From 0fdbf43129883b4ba169a0d5e27609b35b15064d Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Mon, 21 Dec 2015 21:10:44 -0500 Subject: REVIEW COMMENTS --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + lib/Target/AMDGPU/SIMachineScheduler.cpp | 27 +++++++++++++++++++++++++++ lib/Target/AMDGPU/SIMachineScheduler.h | 2 ++ lib/Target/AMDGPU/SIRegisterInfo.cpp | 1 + lib/Target/AMDGPU/SIRegisterInfo.h | 1 + 5 files changed, 32 insertions(+) diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0d007325dee..6a785d4b263 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -22,6 +22,7 @@ #include "R600MachineScheduler.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" +// XXX - would be nicer to get rid of this #include, should be sufficient to move createSIMachineScheduler #include "SIMachineScheduler.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index aba519c898a..fbf20c0e7c2 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -24,6 +24,11 @@ using namespace llvm; #define DEBUG_TYPE "misched" +// XXX - general comment: would be useful for review if you could decide on +// one variant of everything that is best in your tests and submit only that +// quite likely you'll get feedback of the form "have you tried this", but that's okay + +// XXX - move this to the file comment above? // This scheduler implements a different scheduling algorithm than // GenericScheduler. // @@ -36,6 +41,7 @@ using namespace llvm; // for all the VGPR load instructions previous to the VGPR load instruction // you are interested in to finish. // . The less the register pressure, the best load latencies are hidden +// XXX - explain what you mean precisely // // Moreover some specifities (like the fact a lot of instructions in the shader // have few dependencies) makes the generic scheduler have some unpredictable @@ -43,6 +49,8 @@ using namespace llvm; // manage to prevent register pressure from going too high, or it can // increase register pressure even more than if it hadn't taken register // pressure into account. +// XXX - this is hard to understand; why, what happens in the generic scheduler +// to trigger this behavior? // // Also some other bad behaviours are generated, like loading at the beginning // of the shader a constant in VGPR you won't need until the end of the shader. @@ -125,6 +133,7 @@ using namespace llvm; #ifndef NDEBUG +// XXX - should be static const char *getReasonStr(SIScheduleCandReason Reason) { switch (Reason) { case NoCand: return "NOCAND"; @@ -471,6 +480,9 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge, void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) { for (SDep& Succ : SU->Succs) { releaseSucc(SU, &Succ, InOrOutBlock); + // XXX - since releaseSucc takes a single successor, it would be better + // to move the logic if deciding whether to actually release the successor + // in here } } @@ -742,6 +754,14 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() { else { std::map, unsigned>::iterator Pos = ColorCombinations.find(SUColors); + // XXX - possible combinatorial explosion / imprecision + // High latency instructions A, B, C, followed by: + // X1 = op A, B + // X2 = op X1, C + // Y1 = op A, C + // Y2 = op Y1, B + // X2 and X3 have the same set of high latency predecessors, but are + // assigned different colors. if (Pos != ColorCombinations.end()) { CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second; } else { @@ -755,6 +775,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() { ColorCombinations.clear(); // Same than before, but BottomUp. + // XXX - same *as* before for (unsigned i = 0, e = DAGSize; i != e; ++i) { SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; @@ -798,6 +819,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() { void SIScheduleBlockCreator::colorAccordingToReservedDependencies() { unsigned DAGSize = DAG->SUnits.size(); std::map, unsigned> ColorCombinations; + // XXX - use std::pair instead of set here // Every combination of colors given by the top down // and bottom up Reserved node dependency @@ -824,6 +846,7 @@ void SIScheduleBlockCreator::colorAccordingToReservedDependencies() { } } +// XXX - Which nodes are affected by this method? void SIScheduleBlockCreator::colorEndsAccordingToDependencies() { unsigned DAGSize = DAG->SUnits.size(); std::vector PendingColoring = CurrentColoring; @@ -901,6 +924,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() { if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) continue; + // XXX - These types of conditions are a bit magical. Replace them + // with a predicate function with a meaningful name? // No predecessor: Vgpr constant loading. // Low latency instructions usually have a predecessor (the address) @@ -918,6 +943,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() { } } +// XXX - This method, the previous one and the next one do almost the same thing, perhaps merge +// them? Also, what is the meaning of reserved / non-reserved exactly? void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() { unsigned DAGSize = DAG->SUnits.size(); diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h index 8cf515ae1aa..c3642a2e3fc 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/lib/Target/AMDGPU/SIMachineScheduler.h @@ -188,6 +188,7 @@ private: void releaseSuccessors(SUnit *SU, bool InOrOutBlock); void NodeScheduled(SUnit *SU); + // XXX - First letter of method name should be lower case void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); SUnit* pickNode(); @@ -205,6 +206,7 @@ struct SIScheduleBlocks { enum SISchedulerBlockCreatorVariant { LatenciesAlone, LatenciesGroupped, + // XXX - spelling: Grouped LatenciesAlonePlusConsecutive }; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 454fc491b10..35f8bc45cbb 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -693,6 +693,7 @@ unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, } } +// XXX - unused unsigned SIRegisterInfo::getWaveFrontsForUsage(AMDGPUSubtarget::Generation gen, unsigned SGPRsUsed, unsigned VGPRsUsed) const { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index ba5039537da..5c9165f4bb6 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -25,6 +25,7 @@ namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { private: + // XXX - magic number, use constant or enum instead unsigned SGPRsForWaveFronts[10]; unsigned SGPRsForWaveFrontsVI[10]; unsigned VGPRsForWaveFronts[10]; -- cgit v1.2.3