diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineScheduler.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIMachineScheduler.cpp | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index aba519c898a..fbf20c0e7c2 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -24,6 +24,11 @@ using namespace llvm; #define DEBUG_TYPE "misched" +// XXX - general comment: would be useful for review if you could decide on +// one variant of everything that is best in your tests and submit only that +// quite likely you'll get feedback of the form "have you tried this", but that's okay + +// XXX - move this to the file comment above? // This scheduler implements a different scheduling algorithm than // GenericScheduler. // @@ -36,6 +41,7 @@ using namespace llvm; // for all the VGPR load instructions previous to the VGPR load instruction // you are interested in to finish. // . The less the register pressure, the best load latencies are hidden +// XXX - explain what you mean precisely // // Moreover some specifities (like the fact a lot of instructions in the shader // have few dependencies) makes the generic scheduler have some unpredictable @@ -43,6 +49,8 @@ using namespace llvm; // manage to prevent register pressure from going too high, or it can // increase register pressure even more than if it hadn't taken register // pressure into account. +// XXX - this is hard to understand; why, what happens in the generic scheduler +// to trigger this behavior? // // Also some other bad behaviours are generated, like loading at the beginning // of the shader a constant in VGPR you won't need until the end of the shader. @@ -125,6 +133,7 @@ using namespace llvm; #ifndef NDEBUG +// XXX - should be static const char *getReasonStr(SIScheduleCandReason Reason) { switch (Reason) { case NoCand: return "NOCAND"; @@ -471,6 +480,9 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge, void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) { for (SDep& Succ : SU->Succs) { releaseSucc(SU, &Succ, InOrOutBlock); + // XXX - since releaseSucc takes a single successor, it would be better + // to move the logic if deciding whether to actually release the successor + // in here } } @@ -742,6 +754,14 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() { else { std::map<std::set<unsigned>, unsigned>::iterator Pos = ColorCombinations.find(SUColors); + // XXX - possible combinatorial explosion / imprecision + // High latency instructions A, B, C, followed by: + // X1 = op A, B + // X2 = op X1, C + // Y1 = op A, C + // Y2 = op Y1, B + // X2 and X3 have the same set of high latency predecessors, but are + // assigned different colors. if (Pos != ColorCombinations.end()) { CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second; } else { @@ -755,6 +775,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() { ColorCombinations.clear(); // Same than before, but BottomUp. + // XXX - same *as* before for (unsigned i = 0, e = DAGSize; i != e; ++i) { SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; @@ -798,6 +819,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() { void SIScheduleBlockCreator::colorAccordingToReservedDependencies() { unsigned DAGSize = DAG->SUnits.size(); std::map<std::set<unsigned>, unsigned> ColorCombinations; + // XXX - use std::pair instead of set here // Every combination of colors given by the top down // and bottom up Reserved node dependency @@ -824,6 +846,7 @@ void SIScheduleBlockCreator::colorAccordingToReservedDependencies() { } } +// XXX - Which nodes are affected by this method? void SIScheduleBlockCreator::colorEndsAccordingToDependencies() { unsigned DAGSize = DAG->SUnits.size(); std::vector<int> PendingColoring = CurrentColoring; @@ -901,6 +924,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() { if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) continue; + // XXX - These types of conditions are a bit magical. Replace them + // with a predicate function with a meaningful name? // No predecessor: Vgpr constant loading. // Low latency instructions usually have a predecessor (the address) @@ -918,6 +943,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() { } } +// XXX - This method, the previous one and the next one do almost the same thing, perhaps merge +// them? Also, what is the meaning of reserved / non-reserved exactly? void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() { unsigned DAGSize = DAG->SUnits.size(); |