summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineScheduler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineScheduler.cpp')
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp27
1 files changed, 27 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index aba519c898a..fbf20c0e7c2 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -24,6 +24,11 @@ using namespace llvm;
#define DEBUG_TYPE "misched"
+// XXX - general comment: would be useful for review if you could decide on
+// one variant of everything that is best in your tests and submit only that
+// quite likely you'll get feedback of the form "have you tried this", but that's okay
+
+// XXX - move this to the file comment above?
// This scheduler implements a different scheduling algorithm than
// GenericScheduler.
//
@@ -36,6 +41,7 @@ using namespace llvm;
// for all the VGPR load instructions previous to the VGPR load instruction
// you are interested in to finish.
// . The less the register pressure, the best load latencies are hidden
+// XXX - explain what you mean precisely
//
// Moreover some specifities (like the fact a lot of instructions in the shader
// have few dependencies) makes the generic scheduler have some unpredictable
@@ -43,6 +49,8 @@ using namespace llvm;
// manage to prevent register pressure from going too high, or it can
// increase register pressure even more than if it hadn't taken register
// pressure into account.
+// XXX - this is hard to understand; why, what happens in the generic scheduler
+// to trigger this behavior?
//
// Also some other bad behaviours are generated, like loading at the beginning
// of the shader a constant in VGPR you won't need until the end of the shader.
@@ -125,6 +133,7 @@ using namespace llvm;
#ifndef NDEBUG
+// XXX - should be static
const char *getReasonStr(SIScheduleCandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND";
@@ -471,6 +480,9 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge,
void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
for (SDep& Succ : SU->Succs) {
releaseSucc(SU, &Succ, InOrOutBlock);
+ // XXX - since releaseSucc takes a single successor, it would be better
+ // to move the logic if deciding whether to actually release the successor
+ // in here
}
}
@@ -742,6 +754,14 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
else {
std::map<std::set<unsigned>, unsigned>::iterator Pos =
ColorCombinations.find(SUColors);
+ // XXX - possible combinatorial explosion / imprecision
+ // High latency instructions A, B, C, followed by:
+ // X1 = op A, B
+ // X2 = op X1, C
+ // Y1 = op A, C
+ // Y2 = op Y1, B
+ // X2 and X3 have the same set of high latency predecessors, but are
+ // assigned different colors.
if (Pos != ColorCombinations.end()) {
CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
} else {
@@ -755,6 +775,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
ColorCombinations.clear();
// Same than before, but BottomUp.
+ // XXX - same *as* before
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
@@ -798,6 +819,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::map<std::set<unsigned>, unsigned> ColorCombinations;
+ // XXX - use std::pair instead of set here
// Every combination of colors given by the top down
// and bottom up Reserved node dependency
@@ -824,6 +846,7 @@ void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
}
}
+// XXX - Which nodes are affected by this method?
void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::vector<int> PendingColoring = CurrentColoring;
@@ -901,6 +924,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
continue;
+ // XXX - These types of conditions are a bit magical. Replace them
+ // with a predicate function with a meaningful name?
// No predecessor: Vgpr constant loading.
// Low latency instructions usually have a predecessor (the address)
@@ -918,6 +943,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
}
}
+// XXX - This method, the previous one and the next one do almost the same thing, perhaps merge
+// them? Also, what is the meaning of reserved / non-reserved exactly?
void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
unsigned DAGSize = DAG->SUnits.size();