summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2015-12-21 21:10:44 -0500
committerNicolai Hähnle <nicolai.haehnle@amd.com>2015-12-22 17:31:43 -0500
commit0fdbf43129883b4ba169a0d5e27609b35b15064d (patch)
treec9c66479f4f62a6cb0335fc0c14a07a33241e7b4
parentd1daeb924cd4697bf4918637650bb9c8e07e788d (diff)
REVIEW COMMENTSfold-subregs
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp27
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.h2
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h1
5 files changed, 32 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0d007325dee..6a785d4b263 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -22,6 +22,7 @@
#include "R600MachineScheduler.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
+// XXX - would be nicer to get rid of this #include, should be sufficient to move createSIMachineScheduler
#include "SIMachineScheduler.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index aba519c898a..fbf20c0e7c2 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -24,6 +24,11 @@ using namespace llvm;
#define DEBUG_TYPE "misched"
+// XXX - general comment: would be useful for review if you could decide on
+// one variant of everything that is best in your tests and submit only that
+// quite likely you'll get feedback of the form "have you tried this", but that's okay
+
+// XXX - move this to the file comment above?
// This scheduler implements a different scheduling algorithm than
// GenericScheduler.
//
@@ -36,6 +41,7 @@ using namespace llvm;
// for all the VGPR load instructions previous to the VGPR load instruction
// you are interested in to finish.
// . The less the register pressure, the best load latencies are hidden
+// XXX - explain what you mean precisely
//
// Moreover some specifities (like the fact a lot of instructions in the shader
// have few dependencies) makes the generic scheduler have some unpredictable
@@ -43,6 +49,8 @@ using namespace llvm;
// manage to prevent register pressure from going too high, or it can
// increase register pressure even more than if it hadn't taken register
// pressure into account.
+// XXX - this is hard to understand; why, what happens in the generic scheduler
+// to trigger this behavior?
//
// Also some other bad behaviours are generated, like loading at the beginning
// of the shader a constant in VGPR you won't need until the end of the shader.
@@ -125,6 +133,7 @@ using namespace llvm;
#ifndef NDEBUG
+// XXX - should be static
const char *getReasonStr(SIScheduleCandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND";
@@ -471,6 +480,9 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge,
void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
for (SDep& Succ : SU->Succs) {
releaseSucc(SU, &Succ, InOrOutBlock);
+ // XXX - since releaseSucc takes a single successor, it would be better
+ // to move the logic if deciding whether to actually release the successor
+ // in here
}
}
@@ -742,6 +754,14 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
else {
std::map<std::set<unsigned>, unsigned>::iterator Pos =
ColorCombinations.find(SUColors);
+ // XXX - possible combinatorial explosion / imprecision
+ // High latency instructions A, B, C, followed by:
+ // X1 = op A, B
+ // X2 = op X1, C
+ // Y1 = op A, C
+ // Y2 = op Y1, B
+ // X2 and X3 have the same set of high latency predecessors, but are
+ // assigned different colors.
if (Pos != ColorCombinations.end()) {
CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
} else {
@@ -755,6 +775,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
ColorCombinations.clear();
// Same than before, but BottomUp.
+ // XXX - same *as* before
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
@@ -798,6 +819,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::map<std::set<unsigned>, unsigned> ColorCombinations;
+ // XXX - use std::pair instead of set here
// Every combination of colors given by the top down
// and bottom up Reserved node dependency
@@ -824,6 +846,7 @@ void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
}
}
+// XXX - Which nodes are affected by this method?
void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::vector<int> PendingColoring = CurrentColoring;
@@ -901,6 +924,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
continue;
+ // XXX - These types of conditions are a bit magical. Replace them
+ // with a predicate function with a meaningful name?
// No predecessor: Vgpr constant loading.
// Low latency instructions usually have a predecessor (the address)
@@ -918,6 +943,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
}
}
+// XXX - This method, the previous one and the next one do almost the same thing, perhaps merge
+// them? Also, what is the meaning of reserved / non-reserved exactly?
void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
unsigned DAGSize = DAG->SUnits.size();
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
index 8cf515ae1aa..c3642a2e3fc 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -188,6 +188,7 @@ private:
void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
void NodeScheduled(SUnit *SU);
+ // XXX - First letter of method name should be lower case
void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
SUnit* pickNode();
@@ -205,6 +206,7 @@ struct SIScheduleBlocks {
enum SISchedulerBlockCreatorVariant {
LatenciesAlone,
LatenciesGroupped,
+ // XXX - spelling: Grouped
LatenciesAlonePlusConsecutive
};
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 454fc491b10..35f8bc45cbb 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -693,6 +693,7 @@ unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
}
}
+// XXX - unused
unsigned SIRegisterInfo::getWaveFrontsForUsage(AMDGPUSubtarget::Generation gen,
unsigned SGPRsUsed,
unsigned VGPRsUsed) const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index ba5039537da..5c9165f4bb6 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -25,6 +25,7 @@ namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
private:
+ // XXX - magic number, use constant or enum instead
unsigned SGPRsForWaveFronts[10];
unsigned SGPRsForWaveFrontsVI[10];
unsigned VGPRsForWaveFronts[10];