From 0fdbf43129883b4ba169a0d5e27609b35b15064d Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Mon, 21 Dec 2015 21:10:44 -0500
Subject: REVIEW COMMENTS

---
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  1 +
 lib/Target/AMDGPU/SIMachineScheduler.cpp  | 27 +++++++++++++++++++++++++++
 lib/Target/AMDGPU/SIMachineScheduler.h    |  2 ++
 lib/Target/AMDGPU/SIRegisterInfo.cpp      |  1 +
 lib/Target/AMDGPU/SIRegisterInfo.h        |  1 +
 5 files changed, 32 insertions(+)
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0d007325dee..6a785d4b263 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -22,6 +22,7 @@
 #include "R600MachineScheduler.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
+// XXX - would be nicer to get rid of this #include, should be sufficient to move createSIMachineScheduler
 #include "SIMachineScheduler.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index aba519c898a..fbf20c0e7c2 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -24,6 +24,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "misched"
 
+// XXX - general comment: would be useful for review if you could decide on
+// one variant of everything that is best in your tests and submit only that
+// quite likely you'll get feedback of the form "have you tried this", but that's okay
+
+// XXX - move this to the file comment above?
 // This scheduler implements a different scheduling algorithm than
 // GenericScheduler.
 //
@@ -36,6 +41,7 @@ using namespace llvm;
 // for all the VGPR load instructions previous to the VGPR load instruction
 // you are interested in to finish.
 // . The less the register pressure, the best load latencies are hidden
+// XXX - explain what you mean precisely
 //
 // Moreover some specifities (like the fact a lot of instructions in the shader
 // have few dependencies) makes the generic scheduler have some unpredictable
@@ -43,6 +49,8 @@ using namespace llvm;
 // manage to prevent register pressure from going too high, or it can
 // increase register pressure even more than if it hadn't taken register
 // pressure into account.
+// XXX - this is hard to understand; why, what happens in the generic scheduler
+// to trigger this behavior?
 //
 // Also some other bad behaviours are generated, like loading at the beginning
 // of the shader a constant in VGPR you won't need until the end of the shader.
@@ -125,6 +133,7 @@ using namespace llvm;
 
 #ifndef NDEBUG
 
+// XXX - should be static
 const char *getReasonStr(SIScheduleCandReason Reason) {
   switch (Reason) {
   case NoCand:         return "NOCAND";
@@ -471,6 +480,9 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge,
 void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
   for (SDep& Succ : SU->Succs) {
     releaseSucc(SU, &Succ, InOrOutBlock);
+    // XXX - since releaseSucc takes a single successor, it would be better
+    // to move the logic if deciding whether to actually release the successor
+    // in here
   }
 }
 
@@ -742,6 +754,14 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
     else {
       std::map<std::set<unsigned>, unsigned>::iterator Pos =
         ColorCombinations.find(SUColors);
+      // XXX - possible combinatorial explosion / imprecision
+      // High latency instructions A, B, C, followed by:
+      //  X1 = op A, B
+      //  X2 = op X1, C
+      //  Y1 = op A, C
+      //  Y2 = op Y1, B
+      // X2 and X3 have the same set of high latency predecessors, but are
+      // assigned different colors.
       if (Pos != ColorCombinations.end()) {
           CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
       } else {
@@ -755,6 +775,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
   ColorCombinations.clear();
 
   // Same than before, but BottomUp.
+  // XXX - same *as* before
 
   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
     SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
@@ -798,6 +819,7 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
 void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
   unsigned DAGSize = DAG->SUnits.size();
   std::map<std::set<unsigned>, unsigned> ColorCombinations;
+  // XXX - use std::pair instead of set here
 
   // Every combination of colors given by the top down
   // and bottom up Reserved node dependency
@@ -824,6 +846,7 @@ void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
   }
 }
 
+// XXX - Which nodes are affected by this method?
 void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
   unsigned DAGSize = DAG->SUnits.size();
   std::vector<int> PendingColoring = CurrentColoring;
@@ -901,6 +924,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
 
     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
       continue;
+    // XXX - These types of conditions are a bit magical. Replace them
+    // with a predicate function with a meaningful name?
 
     // No predecessor: Vgpr constant loading.
     // Low latency instructions usually have a predecessor (the address)
@@ -918,6 +943,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
   }
 }
 
+// XXX - This method, the previous one and the next one do almost the same thing, perhaps merge
+// them? Also, what is the meaning of reserved / non-reserved exactly?
 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
   unsigned DAGSize = DAG->SUnits.size();
 
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
index 8cf515ae1aa..c3642a2e3fc 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -188,6 +188,7 @@ private:
   void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
 
   void NodeScheduled(SUnit *SU);
+  // XXX - First letter of method name should be lower case
   void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
   void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
   SUnit* pickNode();
@@ -205,6 +206,7 @@ struct SIScheduleBlocks {
 enum SISchedulerBlockCreatorVariant {
     LatenciesAlone,
     LatenciesGroupped,
+    // XXX - spelling: Grouped
     LatenciesAlonePlusConsecutive
 };
 
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 454fc491b10..35f8bc45cbb 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -693,6 +693,7 @@ unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
   }
 }
 
+// XXX - unused
 unsigned SIRegisterInfo::getWaveFrontsForUsage(AMDGPUSubtarget::Generation gen,
                                                unsigned SGPRsUsed,
                                                unsigned VGPRsUsed) const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index ba5039537da..5c9165f4bb6 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -25,6 +25,7 @@ namespace llvm {
 
 struct SIRegisterInfo : public AMDGPURegisterInfo {
 private:
+  // XXX - magic number, use constant or enum instead
   unsigned SGPRsForWaveFronts[10];
   unsigned SGPRsForWaveFrontsVI[10];
   unsigned VGPRsForWaveFronts[10];
-- 
cgit v1.2.3