R600: initial scheduler coder600-wip-sched

This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently it only tries to expose more parallelism for ALU instructions (this also makes the distribution of GPR channels more uniform and increases the chances of ALU instructions to be packed together in a single VLIW group). Also it tries to reduce clause switching by grouping instruction of the same kind (ALU/FETCH/CF) together.
author: Vadim Girlin <vadimgirlin@gmail.com> 2012-12-27 20:56:20 +0400
committer: Vadim Girlin <vadimgirlin@gmail.com> 2012-12-29 17:18:20 +0400
commit: eb3d66a03cd8ac5a13e84a92108fc30d2ee66fa9 (patch)
tree: 01c0c1d6fdbeb0273ef15abfafc7246a57f7a732
parent: 6143ff7e4c568acac503453118c316aed0437a22 (diff)
6 files changed, 344 insertions, 12 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 098d42e79a..906f875997 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
 #include "AMDGPU.h"
 #include "R600ISelLowering.h"
 #include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "llvm/Analysis/Passes.h"
@@ -39,6 +40,14 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
 }
 
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+                    createR600MachineScheduler);
+
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
     StringRef CPU, StringRef FS,
   TargetOptions Options,
@@ -70,7 +79,13 @@ namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
-    : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {
+    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+      enablePass(&MachineSchedulerID);
+      MachineSchedRegistry::setDefault(createR600MachineScheduler);
+    }
+  }
 
   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
     return getTM<AMDGPUTargetMachine>();
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 8e5a4a2127..d4f1346946 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -366,7 +366,7 @@ void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
   EmitByte(0, OS);
 
   if (Opcode == AMDGPU::TEX_SET_GRADIENTS_H ||
-      Opcode == AMDGPU::TEX_SET_GRADIENTS_H) {
+      Opcode == AMDGPU::TEX_SET_GRADIENTS_V) {
     // XXX: Emit dst select
     EmitByte(7, OS); // X
     EmitByte(7, OS); // Y
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index e19eea38e4..39a5cd9d2a 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -39,7 +39,11 @@ namespace R600_InstFlag {
     //FlagOperand bits 7, 8
     NATIVE_OPERANDS = (1 << 9),
     OP1 = (1 << 10),
-    OP2 = (1 << 11)
+    OP2 = (1 << 11),
+
+    ALU = (1<<12),
+    FETCH = (1<<13)
+
   };
 }
 
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index e997721598..27c81e0258 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -13,9 +13,24 @@
 
 include "R600Intrinsics.td"
 
+class R600GPUInst <dag outs, dag ins, string asm, list<dag> pattern>
+	: AMDGPUInst <outs, ins, asm, pattern> {
+	bit isALU = 0;
+	bit isFETCH = 0;
+
+	let TSFlags{12} = isALU;
+	let TSFlags{13} = isFETCH;
+}
+
+class R600GPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+    : R600GPUInst<outs, ins, asm, pattern> {
+
+  field bits<32> Inst = 0xffffffff;
+}
+
 class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
                 InstrItinClass itin>
-    : AMDGPUInst <outs, ins, asm, pattern> {
+    : R600GPUInst <outs, ins, asm, pattern> {
 
   field bits<64> Inst;
   bit Trig = 0;
@@ -48,7 +63,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
 }
 
 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
-    AMDGPUInst <outs, ins, asm, pattern> {
+    R600GPUInst <outs, ins, asm, pattern> {
   field bits<64> Inst;
 
   let Namespace = "AMDGPU";
@@ -239,6 +254,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
   let HasNativeOperands = 1;
   let Op1 = 1;
   let DisableEncoding = "$literal";
+  let isALU = 1;
 
   let Inst{31-0}  = Word0;
   let Inst{63-32} = Word1;
@@ -275,6 +291,7 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
   let HasNativeOperands = 1;
   let Op2 = 1;
   let DisableEncoding = "$literal";
+  let isALU = 1;
 
   let Inst{31-0}  = Word0;
   let Inst{63-32} = Word1;
@@ -313,6 +330,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
   let HasNativeOperands = 1;
   let DisableEncoding = "$literal";
   let Op3 = 1;
+  let isALU = 1;
 
   let Inst{31-0}  = Word0;
   let Inst{63-32} = Word1;
@@ -325,7 +343,9 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
           ins,
           asm,
           pattern,
-          itin>;
+          itin>{
+  let isALU = 1;
+}
 
 class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
@@ -336,6 +356,7 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
           pattern,
           itin>{
     let Inst {10-0} = inst;
+    let isFETCH = 1;
   }
 
 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -422,23 +443,26 @@ def isR600toCayman : Predicate<
 // Interpolation Instructions
 //===----------------------------------------------------------------------===//
 
-def INTERP_PAIR_XY :  AMDGPUShaderInst <
+
+let isALU = 1 in {
+def INTERP_PAIR_XY :  R600GPUShaderInst <
   (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
   (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
   "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
   []>;
 
-def INTERP_PAIR_ZW :  AMDGPUShaderInst <
+def INTERP_PAIR_ZW :  R600GPUShaderInst <
   (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
   (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
   "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
   []>;
 
-def INTERP_VEC_LOAD :  AMDGPUShaderInst <
+def INTERP_VEC_LOAD :  R600GPUShaderInst <
   (outs R600_Reg128:$dst),
   (ins i32imm:$src0),
   "INTERP_LOAD $src0 : $dst",
   []>;
+} // isALU = 1
 
 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
   let bank_swizzle = 5;
@@ -633,7 +657,7 @@ def MOV : R600_1OP <0x19, "MOV", []>;
 
 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
 
-class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
+class MOV_IMM <ValueType vt, Operand immType> : R600GPUInst <
   (outs R600_Reg32:$dst),
   (ins immType:$imm),
   "",
@@ -880,6 +904,7 @@ multiclass CUBE_Common <bits<11> inst> {
     VecALU
   > {
     let isPseudo = 1;
+    let isALU = 1;
   }
 
   def _real : R600_2OP <inst, "CUBE", []>;
@@ -1475,6 +1500,8 @@ def PRED_X : InstR600 <
   (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
   "", [], NullALU> {
   let FlagOperandIdx = 3;
+  let isALU = 1;
+  let isTerminator = 1;
 }
 
 let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
@@ -1505,19 +1532,21 @@ def MASK_WRITE : AMDGPUShaderInst <
 } // End isPseudo = 1
 } // End usesCustomInserter = 1
 
-def TXD: AMDGPUShaderInst <
+let isFETCH = 1 in {
+def TXD: R600GPUShaderInst <
   (outs R600_Reg128:$dst),
   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
 >;
 
-def TXD_SHADOW: AMDGPUShaderInst <
+def TXD_SHADOW: R600GPUShaderInst <
   (outs R600_Reg128:$dst),
   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
 >;
+} // isFETCH = 1
 
 def CLAMP_R600 :  CLAMP <R600_Reg32>;
 def FABS_R600 : FABS<R600_Reg32>;
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
new file mode 100644
index 0000000000..eb7a7255fb
--- /dev/null
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -0,0 +1,187 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+
+using namespace llvm;
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+  DAG = dag;
+  TII = static_cast<const R600InstrInfo*>(DAG->TII);
+  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+  MRI = &DAG->MRI;
+  Available[IDAlu]->clear();
+  Available[IDFetch]->clear();
+  Available[IDOther]->clear();
+  CurInstKind = IDOther;
+  CurEmitted = 0;
+
+  InstKindLimit[IDAlu] = 128;
+
+  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
+    InstKindLimit[IDFetch] = 8;
+  } else {
+    InstKindLimit[IDFetch] = 16;
+  }
+
+/*
+  if (ST.device()->getDeviceFlag() & OCL_DEVICE_CAYMAN) {
+    MaxSlots = 4;
+  } else {
+    MaxSlots = 5;
+  }
+*/
+
+  MaxSlots = 10;
+  Slots = 0;
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+  if (QSrc->empty())
+    return;
+  for (ReadyQueue::iterator I = QSrc->begin(),
+      E = QSrc->end(); I != E; ++I) {
+    (*I)->NodeQueueId &= ~QSrc->getID();
+    QDst->push(*I);
+  }
+  QSrc->clear();
+}
+
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+  SUnit *SU = 0;
+  IsTopNode = true;
+  NextInstKind = -1;
+
+  // check if we might want to switch current clause type
+  bool AllowSwitch = (CurInstKind == IDOther) ||
+      (CurEmitted > InstKindLimit[CurInstKind] *3 / 4) ||
+      (Available[CurInstKind]->empty());
+
+  if (AllowSwitch || CurInstKind == IDAlu) {
+    // try to pick ALU
+    SU = pickAlu();
+    if (SU)
+      NextInstKind = IDAlu;
+  }
+
+  if (!SU) {
+    // try to pick FETCH
+    SU = pickOther(IDFetch);
+    if (SU)
+      NextInstKind = IDFetch;
+  }
+
+  // try to pick other
+  if (!SU) {
+    SU = pickOther(IDOther);
+    if (SU)
+      NextInstKind = IDOther;
+  }
+
+  DEBUG(
+      if (SU) {
+        dbgs() << "picked node: ";
+        SU->dump(DAG);
+      } else {
+        dbgs() << "NO NODE ";
+        for (int i = 0; i < IDLast; ++i) {
+          Available[i]->dump();
+          Pending[i]->dump();
+        }
+      }
+  );
+  return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+
+  DEBUG(dbgs() << "scheduled: ");
+  DEBUG(SU->dump(DAG));
+
+  if (NextInstKind != CurInstKind) {
+    Slots = 0;
+    CurEmitted = 0;
+    CurInstKind = NextInstKind;
+  }
+
+  ++CurEmitted;
+
+  if (CurInstKind != IDAlu || (++Slots == MaxSlots) || Available[IDAlu]->empty()) {
+    MoveUnits(Pending[IDAlu], Available[IDAlu]);
+    Slots = 0;
+  }
+  if (CurInstKind != IDFetch) {
+    MoveUnits(Pending[IDFetch], Available[IDFetch]);
+  }
+  MoveUnits(Pending[IDOther], Available[IDOther]);
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+  int IK = getInstKind(SU);
+
+  DEBUG(dbgs() << IK << " <= ");
+  DEBUG(SU->dump(DAG));
+
+  Pending[IK]->push(SU);
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+  int Opcode = SU->getInstr()->getOpcode();
+  const MCInstrDesc &Desc = TII->get(Opcode);
+
+  if (Desc.TSFlags & R600_InstFlag::ALU)
+    return IDAlu;
+  if (Desc.TSFlags & R600_InstFlag::FETCH)
+    return IDFetch;
+
+  switch (Opcode) {
+  case AMDGPU::COPY:
+    return IDAlu;
+  default:
+    DEBUG(
+        dbgs() << "other inst: ";
+        SU->dump(DAG);
+    );
+    return IDOther;
+  }
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+  return pickOther(IDAlu);
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+  SUnit *SU = 0;
+  ReadyQueue *AQ = Available[QID];
+
+  if (Available[QID]->empty()) {
+    MoveUnits(Pending[QID], Available[QID]);
+    if (QID == IDAlu)
+      Slots = 0;
+  }
+  if (!AQ->empty()) {
+    SU = *AQ->begin();
+    AQ->remove(AQ->begin());
+  }
+  return SU;
+}
+
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.h b/lib/Target/AMDGPU/R600MachineScheduler.h
new file mode 100644
index 0000000000..33fce7ddf9
--- /dev/null
+++ b/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -0,0 +1,97 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINESCHEDULER_H_
+#define R600MACHINESCHEDULER_H_
+
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+  const ScheduleDAGMI *DAG;
+  const R600InstrInfo *TII;
+  const R600RegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+
+  enum InstQueue {
+    QAlu = 1,
+    QFetch = 2,
+    QOther = 4
+  };
+
+  enum InstKind {
+    IDAlu,
+    IDFetch,
+    IDOther,
+    IDLast
+  };
+
+  ReadyQueue *Available[IDLast], *Pending[IDLast];
+
+  int CurInstKind;
+  int CurEmitted;
+  int NextInstKind;
+
+  int InstKindLimit[IDLast];
+
+  int Slots, MaxSlots;
+
+public:
+
+  R600SchedStrategy() :
+    DAG(0), TII(0), TRI(0), MRI(0) {
+    Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
+    Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
+    Available[IDOther] = new ReadyQueue(QOther, "AOther");
+    Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
+    Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
+    Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
+  }
+
+  virtual ~R600SchedStrategy() {
+    for (unsigned I = 0; I < IDLast; ++I) {
+      delete Available[I];
+      delete Pending[I];
+    }
+  }
+
+  virtual void initialize(ScheduleDAGMI *dag);
+
+  virtual SUnit *pickNode(bool &IsTopNode);
+
+  virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+  virtual void releaseTopNode(SUnit *SU);
+
+  virtual void releaseBottomNode(SUnit *SU);
+
+private:
+
+  int getInstKind(SUnit *SU);
+
+  SUnit* pickAlu();
+  SUnit* pickOther(int QID);
+
+  void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
author	Vadim Girlin <vadimgirlin@gmail.com>	2012-12-27 20:56:20 +0400
committer	Vadim Girlin <vadimgirlin@gmail.com>	2012-12-29 17:18:20 +0400
commit	eb3d66a03cd8ac5a13e84a92108fc30d2ee66fa9 (patch)
tree	01c0c1d6fdbeb0273ef15abfafc7246a57f7a732
parent	6143ff7e4c568acac503453118c316aed0437a22 (diff)