diff options
author | Vadim Girlin <vadimgirlin@gmail.com> | 2012-12-27 20:56:20 +0400 |
---|---|---|
committer | Vadim Girlin <vadimgirlin@gmail.com> | 2012-12-29 17:18:20 +0400 |
commit | eb3d66a03cd8ac5a13e84a92108fc30d2ee66fa9 (patch) | |
tree | 01c0c1d6fdbeb0273ef15abfafc7246a57f7a732 | |
parent | 6143ff7e4c568acac503453118c316aed0437a22 (diff) |
R600: initial scheduler coder600-wip-sched
This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
it only tries to expose more parallelism for ALU instructions (this also
makes the distribution of GPR channels more uniform and increases the
chances of ALU instructions to be packed together in a single VLIW group).
Also it tries to reduce clause switching by grouping instruction of the
same kind (ALU/FETCH/CF) together.
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 17 | ||||
-rw-r--r-- | lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Defines.h | 6 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 47 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineScheduler.cpp | 187 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineScheduler.h | 97 |
6 files changed, 344 insertions, 12 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 098d42e79a..906f875997 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -17,6 +17,7 @@ #include "AMDGPU.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" +#include "R600MachineScheduler.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" @@ -39,6 +40,14 @@ extern "C" void LLVMInitializeAMDGPUTarget() { RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); } +static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new R600SchedStrategy()); +} + +static MachineSchedRegistry +SchedCustomRegistry("r600", "Run R600's custom scheduler", + createR600MachineScheduler); + AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, TargetOptions Options, @@ -70,7 +79,13 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + enablePass(&MachineSchedulerID); + MachineSchedRegistry::setDefault(createR600MachineScheduler); + } + } AMDGPUTargetMachine &getAMDGPUTargetMachine() const { return getTM<AMDGPUTargetMachine>(); diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 8e5a4a2127..d4f1346946 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -366,7 +366,7 @@ void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, EmitByte(0, OS); if (Opcode == AMDGPU::TEX_SET_GRADIENTS_H || - Opcode == AMDGPU::TEX_SET_GRADIENTS_H) { + Opcode == AMDGPU::TEX_SET_GRADIENTS_V) { // XXX: Emit dst select EmitByte(7, OS); // X EmitByte(7, OS); // Y diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h index e19eea38e4..39a5cd9d2a 100644 --- a/lib/Target/AMDGPU/R600Defines.h +++ b/lib/Target/AMDGPU/R600Defines.h @@ -39,7 +39,11 @@ namespace R600_InstFlag { //FlagOperand bits 7, 8 NATIVE_OPERANDS = (1 << 9), OP1 = (1 << 10), - OP2 = (1 << 11) + OP2 = (1 << 11), + + ALU = (1<<12), + FETCH = (1<<13) + }; } diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index e997721598..27c81e0258 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -13,9 +13,24 @@ include "R600Intrinsics.td" +class R600GPUInst <dag outs, dag ins, string asm, list<dag> pattern> + : AMDGPUInst <outs, ins, asm, pattern> { + bit isALU = 0; + bit isFETCH = 0; + + let TSFlags{12} = isALU; + let TSFlags{13} = isFETCH; +} + +class R600GPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> + : R600GPUInst<outs, ins, asm, pattern> { + + field bits<32> Inst = 0xffffffff; +} + class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin> - : AMDGPUInst <outs, ins, asm, pattern> { + : R600GPUInst <outs, ins, asm, pattern> { field bits<64> Inst; bit Trig = 0; @@ -48,7 +63,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, } class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : - AMDGPUInst <outs, ins, asm, pattern> { + R600GPUInst <outs, ins, asm, pattern> { field bits<64> Inst; let Namespace = "AMDGPU"; @@ -239,6 +254,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, let HasNativeOperands = 1; let Op1 = 1; let DisableEncoding = "$literal"; + let isALU = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -275,6 +291,7 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, let HasNativeOperands = 1; let Op2 = 1; let DisableEncoding = "$literal"; + let isALU = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -313,6 +330,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, let HasNativeOperands = 1; let DisableEncoding = "$literal"; let Op3 = 1; + let isALU = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -325,7 +343,9 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, ins, asm, pattern, - itin>; + itin>{ + let isALU = 1; +} class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : @@ -336,6 +356,7 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern, pattern, itin>{ let Inst {10-0} = inst; + let isFETCH = 1; } } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -422,23 +443,26 @@ def isR600toCayman : Predicate< // Interpolation Instructions //===----------------------------------------------------------------------===// -def INTERP_PAIR_XY : AMDGPUShaderInst < + +let isALU = 1 in { +def INTERP_PAIR_XY : R600GPUShaderInst < (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", []>; -def INTERP_PAIR_ZW : AMDGPUShaderInst < +def INTERP_PAIR_ZW : R600GPUShaderInst < (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", []>; -def INTERP_VEC_LOAD : AMDGPUShaderInst < +def INTERP_VEC_LOAD : R600GPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst", []>; +} // isALU = 1 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -633,7 +657,7 @@ def MOV : R600_1OP <0x19, "MOV", []>; let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { -class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < +class MOV_IMM <ValueType vt, Operand immType> : R600GPUInst < (outs R600_Reg32:$dst), (ins immType:$imm), "", @@ -880,6 +904,7 @@ multiclass CUBE_Common <bits<11> inst> { VecALU > { let isPseudo = 1; + let isALU = 1; } def _real : R600_2OP <inst, "CUBE", []>; @@ -1475,6 +1500,8 @@ def PRED_X : InstR600 < (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "", [], NullALU> { let FlagOperandIdx = 3; + let isALU = 1; + let isTerminator = 1; } let isTerminator = 1, isBranch = 1, isBarrier = 1 in { @@ -1505,19 +1532,21 @@ def MASK_WRITE : AMDGPUShaderInst < } // End isPseudo = 1 } // End usesCustomInserter = 1 -def TXD: AMDGPUShaderInst < +let isFETCH = 1 in { +def TXD: R600GPUShaderInst < (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] >; -def TXD_SHADOW: AMDGPUShaderInst < +def TXD_SHADOW: R600GPUShaderInst < (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] >; +} // isFETCH = 1 def CLAMP_R600 : CLAMP <R600_Reg32>; def FABS_R600 : FABS<R600_Reg32>; diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp new file mode 100644 index 0000000000..eb7a7255fb --- /dev/null +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -0,0 +1,187 @@ +//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 Machine Scheduler interface +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "R600MachineScheduler.h" + +using namespace llvm; + +void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { + DAG = dag; + TII = static_cast<const R600InstrInfo*>(DAG->TII); + TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); + MRI = &DAG->MRI; + Available[IDAlu]->clear(); + Available[IDFetch]->clear(); + Available[IDOther]->clear(); + CurInstKind = IDOther; + CurEmitted = 0; + + InstKindLimit[IDAlu] = 128; + + const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) { + InstKindLimit[IDFetch] = 8; + } else { + InstKindLimit[IDFetch] = 16; + } + +/* + if (ST.device()->getDeviceFlag() & OCL_DEVICE_CAYMAN) { + MaxSlots = 4; + } else { + MaxSlots = 5; + } +*/ + + MaxSlots = 10; + Slots = 0; +} + +void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) +{ + if (QSrc->empty()) + return; + for (ReadyQueue::iterator I = QSrc->begin(), + E = QSrc->end(); I != E; ++I) { + (*I)->NodeQueueId &= ~QSrc->getID(); + QDst->push(*I); + } + QSrc->clear(); +} + + +SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { + SUnit *SU = 0; + IsTopNode = true; + NextInstKind = -1; + + // check if we might want to switch current clause type + bool AllowSwitch = (CurInstKind == IDOther) || + (CurEmitted > InstKindLimit[CurInstKind] *3 / 4) || + (Available[CurInstKind]->empty()); + + if (AllowSwitch || CurInstKind == IDAlu) { + // try to pick ALU + SU = pickAlu(); + if (SU) + NextInstKind = IDAlu; + } + + if (!SU) { + // try to pick FETCH + SU = pickOther(IDFetch); + if (SU) + NextInstKind = IDFetch; + } + + // try to pick other + if (!SU) { + SU = pickOther(IDOther); + if (SU) + NextInstKind = IDOther; + } + + DEBUG( + if (SU) { + dbgs() << "picked node: "; + SU->dump(DAG); + } else { + dbgs() << "NO NODE "; + for (int i = 0; i < IDLast; ++i) { + Available[i]->dump(); + Pending[i]->dump(); + } + } + ); + return SU; +} + +void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + + DEBUG(dbgs() << "scheduled: "); + DEBUG(SU->dump(DAG)); + + if (NextInstKind != CurInstKind) { + Slots = 0; + CurEmitted = 0; + CurInstKind = NextInstKind; + } + + ++CurEmitted; + + if (CurInstKind != IDAlu || (++Slots == MaxSlots) || Available[IDAlu]->empty()) { + MoveUnits(Pending[IDAlu], Available[IDAlu]); + Slots = 0; + } + if (CurInstKind != IDFetch) { + MoveUnits(Pending[IDFetch], Available[IDFetch]); + } + MoveUnits(Pending[IDOther], Available[IDOther]); +} + +void R600SchedStrategy::releaseTopNode(SUnit *SU) { + int IK = getInstKind(SU); + + DEBUG(dbgs() << IK << " <= "); + DEBUG(SU->dump(DAG)); + + Pending[IK]->push(SU); +} + +void R600SchedStrategy::releaseBottomNode(SUnit *SU) { +} + +int R600SchedStrategy::getInstKind(SUnit* SU) { + int Opcode = SU->getInstr()->getOpcode(); + const MCInstrDesc &Desc = TII->get(Opcode); + + if (Desc.TSFlags & R600_InstFlag::ALU) + return IDAlu; + if (Desc.TSFlags & R600_InstFlag::FETCH) + return IDFetch; + + switch (Opcode) { + case AMDGPU::COPY: + return IDAlu; + default: + DEBUG( + dbgs() << "other inst: "; + SU->dump(DAG); + ); + return IDOther; + } +} + +SUnit* R600SchedStrategy::pickAlu() { + return pickOther(IDAlu); +} + +SUnit* R600SchedStrategy::pickOther(int QID) { + SUnit *SU = 0; + ReadyQueue *AQ = Available[QID]; + + if (Available[QID]->empty()) { + MoveUnits(Pending[QID], Available[QID]); + if (QID == IDAlu) + Slots = 0; + } + if (!AQ->empty()) { + SU = *AQ->begin(); + AQ->remove(AQ->begin()); + } + return SU; +} + diff --git a/lib/Target/AMDGPU/R600MachineScheduler.h b/lib/Target/AMDGPU/R600MachineScheduler.h new file mode 100644 index 0000000000..33fce7ddf9 --- /dev/null +++ b/lib/Target/AMDGPU/R600MachineScheduler.h @@ -0,0 +1,97 @@ +//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 Machine Scheduler interface +// +//===----------------------------------------------------------------------===// + +#ifndef R600MACHINESCHEDULER_H_ +#define R600MACHINESCHEDULER_H_ + +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace llvm { + +class R600SchedStrategy : public MachineSchedStrategy { + + const ScheduleDAGMI *DAG; + const R600InstrInfo *TII; + const R600RegisterInfo *TRI; + MachineRegisterInfo *MRI; + + enum InstQueue { + QAlu = 1, + QFetch = 2, + QOther = 4 + }; + + enum InstKind { + IDAlu, + IDFetch, + IDOther, + IDLast + }; + + ReadyQueue *Available[IDLast], *Pending[IDLast]; + + int CurInstKind; + int CurEmitted; + int NextInstKind; + + int InstKindLimit[IDLast]; + + int Slots, MaxSlots; + +public: + + R600SchedStrategy() : + DAG(0), TII(0), TRI(0), MRI(0) { + Available[IDAlu] = new ReadyQueue(QAlu, "AAlu"); + Available[IDFetch] = new ReadyQueue(QFetch, "AFetch"); + Available[IDOther] = new ReadyQueue(QOther, "AOther"); + Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu"); + Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch"); + Pending[IDOther] = new ReadyQueue(QOther<<4, "POther"); + } + + virtual ~R600SchedStrategy() { + for (unsigned I = 0; I < IDLast; ++I) { + delete Available[I]; + delete Pending[I]; + } + } + + virtual void initialize(ScheduleDAGMI *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + +private: + + int getInstKind(SUnit *SU); + + SUnit* pickAlu(); + SUnit* pickOther(int QID); + + void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); +}; + +} // namespace llvm + +#endif /* R600MACHINESCHEDULER_H_ */ |