diff options
author | Vadim Girlin <vadimgirlin@gmail.com> | 2013-10-14 20:59:16 +0400 |
---|---|---|
committer | Vadim Girlin <vadimgirlin@gmail.com> | 2013-10-14 20:59:16 +0400 |
commit | 72cf2ee8207a50994564d25abe65bf0bdd485e1c (patch) | |
tree | 346b6eecbb138d06f5e271f17c8e66d770665882 | |
parent | 142d2c9e28e89883e987f2c7865d5836c52c741b (diff) |
wipsi_sched
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.cpp | 13 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.h | 7 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 28 | ||||
-rw-r--r-- | lib/Target/R600/SISchedule.td | 28 |
6 files changed, 64 insertions, 16 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 1e21c8e8b5..fa961f9801 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineScheduler.h" + #include "AMDGPUSubtarget.h" using namespace llvm; @@ -109,6 +111,17 @@ AMDGPUSubtarget::getDataLayout() const { return DataLayout; } +void +AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS && NumRegionInstrs > 32) { + Policy.ShouldTrackPressure = true; + } +} + + std::string AMDGPUSubtarget::getDeviceName() const { return DevName; diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index c5345cc764..6ffd80c2a5 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -67,9 +67,14 @@ public: bool IsIRStructurizerEnabled() const; virtual bool enableMachineScheduler() const { - return getGeneration() <= NORTHERN_ISLANDS; + return true; } + virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const; + // Helper functions to simplify if statements bool isTargetELF() const; std::string getDataLayout() const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 098ca7f06c..20f3f9ad5d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -100,7 +100,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SETCC); - setSchedulingPreference(Sched::RegPressure); + setSchedulingPreference(Sched::Source); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ed42a2ad95..e7fdc5ad20 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -417,7 +417,7 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> { let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */, - mayLoad = 1 in { + mayLoad = 1, SchedRW = [WriteVMEM] in { let offen = 1, idxen = 0, addr64 = 0, offset = 0 in { def _OFFEN : MUBUF <op, (outs regClass:$vdata), diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e1acb19805..95fea66999 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -393,17 +393,23 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1, SchedRW = [WriteFloat] -def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; -def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; + def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; + +let SchedRW = [WriteVMEM] in { + +def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; +def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>; def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>; def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>; def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>; +} // let SchedRW = [WriteVMEM] + //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; @@ -997,6 +1003,9 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; + +let SchedRW = [WriteInt] in { + defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; @@ -1017,6 +1026,9 @@ defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; } // End Uses = [VCC] } // End isCommutable = 1, Defs = [VCC] +} // let SchedRW = [WriteInt] + + defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; let SchedRW = [WriteFloat] in { // XXX: Not sure this is correct @@ -1063,13 +1075,19 @@ def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))] >; -} // let SchedRn = [WriteFloat] +} // let SchedRW = [WriteFloat] } // End neverHasSideEffects + +let SchedRW = [WriteFloat] in { + def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; + +} // let SchedRW = [WriteFloat] + def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; @@ -1136,11 +1154,15 @@ def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; let isCommutable = 1 in { +let SchedRW = [WriteInt] in { + def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; +} // let SchedRW = [WriteInt] + } // isCommutable = 1 def : Pat < diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td index d42c2e0805..5dab9511c3 100644 --- a/lib/Target/R600/SISchedule.td +++ b/lib/Target/R600/SISchedule.td @@ -32,7 +32,15 @@ def WriteIntMUL : SchedWrite; def WriteConversion : SchedWrite; def WriteI24 : SchedWrite; -def SIModel : SchedMachineModel; +def SIModel : SchedMachineModel { + int IssueWidth = 1; // Max micro-ops that may be scheduled per cycle. + int MinLatency = -1; // Determines which instructions are allowed in a group. + // (-1) inorder (0) ooo, (1): inorder +var latencies. + int MicroOpBufferSize = 0; // Max micro-ops that can be buffered. + int LoadLatency = -1; // Cycles for loads to access the cache. + int HighLatency = -1; // Approximation of cycles for "high latency" ops. + int MispredictPenalty = -1; // Extra cycles for a mispredicted branch. +} @@ -41,10 +49,10 @@ let BufferSize = 0 in { // XXX: Are the resource counts correct? def HWBranch : ProcResource<1>; -def HWExport : ProcResource<7>; // Taken from S_WAITCNT -def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT +def HWExport : ProcResource<8>; +def HWLGKM : ProcResource<8>; def HWSALU : ProcResource<1>; -def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT +def HWVMEM : ProcResource<8>; def HWVALU : ProcResource<1>; } @@ -62,12 +70,12 @@ class HWVALUWriteRes<SchedWrite write, int latency> : // The latency numbers are taken from AMD Accelerated Parallel Processing // guide. They may not be acurate. -def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ??? -def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ??? -def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64 -def : HWWriteRes<WriteSALU, [HWSALU], 1>; -def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ??? -def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600 +def : HWWriteRes<WriteBranch, [HWBranch], 16>; +def : HWWriteRes<WriteExport, [HWExport], 200>; +def : HWWriteRes<WriteLDS, [HWLGKM], 20>; +def : HWWriteRes<WriteSALU, [HWSALU], 1>; +def : HWWriteRes<WriteSMEM, [HWLGKM], 200>; +def : HWWriteRes<WriteVMEM, [HWVMEM], 200>; // XXX: These definitions assume full double-precision speed, some devices are // slower. These are also taken from the AMD Accelerated Parallel Processing |