summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVadim Girlin <vadimgirlin@gmail.com>2013-10-14 20:59:16 +0400
committerVadim Girlin <vadimgirlin@gmail.com>2013-10-14 20:59:16 +0400
commit72cf2ee8207a50994564d25abe65bf0bdd485e1c (patch)
tree346b6eecbb138d06f5e271f17c8e66d770665882
parent142d2c9e28e89883e987f2c7865d5836c52c741b (diff)
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp13
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h7
-rw-r--r--lib/Target/R600/SIISelLowering.cpp2
-rw-r--r--lib/Target/R600/SIInstrInfo.td2
-rw-r--r--lib/Target/R600/SIInstructions.td28
-rw-r--r--lib/Target/R600/SISchedule.td28
6 files changed, 64 insertions, 16 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 1e21c8e8b5..fa961f9801 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineScheduler.h"
+
#include "AMDGPUSubtarget.h"
using namespace llvm;
@@ -109,6 +111,17 @@ AMDGPUSubtarget::getDataLayout() const {
return DataLayout;
}
+void
+AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS && NumRegionInstrs > 32) {
+ Policy.ShouldTrackPressure = true;
+ }
+}
+
+
std::string
AMDGPUSubtarget::getDeviceName() const {
return DevName;
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index c5345cc764..6ffd80c2a5 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -67,9 +67,14 @@ public:
bool IsIRStructurizerEnabled() const;
virtual bool enableMachineScheduler() const {
- return getGeneration() <= NORTHERN_ISLANDS;
+ return true;
}
+ virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const;
+
// Helper functions to simplify if statements
bool isTargetELF() const;
std::string getDataLayout() const;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 098ca7f06c..20f3f9ad5d 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -100,7 +100,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SETCC);
- setSchedulingPreference(Sched::RegPressure);
+ setSchedulingPreference(Sched::Source);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index ed42a2ad95..e7fdc5ad20 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -417,7 +417,7 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
- mayLoad = 1 in {
+ mayLoad = 1, SchedRW = [WriteVMEM] in {
let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index e1acb19805..95fea66999 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -393,17 +393,23 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
} // End isCompare = 1, SchedRW = [WriteFloat]
-def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
-def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
+
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
+
+let SchedRW = [WriteVMEM] in {
+
+def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
+def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
+} // let SchedRW = [WriteVMEM]
+
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
@@ -997,6 +1003,9 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+
+let SchedRW = [WriteInt] in {
+
defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
@@ -1017,6 +1026,9 @@ defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
+} // let SchedRW = [WriteInt]
+
+
defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
let SchedRW = [WriteFloat] in { // XXX: Not sure this is correct
@@ -1063,13 +1075,19 @@ def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
[(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))]
>;
-} // let SchedRn = [WriteFloat]
+} // let SchedRW = [WriteFloat]
} // End neverHasSideEffects
+
+let SchedRW = [WriteFloat] in {
+
def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+
+} // let SchedRW = [WriteFloat]
+
def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
@@ -1136,11 +1154,15 @@ def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
let isCommutable = 1 in {
+let SchedRW = [WriteInt] in {
+
def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+} // let SchedRW = [WriteInt]
+
} // isCommutable = 1
def : Pat <
diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td
index d42c2e0805..5dab9511c3 100644
--- a/lib/Target/R600/SISchedule.td
+++ b/lib/Target/R600/SISchedule.td
@@ -32,7 +32,15 @@ def WriteIntMUL : SchedWrite;
def WriteConversion : SchedWrite;
def WriteI24 : SchedWrite;
-def SIModel : SchedMachineModel;
+def SIModel : SchedMachineModel {
+ int IssueWidth = 1; // Max micro-ops that may be scheduled per cycle.
+ int MinLatency = -1; // Determines which instructions are allowed in a group.
+ // (-1) inorder (0) ooo, (1): inorder +var latencies.
+ int MicroOpBufferSize = 0; // Max micro-ops that can be buffered.
+ int LoadLatency = -1; // Cycles for loads to access the cache.
+ int HighLatency = -1; // Approximation of cycles for "high latency" ops.
+ int MispredictPenalty = -1; // Extra cycles for a mispredicted branch.
+}
@@ -41,10 +49,10 @@ let BufferSize = 0 in {
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1>;
-def HWExport : ProcResource<7>; // Taken from S_WAITCNT
-def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT
+def HWExport : ProcResource<8>;
+def HWLGKM : ProcResource<8>;
def HWSALU : ProcResource<1>;
-def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT
+def HWVMEM : ProcResource<8>;
def HWVALU : ProcResource<1>;
}
@@ -62,12 +70,12 @@ class HWVALUWriteRes<SchedWrite write, int latency> :
// The latency numbers are taken from AMD Accelerated Parallel Processing
// guide. They may not be acurate.
-def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ???
-def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ???
-def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64
-def : HWWriteRes<WriteSALU, [HWSALU], 1>;
-def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
-def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+def : HWWriteRes<WriteBranch, [HWBranch], 16>;
+def : HWWriteRes<WriteExport, [HWExport], 200>;
+def : HWWriteRes<WriteLDS, [HWLGKM], 20>;
+def : HWWriteRes<WriteSALU, [HWSALU], 1>;
+def : HWWriteRes<WriteSMEM, [HWLGKM], 200>;
+def : HWWriteRes<WriteVMEM, [HWVMEM], 200>;
// XXX: These definitions assume full double-precision speed, some devices are
// slower. These are also taken from the AMD Accelerated Parallel Processing