summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2012-12-06 14:09:51 -0800
committerTom Stellard <thomas.stellard@amd.com>2012-12-11 16:37:18 +0000
commita18026e83f5ca5e043702e6bfef567e2854d54e8 (patch)
tree92efb5e06b7e0e8c8aa0b47913cdad444666dfe5
parent8ac1a1f17d7d36869e5661d6f4219c8c6cd3015a (diff)
R600: Convert global store address to dword offset during isel
Reviewed-by: Vincent Lejeune <vljn at ovi.com>
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td3
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td5
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp45
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td4
7 files changed, 46 insertions, 14 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 63b937f656..1f31c2ac55 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -387,6 +387,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
+ NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(SMAX)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 992dab7052..c7abaf69b4 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -111,6 +111,7 @@ enum {
BRANCH_COND,
// End AMDIL ISD Opcodes
BITALIGN,
+ DWORDADDR,
FRACT,
FMAX,
SMAX,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 23ca35aadc..96368e8541 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -29,6 +29,9 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
// rotl(a, b) = bitalign(a, a, 32 - b)
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 443c6d4bc5..e634d20b61 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -179,6 +179,11 @@ class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt rc:$src0)
>;
+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
+ (vt (AMDGPUdwordaddr (vt rc:$addr))),
+ (vt rc:$addr)
+>;
+
include "R600Instructions.td"
include "SIInstrInfo.td"
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 5da52af4b4..f7ae3c91ed 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -66,6 +66,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4f32, Custom);
+
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
@@ -137,23 +142,11 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
- // Convert to DWORD address
- unsigned NewAddr = MRI.createVirtualRegister(
- &AMDGPU::R600_TReg32_XRegClass);
- unsigned ShiftValue = MRI.createVirtualRegister(
- &AMDGPU::R600_TReg32RegClass);
unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
- // XXX In theory, we should be able to pass ShiftValue directly to
- // the LSHR_eg instruction as an inline literal, but I tried doing it
- // this way and it didn't produce the correct results.
- TII->buildMovImm(*BB, I, ShiftValue, 2);
- TII->buildDefaultInstruction(*BB, I, AMDGPU::LSHR_eg, NewAddr,
- MI->getOperand(1).getReg(),
- ShiftValue);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
- .addReg(NewAddr)
+ .addOperand(MI->getOperand(1))
.addImm(EOP); // Set End of program bit
break;
}
@@ -316,6 +309,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -763,6 +757,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return Cond;
}
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Ptr = Op.getOperand(2);
+
+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
+ // Convert pointer from byte address to dword address.
+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(2, MVT::i32)));
+
+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+ assert(!"Truncated and indexed stores not supported yet");
+ } else {
+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ }
+ return Chain;
+ }
+ return SDValue();
+}
+
+
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index bdb0a55be9..2b954dab55 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -60,6 +60,7 @@ private:
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index bd5e9d6b86..31639a36ea 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1117,7 +1117,6 @@ let Predicates = [isEGorCayman] in {
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
-
let usesCustomInserter = 1 in {
class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT <
@@ -1589,4 +1588,7 @@ def : BitConvert <f32, i32, R600_Reg32>;
def : BitConvert <v4f32, v4i32, R600_Reg128>;
def : BitConvert <v4i32, v4f32, R600_Reg128>;
+// DWORDADDR pattern
+def : DwordAddrPat <i32, R600_Reg32>;
+
} // End isR600toCayman Predicate