diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-12-06 14:09:51 -0800 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-12-11 16:37:18 +0000 |
commit | a18026e83f5ca5e043702e6bfef567e2854d54e8 (patch) | |
tree | 92efb5e06b7e0e8c8aa0b47913cdad444666dfe5 | |
parent | 8ac1a1f17d7d36869e5661d6f4219c8c6cd3015a (diff) |
R600: Convert global store address to dword offset during isel
Reviewed-by: Vincent Lejeune <vljn at ovi.com>
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstrInfo.td | 3 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstructions.td | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 45 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 4 |
7 files changed, 46 insertions, 14 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 63b937f656..1f31c2ac55 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -387,6 +387,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BRANCH_COND); // AMDGPU DAG nodes + NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) NODE_NAME_CASE(FMAX) NODE_NAME_CASE(SMAX) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 992dab7052..c7abaf69b4 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -111,6 +111,7 @@ enum { BRANCH_COND, // End AMDIL ISD Opcodes BITALIGN, + DWORDADDR, FRACT, FMAX, SMAX, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 23ca35aadc..96368e8541 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -29,6 +29,9 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ // rotl(a, b) = bitalign(a, a, 32 - b) def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>; +// This argument to this node is a dword address. +def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; + // out = a - floor(a) def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 443c6d4bc5..e634d20b61 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -179,6 +179,11 @@ class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < (dt rc:$src0) >; +class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < + (vt (AMDGPUdwordaddr (vt rc:$addr))), + (vt rc:$addr) +>; + include "R600Instructions.td" include "SIInstrInfo.td" diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 5da52af4b4..f7ae3c91ed 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -66,6 +66,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::f32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::STORE, MVT::v4f32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); setSchedulingPreference(Sched::VLIW); @@ -137,23 +142,11 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { - // Convert to DWORD address - unsigned NewAddr = MRI.createVirtualRegister( - &AMDGPU::R600_TReg32_XRegClass); - unsigned ShiftValue = MRI.createVirtualRegister( - &AMDGPU::R600_TReg32RegClass); unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; - // XXX In theory, we should be able to pass ShiftValue directly to - // the LSHR_eg instruction as an inline literal, but I tried doing it - // this way and it didn't produce the correct results. - TII->buildMovImm(*BB, I, ShiftValue, 2); - TII->buildDefaultInstruction(*BB, I, AMDGPU::LSHR_eg, NewAddr, - MI->getOperand(1).getReg(), - ShiftValue); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) .addOperand(MI->getOperand(0)) - .addReg(NewAddr) + .addOperand(MI->getOperand(1)) .addImm(EOP); // Set End of program bit break; } @@ -316,6 +309,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -763,6 +757,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return Cond; } +SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + StoreSDNode *StoreNode = cast<StoreSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Ptr = Op.getOperand(2); + + if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { + // Convert pointer from byte address to dword address. + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), + DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(2, MVT::i32))); + + if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { + assert(!"Truncated and indexed stores not supported yet"); + } else { + Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); + } + return Chain; + } + return SDValue(); +} + + SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index bdb0a55be9..2b954dab55 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -60,6 +60,7 @@ private: SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index bd5e9d6b86..31639a36ea 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -1117,7 +1117,6 @@ let Predicates = [isEGorCayman] in { //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// - let usesCustomInserter = 1 in { class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT < @@ -1589,4 +1588,7 @@ def : BitConvert <f32, i32, R600_Reg32>; def : BitConvert <v4f32, v4i32, R600_Reg128>; def : BitConvert <v4i32, v4f32, R600_Reg128>; +// DWORDADDR pattern +def : DwordAddrPat <i32, R600_Reg32>; + } // End isR600toCayman Predicate |