diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2015-11-03 20:14:47 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-11-03 20:14:47 +0000 |
commit | b70e162b70e3bb112bc3fb7c4dec24c032aa6617 (patch) | |
tree | fc8770616062a699f2e812aa7990dff9ad3edc30 | |
parent | 00c306b4f2ae9e970fa6f867d3cd193372c968ac (diff) |
XXX: Struct fixesstruct-divergence-v1
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 41 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 45 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.cpp | 8 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.td | 19 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstructions.td | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/endcf-loop-header.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/i1-copy-implicit-def.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/i1-copy-phi.ll | 10 |
8 files changed, 111 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 73b87b082e2..65f25f368e0 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -149,6 +149,7 @@ private: uint32_t Offset, uint32_t Width); SDNode *SelectS_BFEFromShifts(SDNode *N); SDNode *SelectS_BFE(SDNode *N); + SDNode *SelectBRCOND(SDNode *N); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -576,6 +577,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; return SelectS_BFE(N); + case ISD::BRCOND: + return SelectBRCOND(N); } return SelectCode(N); @@ -1449,6 +1452,44 @@ SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { return SelectCode(N); } +SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { + SDValue Cond = N->getOperand(1); + if (Cond.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return SelectCode(N); + + unsigned IntrinsicID = + cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); + if (IntrinsicID != AMDGPUIntrinsic::AMDGPU_uniform_cond) + return SelectCode(N); + + SDValue RealCond = Cond.getOperand(1); + if (RealCond.hasOneUse() && isSALUCmp(RealCond)) { + // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. + return SelectCode(N); + } + + // The result of VOPC instructions is or'd against ~EXEC before it is + // written to vcc or another SGPR. This means that the value '1' is always + // written to the corresponding bit for results that are masked. In order + // to correctly check against vccz, we need to and VCC with the EXEC + // register in order to clear the value from the masked bits. + + SDLoc SL(N); + + SDNode *MaskedCond = + CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, + CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), + RealCond); + SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, + SDValue(MaskedCond, 0), + SDValue()); // Passing SDValue() adds a + // glue output. + return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, + N->getOperand(2), // Basic Block + VCC.getValue(0), // Chain + VCC.getValue(1)); // Glue +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const { diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 6fd38ca9022..3d0456052bd 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -877,6 +877,33 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SDNode *Intr = BRCOND.getOperand(1).getNode(); SDValue Target = BRCOND.getOperand(2); SDNode *BR = nullptr; + bool InvertBranch = false; + + if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + unsigned IntrinsicID = + cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue(); + if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) + return BRCOND; + } + + if (Intr->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + unsigned IntrinsicID = + cast<ConstantSDNode>(Intr->getOperand(0)->getOperand(0))->getZExtValue(); + if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) { + SDValue UniformCond = Intr->getOperand(0); + SDValue RealCond = UniformCond.getOperand(1); + SDValue Cond = BRCOND.getOperand(1); + + //brcond <- cond <- uniformcond <- realcond + DAG.ReplaceAllUsesWith(UniformCond, RealCond); + //brcond <- cond <- realcond + SDValue NewUniformCond = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i1, + DAG.getTargetConstant(AMDGPUIntrinsic::AMDGPU_uniform_cond, DL, MVT::i32), + Cond); + return DAG.getNode(ISD::BRCOND, DL, MVT::Other, + BRCOND.getOperand(0), NewUniformCond, Target); + } + } if (Intr->getOpcode() == ISD::SETCC) { // As long as we negate the condition everything is fine @@ -885,6 +912,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() == ISD::SETNE); Intr = SetCC->getOperand(0).getNode(); + InvertBranch = true; } else { // Get the target from BR if we don't negate the condition @@ -892,20 +920,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, Target = BR->getOperand(1); } - #if 1 - if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) { - unsigned IntrinsicID = cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue(); - - if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) { - SDValue Cond = Intr->getOperand(1); - return DAG.getNode(AMDGPUISD::BRCOND_UNIFORM, DL, MVT::Other, - BRCOND.getOperand(0), Cond, BRCOND.getOperand(2)); - } - } - - #endif - - assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); +// DAG.dump(); +// Intr->dump(); +// assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); // Build the result and ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end()); diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 52a23c8180a..0aa32c0be31 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2298,6 +2298,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } break; + case AMDGPU::S_CBRANCH_SCC0: + case AMDGPU::S_CBRANCH_SCC1: + // Clear unused bits of vcc + BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + break; + case AMDGPU::S_BFE_U64: case AMDGPU::S_BFM_B64: llvm_unreachable("Moving this op to VALU not implemented"); diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 7158dd5686a..257585fa32b 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -220,7 +220,6 @@ def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), (setcc node:$lhs, node:$rhs, node:$cond), [{ for (SDNode *Use : N->uses()) { - Use->dump(); if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg) return false; @@ -231,14 +230,26 @@ def si_setcc_uniform : PatFrag < return true; }]>; +def si_br_uniform : PatFrag < + (ops node:$cond, node:$bb), + (brcond (int_AMDGPU_uniform_cond node:$cond), node:$bb) +>; + +/* +def si_br_uniform_inverse : PatFrag < + (ops node:$cond, node:$bb), + (brcond (setne (int_AMDGPU_uniform_cond node:$cond), -1), node:$bb) +>; +*/ + def si_br_uniform_scc : PatFrag < - (ops node:$cond, node:$bb), (SIbr_uniform node:$cond, node:$bb), [{ - SDValue SetCC = N->getOperand(1); + (ops node:$cond, node:$bb), (si_br_uniform node:$cond, node:$bb), [{ + SDValue SetCC = N->getOperand(1)->getOperand(1); return SetCC.hasOneUse() && isSALUCmp(SetCC); }]>; def si_br_uniform_vcc : PatFrag < - (ops node:$cond, node:$bb), (SIbr_uniform node:$cond, node:$bb), [{ + (ops node:$cond, node:$bb), (si_br_uniform node:$cond, node:$bb), [{ SDValue SetCC = N->getOperand(1); return !SetCC.hasOneUse() || !isSALUCmp(SetCC); }]>; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 145b7f3bff3..77d689e9f37 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -466,7 +466,7 @@ def S_CBRANCH_VCCZ : SOPP < def S_CBRANCH_VCCNZ : SOPP < 0x00000007, (ins sopp_brtarget:$simm16), "s_cbranch_vccnz $simm16", - [(si_br_uniform_vcc (i1 VCC), bb:$simm16)] + [] >; } // End Uses = [VCC] diff --git a/test/CodeGen/AMDGPU/endcf-loop-header.ll b/test/CodeGen/AMDGPU/endcf-loop-header.ll index 267a323c506..c67095438ee 100644 --- a/test/CodeGen/AMDGPU/endcf-loop-header.ll +++ b/test/CodeGen/AMDGPU/endcf-loop-header.ll @@ -12,8 +12,9 @@ ; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}} ; CHECK-NOT: s_or_b64 exec, exec ; CHECK: s_cbranch_execnz [[LOOP_LABEL]] -define void @test(i32 addrspace(1)* %out, i32 %cond) { +define void @test(i32 addrspace(1)* %out) { entry: + %cond = call i32 @llvm.r600.read.tidig.x() #0 %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %if, label %loop @@ -32,3 +33,7 @@ done: store i32 %inc, i32 addrspace(1)* %tmp3 ret void } + +declare i32 @llvm.r600.read.tidig.x() #0 + +attributes #0 = { readnone } diff --git a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll index b11a2113764..38cf93b85b2 100644 --- a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -4,9 +4,7 @@ ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: ; SI: BB#0: -; SI-NEXT: s_and_saveexec_b64 -; SI-NEXT: s_xor_b64 -; SI-NEXT: BB#1: +; SI-NEXT: s_cbranch_vccnz define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: br i1 undef, label %bb1, label %bb2 diff --git a/test/CodeGen/AMDGPU/i1-copy-phi.ll b/test/CodeGen/AMDGPU/i1-copy-phi.ll index 105cd06b330..e6129e62e34 100644 --- a/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -10,9 +10,11 @@ ; SI: s_and_saveexec_b64 ; SI: s_xor_b64 ; SI: s_endpgm -define void @br_i1_phi(i32 %arg, i1 %arg1) #0 { +define void @br_i1_phi(i32 %arg) { bb: - br i1 %arg1, label %bb2, label %bb3 + %tidig = call i32 @llvm.r600.read.tidig.x() #0 + %cmp = trunc i32 %tidig to i1 + br i1 %cmp, label %bb2, label %bb3 bb2: ; preds = %bb br label %bb3 @@ -28,3 +30,7 @@ bb4: ; preds = %bb3 bb6: ; preds = %bb4, %bb3 ret void } + +declare i32 @llvm.r600.read.tidig.x() #0 + +attributes #0 = { readnone } |