summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2015-11-03 20:14:47 +0000
committerTom Stellard <thomas.stellard@amd.com>2015-11-03 20:14:47 +0000
commitb70e162b70e3bb112bc3fb7c4dec24c032aa6617 (patch)
treefc8770616062a699f2e812aa7990dff9ad3edc30
parent00c306b4f2ae9e970fa6f867d3cd193372c968ac (diff)
XXX: Struct fixesstruct-divergence-v1
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp41
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp45
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp8
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td19
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--test/CodeGen/AMDGPU/endcf-loop-header.ll7
-rw-r--r--test/CodeGen/AMDGPU/i1-copy-implicit-def.ll4
-rw-r--r--test/CodeGen/AMDGPU/i1-copy-phi.ll10
8 files changed, 111 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 73b87b082e2..65f25f368e0 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -149,6 +149,7 @@ private:
uint32_t Offset, uint32_t Width);
SDNode *SelectS_BFEFromShifts(SDNode *N);
SDNode *SelectS_BFE(SDNode *N);
+ SDNode *SelectBRCOND(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@@ -576,6 +577,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
return SelectS_BFE(N);
+ case ISD::BRCOND:
+ return SelectBRCOND(N);
}
return SelectCode(N);
@@ -1449,6 +1452,44 @@ SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
return SelectCode(N);
}
+SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
+ SDValue Cond = N->getOperand(1);
+ if (Cond.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SelectCode(N);
+
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+ if (IntrinsicID != AMDGPUIntrinsic::AMDGPU_uniform_cond)
+ return SelectCode(N);
+
+ SDValue RealCond = Cond.getOperand(1);
+ if (RealCond.hasOneUse() && isSALUCmp(RealCond)) {
+ // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
+ return SelectCode(N);
+ }
+
+ // The result of VOPC instructions is or'd against ~EXEC before it is
+ // written to vcc or another SGPR. This means that the value '1' is always
+ // written to the corresponding bit for results that are masked. In order
+ // to correctly check against vccz, we need to and VCC with the EXEC
+ // register in order to clear the value from the masked bits.
+
+ SDLoc SL(N);
+
+ SDNode *MaskedCond =
+ CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
+ CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
+ RealCond);
+ SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
+ SDValue(MaskedCond, 0),
+ SDValue()); // Passing SDValue() adds a
+ // glue output.
+ return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
+ N->getOperand(2), // Basic Block
+ VCC.getValue(0), // Chain
+ VCC.getValue(1)); // Glue
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 6fd38ca9022..3d0456052bd 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -877,6 +877,33 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SDNode *Intr = BRCOND.getOperand(1).getNode();
SDValue Target = BRCOND.getOperand(2);
SDNode *BR = nullptr;
+ bool InvertBranch = false;
+
+ if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue();
+ if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond)
+ return BRCOND;
+ }
+
+ if (Intr->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intr->getOperand(0)->getOperand(0))->getZExtValue();
+ if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) {
+ SDValue UniformCond = Intr->getOperand(0);
+ SDValue RealCond = UniformCond.getOperand(1);
+ SDValue Cond = BRCOND.getOperand(1);
+
+ //brcond <- cond <- uniformcond <- realcond
+ DAG.ReplaceAllUsesWith(UniformCond, RealCond);
+ //brcond <- cond <- realcond
+ SDValue NewUniformCond = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i1,
+ DAG.getTargetConstant(AMDGPUIntrinsic::AMDGPU_uniform_cond, DL, MVT::i32),
+ Cond);
+ return DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ BRCOND.getOperand(0), NewUniformCond, Target);
+ }
+ }
if (Intr->getOpcode() == ISD::SETCC) {
// As long as we negate the condition everything is fine
@@ -885,6 +912,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
ISD::SETNE);
Intr = SetCC->getOperand(0).getNode();
+ InvertBranch = true;
} else {
// Get the target from BR if we don't negate the condition
@@ -892,20 +920,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
Target = BR->getOperand(1);
}
- #if 1
- if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
- unsigned IntrinsicID = cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue();
-
- if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) {
- SDValue Cond = Intr->getOperand(1);
- return DAG.getNode(AMDGPUISD::BRCOND_UNIFORM, DL, MVT::Other,
- BRCOND.getOperand(0), Cond, BRCOND.getOperand(2));
- }
- }
-
- #endif
-
- assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+// DAG.dump();
+// Intr->dump();
+// assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
// Build the result and
ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 52a23c8180a..0aa32c0be31 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2298,6 +2298,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
break;
+ case AMDGPU::S_CBRANCH_SCC0:
+ case AMDGPU::S_CBRANCH_SCC1:
+ // Clear unused bits of vcc
+ BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+ break;
+
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 7158dd5686a..257585fa32b 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -220,7 +220,6 @@ def si_setcc_uniform : PatFrag <
(ops node:$lhs, node:$rhs, node:$cond),
(setcc node:$lhs, node:$rhs, node:$cond), [{
for (SDNode *Use : N->uses()) {
- Use->dump();
if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
return false;
@@ -231,14 +230,26 @@ def si_setcc_uniform : PatFrag <
return true;
}]>;
+def si_br_uniform : PatFrag <
+ (ops node:$cond, node:$bb),
+ (brcond (int_AMDGPU_uniform_cond node:$cond), node:$bb)
+>;
+
+/*
+def si_br_uniform_inverse : PatFrag <
+ (ops node:$cond, node:$bb),
+ (brcond (setne (int_AMDGPU_uniform_cond node:$cond), -1), node:$bb)
+>;
+*/
+
def si_br_uniform_scc : PatFrag <
- (ops node:$cond, node:$bb), (SIbr_uniform node:$cond, node:$bb), [{
- SDValue SetCC = N->getOperand(1);
+ (ops node:$cond, node:$bb), (si_br_uniform node:$cond, node:$bb), [{
+ SDValue SetCC = N->getOperand(1)->getOperand(1);
return SetCC.hasOneUse() && isSALUCmp(SetCC);
}]>;
def si_br_uniform_vcc : PatFrag <
- (ops node:$cond, node:$bb), (SIbr_uniform node:$cond, node:$bb), [{
+ (ops node:$cond, node:$bb), (si_br_uniform node:$cond, node:$bb), [{
SDValue SetCC = N->getOperand(1);
return !SetCC.hasOneUse() || !isSALUCmp(SetCC);
}]>;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 145b7f3bff3..77d689e9f37 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -466,7 +466,7 @@ def S_CBRANCH_VCCZ : SOPP <
def S_CBRANCH_VCCNZ : SOPP <
0x00000007, (ins sopp_brtarget:$simm16),
"s_cbranch_vccnz $simm16",
- [(si_br_uniform_vcc (i1 VCC), bb:$simm16)]
+ []
>;
} // End Uses = [VCC]
diff --git a/test/CodeGen/AMDGPU/endcf-loop-header.ll b/test/CodeGen/AMDGPU/endcf-loop-header.ll
index 267a323c506..c67095438ee 100644
--- a/test/CodeGen/AMDGPU/endcf-loop-header.ll
+++ b/test/CodeGen/AMDGPU/endcf-loop-header.ll
@@ -12,8 +12,9 @@
; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
; CHECK-NOT: s_or_b64 exec, exec
; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
-define void @test(i32 addrspace(1)* %out, i32 %cond) {
+define void @test(i32 addrspace(1)* %out) {
entry:
+ %cond = call i32 @llvm.r600.read.tidig.x() #0
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %if, label %loop
@@ -32,3 +33,7 @@ done:
store i32 %inc, i32 addrspace(1)* %tmp3
ret void
}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
index b11a2113764..38cf93b85b2 100644
--- a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
+++ b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
@@ -4,9 +4,7 @@
; SILowerI1Copies was not handling IMPLICIT_DEF
; SI-LABEL: {{^}}br_implicit_def:
; SI: BB#0:
-; SI-NEXT: s_and_saveexec_b64
-; SI-NEXT: s_xor_b64
-; SI-NEXT: BB#1:
+; SI-NEXT: s_cbranch_vccnz
define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 {
bb:
br i1 undef, label %bb1, label %bb2
diff --git a/test/CodeGen/AMDGPU/i1-copy-phi.ll b/test/CodeGen/AMDGPU/i1-copy-phi.ll
index 105cd06b330..e6129e62e34 100644
--- a/test/CodeGen/AMDGPU/i1-copy-phi.ll
+++ b/test/CodeGen/AMDGPU/i1-copy-phi.ll
@@ -10,9 +10,11 @@
; SI: s_and_saveexec_b64
; SI: s_xor_b64
; SI: s_endpgm
-define void @br_i1_phi(i32 %arg, i1 %arg1) #0 {
+define void @br_i1_phi(i32 %arg) {
bb:
- br i1 %arg1, label %bb2, label %bb3
+ %tidig = call i32 @llvm.r600.read.tidig.x() #0
+ %cmp = trunc i32 %tidig to i1
+ br i1 %cmp, label %bb2, label %bb3
bb2: ; preds = %bb
br label %bb3
@@ -28,3 +30,7 @@ bb4: ; preds = %bb3
bb6: ; preds = %bb4, %bb3
ret void
}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+attributes #0 = { readnone }