summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2015-10-08 15:26:25 +0000
committerTom Stellard <thomas.stellard@amd.com>2015-10-15 15:39:56 +0000
commit3d8a9964a108e04a75e00cbd5b1f410559c01d10 (patch)
tree03caacce749dc948381ef229b38ea35f44641db3
parent869ed89757ab6dbeadd9508b284f98ca0ec82f70 (diff)
XXX: Uniform brancing
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp17
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsics.td1
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp15
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp29
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h1
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp13
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td5
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td16
11 files changed, 98 insertions, 2 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 298b6f19451..b97ebcab462 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -66,6 +66,7 @@ Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
ModulePass *createAMDGPUAlwaysInlinePass();
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
+FunctionPass *createAMDGPUAnnotateUniformBranches();
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
extern char &SIFixControlFlowLiveIntervalsID;
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 3ad900a6ef1..bb6df3e9416 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -540,6 +540,23 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::DIV_SCALE: {
return SelectDIV_SCALE(N);
}
+ case AMDGPUISD::BRCOND_UNIFORM: {
+ SDValue Cond = N->getOperand(0);
+
+ // Let TableGen handle this case:
+// if (!Cond.getOpcode() == ISD::SETCC)
+// break;
+
+ const SDValue Ops[] = {
+ N->getOperand(1),
+ N->getOperand(2)
+ };
+
+ return CurDAG->getMachineNode(AMDGPU::S_CBRANCH_SCC1, SDLoc(N),
+ MVT::Other, Ops);
+ break;
+
+ }
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a8af7ec75f0..0b59824ab36 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2749,6 +2749,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVT_F32_UBYTE3)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
+ NODE_NAME_CASE(BRCOND_UNIFORM)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(INTERP_MOV)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 1e060c4d708..ddc1051846b 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -299,6 +299,7 @@ enum NodeType : unsigned {
INTERP_MOV,
INTERP_P1,
INTERP_P2,
+ BRCOND_UNIFORM,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index ab489cd2a4a..52e65e6fba7 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -71,6 +71,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
+ def int_AMDGPU_uniform_cond : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrNoMem]>;
}
// Legacy names for compatibility.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5a80de900c4..2b7fc3c73ff 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -38,6 +38,12 @@
using namespace llvm;
+static cl::opt<bool>
+EnableAssumeScalarBr("amdgpu-assume-scalar-br",
+ cl::desc("Disable structurzier and run uniform analysis "
+ "for branches"),
+ cl::init(false), cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
@@ -267,10 +273,15 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
- addPass(createStructurizeCFGPass());
+ if (!EnableAssumeScalarBr)
+ addPass(createStructurizeCFGPass());
addPass(createSinkingPass());
addPass(createSITypeRewriter());
- addPass(createSIAnnotateControlFlowPass());
+
+ if (!EnableAssumeScalarBr)
+ addPass(createSIAnnotateControlFlowPass());
+ else
+ addPass(createAMDGPUAnnotateUniformBranches());
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6dacc742b12..9334a275656 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -80,3 +80,32 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Semi-arbitrary large amount.
return 64;
}
+
+///
+/// \returns true if the result of the value could potentially be
+/// different across threads.
+bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
+
+ // Arguments to Kernels are not a source of divergence.
+ if (isa<Argument>(V))
+ return false;
+
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Assume all non-constant loads are a source of divergence.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned AS = LI->getPointerAddressSpace();
+ return AS != AMDGPUAS::CONSTANT_ADDRESS;
+ }
+
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+ switch(Intrinsic->getIntrinsicID()) {
+ default: return false;
+ }
+ }
+
+ // Assume all function calls are a source of divergence.
+ if (isa<CallInst>(I))
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index dee0a69d1e6..aff52b33ecc 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -60,6 +60,7 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
+ bool isSourceOfDivergence(const Value *V) const;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 804b5e6075e..6fd38ca9022 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -892,6 +892,19 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
Target = BR->getOperand(1);
}
+ #if 1
+ if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IntrinsicID = cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue();
+
+ if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) {
+ SDValue Cond = Intr->getOperand(1);
+ return DAG.getNode(AMDGPUISD::BRCOND_UNIFORM, DL, MVT::Other,
+ BRCOND.getOperand(0), Cond, BRCOND.getOperand(2));
+ }
+ }
+
+ #endif
+
assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
// Build the result and
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 61e9022c47b..4f9206e5c5f 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -136,6 +136,11 @@ def SIconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
>;
+def SIbr_uniform : SDNode <
+ "AMDGPUISD::BRCOND_UNIFORM", SDTypeProfile <0, 2,
+ [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, [SDNPHasChain]
+>;
+
//===----------------------------------------------------------------------===//
// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
// to be glued to the memory instructions.
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 9ea7d822e07..2a40581fe8f 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -2197,6 +2197,22 @@ def : Pat <
(S_BARRIER)
>;
+/*
+def : Pat <
+ (SIbr_uniform (i1 (setcc i32:$src0, i32:$src1, SETNE)), bb:$bb),
+ (S_CBRANCH_SCC0 $bb, (S_CMP_EQ_U32 $src0, $src1))
+>;
+*/
+
+/*
+
+def : Pat <
+ (SIbr_uniform (i1 NodeFrag), bb:$bb),
+ (S_CBRANCH_SCC0 $bb)
+>;
+
+*/
+
//===----------------------------------------------------------------------===//
// VOP1 Patterns
//===----------------------------------------------------------------------===//