diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2015-10-08 15:26:25 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-10-15 15:39:56 +0000 |
commit | 3d8a9964a108e04a75e00cbd5b1f410559c01d10 (patch) | |
tree | 03caacce749dc948381ef229b38ea35f44641db3 | |
parent | 869ed89757ab6dbeadd9508b284f98ca0ec82f70 (diff) |
XXX: Uniform brancing
-rw-r--r-- | lib/Target/AMDGPU/AMDGPU.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 17 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUIntrinsics.td | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 15 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 29 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.td | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstructions.td | 16 |
11 files changed, 98 insertions, 2 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 298b6f19451..b97ebcab462 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -66,6 +66,7 @@ Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUISelDag(TargetMachine &tm); ModulePass *createAMDGPUAlwaysInlinePass(); ModulePass *createAMDGPUOpenCLImageTypeLoweringPass(); +FunctionPass *createAMDGPUAnnotateUniformBranches(); void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&); extern char &SIFixControlFlowLiveIntervalsID; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 3ad900a6ef1..bb6df3e9416 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -540,6 +540,23 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { case AMDGPUISD::DIV_SCALE: { return SelectDIV_SCALE(N); } + case AMDGPUISD::BRCOND_UNIFORM: { + SDValue Cond = N->getOperand(0); + + // Let TableGen handle this case: +// if (!Cond.getOpcode() == ISD::SETCC) +// break; + + const SDValue Ops[] = { + N->getOperand(1), + N->getOperand(2) + }; + + return CurDAG->getMachineNode(AMDGPU::S_CBRANCH_SCC1, SDLoc(N), + MVT::Other, Ops); + break; + + } case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a8af7ec75f0..0b59824ab36 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2749,6 +2749,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVT_F32_UBYTE3) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) + NODE_NAME_CASE(BRCOND_UNIFORM) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(INTERP_MOV) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 1e060c4d708..ddc1051846b 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -299,6 +299,7 @@ enum NodeType : unsigned { INTERP_MOV, INTERP_P1, INTERP_P2, + BRCOND_UNIFORM, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td index ab489cd2a4a..52e65e6fba7 100644 --- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -71,6 +71,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; + def int_AMDGPU_uniform_cond : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrNoMem]>; } // Legacy names for compatibility. diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 5a80de900c4..2b7fc3c73ff 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -38,6 +38,12 @@ using namespace llvm; +static cl::opt<bool> +EnableAssumeScalarBr("amdgpu-assume-scalar-br", + cl::desc("Disable structurzier and run uniform analysis " + "for branches"), + cl::init(false), cl::Hidden); + extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); @@ -267,10 +273,15 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { bool GCNPassConfig::addPreISel() { AMDGPUPassConfig::addPreISel(); - addPass(createStructurizeCFGPass()); + if (!EnableAssumeScalarBr) + addPass(createStructurizeCFGPass()); addPass(createSinkingPass()); addPass(createSITypeRewriter()); - addPass(createSIAnnotateControlFlowPass()); + + if (!EnableAssumeScalarBr) + addPass(createSIAnnotateControlFlowPass()); + else + addPass(createAMDGPUAnnotateUniformBranches()); return false; } diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 6dacc742b12..9334a275656 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -80,3 +80,32 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { // Semi-arbitrary large amount. return 64; } + +/// +/// \returns true if the result of the value could potentially be +/// different across threads. +bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { + + // Arguments to Kernels are not a source of divergence. + if (isa<Argument>(V)) + return false; + + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Assume all non-constant loads are a source of divergence. + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned AS = LI->getPointerAddressSpace(); + return AS != AMDGPUAS::CONSTANT_ADDRESS; + } + + if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(I)) { + switch(Intrinsic->getIntrinsicID()) { + default: return false; + } + } + + // Assume all function calls are a source of divergence. + if (isa<CallInst>(I)) + return true; + } + return false; +} diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index dee0a69d1e6..aff52b33ecc 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -60,6 +60,7 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(unsigned VF); + bool isSourceOfDivergence(const Value *V) const; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 804b5e6075e..6fd38ca9022 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -892,6 +892,19 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, Target = BR->getOperand(1); } + #if 1 + if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + unsigned IntrinsicID = cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue(); + + if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_uniform_cond) { + SDValue Cond = Intr->getOperand(1); + return DAG.getNode(AMDGPUISD::BRCOND_UNIFORM, DL, MVT::Other, + BRCOND.getOperand(0), Cond, BRCOND.getOperand(2)); + } + } + + #endif + assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); // Build the result and diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 61e9022c47b..4f9206e5c5f 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -136,6 +136,11 @@ def SIconstdata_ptr : SDNode< "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]> >; +def SIbr_uniform : SDNode < + "AMDGPUISD::BRCOND_UNIFORM", SDTypeProfile <0, 2, + [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, [SDNPHasChain] +>; + //===----------------------------------------------------------------------===// // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1 // to be glued to the memory instructions. diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 9ea7d822e07..2a40581fe8f 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -2197,6 +2197,22 @@ def : Pat < (S_BARRIER) >; +/* +def : Pat < + (SIbr_uniform (i1 (setcc i32:$src0, i32:$src1, SETNE)), bb:$bb), + (S_CBRANCH_SCC0 $bb, (S_CMP_EQ_U32 $src0, $src1)) +>; +*/ + +/* + +def : Pat < + (SIbr_uniform (i1 NodeFrag), bb:$bb), + (S_CBRANCH_SCC0 $bb) +>; + +*/ + //===----------------------------------------------------------------------===// // VOP1 Patterns //===----------------------------------------------------------------------===// |