diff options
author | Hans de Goede <hdegoede@redhat.com> | 2016-01-26 12:22:29 +0100 |
---|---|---|
committer | Hans de Goede <hdegoede@redhat.com> | 2016-06-28 11:53:40 +0200 |
commit | 7cc2bc083fdd5d1268b2c0b17ece3b31acfac1d1 (patch) | |
tree | e33adb822334172c6a757f7a2ab7839cf76a3d14 | |
parent | 0f498eab65a9e3cfb4c0f26bd3b5f20bb243eacf (diff) |
TGSI: initial Intrinsic support
Add Intrinsic support for get_local_id() and friends.
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
-rw-r--r-- | include/llvm/IR/Intrinsics.td | 1 | ||||
-rw-r--r-- | include/llvm/IR/IntrinsicsTGSI.td | 35 | ||||
-rw-r--r-- | lib/Target/TGSI/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSIISelLowering.cpp | 44 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSIISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSIRegisterInfo.cpp | 25 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSIRegisterInfo.h | 13 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSIRegisterInfo.td | 30 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSITargetMachine.cpp | 7 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSITargetMachine.h | 3 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSITargetTransformInfo.cpp | 68 | ||||
-rw-r--r-- | lib/Target/TGSI/TGSITargetTransformInfo.h | 58 |
12 files changed, 285 insertions, 8 deletions
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index b50f07f3263..40686728f57 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -691,3 +691,4 @@ include "llvm/IR/IntrinsicsAMDGPU.td" include "llvm/IR/IntrinsicsBPF.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" +include "llvm/IR/IntrinsicsTGSI.td" diff --git a/include/llvm/IR/IntrinsicsTGSI.td b/include/llvm/IR/IntrinsicsTGSI.td new file mode 100644 index 00000000000..1bb6c9b6309 --- /dev/null +++ b/include/llvm/IR/IntrinsicsTGSI.td @@ -0,0 +1,35 @@ +//===--- IntrinsicsTGSI.td - Defines TGSI intrinsics -----*- tablegen -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the TGSI-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "tgsi" in { + +class TGSIReadPreloadRegisterIntrinsic<string name> + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin<name>; + +multiclass TGSIReadPreloadRegisterIntrinsic_xyz<string prefix> { + def _x : TGSIReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>; + def _y : TGSIReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>; + def _z : TGSIReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>; +} + +defm int_tgsi_read_blockid : TGSIReadPreloadRegisterIntrinsic_xyz < + "__builtin_tgsi_read_blockid">; +defm int_tgsi_read_blocksize : TGSIReadPreloadRegisterIntrinsic_xyz < + "__builtin_tgsi_read_blocksize">; +defm int_tgsi_read_gridsize : TGSIReadPreloadRegisterIntrinsic_xyz < + "__builtin_tgsi_read_gridsize">; +defm int_tgsi_read_threadid : TGSIReadPreloadRegisterIntrinsic_xyz < + "__builtin_tgsi_read_threadid">; + +} // End TargetPrefix = "tgsi" diff --git a/lib/Target/TGSI/CMakeLists.txt b/lib/Target/TGSI/CMakeLists.txt index f4deabeaf05..9f92bcdc5c7 100644 --- a/lib/Target/TGSI/CMakeLists.txt +++ b/lib/Target/TGSI/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(TGSICodeGen TGSIFrameLowering.cpp TGSISubtarget.cpp TGSITargetMachine.cpp + TGSITargetTransformInfo.cpp TGSIRegisterInfo.cpp TGSIPreEmitImmPass.cpp ) diff --git a/lib/Target/TGSI/TGSIISelLowering.cpp b/lib/Target/TGSI/TGSIISelLowering.cpp index 4f56a682188..28d448528e8 100644 --- a/lib/Target/TGSI/TGSIISelLowering.cpp +++ b/lib/Target/TGSI/TGSIISelLowering.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "TGSIISelLowering.h" +#include "TGSIRegisterInfo.h" #include "TGSITargetMachine.h" #include "TGSITargetObjectFile.h" #include "llvm/IR/DerivedTypes.h" @@ -127,9 +128,6 @@ TGSITargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.getArgs(); - Type *retTy = CLI.RetTy; - ImmutableCallSite *CS = CLI.CS; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; @@ -231,6 +229,8 @@ TGSITargetLowering::TGSITargetLowering(TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Expand); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); } const char *TGSITargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -247,11 +247,45 @@ const char *TGSITargetLowering::getTargetNodeName(unsigned Opcode) const { } SDValue TGSITargetLowering:: +CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned VirtualRegister; + if (!MRI.isLiveIn(Reg)) { + VirtualRegister = MRI.createVirtualRegister(RC); + MRI.addLiveIn(Reg, VirtualRegister); + } else { + VirtualRegister = MRI.getLiveInVirtReg(Reg); + } + return DAG.getRegister(VirtualRegister, VT); +} + +SDValue TGSITargetLowering:: LowerOperation(SDValue op, SelectionDAG &dag) const { switch (op.getOpcode()) { + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast<ConstantSDNode>(op.getOperand(0))->getZExtValue(); + EVT VT = op.getValueType(); + switch(IntrinsicID) { + case Intrinsic::tgsi_read_threadid_x: + return CreateLiveInRegister(dag, &TGSI::IRegsRegClass, + TGSI_THREAD_ID(x), VT); + case Intrinsic::tgsi_read_threadid_y: + return CreateLiveInRegister(dag, &TGSI::IRegsRegClass, + TGSI_THREAD_ID(y), VT); + case Intrinsic::tgsi_read_threadid_z: + return CreateLiveInRegister(dag, &TGSI::IRegsRegClass, + TGSI_THREAD_ID(z), VT); default: - llvm_unreachable("Should not custom lower this!"); - }; + llvm_unreachable("Unknown TGSI Intrinsic"); + } + break; /* Never reached */ + } + default: + llvm_unreachable("Should not custom lower this!"); + } } diff --git a/lib/Target/TGSI/TGSIISelLowering.h b/lib/Target/TGSI/TGSIISelLowering.h index 1aa96ebc091..cd6e87c0dd2 100644 --- a/lib/Target/TGSI/TGSIISelLowering.h +++ b/lib/Target/TGSI/TGSIISelLowering.h @@ -57,6 +57,14 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; + private: + /// \brief Helper function that adds Reg to the LiveIn list of the DAG's + /// MachineFunction. + /// + /// \returns a RegisterSDNode representing Reg. + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const; }; } diff --git a/lib/Target/TGSI/TGSIRegisterInfo.cpp b/lib/Target/TGSI/TGSIRegisterInfo.cpp index f3be9ed636d..f849d490469 100644 --- a/lib/Target/TGSI/TGSIRegisterInfo.cpp +++ b/lib/Target/TGSI/TGSIRegisterInfo.cpp @@ -56,11 +56,36 @@ BitVector TGSIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { rsv.set(TGSI::TEMP0w); rsv.set(TGSI::TEMP0); + rsv.set(TGSI::SV0x); + rsv.set(TGSI::SV0y); + rsv.set(TGSI::SV0z); + rsv.set(TGSI::SV0w); + rsv.set(TGSI::SV0); + + rsv.set(TGSI::SV1x); + rsv.set(TGSI::SV1y); + rsv.set(TGSI::SV1z); + rsv.set(TGSI::SV1w); + rsv.set(TGSI::SV1); + + rsv.set(TGSI::SV2x); + rsv.set(TGSI::SV2y); + rsv.set(TGSI::SV2z); + rsv.set(TGSI::SV2w); + rsv.set(TGSI::SV2); + + rsv.set(TGSI::SV3x); + rsv.set(TGSI::SV3y); + rsv.set(TGSI::SV3z); + rsv.set(TGSI::SV3w); + rsv.set(TGSI::SV3); + rsv.set(TGSI::ADDR0x); rsv.set(TGSI::ADDR0y); rsv.set(TGSI::ADDR0z); rsv.set(TGSI::ADDR0w); rsv.set(TGSI::ADDR0); + return rsv; } diff --git a/lib/Target/TGSI/TGSIRegisterInfo.h b/lib/Target/TGSI/TGSIRegisterInfo.h index 1840f08e765..f409dbc962c 100644 --- a/lib/Target/TGSI/TGSIRegisterInfo.h +++ b/lib/Target/TGSI/TGSIRegisterInfo.h @@ -17,6 +17,19 @@ #define GET_REGINFO_HEADER #include "TGSIGenRegisterInfo.inc" +// These need to be kept in sync, so we keep them together here + +#define TGSI_SV_REGISTER_DECL \ + "DCL SV[0], BLOCK_ID[0]\n" \ + "DCL SV[1], BLOCK_SIZE[0]\n" \ + "DCL SV[2], GRID_SIZE[0]\n" \ + "DCL SV[3], THREAD_ID[0]\n" + +#define TGSI_BLOCK_ID(suffix) TGSI::SV0 ## suffix +#define TGSI_BLOCK_SIZE(suffix) TGSI::SV1 ## suffix +#define TGSI_GRID_SIZE(suffix) TGSI::SV2 ## suffix +#define TGSI_THREAD_ID(suffix) TGSI::SV3 ## suffix + namespace llvm { class TGSISubtarget; class TargetInstrInfo; diff --git a/lib/Target/TGSI/TGSIRegisterInfo.td b/lib/Target/TGSI/TGSIRegisterInfo.td index 7ab388033ff..8f19945079a 100644 --- a/lib/Target/TGSI/TGSIRegisterInfo.td +++ b/lib/Target/TGSI/TGSIRegisterInfo.td @@ -192,6 +192,28 @@ def TEMP31z : TGSIReg<"TEMP[31].z", 31, 4>; def TEMP31w : TGSIReg<"TEMP[31].w", 31, 8>; def TEMP31 : TGSIVReg<"TEMP[31]", 31, [TEMP31x, TEMP31y, TEMP31z, TEMP31w]>; +// SV registers +def SV0x : TGSIReg<"SV[0].x", 0, 1>; +def SV0y : TGSIReg<"SV[0].y", 0, 2>; +def SV0z : TGSIReg<"SV[0].z", 0, 4>; +def SV0w : TGSIReg<"SV[0].w", 0, 8>; +def SV0 : TGSIVReg<"SV[0]", 0, [SV0x, SV0y, SV0z, SV0w]>; +def SV1x : TGSIReg<"SV[1].x", 1, 1>; +def SV1y : TGSIReg<"SV[1].y", 1, 2>; +def SV1z : TGSIReg<"SV[1].z", 1, 4>; +def SV1w : TGSIReg<"SV[1].w", 1, 8>; +def SV1 : TGSIVReg<"SV[1]", 1, [SV1x, SV1y, SV1z, SV1w]>; +def SV2x : TGSIReg<"SV[2].x", 2, 1>; +def SV2y : TGSIReg<"SV[2].y", 2, 2>; +def SV2z : TGSIReg<"SV[2].z", 2, 4>; +def SV2w : TGSIReg<"SV[2].w", 2, 8>; +def SV2 : TGSIVReg<"SV[2]", 2, [SV2x, SV2y, SV2z, SV2w]>; +def SV3x : TGSIReg<"SV[3].x", 3, 1>; +def SV3y : TGSIReg<"SV[3].y", 3, 2>; +def SV3z : TGSIReg<"SV[3].z", 3, 4>; +def SV3w : TGSIReg<"SV[3].w", 3, 8>; +def SV3 : TGSIVReg<"SV[3]", 3, [SV3x, SV3y, SV3z, SV3w]>; + // Address register def ADDR0x : TGSIReg<"ADDR0x", 0, 1>; def ADDR0y : TGSIReg<"ADDR0y", 0, 2>; @@ -203,8 +225,11 @@ def ADDR0 : TGSIVReg<"ADDR0", 0, [ADDR0x, ADDR0y, ADDR0z, ADDR0w]>; // def IRegs : RegisterClass<"TGSI", [i32], 32, (add (sequence "TEMP%ux", 0, 31), (sequence "TEMP%uy", 0, 31), - (sequence "TEMP%uz", 0, 31), (sequence "TEMP%uw", 0, 31))>; -def IVRegs : RegisterClass<"TGSI", [v4i32], 128, (sequence "TEMP%u", 0, 31)>; + (sequence "TEMP%uz", 0, 31), (sequence "TEMP%uw", 0, 31), + (sequence "SV%ux", 0, 3), (sequence "SV%uy", 0, 3), + (sequence "SV%uz", 0, 3), (sequence "SV%uw", 0, 3))>; +def IVRegs : RegisterClass<"TGSI", [v4i32], 128, + (add (sequence "TEMP%u", 0, 31), (sequence "SV%u", 0, 3))>; def FRegs : RegisterClass<"TGSI", [f32], 32, (add (sequence "TEMP%ux", 0, 31), (sequence "TEMP%uy", 0, 31), (sequence "TEMP%uz", 0, 31), (sequence "TEMP%uw", 0, 31))>; @@ -213,4 +238,3 @@ def ARegs : RegisterClass<"TGSI", [i32], 32, (add (sequence "ADDR%ux", 0, 0), (sequence "ADDR%uy", 0, 0), (sequence "ADDR%uz", 0, 0), (sequence "ADDR%uw", 0, 0))>; def AVRegs : RegisterClass<"TGSI", [i32], 128, (sequence "ADDR%u", 0, 0)>; - diff --git a/lib/Target/TGSI/TGSITargetMachine.cpp b/lib/Target/TGSI/TGSITargetMachine.cpp index e25ad14412f..3279cdb0156 100644 --- a/lib/Target/TGSI/TGSITargetMachine.cpp +++ b/lib/Target/TGSI/TGSITargetMachine.cpp @@ -14,6 +14,7 @@ #include "TGSI.h" #include "TGSITargetMachine.h" #include "TGSITargetObjectFile.h" +#include "TGSITargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -69,3 +70,9 @@ TGSITargetMachine::TGSITargetMachine(const Target &T, const Triple &TT, TargetPassConfig *TGSITargetMachine::createPassConfig(PassManagerBase &PM) { return new TGSIPassConfig(this, PM); } + +TargetIRAnalysis TGSITargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(TGSITTIImpl(this, F)); + }); +} diff --git a/lib/Target/TGSI/TGSITargetMachine.h b/lib/Target/TGSI/TGSITargetMachine.h index e9ab038f82c..f12aeb00013 100644 --- a/lib/Target/TGSI/TGSITargetMachine.h +++ b/lib/Target/TGSI/TGSITargetMachine.h @@ -15,6 +15,7 @@ #define TGSI_TARGET_MACHINE_H #include "TGSISubtarget.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -38,6 +39,8 @@ namespace llvm { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + TargetIRAnalysis getTargetIRAnalysis() override; }; } diff --git a/lib/Target/TGSI/TGSITargetTransformInfo.cpp b/lib/Target/TGSI/TGSITargetTransformInfo.cpp new file mode 100644 index 00000000000..7c4d0d6883f --- /dev/null +++ b/lib/Target/TGSI/TGSITargetTransformInfo.cpp @@ -0,0 +1,68 @@ +//===---- TGSITargetTransformInfo.cpp - TGSI specific TTI -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "TGSITargetTransformInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "TGSItti" + +// Based in the NVPTX isSourceOfDivergence() implementation +bool TGSITTIImpl::isSourceOfDivergence(const Value *V) { + // Without inter-procedural analysis, we conservatively assume that arguments + // to __device__ functions are divergent. + if (const Argument *Arg = dyn_cast<Argument>(V)) + return true; // FIXME: !isKernelFunction(*Arg->getParent()); + + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Without pointer analysis, we conservatively assume values loaded from + // generic or local address space are divergent. + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned AS = LI->getPointerAddressSpace(); + return AS == tgsi::PRIVATE || AS == tgsi::LOCAL; + } + // Atomic instructions may cause divergence. Atomic instructions are + // executed sequentially across all threads in a warp. Therefore, an earlier + // executed thread may see different memory inputs than a later executed + // thread. For example, suppose *a = 0 initially. + // + // atom.global.add.s32 d, [a], 1 + // + // returns 0 for the first thread that enters the critical region, and 1 for + // the second thread. + if (I->isAtomic()) + return true; + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::tgsi_read_blockid_x: + case Intrinsic::tgsi_read_blockid_y: + case Intrinsic::tgsi_read_blockid_z: + case Intrinsic::tgsi_read_threadid_x: + case Intrinsic::tgsi_read_threadid_y: + case Intrinsic::tgsi_read_threadid_z: + return true; + default: + return false; + } + } + // Conservatively consider the return value of function calls as divergent. + // We could analyze callees with bodies more precisely using + // inter-procedural analysis. + if (isa<CallInst>(V) || isa<InvokeInst>(V)) + return true; + } + + return false; +} diff --git a/lib/Target/TGSI/TGSITargetTransformInfo.h b/lib/Target/TGSI/TGSITargetTransformInfo.h new file mode 100644 index 00000000000..d68c531c48e --- /dev/null +++ b/lib/Target/TGSI/TGSITargetTransformInfo.h @@ -0,0 +1,58 @@ +//===---- TGSITargetTransformInfo.h - TGSI specific TTI ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// TGSI target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_TGSI_TGSITARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_TGSI_TGSITARGETTRANSFORMINFO_H + +#include "TGSI.h" +#include "TGSITargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class TGSITTIImpl : public BasicTTIImplBase<TGSITTIImpl> { + typedef BasicTTIImplBase<TGSITTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const TGSISubtarget *ST; + const TGSITargetLowering *TLI; + + const TGSISubtarget *getST() const { return ST; }; + const TGSITargetLowering *getTLI() const { return TLI; }; + +public: + explicit TGSITTIImpl(const TGSITargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + TGSITTIImpl(const TGSITTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + TGSITTIImpl(TGSITTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + + bool hasBranchDivergence() { return true; } + + bool isSourceOfDivergence(const Value *V); +}; + +} // end namespace llvm + +#endif |