diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2018-02-09 14:25:44 +0100 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2018-02-12 14:59:14 +0100 |
commit | fd0f967657d14d0a512f224e7bc5449570030cb7 (patch) | |
tree | ee9160ad2bdd6e674d22d261d21eaec9439d7e2f | |
parent | 16e83679a97746f2492a07c81faeecb5587a32ed (diff) |
WIP new-style image intrinsics and basic testsmimg
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
-rw-r--r-- | include/llvm/IR/Intrinsics.td | 23 | ||||
-rw-r--r-- | include/llvm/IR/IntrinsicsAMDGPU.td | 215 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPU.td | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 7 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstrInfo.h | 11 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSearchableTables.td | 29 | ||||
-rw-r--r-- | lib/Target/AMDGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/MIMGInstructions.td | 80 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 251 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll | 24 |
10 files changed, 413 insertions, 229 deletions
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index c8f5c64fadc..77c7d5f2d8c 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -117,6 +117,7 @@ def IntrHasSideEffects : IntrinsicProperty; class LLVMType<ValueType vt> { ValueType VT = vt; + int isAny = 0; } class LLVMQualPointerType<LLVMType elty, int addrspace> @@ -131,6 +132,8 @@ class LLVMPointerType<LLVMType elty> class LLVMAnyPointerType<LLVMType elty> : LLVMType<iPTRAny>{ LLVMType ElTy = elty; + + let isAny = 1; } // Match the type of another intrinsic parameter. Number is an index into the @@ -163,10 +166,12 @@ class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>; class LLVMHalfElementsVectorType<int num> : LLVMMatchType<num>; def llvm_void_ty : LLVMType<isVoid>; -def llvm_any_ty : LLVMType<Any>; -def llvm_anyint_ty : LLVMType<iAny>; -def llvm_anyfloat_ty : LLVMType<fAny>; -def llvm_anyvector_ty : LLVMType<vAny>; +let isAny = 1 in { + def llvm_any_ty : LLVMType<Any>; + def llvm_anyint_ty : LLVMType<iAny>; + def llvm_anyfloat_ty : LLVMType<fAny>; + def llvm_anyvector_ty : LLVMType<vAny>; +} def llvm_i1_ty : LLVMType<i1>; def llvm_i8_ty : LLVMType<i8>; def llvm_i16_ty : LLVMType<i16>; @@ -249,6 +254,16 @@ def llvm_v8f64_ty : LLVMType<v8f64>; // 8 x double def llvm_vararg_ty : LLVMType<isVoid>; // this means vararg here +// Add 'shift' to the reference number of all LLVMMatchTypes in 'ty' +class typelistshiftmatches<list<LLVMType> types, int shift> { + list<LLVMType> Types = types; + int Shift = shift; + list<LLVMType> ret = + !foreach(type, types, + !if(!isa<LLVMMatchType>(type), + LLVMMatchType<!add(!cast<LLVMMatchType>(type).Number, shift)>, + type)); +} //===----------------------------------------------------------------------===// // Intrinsic Definitions. diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 454b62bdfb6..3744e173ba2 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -17,6 +17,13 @@ class AMDGPUReadPreloadRegisterIntrinsic class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>; +// Used to tag image and resource intrinsics with information used to generate +// mem operands, +class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> { + int RsrcArg = rsrcarg; + bit IsImage = isimage; +} + let TargetPrefix = "r600" in { multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { @@ -310,6 +317,8 @@ def int_amdgcn_atomic_fadd : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fadd">; def int_amdgcn_atomic_fmin : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fmin">; def int_amdgcn_atomic_fmax : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fmax">; +defset list<AMDGPURsrcIntrinsic> AMDGPUImageIntrinsics = { + class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyint_ty, // vaddr(VGPR) @@ -320,7 +329,8 @@ class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic < llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) !if(NoMem, [IntrNoMem], [IntrReadMem]), "", - !if(NoMem, [], [SDNPMemOperand])>; + !if(NoMem, [], [SDNPMemOperand])>, + AMDGPURsrcIntrinsic<2, 1>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; @@ -336,7 +346,8 @@ class AMDGPUImageStore : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - [IntrWriteMem], "", [SDNPMemOperand]>; + [IntrWriteMem], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<3, 1>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; @@ -353,7 +364,8 @@ class AMDGPUImageSample<bit NoMem = 0> : Intrinsic < llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) !if(NoMem, [IntrNoMem], [IntrReadMem]), "", - !if(NoMem, [], [SDNPMemOperand])>; + !if(NoMem, [], [SDNPMemOperand])>, + AMDGPURsrcIntrinsic<1, 1>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; @@ -445,7 +457,8 @@ class AMDGPUImageAtomic : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>; + [], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<2, 1>; def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; @@ -468,7 +481,179 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>; + [], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<3, 1>; + +} // defset AMDGPUImageIntrinsics + +} // TargetPrefix = "amdgcn" + +// New-style image intrinsics + +// TODO-MIMG: amdgcn.image.load{.mip} +// TODO-MIMG: amdgcn.image.getresinfo +// TODO-MIMG: amdgcn.image.store{.mip} + +class AMDGPUArg<LLVMType ty, string name> { + LLVMType Type = ty; + string Name = name; +} + +class makeArgList<list<string> names, LLVMType basety> { + list<AMDGPUArg> ret = + !listconcat([AMDGPUArg<basety, names[0]>], + !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>)); +} + +class arglistmatchshift<list<AMDGPUArg> arglist, int shift> { + list<AMDGPUArg> ret = + !foreach(arg, arglist, + !if(!isa<LLVMMatchType>(arg.Type), + AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>, + arg.Name>, + arg)); +} + +class arglistconcat<list<list<AMDGPUArg>> arglists> { + list<AMDGPUArg> ret = + !foldl([]<AMDGPUArg>, arglists, lhs, rhs, + !listconcat( + lhs, + arglistmatchshift<rhs, !foldl(0, lhs, a, b, !add(a, b.Type.isAny))>.ret)); +} + +class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> { + string Name = name; + bit DA = 0; // DA bit in MIMG encoding + list<AMDGPUArg> CoordSliceArgs = + makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret; + list<AMDGPUArg> GradientArgs = + makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"), + !foreach(name, coord_names, "d" # name # "dv")), + llvm_anyfloat_ty>.ret; +} + +def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>; +def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>; +def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>; +def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>; +let DA = 1 in { + def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>; + def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>; +} +def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>; +let DA = 1 in { + def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>; +} + +class AMDGPUDimProfile<string opmod, + AMDGPUDimProps dim, + list<AMDGPUArg> extra_addr, // {offset} {bias} {z-compare} + bit Gradients, + string LodOrClamp> { + AMDGPUDimProps Dim = dim; + string OpMod = opmod; + list<AMDGPUArg> AddrArgs = + arglistconcat<[extra_addr, + !if(Gradients, dim.GradientArgs, []), + !listconcat(dim.CoordSliceArgs, + !if(!eq(LodOrClamp, ""), + []<AMDGPUArg>, + [AMDGPUArg<LLVMMatchType<0>, LodOrClamp>]))]>.ret; + list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type); + list<AMDGPUArg> AddrFloatArgs = + !foreach(arg, AddrArgs, + AMDGPUArg<!if(!or(!isa<LLVMMatchType>(arg.Type), + !eq(arg.Type.VT.Value, llvm_anyfloat_ty.VT.Value)), + llvm_float_ty, arg.Type), arg.Name>); + list<AMDGPUArg> AddrA16Args = + !foreach(arg, AddrArgs, + AMDGPUArg<!if(!or(!isa<LLVMMatchType>(arg.Type), + !eq(arg.Type.VT.Value, llvm_anyfloat_ty.VT.Value)), + llvm_half_ty, arg.Type), arg.Name>); +} + +class AMDGPUImageDimSample<AMDGPUDimProfile P_, + bit NoMem = 0> : Intrinsic < + [llvm_anyfloat_ty], // vdata(VGPR) + !listconcat( + typelistshiftmatches<P_.AddrTypes, 1>.ret, + [llvm_v8i32_ty, // rsrc(SGPR) + llvm_v4i32_ty, // sampler(SGPR) + llvm_i32_ty, // dmask(imm) + llvm_i1_ty, // unorm(imm) + llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) + llvm_i32_ty]), // cachepolicy(imm: bit 0 = glc, bit 1 = slc) + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>, + AMDGPURsrcIntrinsic<!size(P_.AddrTypes), 1> { + AMDGPUDimProfile P = P_; + + let TargetPrefix = "amdgcn"; +} + +multiclass AMDGPUImageDimSampleDims<string opmod, + list<AMDGPUArg> xaddr, + bit Gradients = 0, + string LodOrClamp = "", + bit NoMem = 0> { + def NAME#_1d : AMDGPUImageDimSample< + AMDGPUDimProfile<opmod, AMDGPUDim1D, xaddr, Gradients, LodOrClamp>>; + def NAME#_2d : AMDGPUImageDimSample< + AMDGPUDimProfile<opmod, AMDGPUDim2D, xaddr, Gradients, LodOrClamp>>; + def NAME#_3d : AMDGPUImageDimSample< + AMDGPUDimProfile<opmod, AMDGPUDim3D, xaddr, Gradients, LodOrClamp>>; + def NAME#_cube : AMDGPUImageDimSample< + AMDGPUDimProfile<opmod, AMDGPUDimCube, xaddr, Gradients, LodOrClamp>>; + def NAME#_1darray : AMDGPUImageDimSample< + AMDGPUDimProfile<opmod, AMDGPUDim1DArray, xaddr, Gradients, LodOrClamp>>; + def NAME#_2darray : AMDGPUImageDimSample< + AMDGPUDimProfile<opmod, AMDGPUDim2DArray, xaddr, Gradients, LodOrClamp>>; +} + +multiclass AMDGPUImageDimSampleOffset<string opmod, + list<AMDGPUArg> extra_addr, + bit Gradients = 0, + string LodOrClamp = ""> { + defm NAME: AMDGPUImageDimSampleDims<opmod, extra_addr, Gradients, LodOrClamp>; + defm NAME#_o : AMDGPUImageDimSampleDims< + opmod#_O, + !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr), + Gradients, LodOrClamp>; +} + +multiclass AMDGPUImageDimSampleClamp<string opmod, + list<AMDGPUArg> extra_addr, + bit Gradients = 0> { + defm NAME: AMDGPUImageDimSampleOffset<opmod, extra_addr, Gradients>; + defm NAME#_cl : AMDGPUImageDimSampleOffset<opmod#_CL, extra_addr, Gradients, "clamp">; +} + +multiclass AMDGPUImageDimSampleLodBase<string opmod, + list<AMDGPUArg> zcompare_addr = []> { + defm NAME: AMDGPUImageDimSampleClamp<opmod, zcompare_addr>; + defm NAME#_b : + AMDGPUImageDimSampleClamp<opmod#_B, + !listconcat([AMDGPUArg<llvm_anyfloat_ty, "bias">], zcompare_addr)>; + defm NAME#_cd : AMDGPUImageDimSampleClamp<opmod#_CD, zcompare_addr, 1>; + defm NAME#_d : AMDGPUImageDimSampleClamp<opmod#_D, zcompare_addr, 1>; + defm NAME#_l : AMDGPUImageDimSampleOffset<opmod#_L, zcompare_addr, 0, "lod">; + defm NAME#_lz : AMDGPUImageDimSampleOffset<opmod#_LZ, zcompare_addr, 0>; +} + +defset list<AMDGPUImageDimSample> AMDGPUImageDimSampleIntrinsics = { + defm int_amdgcn_image_sample : AMDGPUImageDimSampleLodBase<"", []>; + defm int_amdgcn_image_sample_c : + AMDGPUImageDimSampleLodBase<"_C", [AMDGPUArg<llvm_float_ty, "zcompare">]>; +} + +// TODO-MIMG: amdgcn.image.gather4.* +// TODO-MIMG: amdgcn.image.getlod +// TODO-MIMG: amdgcn.image.atomic.* + +let TargetPrefix = "amdgcn" in { + +defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = { class AMDGPUBufferLoad : Intrinsic < [llvm_anyfloat_ty], @@ -477,7 +662,8 @@ class AMDGPUBufferLoad : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem], "", [SDNPMemOperand]>; + [IntrReadMem], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<0>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; @@ -489,7 +675,8 @@ class AMDGPUBufferStore : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem], "", [SDNPMemOperand]>; + [IntrWriteMem], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<1>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -504,7 +691,8 @@ def int_amdgcn_tbuffer_load : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem], "", [SDNPMemOperand]>; + [IntrReadMem], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<0>; def int_amdgcn_tbuffer_store : Intrinsic < [], @@ -518,7 +706,8 @@ def int_amdgcn_tbuffer_store : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem], "", [SDNPMemOperand]>; + [IntrWriteMem], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<1>; class AMDGPUBufferAtomic : Intrinsic < [llvm_i32_ty], @@ -527,7 +716,8 @@ class AMDGPUBufferAtomic : Intrinsic < llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>; + [], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<1>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; @@ -546,7 +736,10 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>; + [], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<2>; + +} // defset AMDGPUBufferIntrinsics // Uses that do not set the done bit should set IntrWriteMem on the // call site. diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 6355c4fa6eb..f01d9ab5bc4 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -798,3 +798,4 @@ include "AMDGPURegisterInfo.td" include "AMDGPURegisterBanks.td" include "AMDGPUInstructions.td" include "AMDGPUCallingConv.td" +include "AMDGPUSearchableTables.td" diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 9b9ec063864..248632efeb9 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -25,6 +25,13 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenInstrInfo.inc" +namespace llvm { +namespace AMDGPU { +#define GET_RSRCINTRINSIC_IMPL +#include "AMDGPUGenSearchableTables.inc" +} +} + // Pin the vtable to this file. void AMDGPUInstrInfo::anchor() {} diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index a9fcd483463..7488bbcb76f 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -51,6 +51,17 @@ public: /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; }; + +namespace AMDGPU { + +struct RsrcIntrinsic { + unsigned Intr; + uint8_t RsrcArg; + bool IsImage; +}; +const RsrcIntrinsic *lookupRsrcIntrinsicByIntr(unsigned Intr); + +} // end AMDGPU namespace } // End llvm namespace #endif diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td new file mode 100644 index 00000000000..d61bdbeb3bd --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -0,0 +1,29 @@ +//===-- AMDGPUSearchableTables.td - ------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "llvm/TableGen/SearchableTable.td" + +//===----------------------------------------------------------------------===// +// Resource intrinsics table. +//===----------------------------------------------------------------------===// + +class RsrcIntrinsic<AMDGPURsrcIntrinsic intr> : SearchableTable { + let SearchableFields = ["Intr"]; + let EnumNameField = ?; + + Intrinsic Intr = !cast<Intrinsic>(intr); + bits<8> RsrcArg = intr.RsrcArg; + bit IsImage = intr.IsImage; +} + +foreach intr = !listconcat(AMDGPUBufferIntrinsics, + AMDGPUImageIntrinsics, + AMDGPUImageDimSampleIntrinsics) in { + def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>; +} diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 1a14db4d62c..748c1391650 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank) +tablegen(LLVM AMDGPUGenSearchableTables.inc -gen-searchable-tables) add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 9fd0abd9a3d..e2935318bb6 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -429,6 +429,86 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; } +/********** ============================== **********/ +/********** Dimension-aware image patterns **********/ +/********** ============================== **********/ + +class getDwordsType<int dwords> { + string suffix = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), "_V1", + !if(!eq(dwords, 2), "_V2", + !if(!le(dwords, 4), "_V4", + !if(!le(dwords, 8), "_V8", + !if(!le(dwords, 16), "_V16", ?)))))); + ValueType VT = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), f32, + !if(!eq(dwords, 2), v2f32, + !if(!le(dwords, 4), v4f32, + !if(!le(dwords, 8), v8f32, + !if(!le(dwords, 16), v16f32, ?)))))); + RegisterClass VReg = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), VGPR_32, + !if(!eq(dwords, 2), VReg_64, + !if(!le(dwords, 4), VReg_128, + !if(!le(dwords, 8), VReg_256, + !if(!le(dwords, 16), VReg_512, ?)))))); +} + +class makeRegSequence_Fold<int i, dag d> { + int idx = i; + dag lhs = d; +} + +class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> { + dag ret = + !if(!eq(!size(names), 1), + !dag(COPY, [?]<dag>, [names[0]]), + !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name, + makeRegSequence_Fold< + !add(f.idx, 1), + !con((INSERT_SUBREG f.lhs), + !dag(INSERT_SUBREG, [?, !cast<SubRegIndex>("sub"#f.idx)], + [name, ?]))>).lhs); +} + +class ImageSampleDimPattern<AMDGPUImageDimSample I, + string dop, ValueType dty, + string suffix = ""> : GCNPat<(undef), (undef)> { + dag AddrDag = !dag(I, !foreach(arg, I.P.AddrFloatArgs, arg.Type.VT), + !foreach(arg, I.P.AddrFloatArgs, arg.Name)); + getDwordsType AddrDwords = getDwordsType<!size(I.P.AddrFloatArgs)>; + string aop = AddrDwords.suffix; + ValueType aty = AddrDwords.VT; + + let PatternToMatch = + (dty !con(AddrDag, (I v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, + 0, 0))); +// i32:$texfailctrl, i32:$cachepolicy))); + let ResultInstrs = [ + (!cast<Instruction>(!strconcat("IMAGE_SAMPLE", I.P.OpMod, dop, aop, suffix)) + makeRegSequence<AddrDwords.VT, AddrDwords.VReg, + !foreach(arg, I.P.AddrFloatArgs, arg.Name)>.ret, + $rsrc, $sampler, + (as_i32imm $dmask), (as_i1imm $unorm), 0 /*(as_i1imm $glc)*/, 0 /*(as_i1imm $slc)*/, + 0, 0, 0 /*(as_i1imm $lwe)*/, { I.P.Dim.DA }) + ]; +} + +foreach intr = AMDGPUImageDimSampleIntrinsics in { + def intr#_pat1 : ImageSampleDimPattern<intr, "_V1", f32>; + def intr#_pat2 : ImageSampleDimPattern<intr, "_V2", v2f32>; + def intr#_pat3 : ImageSampleDimPattern<intr, "_V4", v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + def intr#_pat4 : ImageSampleDimPattern<intr, "_V1", f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + def intr#_pat5 : ImageSampleDimPattern<intr, "_V1", f16, "_D16">; + def intr#_pat6 : ImageSampleDimPattern<intr, "_V1", v2f16, "_D16">; + } // End HasPackedD16VMem. +} + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 7dc9dcf31fc..562ca2003db 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -563,6 +563,43 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, MachineFunction &MF, unsigned IntrID) const { + if (const AMDGPU::RsrcIntrinsic *RsrcIntr = + AMDGPU::lookupRsrcIntrinsicByIntr(IntrID)) { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + + if (RsrcIntr->IsImage) { + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(RsrcIntr->RsrcArg)); + Info.align = 0; + } else { + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(RsrcIntr->RsrcArg)); + } + + AttributeList Attr = Intrinsic::getAttributes(CI.getContext(), + (Intrinsic::ID)IntrID); + + Info.flags = MachineMemOperand::MODereferenceable; + if (Attr.hasFnAttribute(Attribute::ReadOnly)) + Info.flags |= MachineMemOperand::MOLoad; + else if (Attr.hasFnAttribute(Attribute::ReadNone)) + Info.flags |= MachineMemOperand::MOStore; + else { + // Atomic + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + } + return true; + } + switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: @@ -582,220 +619,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } - // Image load. - case Intrinsic::amdgcn_image_load: - case Intrinsic::amdgcn_image_load_mip: - - // Sample. - case Intrinsic::amdgcn_image_sample: - case Intrinsic::amdgcn_image_sample_cl: - case Intrinsic::amdgcn_image_sample_d: - case Intrinsic::amdgcn_image_sample_d_cl: - case Intrinsic::amdgcn_image_sample_l: - case Intrinsic::amdgcn_image_sample_b: - case Intrinsic::amdgcn_image_sample_b_cl: - case Intrinsic::amdgcn_image_sample_lz: - case Intrinsic::amdgcn_image_sample_cd: - case Intrinsic::amdgcn_image_sample_cd_cl: - - // Sample with comparison. - case Intrinsic::amdgcn_image_sample_c: - case Intrinsic::amdgcn_image_sample_c_cl: - case Intrinsic::amdgcn_image_sample_c_d: - case Intrinsic::amdgcn_image_sample_c_d_cl: - case Intrinsic::amdgcn_image_sample_c_l: - case Intrinsic::amdgcn_image_sample_c_b: - case Intrinsic::amdgcn_image_sample_c_b_cl: - case Intrinsic::amdgcn_image_sample_c_lz: - case Intrinsic::amdgcn_image_sample_c_cd: - case Intrinsic::amdgcn_image_sample_c_cd_cl: - - // Sample with offsets. - case Intrinsic::amdgcn_image_sample_o: - case Intrinsic::amdgcn_image_sample_cl_o: - case Intrinsic::amdgcn_image_sample_d_o: - case Intrinsic::amdgcn_image_sample_d_cl_o: - case Intrinsic::amdgcn_image_sample_l_o: - case Intrinsic::amdgcn_image_sample_b_o: - case Intrinsic::amdgcn_image_sample_b_cl_o: - case Intrinsic::amdgcn_image_sample_lz_o: - case Intrinsic::amdgcn_image_sample_cd_o: - case Intrinsic::amdgcn_image_sample_cd_cl_o: - - // Sample with comparison and offsets. - case Intrinsic::amdgcn_image_sample_c_o: - case Intrinsic::amdgcn_image_sample_c_cl_o: - case Intrinsic::amdgcn_image_sample_c_d_o: - case Intrinsic::amdgcn_image_sample_c_d_cl_o: - case Intrinsic::amdgcn_image_sample_c_l_o: - case Intrinsic::amdgcn_image_sample_c_b_o: - case Intrinsic::amdgcn_image_sample_c_b_cl_o: - case Intrinsic::amdgcn_image_sample_c_lz_o: - case Intrinsic::amdgcn_image_sample_c_cd_o: - case Intrinsic::amdgcn_image_sample_c_cd_cl_o: - - // Basic gather4 - case Intrinsic::amdgcn_image_gather4: - case Intrinsic::amdgcn_image_gather4_cl: - case Intrinsic::amdgcn_image_gather4_l: - case Intrinsic::amdgcn_image_gather4_b: - case Intrinsic::amdgcn_image_gather4_b_cl: - case Intrinsic::amdgcn_image_gather4_lz: - - // Gather4 with comparison - case Intrinsic::amdgcn_image_gather4_c: - case Intrinsic::amdgcn_image_gather4_c_cl: - case Intrinsic::amdgcn_image_gather4_c_l: - case Intrinsic::amdgcn_image_gather4_c_b: - case Intrinsic::amdgcn_image_gather4_c_b_cl: - case Intrinsic::amdgcn_image_gather4_c_lz: - - // Gather4 with offsets - case Intrinsic::amdgcn_image_gather4_o: - case Intrinsic::amdgcn_image_gather4_cl_o: - case Intrinsic::amdgcn_image_gather4_l_o: - case Intrinsic::amdgcn_image_gather4_b_o: - case Intrinsic::amdgcn_image_gather4_b_cl_o: - case Intrinsic::amdgcn_image_gather4_lz_o: - - // Gather4 with comparison and offsets - case Intrinsic::amdgcn_image_gather4_c_o: - case Intrinsic::amdgcn_image_gather4_c_cl_o: - case Intrinsic::amdgcn_image_gather4_c_l_o: - case Intrinsic::amdgcn_image_gather4_c_b_o: - case Intrinsic::amdgcn_image_gather4_c_b_cl_o: - case Intrinsic::amdgcn_image_gather4_c_lz_o: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(CI.getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(1)); - Info.align = 0; - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MODereferenceable; - return true; - } - case Intrinsic::amdgcn_image_store: - case Intrinsic::amdgcn_image_store_mip: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_VOID; - Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(2)); - Info.flags = MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - Info.align = 0; - return true; - } - case Intrinsic::amdgcn_image_atomic_swap: - case Intrinsic::amdgcn_image_atomic_add: - case Intrinsic::amdgcn_image_atomic_sub: - case Intrinsic::amdgcn_image_atomic_smin: - case Intrinsic::amdgcn_image_atomic_umin: - case Intrinsic::amdgcn_image_atomic_smax: - case Intrinsic::amdgcn_image_atomic_umax: - case Intrinsic::amdgcn_image_atomic_and: - case Intrinsic::amdgcn_image_atomic_or: - case Intrinsic::amdgcn_image_atomic_xor: - case Intrinsic::amdgcn_image_atomic_inc: - case Intrinsic::amdgcn_image_atomic_dec: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(CI.getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(2)); - - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - - // XXX - Should this be volatile without known ordering? - Info.flags |= MachineMemOperand::MOVolatile; - return true; - } - case Intrinsic::amdgcn_image_atomic_cmpswap: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(CI.getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(3)); - - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - - // XXX - Should this be volatile without known ordering? - Info.flags |= MachineMemOperand::MOVolatile; - return true; - } - case Intrinsic::amdgcn_tbuffer_load: - case Intrinsic::amdgcn_buffer_load: - case Intrinsic::amdgcn_buffer_load_format: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(0)); - Info.memVT = MVT::getVT(CI.getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MODereferenceable; - - // There is a constant offset component, but there are additional register - // offsets which could break AA if we set the offset to anything non-0. - return true; - } - case Intrinsic::amdgcn_tbuffer_store: - case Intrinsic::amdgcn_buffer_store: - case Intrinsic::amdgcn_buffer_store_format: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_VOID; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(1)); - Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - Info.flags = MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - return true; - } - case Intrinsic::amdgcn_buffer_atomic_swap: - case Intrinsic::amdgcn_buffer_atomic_add: - case Intrinsic::amdgcn_buffer_atomic_sub: - case Intrinsic::amdgcn_buffer_atomic_smin: - case Intrinsic::amdgcn_buffer_atomic_umin: - case Intrinsic::amdgcn_buffer_atomic_smax: - case Intrinsic::amdgcn_buffer_atomic_umax: - case Intrinsic::amdgcn_buffer_atomic_and: - case Intrinsic::amdgcn_buffer_atomic_or: - case Intrinsic::amdgcn_buffer_atomic_xor: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(1)); - Info.memVT = MVT::getVT(CI.getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile; - return true; - } - case Intrinsic::amdgcn_buffer_atomic_cmpswap: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(2)); - Info.memVT = MVT::getVT(CI.getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile; - return true; - } default: return false; } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll new file mode 100644 index 00000000000..53e487f4991 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}sample_1d: +; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf +define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.float(float %s, <8 x i32> %rsrc, <4 x i32> %samp, i32 15, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_2d: +; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.float(float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i32 15, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.float(float, <8 x i32>, <4 x i32>, i32, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.float(float, float, <8 x i32>, <4 x i32>, i32, i1, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } |