diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2016-07-10 21:20:29 +0000 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2016-07-10 21:20:29 +0000 |
commit | e2b4643334885124c4d140aa381d321cc1b2bb5b (patch) | |
tree | 0a0024f5d862d94ce6ea40213d7c0615d98f1cf1 | |
parent | 8f101a75895acd524833a5792212f6d61ec0dd55 (diff) |
AMDGPU/R600: Add implicitarg.ptr intrinsic
Differential Revision: http://reviews.llvm.org/D21622
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275024 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IR/IntrinsicsAMDGPU.td | 6 | ||||
-rw-r--r-- | lib/Target/AMDGPU/EvergreenInstructions.td | 10 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 3 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll | 114 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll (renamed from test/CodeGen/AMDGPU/work-item-intrinsics.ll) | 127 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll | 107 |
7 files changed, 336 insertions, 36 deletions
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 51afb83d7c6..119ce807a45 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -43,6 +43,12 @@ defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic; + +// AS 7 is PARAM_I_ADDRESS, used for kernel arguments +def int_r600_implicitarg_ptr : + GCCBuiltin<"__builtin_r600_implicitarg_ptr">, + Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>; + def int_r600_rat_store_typed : // 1st parameter: Data // 2nd parameter: Index diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 656400517c4..1092e61980a 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -210,23 +210,23 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> // VTX Read from parameter memory space //===----------------------------------------------------------------------===// -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <3, [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <3, [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <3, [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, +def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <3, [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <3, [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index a64ffb2c8a0..8651bd84a2e 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -782,6 +782,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); } + case Intrinsic::r600_implicitarg_ptr: { + MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); + uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + return DAG.getConstant(ByteOffset, DL, PtrVT); + } case Intrinsic::r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); case Intrinsic::r600_read_ngroups_y: diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 64e9b156dc2..8414f993bcc 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -329,7 +329,8 @@ class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> class LoadParamFrag <PatFrag load_type> : PatFrag < (ops node:$ptr), (load_type node:$ptr), - [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }] + [{ return isConstantLoad(cast<LoadSDNode>(N), 0) || + (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] >; def load_param : LoadParamFrag<load>; diff --git a/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll b/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll new file mode 100644 index 00000000000..b1b3b9930d1 --- /dev/null +++ b/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll @@ -0,0 +1,114 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}workdim: + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +define void @workdim (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; The workgroup.id values are stored in sgprs offset by the number of user +; sgprs. + +; FUNC-LABEL: {{^}}workgroup_id_x: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @workgroup_id_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workgroup_id_y: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +define void @workgroup_id_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workgroup_id_z: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @workgroup_id_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 132{{$}} + +; FUNC-LABEL: {{^}}workitem_id_x: +; GCN-NOHSA: buffer_store_dword v0 +define void @workitem_id_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 2180{{$}} + +; FUNC-LABEL: {{^}}workitem_id_y: + +; GCN-NOHSA: buffer_store_dword v1 +define void @workitem_id_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 4228{{$}} + +; FUNC-LABEL: {{^}}workitem_id_z: +; GCN-NOHSA: buffer_store_dword v2 +define void @workitem_id_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.amdgcn.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.workgroup.id.z() #0 + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workitem.id.y() #0 +declare i32 @llvm.amdgcn.workitem.id.z() #0 + +declare i32 @llvm.amdgcn.read.workdim() #0 diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll index 1c0a99ead51..853788b92aa 100644 --- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -2,15 +2,31 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; Legacy intrinsics that just read implicit parameters + +; FUNC-LABEL: {{^}}workdim_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] -; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].X +; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z +define void @workdim_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} -; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 +; FUNC-LABEL: {{^}}ngroups_x: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].X define void @ngroups_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.x() #0 @@ -19,13 +35,13 @@ entry: } ; FUNC-LABEL: {{^}}ngroups_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y - ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y define void @ngroups_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.y() #0 @@ -34,13 +50,13 @@ entry: } ; FUNC-LABEL: {{^}}ngroups_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z - ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z define void @ngroups_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.z() #0 @@ -49,13 +65,13 @@ entry: } ; FUNC-LABEL: {{^}}global_size_x: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].W - ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].W define void @global_size_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.x() #0 @@ -64,13 +80,13 @@ entry: } ; FUNC-LABEL: {{^}}global_size_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].X - ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].X define void @global_size_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.y() #0 @@ -79,13 +95,13 @@ entry: } ; FUNC-LABEL: {{^}}global_size_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y - ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y define void @global_size_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.z() #0 @@ -93,10 +109,57 @@ entry: ret void } +; FUNC-LABEL: {{^}}local_size_x: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z +define void @local_size_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_y: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].W +define void @local_size_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_z: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].X +define void @local_size_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Legacy use of r600 intrinsics by GCN + ; The tgid values are stored in sgprs offset by the number of user ; sgprs. -; FUNC-LABEL: {{^}}tgid_x: +; FUNC-LABEL: {{^}}tgid_x_legacy: ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} ; GCN-NOHSA: buffer_store_dword [[VVAL]] @@ -105,26 +168,26 @@ entry: ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define void @tgid_x(i32 addrspace(1)* %out) { +define void @tgid_x_legacy(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.x() #0 store i32 %0, i32 addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}tgid_y: +; FUNC-LABEL: {{^}}tgid_y_legacy: ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 ; GCN-NOHSA: buffer_store_dword [[VVAL]] ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -define void @tgid_y(i32 addrspace(1)* %out) { +define void @tgid_y_legacy(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.y() #0 store i32 %0, i32 addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}tgid_z: +; FUNC-LABEL: {{^}}tgid_z_legacy: ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} ; GCN-NOHSA: buffer_store_dword [[VVAL]] @@ -133,7 +196,7 @@ entry: ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define void @tgid_z(i32 addrspace(1)* %out) { +define void @tgid_z_legacy(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.z() #0 store i32 %0, i32 addrspace(1)* %out @@ -144,9 +207,9 @@ entry: ; GCN-NOHSA: .long 47180 ; GCN-NOHSA-NEXT: .long 132{{$}} -; FUNC-LABEL: {{^}}tidig_x: +; FUNC-LABEL: {{^}}tidig_x_legacy: ; GCN-NOHSA: buffer_store_dword v0 -define void @tidig_x(i32 addrspace(1)* %out) { +define void @tidig_x_legacy(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #0 store i32 %0, i32 addrspace(1)* %out @@ -157,10 +220,10 @@ entry: ; GCN-NOHSA: .long 47180 ; GCN-NOHSA-NEXT: .long 2180{{$}} -; FUNC-LABEL: {{^}}tidig_y: +; FUNC-LABEL: {{^}}tidig_y_legacy: ; GCN-NOHSA: buffer_store_dword v1 -define void @tidig_y(i32 addrspace(1)* %out) { +define void @tidig_y_legacy(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.y() #0 store i32 %0, i32 addrspace(1)* %out @@ -171,9 +234,9 @@ entry: ; GCN-NOHSA: .long 47180 ; GCN-NOHSA-NEXT: .long 4228{{$}} -; FUNC-LABEL: {{^}}tidig_z: +; FUNC-LABEL: {{^}}tidig_z_legacy: ; GCN-NOHSA: buffer_store_dword v2 -define void @tidig_z(i32 addrspace(1)* %out) { +define void @tidig_z_legacy(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.z() #0 store i32 %0, i32 addrspace(1)* %out @@ -188,6 +251,10 @@ declare i32 @llvm.r600.read.global.size.x() #0 declare i32 @llvm.r600.read.global.size.y() #0 declare i32 @llvm.r600.read.global.size.z() #0 +declare i32 @llvm.r600.read.local.size.x() #0 +declare i32 @llvm.r600.read.local.size.y() #0 +declare i32 @llvm.r600.read.local.size.z() #0 + declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 declare i32 @llvm.r600.read.tgid.z() #0 diff --git a/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll new file mode 100644 index 00000000000..ff248a89ced --- /dev/null +++ b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll @@ -0,0 +1,107 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}tgid_x: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.X +define void @tgid_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_y: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.Y +define void @tgid_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_z: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.Z +define void @tgid_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_x: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.X +define void @tidig_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_y: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.Y +define void @tidig_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_z: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.Z +define void @tidig_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_implicit: +; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 +; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56 +define void @test_implicit(i32 addrspace(1)* %out) #1 { + %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() + %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* + %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4 + %value = load i32, i32 addrspace(7)* %gep + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_implicit_dyn: +; 36 prepended implicit bytes + 8(out pointer + in) = 44 +; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44 +define void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 { + %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() + %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* + %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in + %value = load i32, i32 addrspace(7)* %gep + store i32 %value, i32 addrspace(1)* %out + ret void +} + + + +; DEPRECATED but R600 only + +; FUNC-LABEL: {{^}}workdim: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z +define void @workdim (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.workdim() #0 + +declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0 + +declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.r600.read.tgid.y() #0 +declare i32 @llvm.r600.read.tgid.z() #0 + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.r600.read.tidig.z() #0 + +attributes #0 = { readnone } |