summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2016-07-10 21:20:29 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2016-07-10 21:20:29 +0000
commite2b4643334885124c4d140aa381d321cc1b2bb5b (patch)
tree0a0024f5d862d94ce6ea40213d7c0615d98f1cf1
parent8f101a75895acd524833a5792212f6d61ec0dd55 (diff)
AMDGPU/R600: Add implicitarg.ptr intrinsic
Differential Revision: http://reviews.llvm.org/D21622 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275024 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IR/IntrinsicsAMDGPU.td6
-rw-r--r--lib/Target/AMDGPU/EvergreenInstructions.td10
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp5
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td3
-rw-r--r--test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll114
-rw-r--r--test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll (renamed from test/CodeGen/AMDGPU/work-item-intrinsics.ll)127
-rw-r--r--test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll107
7 files changed, 336 insertions, 36 deletions
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 51afb83d7c6..119ce807a45 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -43,6 +43,12 @@ defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;
+
+// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
+def int_r600_implicitarg_ptr :
+ GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;
+
def int_r600_rat_store_typed :
// 1st parameter: Data
// 2nd parameter: Index
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 656400517c4..1092e61980a 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -210,23 +210,23 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
// VTX Read from parameter memory space
//===----------------------------------------------------------------------===//
-def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <3,
[(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <3,
[(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <3,
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
+def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <3,
[(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <3,
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index a64ffb2c8a0..8651bd84a2e 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -782,6 +782,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}
+ case Intrinsic::r600_implicitarg_ptr: {
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
+ uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return DAG.getConstant(ByteOffset, DL, PtrVT);
+ }
case Intrinsic::r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case Intrinsic::r600_read_ngroups_y:
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 64e9b156dc2..8414f993bcc 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -329,7 +329,8 @@ class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
- [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
+ [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
+ (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
>;
def load_param : LoadParamFrag<load>;
diff --git a/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll b/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll
new file mode 100644
index 00000000000..b1b3b9930d1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll
@@ -0,0 +1,114 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}workdim:
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+define void @workdim (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; The workgroup.id values are stored in sgprs offset by the number of user
+; sgprs.
+
+; FUNC-LABEL: {{^}}workgroup_id_x:
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @workgroup_id_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workgroup.id.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_id_y:
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+define void @workgroup_id_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workgroup.id.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_id_z:
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @workgroup_id_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workgroup.id.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 132{{$}}
+
+; FUNC-LABEL: {{^}}workitem_id_x:
+; GCN-NOHSA: buffer_store_dword v0
+define void @workitem_id_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 2180{{$}}
+
+; FUNC-LABEL: {{^}}workitem_id_y:
+
+; GCN-NOHSA: buffer_store_dword v1
+define void @workitem_id_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workitem.id.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 4228{{$}}
+
+; FUNC-LABEL: {{^}}workitem_id_z:
+; GCN-NOHSA: buffer_store_dword v2
+define void @workitem_id_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workitem.id.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.workgroup.id.y() #0
+declare i32 @llvm.amdgcn.workgroup.id.z() #0
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+
+declare i32 @llvm.amdgcn.read.workdim() #0
diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
index 1c0a99ead51..853788b92aa 100644
--- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll
+++ b/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
@@ -2,15 +2,31 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; Legacy intrinsics that just read implicit parameters
+
+; FUNC-LABEL: {{^}}workdim_legacy:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
-; FUNC-LABEL: {{^}}ngroups_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
+; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z
+define void @workdim_legacy (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
-; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; FUNC-LABEL: {{^}}ngroups_x:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -19,13 +35,13 @@ entry:
}
; FUNC-LABEL: {{^}}ngroups_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -34,13 +50,13 @@ entry:
}
; FUNC-LABEL: {{^}}ngroups_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -49,13 +65,13 @@ entry:
}
; FUNC-LABEL: {{^}}global_size_x:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -64,13 +80,13 @@ entry:
}
; FUNC-LABEL: {{^}}global_size_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -79,13 +95,13 @@ entry:
}
; FUNC-LABEL: {{^}}global_size_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -93,10 +109,57 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}local_size_x:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
+define void @local_size_x (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_y:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
+define void @local_size_y (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_z:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
+define void @local_size_z (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Legacy use of r600 intrinsics by GCN
+
; The tgid values are stored in sgprs offset by the number of user
; sgprs.
-; FUNC-LABEL: {{^}}tgid_x:
+; FUNC-LABEL: {{^}}tgid_x_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]
@@ -105,26 +168,26 @@ entry:
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define void @tgid_x(i32 addrspace(1)* %out) {
+define void @tgid_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}tgid_y:
+; FUNC-LABEL: {{^}}tgid_y_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
; GCN-NOHSA: buffer_store_dword [[VVAL]]
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
-define void @tgid_y(i32 addrspace(1)* %out) {
+define void @tgid_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}tgid_z:
+; FUNC-LABEL: {{^}}tgid_z_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]
@@ -133,7 +196,7 @@ entry:
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define void @tgid_z(i32 addrspace(1)* %out) {
+define void @tgid_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -144,9 +207,9 @@ entry:
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 132{{$}}
-; FUNC-LABEL: {{^}}tidig_x:
+; FUNC-LABEL: {{^}}tidig_x_legacy:
; GCN-NOHSA: buffer_store_dword v0
-define void @tidig_x(i32 addrspace(1)* %out) {
+define void @tidig_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
@@ -157,10 +220,10 @@ entry:
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 2180{{$}}
-; FUNC-LABEL: {{^}}tidig_y:
+; FUNC-LABEL: {{^}}tidig_y_legacy:
; GCN-NOHSA: buffer_store_dword v1
-define void @tidig_y(i32 addrspace(1)* %out) {
+define void @tidig_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
@@ -171,9 +234,9 @@ entry:
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 4228{{$}}
-; FUNC-LABEL: {{^}}tidig_z:
+; FUNC-LABEL: {{^}}tidig_z_legacy:
; GCN-NOHSA: buffer_store_dword v2
-define void @tidig_z(i32 addrspace(1)* %out) {
+define void @tidig_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -188,6 +251,10 @@ declare i32 @llvm.r600.read.global.size.x() #0
declare i32 @llvm.r600.read.global.size.y() #0
declare i32 @llvm.r600.read.global.size.z() #0
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
declare i32 @llvm.r600.read.tgid.z() #0
diff --git a/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
new file mode 100644
index 00000000000..ff248a89ced
--- /dev/null
+++ b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}tgid_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
+define void @tgid_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tgid.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tgid_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW T1.Y
+define void @tgid_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tgid.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tgid_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW T1.Z
+define void @tgid_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tgid.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tidig_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
+define void @tidig_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tidig_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.Y
+define void @tidig_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tidig_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.Z
+define void @tidig_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_implicit:
+; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56
+; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56
+define void @test_implicit(i32 addrspace(1)* %out) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
+ %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4
+ %value = load i32, i32 addrspace(7)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_implicit_dyn:
+; 36 prepended implicit bytes + 8(out pointer + in) = 44
+; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44
+define void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
+ %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in
+ %value = load i32, i32 addrspace(7)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+
+
+; DEPRECATED but R600 only
+
+; FUNC-LABEL: {{^}}workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z
+define void @workdim (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.workdim() #0
+
+declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0
+
+declare i32 @llvm.r600.read.tgid.x() #0
+declare i32 @llvm.r600.read.tgid.y() #0
+declare i32 @llvm.r600.read.tgid.z() #0
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.r600.read.tidig.z() #0
+
+attributes #0 = { readnone }