summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-07-11 12:03:29 +0200
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-07-11 22:42:41 +0200
commit2b88db84f1ab2fe5b80cf1dbbd82f228151cfc9a (patch)
treec6009764e5c8e72a31f581c23ea4baf3293f5700
parentfdc8303ecbdf2c8db52d638dc223381aa8cbd55a (diff)
AMDGPU: Treat texture gather instructions more like other MIMG instructions
Summary: Setting MIMG to 0 has a bunch of unexpected side effects, including that isVMEM returns false which leads to incorrect treatment in the hazard recognizer. The reason I noticed it is that it also leads to incorrect treatment in VGPR-to-SGPR copies, which is one cause of the referenced bug. The only reason why MIMG was set to 0 is to signal the special handling of dmasks, but that can be checked differently. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96877 Reviewers: arsenm, tstellarAMD Subscribers: arsenm, kzhuravl, llvm-commits Differential Revision: http://reviews.llvm.org/D22210
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp3
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td3
-rw-r--r--test/CodeGen/AMDGPU/llvm.SI.gather4.ll22
3 files changed, 24 insertions, 4 deletions
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 4455e9cb170..4be3b4631cb 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3133,7 +3133,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
- if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
+ if (TII->isMIMG(Opcode) && TII->get(Opcode).hasPostISelHook() &&
+ !TII->get(Opcode).mayStore())
adjustWritemask(Node, DAG);
if (Opcode == AMDGPU::INSERT_SUBREG ||
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 88d133f7b68..f1cf2a563d3 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -3557,8 +3557,7 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
// (red,red,red,red) etc.) The ISA document doesn't mention
// this.
- // Therefore, disable all code which updates DMASK by setting these two:
- let MIMG = 0;
+ // Therefore, disable all code which updates DMASK by setting this:
let hasPostISelHook = 0;
let WQM = wqm;
diff --git a/test/CodeGen/AMDGPU/llvm.SI.gather4.ll b/test/CodeGen/AMDGPU/llvm.SI.gather4.ll
index aa835efcb42..7eb9eb253b3 100644
--- a/test/CodeGen/AMDGPU/llvm.SI.gather4.ll
+++ b/test/CodeGen/AMDGPU/llvm.SI.gather4.ll
@@ -462,7 +462,27 @@ main_body:
ret void
}
-
+;CHECK-LABEL: {{^}}gather4_sgpr_bug:
+;
+; This crashed at some point due to a bug in FixSGPRCopies. Derived from the
+; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877
+;
+;TODO: the readfirstlanes are unnecessary, see http://reviews.llvm.org/D22217
+;
+;CHECK: v_readfirstlane_b32 s[[LO:[0-9]+]], v{{[0-9]+}}
+;CHECK: v_readfirstlane_b32
+;CHECK: v_readfirstlane_b32
+;CHECK: v_readfirstlane_b32 s[[HI:[0-9]+]], v{{[0-9]+}}
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]] dmask:0x8
+define amdgpu_ps float @gather4_sgpr_bug() {
+main_body:
+ %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16
+ %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
+ %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %tmp4 = extractelement <4 x float> %tmp2, i32 1
+ %tmp9 = fadd float undef, %tmp4
+ ret float %tmp9
+}
declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0