summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2015-04-23 20:32:01 +0000
committerTom Stellard <thomas.stellard@amd.com>2015-04-23 20:32:01 +0000
commit2aab32cade85edffed419419b1eb5f2beb43c358 (patch)
tree1a9908faf0ab9865c762cf755ccb761c7db15394
parent391b2c39f7d738a9bf49fe404cdbf0fa0825e1a5 (diff)
R600/SI: Fix indirect addressing with a negative constant offset
When the base register index of the vector plus the constant offset was less than zero, we were passing the wrong base register to the indirect addressing instruction. In this case, we need to set the base register to v0 and then add the computed (negative) index to m0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235641 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp71
-rw-r--r--test/CodeGen/R600/indirect-addressing-si.ll72
2 files changed, 127 insertions, 16 deletions
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 2e08c9f5a5..c319b32111 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -88,7 +88,8 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
+ void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
-void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
@@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
unsigned Idx = MI.getOperand(3).getReg();
if (AMDGPU::SReg_32RegClass.contains(Idx)) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(Idx);
+ if (Offset) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .addReg(Idx)
+ .addImm(Offset);
+ } else {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ }
MBB.insert(I, MovRel);
} else {
@@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
.addReg(AMDGPU::VCC);
+ if (Offset) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .addReg(AMDGPU::M0)
+ .addImm(Offset);
+ }
// Do the actual move
MBB.insert(I, MovRel);
@@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MI.eraseFromParent();
}
+/// \param @VecReg The register which holds element zero of the vector
+/// being addressed into.
+/// \param[out] @Reg The base register to use in the indirect addressing instruction.
+/// \param[in,out] @Offset As an input, this is the constant offset part of the
+// indirect Index. e.g. v0 = v[VecReg + Offset]
+// As an output, this is a constant value that needs
+// to be added to the value stored in M0.
+void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
+ unsigned &Reg,
+ int &Offset) {
+ unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = VecReg;
+
+ const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
+ int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
+
+ if (RegIdx < 0) {
+ Offset = RegIdx;
+ RegIdx = 0;
+ } else {
+ Offset = 0;
+ }
+
+ Reg = RC->getRegister(RegIdx);
+}
+
void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
- unsigned Off = MI.getOperand(4).getImm();
- unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
- if (!SubReg)
- SubReg = Vec;
+ int Off = MI.getOperand(4).getImm();
+ unsigned Reg;
+
+ computeIndirectRegAndOffset(Vec, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(SubReg + Off)
+ .addReg(Reg)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
- LoadM0(MI, MovRel);
+ LoadM0(MI, MovRel, Off);
}
void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
@@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
- unsigned Off = MI.getOperand(4).getImm();
+ int Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
- unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
- if (!SubReg)
- SubReg = Dst;
+ unsigned Reg;
+
+ computeIndirectRegAndOffset(Dst, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(SubReg + Off, RegState::Define)
+ .addReg(Reg, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
- LoadM0(MI, MovRel);
+ LoadM0(MI, MovRel, Off);
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
index 319910fcab..f551606d63 100644
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -25,6 +25,33 @@ entry:
ret void
}
+; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
+define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
+; CHECK: s_cbranch_execnz
+define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -512
+ %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: {{^}}insert_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
@@ -47,3 +74,48 @@ entry:
store float %1, float addrspace(1)* %out
ret void
}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
+define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -512
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -16
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+attributes #1 = { nounwind readnone }