summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-10-01 23:09:05 -0400
committerTom Stellard <thomas.stellard@amd.com>2013-10-14 13:19:18 -0700
commitac8f2bce959088ee343264c7cd00efadfd02311f (patch)
tree6179547be6de8d365ae36b95045e79e569ea48e0
parent2da53a76c59243918cffec9c5a47e31579be7e2d (diff)
R600/SI: Prefer SALU instructions for bit shift operations
All shift operations will be selected as SALU instructions and then if necessary lowered to VALU instruction in the SIFixSGPRCopies pass. This allows us to do more operations on the SALU which will improve performance and is also required for implementing private memory using indirect addressing, since the private memory pointers must stay in the scalar registers.
-rw-r--r--lib/Target/R600/SIDefines.h3
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp95
-rw-r--r--lib/Target/R600/SIInstrFormats.td7
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp258
-rw-r--r--lib/Target/R600/SIInstrInfo.h37
-rw-r--r--lib/Target/R600/SIInstrInfo.td5
-rw-r--r--lib/Target/R600/SIInstructions.td35
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp46
-rw-r--r--lib/Target/R600/SIRegisterInfo.h13
-rw-r--r--test/CodeGen/R600/fneg.ll47
-rw-r--r--test/CodeGen/R600/load.ll166
-rw-r--r--test/CodeGen/R600/lshl.ll2
-rw-r--r--test/CodeGen/R600/lshr.ll2
-rw-r--r--test/CodeGen/R600/mad_uint24.ll12
-rw-r--r--test/CodeGen/R600/mul_uint24.ll12
-rw-r--r--test/CodeGen/R600/sra.ll12
16 files changed, 608 insertions, 144 deletions
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 7fdaee59a57..2cbce282cbe 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -18,7 +18,8 @@ enum {
VOP1 = 1 << 5,
VOP2 = 1 << 6,
VOP3 = 1 << 7,
- VOPC = 1 << 8
+ VOPC = 1 << 8,
+ SALU = 1 << 9
};
}
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 7f07b01f087..2f7fe7a29e4 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -65,10 +65,13 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "sgpr-copies"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -79,9 +82,12 @@ class SIFixSGPRCopies : public MachineFunctionPass {
private:
static char ID;
- const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI,
+ const TargetRegisterClass *inferRegClass(const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
- unsigned Reg) const;
+ unsigned Reg,
+ unsigned SubReg) const;
+ bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI) const;
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
@@ -102,25 +108,42 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
return new SIFixSGPRCopies(tm);
}
+static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+ continue;
+
+ if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+ return true;
+ }
+ return false;
+}
+
/// This functions walks the use/def chains starting with the definition of
/// \p Reg until it finds an Instruction that isn't a COPY returns
/// the register class of that instruction.
+/// \param[out] The register defined by the first non-COPY instruction.
const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
- const TargetRegisterInfo *TRI,
+ const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
- unsigned Reg) const {
+ unsigned Reg,
+ unsigned SubReg) const {
// The Reg parameter to the function must always be defined by either a PHI
// or a COPY, therefore it cannot be a physical register.
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Reg cannot be a physical register");
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ RC = TRI->getSubRegClass(RC, SubReg);
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
E = MRI.use_end(); I != E; ++I) {
switch (I->getOpcode()) {
case AMDGPU::COPY:
RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI,
- I->getOperand(0).getReg()));
+ I->getOperand(0).getReg(),
+ I->getOperand(0).getSubReg()));
break;
}
}
@@ -128,9 +151,32 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
return RC;
}
+bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI) const {
+
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+ unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ DstRC == &AMDGPU::M0RegRegClass)
+ return false;
+
+ const TargetRegisterClass *SrcRC = TRI->getSubRegClass(
+ MRI.getRegClass(SrcReg), SrcSubReg);
+
+ return TRI->isSGPRClass(DstRC) &&
+ !TRI->getCommonSubClass(DstRC, SrcRC);
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -138,13 +184,38 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
- if (MI.getOpcode() != AMDGPU::PHI) {
- continue;
+ if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
+ DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
+ DEBUG(MI.print(dbgs()));
+ TII->moveToVALU(MI);
+
+ }
+
+ switch (MI.getOpcode()) {
+ default: continue;
+ case AMDGPU::PHI: {
+ DEBUG(dbgs() << " Fixing PHI:\n");
+ DEBUG(MI.print(dbgs()));
+ unsigned Reg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg,
+ MI.getOperand(0).getSubReg());
+ if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
+ MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
+ }
+ break;
+ }
+ case AMDGPU::REG_SEQUENCE: {
+ if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
+ !hasVGPROperands(MI, TRI))
+ continue;
+
+ DEBUG(dbgs() << "Fixing REG_SEQUENCE: \n");
+ DEBUG(MI.print(dbgs()));
+
+ TII->moveToVALU(MI);
+ TII->legalizeOperands(&MI);
+ break;
}
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg);
- if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
- MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
}
}
}
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 962e266f215..53ebaaf15a7 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -23,6 +23,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VOP2 = 0;
field bits<1> VOP3 = 0;
field bits<1> VOPC = 0;
+ field bits<1> SALU = 0;
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -33,6 +34,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{6} = VOP2;
let TSFlags{7} = VOP3;
let TSFlags{8} = VOPC;
+ let TSFlags{9} = SALU;
}
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -67,6 +69,7 @@ class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -85,6 +88,7 @@ class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -102,6 +106,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -118,6 +123,7 @@ class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
@@ -135,6 +141,7 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index d3a1bdbdffe..4640d80fdfd 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -185,11 +185,36 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
- if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
- !MI->getOperand(2).isReg())
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
return 0;
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ // Cannot commute VOP2 if src0 is SGPR.
+ if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
+ return 0;
+
+ if (!MI->getOperand(2).isReg()) {
+ // XXX: Commute instructions with FPImm operands
+ if (NewMI || MI->getOperand(2).isFPImm() ||
+ (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+ return 0;
+ }
+
+ // XXX: Commute VOP3 instructions with abs and neg set.
+ if (isVOP3(MI->getOpcode()) &&
+ (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::abs)).getImm() ||
+ MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::neg)).getImm()))
+ return 0;
+
+ unsigned Reg = MI->getOperand(1).getReg();
+ MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
+ MI->getOperand(2).ChangeToRegister(Reg, false);
+ } else {
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
if (MI)
MI->setDesc(get(commuteOpcode(MI->getOpcode())));
@@ -244,6 +269,10 @@ bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOPC;
}
+bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
+}
+
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
if(MO.isImm()) {
return MO.getImm() >= -16 && MO.getImm() <= 64;
@@ -311,7 +340,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify SRC1 for VOP2 and VOPC
if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
const MachineOperand &Src1 = MI->getOperand(Src1Idx);
- if (Src1.isImm()) {
+ if (Src1.isImm() || Src1.isFPImm()) {
ErrInfo = "VOP[2C] src1 cannot be an immediate.";
return false;
}
@@ -335,6 +364,227 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return true;
}
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default: return AMDGPU::INSTRUCTION_LIST_END;
+ case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
+ case AMDGPU::COPY: return AMDGPU::COPY;
+ case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
+ case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+ case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
+ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+ case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
+ case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ }
+}
+
+bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
+ return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
+}
+
+const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &Desc = get(MI.getOpcode());
+ if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
+ Desc.OpInfo[OpNo].RegClass == -1)
+ return MRI.getRegClass(MI.getOperand(OpNo).getReg());
+
+ unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+ return RI.getRegClass(RCID);
+}
+
+bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::REG_SEQUENCE:
+ return RI.hasVGPRs(getOpRegClass(MI, 0));
+ default:
+ return RI.hasVGPRs(getOpRegClass(MI, OpNo));
+ }
+}
+
+void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
+ const TargetRegisterClass *RC = RI.getRegClass(RCID);
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (MO.isReg()) {
+ Opcode = AMDGPU::COPY;
+ } else if (RI.isSGPRClass(RC)) {
+ Opcode = AMDGPU::S_MOV_B32;
+ }
+
+ unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID));
+ BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
+ Reg).addOperand(MO);
+ MO.ChangeToRegister(Reg, false);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src2);
+
+ // Legalize VOP2
+ if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ // Legalize VOP2 instructions where src1 is not a VGPR.
+ if (Src1.isImm() || Src1.isFPImm() ||
+ (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ return;
+ }
+ legalizeOpWithMove(MI, Src1Idx);
+ }
+ }
+
+ // Legalize VOP3
+ if (isVOP3(MI->getOpcode())) {
+ int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
+ unsigned SGPRReg = AMDGPU::NoRegister;
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ continue;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ if (MO.isReg()) {
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
+ }
+ } else if (!isLiteralConstant(MO)) {
+ // If it is not a register and not a literal constant, then it must be
+ // an inline constant which is always legal.
+ continue;
+ }
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+ }
+
+ // Legalize REG_SEQUENCE
+ // The register class of the operands much be the same type as the register
+ // class of the output.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ const TargetRegisterClass *OpRC =
+ MRI.getRegClass(MI->getOperand(i).getReg());
+ if (RI.hasVGPRs(OpRC)) {
+ VRC = OpRC;
+ } else {
+ SRC = OpRC;
+ }
+ }
+
+ // If any of the operands are VGPR registers, then they all most be
+ // otherwise we will create illegal VGPR->SGPR copies when legalizing
+ // them.
+ if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
+ if (!VRC) {
+ assert(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
+ }
+ RC = VRC;
+ } else {
+ RC = SRC;
+ }
+
+ // Update all the operands so they have the same type.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ unsigned DstReg = MRI.createVirtualRegister(RC);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(AMDGPU::COPY), DstReg)
+ .addOperand(MI->getOperand(i));
+ MI->getOperand(i).setReg(DstReg);
+ }
+ }
+}
+
+bool SIInstrInfo::moveToVALU(MachineInstr &MI) const {
+ unsigned NewOpcode = getVALUOp(MI);
+
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ return false;
+
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+
+ // Use the new VALU Opcode;
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ MI.setDesc(NewDesc);
+
+ // Add the implict and explicit register definitions
+ if (NewDesc.ImplicitUses) {
+ for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+ MI.addOperand(MachineOperand::CreateReg(NewDesc.ImplicitUses[i],
+ false, true));
+ }
+ }
+
+ if (NewDesc.ImplicitDefs) {
+ for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+ MI.addOperand(MachineOperand::CreateReg(NewDesc.ImplicitDefs[i],
+ true, true));
+ }
+ }
+
+ // Update the destination register class
+ const TargetRegisterClass *NewDstRC = getOpRegClass(MI, 0);
+
+ switch (MI.getOpcode()) {
+ // For target instructions, getOpRegClass just returns the virtual register
+ // class associated with the operand, so we need to find an equivalent
+ // VGPR register class in order to move the instruction to the VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::REG_SEQUENCE:
+ if (RI.hasVGPRs(NewDstRC))
+ return false;
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ return false;
+ break;
+ default:
+ break;
+ }
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+ Next = llvm::next(I);
+ I != MRI.use_end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &UseMI = *I;
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ moveToVALU(UseMI);
+ legalizeOperands(&UseMI);
+ }
+ }
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Indirect addressing callbacks
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 6f833ff8a37..31ceaf35f54 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -62,6 +62,43 @@ public:
virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+ bool isSALUInstr(const MachineInstr &MI) const;
+ unsigned getVALUOp(const MachineInstr &MI) const;
+ bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
+
+ /// \brief Return the correct register class for \p OpNo. For target-specific
+ /// instructions, this will return the register class that has been defined
+ /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
+ /// the register class of its machine operand.
+ /// to infer the correct register class base on the other operands.
+ const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const;\
+
+ /// \returns true if it is legal for the operand at index \p OpNo
+ /// to read a VGPR.
+ bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
+
+ /// \brief Legalize the \p OpIndex operand of this instruction by inserting
+ /// a MOV. For example:
+ /// ADD_I32_e32 VGPR0, 15
+ /// to
+ /// MOV VGPR1, 15
+ /// ADD_I32_e32 VGPR0, VGPR1
+ ///
+ /// If the operand being legalized is a register, then a COPY will be used
+ /// instead of MOV.
+ void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
+
+ /// \brief Legalize all operands in this instruction. This function may
+ /// create new instruction and insert them before \p MI.
+ void legalizeOperands(MachineInstr *MI) const;
+
+ /// \brief Replace this instruction's opcode with the equivalent VALU
+ /// opcode. This function will also move the users of \p MI to the
+ /// VALU if necessary.
+ /// NOTE: This function does not update the operands of MI.
+ bool moveToVALU(MachineInstr &MI) const;
+
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index ed42a2ad954..b55f59d1618 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -172,6 +172,11 @@ class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
opName#" $dst, $src0, $src1", pattern
>;
+class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 94d42d55c33..51d3003fc53 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -941,9 +941,13 @@ defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
+let hasPostISelHook = 1 in {
+
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
+
+}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
@@ -1172,12 +1176,31 @@ def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
-def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
-def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
-def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+
+// Use added complexity so these patterns are preferred to the VALU patterns.
+let AddedComplexity = 1 in {
+
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
+def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+ [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
+def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+ [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
+def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+ [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+>;
+
+} // End AddedComplexity = 1
+
def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 7f69ef6f286..e06a02257fe 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -76,9 +76,45 @@ bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
if (!RC) {
return false;
}
- return RC == &AMDGPU::SReg_32RegClass ||
- RC == &AMDGPU::SReg_64RegClass ||
- RC == &AMDGPU::SReg_128RegClass ||
- RC == &AMDGPU::SReg_256RegClass ||
- RC == &AMDGPU::SReg_512RegClass;
+ return !hasVGPRs(RC);
+}
+
+bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
+ return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+}
+
+const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
+ const TargetRegisterClass *SRC) const {
+ if (hasVGPRs(SRC)) {
+ return SRC;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
+ return &AMDGPU::VReg_32RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
+ return &AMDGPU::VReg_64RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
+ return &AMDGPU::VReg_128RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
+ return &AMDGPU::VReg_256RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
+ return &AMDGPU::VReg_512RegClass;
+ }
+ return NULL;
+}
+
+const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
+ const TargetRegisterClass *RC, unsigned SubIdx) const {
+ if (SubIdx == AMDGPU::NoSubRegister)
+ return RC;
+
+ // If this register has a sub-register, we can safely assume it is a 32-bit
+ // register, becuase all of SI's sub-registers are 32-bit.
+ if (isSGPRClass(RC)) {
+ return &AMDGPU::SGPR_32RegClass;
+ } else {
+ return &AMDGPU::VGPR_32RegClass;
+ }
}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index ffc57973e05..ba831b0f77b 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -48,6 +48,19 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \returns true if this class contains only SGPR registers
bool isSGPRClass(const TargetRegisterClass *RC) const;
+
+ /// \returns true if this class contains VGPR registers.
+ bool hasVGPRs(const TargetRegisterClass *RC) const;
+
+ /// \returns A VGPR reg class with the same width as \p SRC
+ const TargetRegisterClass *getEquivalentVGPRClass(
+ const TargetRegisterClass *SRC) const;
+
+ /// \returns The register class that is used for a sub-register of \p RC for
+ /// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will
+ /// be returned.
+ const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
+ unsigned SubIdx) const;
};
} // End namespace llvm
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index f7083cd6ca0..702fb33a24e 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -1,8 +1,23 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; CHECK: @fneg_v2
-; CHECK: -PV
-; CHECK: -PV
+; R600-CHECK-LABEL: @fneg
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
+define void @fneg(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fsub float -0.000000e+00, %in
+ store float %0, float addrspace(1)* %out
+ ret void
+}
+
+; R600-CHECK-LABEL: @fneg_v2
+; R600-CHECK: -PV
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_v2
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
entry:
%0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
@@ -10,11 +25,16 @@ entry:
ret void
}
-; CHECK: @fneg_v4
-; CHECK: -PV
-; CHECK: -T
-; CHECK: -PV
-; CHECK: -PV
+; R600-CHECK-LABEL: @fneg_v4
+; R600-CHECK: -PV
+; R600-CHECK: -T
+; R600-CHECK: -PV
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_v4
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, VGPR{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, VGPR{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, VGPR{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, VGPR{{[0-9]}}, 0, 0, 0, 0, 1
define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
entry:
%0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
@@ -26,9 +46,12 @@ entry:
; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
; unless the target returns true for isNegFree()
-; CHECK-NOT: XOR
-; CHECK: -KC0[2].Z
-
+; R600-CHECK-LABEL: @fneg_free
+; R600-CHECK-NOT: XOR
+; R600-CHECK: -KC0[2].Z
+; SI-CHECK-LABEL: @fneg_free
+; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
+; SI-CHECK: V_SUB_F32_e64 VGPR{{[0-9]}}, 0.000000e+00, SGPR{{[0-9]}}, 0, 0, 0, 0
define void @fneg_free(float addrspace(1)* %out, i32 %in) {
entry:
%0 = bitcast i32 %in to float
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index f8d5e118350..ed64901759e 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -7,10 +7,10 @@
;===------------------------------------------------------------------------===;
; Load an i8 value from the global address space.
-; R600-CHECK: @load_i8
+; R600-CHECK-LABEL: @load_i8
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_i8
+; SI-CHECK-LABEL: @load_i8
; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}},
define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
%1 = load i8 addrspace(1)* %in
@@ -19,13 +19,13 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
ret void
}
-; R600-CHECK: @load_i8_sext
+; R600-CHECK-LABEL: @load_i8_sext
; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 24
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 24
-; SI-CHECK: @load_i8_sext
+; SI-CHECK-LABEL: @load_i8_sext
; SI-CHECK: BUFFER_LOAD_SBYTE
define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
@@ -35,10 +35,10 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i8
+; R600-CHECK-LABEL: @load_v2i8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
-; SI-CHECK: @load_v2i8
+; SI-CHECK-LABEL: @load_v2i8
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
@@ -49,7 +49,7 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i8_sext
+; R600-CHECK-LABEL: @load_v2i8_sext
; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
@@ -60,7 +60,7 @@ entry:
; R600-CHECK-DAG: 24
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-CHECK-DAG: 24
-; SI-CHECK: @load_v2i8_sext
+; SI-CHECK-LABEL: @load_v2i8_sext
; SI-CHECK: BUFFER_LOAD_SBYTE
; SI-CHECK: BUFFER_LOAD_SBYTE
define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
@@ -71,12 +71,12 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i8
+; R600-CHECK-LABEL: @load_v4i8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
-; SI-CHECK: @load_v4i8
+; SI-CHECK-LABEL: @load_v4i8
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
@@ -89,7 +89,7 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i8_sext
+; R600-CHECK-LABEL: @load_v4i8_sext
; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
@@ -110,7 +110,7 @@ entry:
; R600-CHECK-DAG: 24
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-CHECK-DAG: 24
-; SI-CHECK: @load_v4i8_sext
+; SI-CHECK-LABEL: @load_v4i8_sext
; SI-CHECK: BUFFER_LOAD_SBYTE
; SI-CHECK: BUFFER_LOAD_SBYTE
; SI-CHECK: BUFFER_LOAD_SBYTE
@@ -124,9 +124,9 @@ entry:
}
; Load an i16 value from the global address space.
-; R600-CHECK: @load_i16
+; R600-CHECK-LABEL: @load_i16
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_i16
+; SI-CHECK-LABEL: @load_i16
; SI-CHECK: BUFFER_LOAD_USHORT
define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
@@ -136,13 +136,13 @@ entry:
ret void
}
-; R600-CHECK: @load_i16_sext
+; R600-CHECK-LABEL: @load_i16_sext
; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 16
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 16
-; SI-CHECK: @load_i16_sext
+; SI-CHECK-LABEL: @load_i16_sext
; SI-CHECK: BUFFER_LOAD_SSHORT
define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
@@ -152,10 +152,10 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i16
+; R600-CHECK-LABEL: @load_v2i16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
-; SI-CHECK: @load_v2i16
+; SI-CHECK-LABEL: @load_v2i16
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
@@ -166,7 +166,7 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i16_sext
+; R600-CHECK-LABEL: @load_v2i16_sext
; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
@@ -177,7 +177,7 @@ entry:
; R600-CHECK-DAG: 16
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-CHECK-DAG: 16
-; SI-CHECK: @load_v2i16_sext
+; SI-CHECK-LABEL: @load_v2i16_sext
; SI-CHECK: BUFFER_LOAD_SSHORT
; SI-CHECK: BUFFER_LOAD_SSHORT
define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
@@ -188,12 +188,12 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i16
+; R600-CHECK-LABEL: @load_v4i16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
-; SI-CHECK: @load_v4i16
+; SI-CHECK-LABEL: @load_v4i16
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
@@ -206,7 +206,7 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i16_sext
+; R600-CHECK-LABEL: @load_v4i16_sext
; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
@@ -227,7 +227,7 @@ entry:
; R600-CHECK-DAG: 16
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-CHECK-DAG: 16
-; SI-CHECK: @load_v4i16_sext
+; SI-CHECK-LABEL: @load_v4i16_sext
; SI-CHECK: BUFFER_LOAD_SSHORT
; SI-CHECK: BUFFER_LOAD_SSHORT
; SI-CHECK: BUFFER_LOAD_SSHORT
@@ -241,10 +241,10 @@ entry:
}
; load an i32 value from the global address space.
-; R600-CHECK: @load_i32
+; R600-CHECK-LABEL: @load_i32
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_i32
+; SI-CHECK-LABEL: @load_i32
; SI-CHECK: BUFFER_LOAD_DWORD VGPR{{[0-9]+}}
define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
@@ -254,10 +254,10 @@ entry:
}
; load a f32 value from the global address space.
-; R600-CHECK: @load_f32
+; R600-CHECK-LABEL: @load_f32
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_f32
+; SI-CHECK-LABEL: @load_f32
; SI-CHECK: BUFFER_LOAD_DWORD VGPR{{[0-9]+}}
define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
entry:
@@ -267,10 +267,10 @@ entry:
}
; load a v2f32 value from the global address space
-; R600-CHECK: @load_v2f32
+; R600-CHECK-LABEL: @load_v2f32
; R600-CHECK: VTX_READ_64
-; SI-CHECK: @load_v2f32
+; SI-CHECK-LABEL: @load_v2f32
; SI-CHECK: BUFFER_LOAD_DWORDX2
define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
entry:
@@ -279,11 +279,11 @@ entry:
ret void
}
-; R600-CHECK: @load_i64
+; R600-CHECK-LABEL: @load_i64
; R600-CHECK: MEM_RAT
; R600-CHECK: MEM_RAT
-; SI-CHECK: @load_i64
+; SI-CHECK-LABEL: @load_i64
; SI-CHECK: BUFFER_LOAD_DWORDX2
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
@@ -292,15 +292,13 @@ entry:
ret void
}
-; R600-CHECK: @load_i64_sext
+; R600-CHECK-LABEL: @load_i64_sext
; R600-CHECK: MEM_RAT
; R600-CHECK: MEM_RAT
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
; R600-CHECK: 31
-; SI-CHECK: @load_i64_sext
-; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:VGPR[0-9]_VGPR[0-9]]]
-; SI-CHECK: V_LSHL_B64 [[LSHL:VGPR[0-9]_VGPR[0-9]]], [[VAL]], 32
-; SI-CHECK: V_ASHR_I64 VGPR{{[0-9]}}_VGPR{{[0-9]}}, [[LSHL]], 32
+; SI-CHECK-LABEL: @load_i64_sext
+; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:VGPR[0-9]]]
define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
@@ -310,7 +308,7 @@ entry:
ret void
}
-; R600-CHECK: @load_i64_zext
+; R600-CHECK-LABEL: @load_i64_zext
; R600-CHECK: MEM_RAT
; R600-CHECK: MEM_RAT
define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -326,13 +324,13 @@ entry:
;===------------------------------------------------------------------------===;
; Load a sign-extended i8 value
-; R600-CHECK: @load_const_i8_sext
+; R600-CHECK-LABEL: @load_const_i8_sext
; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 24
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 24
-; SI-CHECK: @load_const_i8_sext
+; SI-CHECK-LABEL: @load_const_i8_sext
; SI-CHECK: BUFFER_LOAD_SBYTE VGPR{{[0-9]+}},
define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
@@ -343,9 +341,9 @@ entry:
}
; Load an aligned i8 value
-; R600-CHECK: @load_const_i8_aligned
+; R600-CHECK-LABEL: @load_const_i8_aligned
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i8_aligned
+; SI-CHECK-LABEL: @load_const_i8_aligned
; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}},
define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
@@ -356,9 +354,9 @@ entry:
}
; Load an un-aligned i8 value
-; R600-CHECK: @load_const_i8_unaligned
+; R600-CHECK-LABEL: @load_const_i8_unaligned
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i8_unaligned
+; SI-CHECK-LABEL: @load_const_i8_unaligned
; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}},
define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
@@ -370,13 +368,13 @@ entry:
}
; Load a sign-extended i16 value
-; R600-CHECK: @load_const_i16_sext
+; R600-CHECK-LABEL: @load_const_i16_sext
; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 16
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 16
-; SI-CHECK: @load_const_i16_sext
+; SI-CHECK-LABEL: @load_const_i16_sext
; SI-CHECK: BUFFER_LOAD_SSHORT
define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
@@ -387,9 +385,9 @@ entry:
}
; Load an aligned i16 value
-; R600-CHECK: @load_const_i16_aligned
+; R600-CHECK-LABEL: @load_const_i16_aligned
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i16_aligned
+; SI-CHECK-LABEL: @load_const_i16_aligned
; SI-CHECK: BUFFER_LOAD_USHORT
define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
@@ -400,9 +398,9 @@ entry:
}
; Load an un-aligned i16 value
-; R600-CHECK: @load_const_i16_unaligned
+; R600-CHECK-LABEL: @load_const_i16_unaligned
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i16_unaligned
+; SI-CHECK-LABEL: @load_const_i16_unaligned
; SI-CHECK: BUFFER_LOAD_USHORT
define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
@@ -414,10 +412,10 @@ entry:
}
; Load an i32 value from the constant address space.
-; R600-CHECK: @load_const_addrspace_i32
+; R600-CHECK-LABEL: @load_const_addrspace_i32
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_const_addrspace_i32
+; SI-CHECK-LABEL: @load_const_addrspace_i32
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}}
define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
@@ -427,10 +425,10 @@ entry:
}
; Load a f32 value from the constant address space.
-; R600-CHECK: @load_const_addrspace_f32
+; R600-CHECK-LABEL: @load_const_addrspace_f32
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_const_addrspace_f32
+; SI-CHECK-LABEL: @load_const_addrspace_f32
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}}
define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
%1 = load float addrspace(2)* %in
@@ -443,9 +441,9 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
;===------------------------------------------------------------------------===;
; Load an i8 value from the local address space.
-; R600-CHECK: @load_i8_local
+; R600-CHECK-LABEL: @load_i8_local
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK: @load_i8_local
+; SI-CHECK-LABEL: @load_i8_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_U8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
@@ -455,10 +453,10 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
ret void
}
-; R600-CHECK: @load_i8_sext_local
+; R600-CHECK-LABEL: @load_i8_sext_local
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: ASHR
-; SI-CHECK: @load_i8_sext_local
+; SI-CHECK-LABEL: @load_i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_I8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
@@ -469,10 +467,10 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i8_local
+; R600-CHECK-LABEL: @load_v2i8_local
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK: @load_v2i8_local
+; SI-CHECK-LABEL: @load_v2i8_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
@@ -484,12 +482,12 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i8_sext_local
+; R600-CHECK-LABEL: @load_v2i8_sext_local
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK: @load_v2i8_sext_local
+; SI-CHECK-LABEL: @load_v2i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
@@ -501,12 +499,12 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i8_local
+; R600-CHECK-LABEL: @load_v4i8_local
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK: @load_v4i8_local
+; SI-CHECK-LABEL: @load_v4i8_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
@@ -520,7 +518,7 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i8_sext_local
+; R600-CHECK-LABEL: @load_v4i8_sext_local
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
@@ -529,7 +527,7 @@ entry:
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK: @load_v4i8_sext_local
+; SI-CHECK-LABEL: @load_v4i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
@@ -544,9 +542,9 @@ entry:
}
; Load an i16 value from the local address space.
-; R600-CHECK: @load_i16_local
+; R600-CHECK-LABEL: @load_i16_local
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK: @load_i16_local
+; SI-CHECK-LABEL: @load_i16_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_U16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
@@ -557,10 +555,10 @@ entry:
ret void
}
-; R600-CHECK: @load_i16_sext_local
+; R600-CHECK-LABEL: @load_i16_sext_local
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: ASHR
-; SI-CHECK: @load_i16_sext_local
+; SI-CHECK-LABEL: @load_i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_I16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
@@ -571,10 +569,10 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i16_local
+; R600-CHECK-LABEL: @load_v2i16_local
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK: @load_v2i16_local
+; SI-CHECK-LABEL: @load_v2i16_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
@@ -586,12 +584,12 @@ entry:
ret void
}
-; R600-CHECK: @load_v2i16_sext_local
+; R600-CHECK-LABEL: @load_v2i16_sext_local
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK: @load_v2i16_sext_local
+; SI-CHECK-LABEL: @load_v2i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
@@ -603,12 +601,12 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i16_local
+; R600-CHECK-LABEL: @load_v4i16_local
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK: @load_v4i16_local
+; SI-CHECK-LABEL: @load_v4i16_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
@@ -622,7 +620,7 @@ entry:
ret void
}
-; R600-CHECK: @load_v4i16_sext_local
+; R600-CHECK-LABEL: @load_v4i16_sext_local
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: LDS_USHORT_READ_RET
@@ -631,7 +629,7 @@ entry:
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK: @load_v4i16_sext_local
+; SI-CHECK-LABEL: @load_v4i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
@@ -646,9 +644,9 @@ entry:
}
; load an i32 value from the glocal address space.
-; R600-CHECK: @load_i32_local
+; R600-CHECK-LABEL: @load_i32_local
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: @load_i32_local
+; SI-CHECK-LABEL: @load_i32_local
; SI-CHECK-NOT: S_WQM_B64
; SI-CHECK: DS_READ_B32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
@@ -659,9 +657,9 @@ entry:
}
; load a f32 value from the global address space.
-; R600-CHECK: @load_f32_local
+; R600-CHECK-LABEL: @load_f32_local
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: @load_f32_local
+; SI-CHECK-LABEL: @load_f32_local
; SI-CHECK: DS_READ_B32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
@@ -671,10 +669,10 @@ entry:
}
; load a v2f32 value from the local address space
-; R600-CHECK: @load_v2f32_local
+; R600-CHECK-LABEL: @load_v2f32_local
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: @load_v2f32_local
+; SI-CHECK-LABEL: @load_v2f32_local
; SI-CHECK: DS_READ_B32
; SI-CHECK: DS_READ_B32
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index 06736c0b730..029c00da8af 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 1
+;CHECK: S_LSHL_B32 SGPR{{[0-9]}}, SGPR{{[0-9]}}, 1
define void @test(i32 %p) {
%i = mul i32 %p, 2
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index 5b1620bbc8f..7217ee61dc1 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]}}, SGPR{{[0-9]}}, 1
+;CHECK: S_LSHR_B32 {{SGPR[0-9]}}, SGPR{{[0-9]}}, 1
define void @test(i32 %p) {
%i = udiv i32 %p, 2
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
index 7fba50c03d9..f5225d34d42 100644
--- a/test/CodeGen/R600/mad_uint24.ll
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -2,9 +2,9 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; EG-CHECK: @u32_mad24
+; EG-CHECK-LABEL: @u32_mad24
; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
-; SI-CHECK: @u32_mad24
+; SI-CHECK-LABEL: @u32_mad24
; SI-CHECK: V_MAD_U32_U24
define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
@@ -19,7 +19,7 @@ entry:
ret void
}
-; EG-CHECK: @i16_mad24
+; EG-CHECK-LABEL: @i16_mad24
; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
@@ -30,7 +30,7 @@ entry:
; EG-CHECK: 16
; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
; EG-CHECK: 16
-; SI-CHECK: @i16_mad24
+; SI-CHECK-LABEL: @i16_mad24
; SI-CHECK: V_MAD_U32_U24 [[MAD:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 16, [[MAD]]
; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 16, [[LSHL]]
@@ -44,7 +44,7 @@ entry:
ret void
}
-; EG-CHECK: @i8_mad24
+; EG-CHECK-LABEL: @i8_mad24
; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
@@ -55,7 +55,7 @@ entry:
; EG-CHECK: 24
; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
; EG-CHECK: 24
-; SI-CHECK: @i8_mad24
+; SI-CHECK-LABEL: @i8_mad24
; SI-CHECK: V_MAD_U32_U24 [[MUL:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 24, [[MUL]]
; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 24, [[LSHL]]
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
index 2244bbd663e..be6c12d4bf2 100644
--- a/test/CodeGen/R600/mul_uint24.ll
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -2,9 +2,9 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; EG-CHECK: @u32_mul24
+; EG-CHECK-LABEL: @u32_mul24
; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: @u32_mul24
+; SI-CHECK-LABEL: @u32_mul24
; SI-CHECK: V_MUL_U32_U24
define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
@@ -18,7 +18,7 @@ entry:
ret void
}
-; EG-CHECK: @i16_mul24
+; EG-CHECK-LABEL: @i16_mul24
; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
; The order of A and B does not matter.
@@ -28,7 +28,7 @@ entry:
; EG-CHECK: 16
; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
; EG-CHECK: 16
-; SI-CHECK: @i16_mul24
+; SI-CHECK-LABEL: @i16_mul24
; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 16, [[MUL]]
; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 16, [[LSHL]]
@@ -41,7 +41,7 @@ entry:
ret void
}
-; EG-CHECK: @i8_mul24
+; EG-CHECK-LABEL: @i8_mul24
; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
; The order of A and B does not matter.
@@ -51,7 +51,7 @@ entry:
; EG-CHECK: 24
; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
; EG-CHECK: 24
-; SI-CHECK: @i8_mul24
+; SI-CHECK-LABEL: @i8_mul24
; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 24, [[MUL]]
; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 24, [[LSHL]]
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index fbf4fec9499..3ede6a5d39d 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -1,11 +1,11 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;EG-CHECK: @ashr_v2i32
+;EG-CHECK-LABEL: @ashr_v2i32
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @ashr_v2i32
+;SI-CHECK-LABEL: @ashr_v2i32
;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
@@ -18,13 +18,13 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
ret void
}
-;EG-CHECK: @ashr_v4i32
+;EG-CHECK-LABEL: @ashr_v4i32
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @ashr_v4i32
+;SI-CHECK-LABEL: @ashr_v4i32
;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
@@ -39,10 +39,10 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
ret void
}
-;EG-CHECK: @ashr_i64
+;EG-CHECK-LABEL: @ashr_i64
;EG-CHECK: ASHR
-;SI-CHECK: @ashr_i64
+;SI-CHECK-LABEL: @ashr_i64
;SI-CHECK: V_ASHR_I64
define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry: