diff options
author | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-19 21:10:14 +0000 |
---|---|---|
committer | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-19 21:10:14 +0000 |
commit | 345cadb517bf60943d7b58fbef982799d31679c4 (patch) | |
tree | 9509bdb1d1ec7911a977cef87301d6811e79ce13 | |
parent | def88f36a1ea9263b6397539dbd720aea97784e7 (diff) |
R600: Use native operands for CUBE*, DOT4* instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@166334 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 52 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600InstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 58 |
3 files changed, 46 insertions, 68 deletions
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index cd012d4884..fabdb4dd6d 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -290,47 +290,51 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { } // Determine the correct destination registers; - unsigned Flags = 0; + bool Mask = false; + bool NotLast = true; if (IsCube) { unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); DstReg = TRI.getSubReg(DstReg, SubRegIndex); } else { // Mask the write if the original instruction does not write to // the current Channel. - Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); + Mask = (Chan != TRI.getHWRegChan(DstReg)); unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); } // Set the IsLast bit - Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0); + NotLast = (Chan != 3 ); // Add the new instruction - unsigned Opcode; - if (IsCube) { - switch (MI.getOpcode()) { - case AMDGPU::CUBE_r600_pseudo: - Opcode = AMDGPU::CUBE_r600_real; - break; - case AMDGPU::CUBE_eg_pseudo: - Opcode = AMDGPU::CUBE_eg_real; - break; - default: - assert(!"Unknown CUBE instruction"); - Opcode = 0; - break; - } - } else { - Opcode = MI.getOpcode(); + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AMDGPU::CUBE_r600_pseudo: + Opcode = AMDGPU::CUBE_r600_real; + break; + case AMDGPU::CUBE_eg_pseudo: + Opcode = AMDGPU::CUBE_eg_real; + break; + case AMDGPU::DOT4_r600_pseudo: + Opcode = AMDGPU::DOT4_r600_real; + break; + case AMDGPU::DOT4_eg_pseudo: + Opcode = AMDGPU::DOT4_eg_real; + break; + default: + break; } + MachineInstr *NewMI = - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) - .addReg(Src0) - .addReg(Src1) - .addImm(0); // Flag + TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); NewMI->setIsInsideBundle(Chan != 0); - TII->addFlag(NewMI, 0, Flags); + if (Mask) { + TII->addFlag(NewMI, 0, MO_FLAG_MASK); + } + if (NotLast) { + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + } } MI.eraseFromParent(); } diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index d1649345ce..499b706670 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -121,8 +121,8 @@ bool R600InstrInfo::isReductionOp(unsigned Opcode) const { switch(Opcode) { default: return false; - case AMDGPU::DOT4_r600: - case AMDGPU::DOT4_eg: + case AMDGPU::DOT4_r600_pseudo: + case AMDGPU::DOT4_eg_pseudo: return true; } } diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 961e395daa..11c29f04f5 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -320,11 +320,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, ins, asm, pattern, - itin>{ - bits<7> dst; - let Inst{49-39} = inst; - let Inst{59-53} = dst; - } + itin>; class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : @@ -764,23 +760,16 @@ class CNDGE_Common <bits<5> inst> : R600_3OP < COND_GE))] >; -class DOT4_Common <bits<11> inst> : R600_REDUCTION < - inst, - (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), - "DOT4 $dst $src0, $src1", - [] - > { - bits<9> src0; - bits<9> src1; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let FlagOperandIdx = 3; -} +multiclass DOT4_Common <bits<11> inst> { -class DOT4_Pat <Instruction dot4> : Pat < - (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1), - (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) ->; + def _pseudo : R600_REDUCTION <inst, + (ins R600_Reg128:$src0, R600_Reg128:$src1), + "DOT4 $dst $src0, $src1", + [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] + >; + + def _real : R600_2OP <inst, "DOT4", []>; +} let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { multiclass CUBE_Common <bits<11> inst> { @@ -792,24 +781,11 @@ multiclass CUBE_Common <bits<11> inst> { "CUBE $dst $src", [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], VecALU - >; - - def _real : InstR600 < - inst, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), - "CUBE $dst, $src0, $src1", - [], VecALU - >{ - let FlagOperandIdx = 3; - bits<7> dst; - bits<9> src0; - bits<9> src1; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let Inst{49-39} = inst; - let Inst{59-53} = dst; + > { + let isPseudo = 1; } + + def _real : R600_2OP <inst, "CUBE", []>; } } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 @@ -917,8 +893,7 @@ let Predicates = [isR600] in { def CNDE_r600 : CNDE_Common<0x18>; def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; - def DOT4_r600 : DOT4_Common<0x50>; - def : DOT4_Pat <DOT4_r600>; + defm DOT4_r600 : DOT4_Common<0x50>; defm CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; @@ -1037,8 +1012,7 @@ let Predicates = [isEGorCayman] in { def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; - def DOT4_eg : DOT4_Common<0xBE>; - def : DOT4_Pat <DOT4_eg>; + defm DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; |