diff options
author | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-31 15:12:20 +0000 |
---|---|---|
committer | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-31 15:12:20 +0000 |
commit | 761b5e4d7d62f7dc7ac5561f355a1eb0d7dd766e (patch) | |
tree | 40d27ba19701bf184d2f5bc01e61bcdf79b573ae | |
parent | 412f51ae015f94286d4efcdd39907e8d720fcb2b (diff) |
R600: use specialised R600.store.pixel.* for fragment shader
Patch by: Vincent Lejeune
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@167120 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 11 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 66 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 94 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Intrinsics.td | 8 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineFunctionInfo.h | 2 |
8 files changed, 185 insertions, 2 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 8021fc473d..57dcaac19a 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -346,5 +346,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const NODE_NAME_CASE(URECIP) NODE_NAME_CASE(INTERP) NODE_NAME_CASE(INTERP_P0) + NODE_NAME_CASE(EXPORT) } } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 2d8ed82c11..58d2287a34 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -121,6 +121,7 @@ enum URECIP, INTERP, INTERP_P0, + EXPORT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 35aa8327f6..5178157d6b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -101,7 +101,8 @@ enum InstrTypes { INSTR_TEX, INSTR_FC, INSTR_NATIVE, - INSTR_VTX + INSTR_VTX, + INSTR_EXPORT }; enum FCInstr { @@ -177,6 +178,14 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(InstWord2, OS); break; } + case AMDGPU::EG_Export: + case AMDGPU::R600_Export: + { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_EXPORT, OS); + Emit(Inst, OS); + break; + } default: EmitALUInstr(MI, Fixups, OS); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 38d68f4d9f..75a2a90b31 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -278,6 +278,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( return BB; } + case AMDGPU::EG_Export: + case AMDGPU::R600_Export: + { + bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; + if (!EOP) + return BB; + unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_Export)? 84 : 40; + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)) + .addImm(CfInst) + .addImm(1); + break; + } } MI->eraseFromParent(); @@ -316,6 +335,53 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); } + case AMDGPUIntrinsic::R600_store_pixel_color: { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Slot = RegIndex / 4; + + SDNode **OutputsMap = MFI->Outputs; + + if (!OutputsMap[Slot]) { + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, + Op.getDebugLoc(), MVT::v4f32, + DAG.getUNDEF(MVT::v4f32), + Op.getOperand(2), + DAG.getConstant(RegIndex % 4, MVT::i32)); + + const SDValue Ops[8] = {Chain, Vector, DAG.getConstant(0, MVT::i32), + DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32), + DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32), + DAG.getConstant(3, MVT::i32)}; + + SDValue Res = DAG.getNode( + AMDGPUISD::EXPORT, + Op.getDebugLoc(), + MVT::Other, + Ops, 8); + OutputsMap[Slot] = Res.getNode(); + return Res; + } + + SDNode *ExportInstruction = (SDNode *) OutputsMap[Slot] ; + SDValue PreviousVector = ExportInstruction->getOperand(1); + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, + Op.getDebugLoc(), MVT::v4f32, + PreviousVector, + Op.getOperand(2), + DAG.getConstant(RegIndex % 4, MVT::i32)); + + const SDValue Ops[8] = {ExportInstruction->getOperand(0), Vector, DAG.getConstant(0, MVT::i32), + DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32), + DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32), + DAG.getConstant(3, MVT::i32)}; + + DAG.UpdateNodeOperands(ExportInstruction, + Ops, 8); + + return Chain; + } // default for switch(IntrinsicID) default: break; } diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 2f9ac0cd19..c8cf0123db 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -456,6 +456,78 @@ def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; +//===----------------------------------------------------------------------===// +// Export Instructions +//===----------------------------------------------------------------------===// + +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; + +def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, + [SDNPHasChain, SDNPSideEffect]>; + +multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { + def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), + (ExportInst + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + 0, 61, 0, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), + (ExportInst + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + 0, 61, 7, 0, 7, 7, cf_inst, 0) + >; + + def : Pat<(int_R600_store_pixel_dummy), + (ExportInst + (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), + (i32 imm:$type), (i32 imm:$arraybase), + (i32 imm:$sw_x), (i32 imm:$sw_y), (i32 imm:$sw_z), (i32 imm:$sw_w)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + imm:$sw_x, imm:$sw_y, imm:$sw_z, imm:$sw_w, cf_inst, 0) + >; +} + +let isTerminator = 1, usesCustomInserter = 1 in { + +class ExportInst : InstR600ISA<( + outs), + (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase, + i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, + i32imm:$eop), + !strconcat("EXPORT", " $src"), + []> +{ + bits<13> arraybase; + bits<2> type; + bits<7> src; + + bits<3> sw_x; + bits<3> sw_y; + bits<3> sw_z; + bits<3> sw_w; + + bits<1> eop; + bits<8> inst; + + let Inst{12-0} = arraybase; + let Inst{14-13} = type; + let Inst{21-15} = src; + let Inst{22} = 0; // RW_REL + let Inst{29-23} = 0; // INDEX_GPR + let Inst{31-30} = 3; // ELEM_SIZE + let Inst{34-32} = sw_x; + let Inst{37-35} = sw_y; + let Inst{40-38} = sw_z; + let Inst{43-41} = sw_w; +} + +} // End isTerminator = 1, usesCustomInserter = 1 + + let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// @@ -894,6 +966,16 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; + def R600_Export : ExportInst + { + let Inst{52-49} = 1; // BURST_COUNT + let Inst{53} = 1; // VALID_PIXEL_MODE + let Inst{54} = eop; + let Inst{62-55} = inst; + let Inst{63} = 1; // BARRIER + } + + defm : ExportPattern<R600_Export, 39>; } // Helper pattern for normalizing inputs to triginomic instructions for R700+ @@ -1024,6 +1106,18 @@ let Predicates = [isEGorCayman] in { def : Pat<(fp_to_uint R600_Reg32:$src0), (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; + def EG_Export : ExportInst + { + let Inst{51-48} = 1; // BURST_COUNT + let Inst{52} = 1; // VALID_PIXEL_MODE + let Inst{53} = eop; + let Inst{61-54} = inst; + let Inst{62} = 0; // MARK + let Inst{63} = 1; // BARRIER + } + + defm : ExportPattern<EG_Export, 83>; + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td index d661366d89..3a7e3ee0d0 100644 --- a/lib/Target/AMDGPU/R600Intrinsics.td +++ b/lib/Target/AMDGPU/R600Intrinsics.td @@ -19,4 +19,12 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; def int_R600_load_input_linear : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_store_pixel_color : + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; + def int_R600_store_pixel_depth : + Intrinsic<[], [llvm_float_ty], []>; + def int_R600_store_pixel_stencil : + Intrinsic<[], [llvm_float_ty], []>; + def int_R600_store_pixel_dummy : + Intrinsic<[], [], []>; } diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp index a31848efc9..49e662f340 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -15,7 +15,9 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) : MachineFunctionInfo(), HasLinearInterpolation(false), HasPerspectiveInterpolation(false) - { } + { + memset(Outputs, 0, sizeof(Outputs)); + } unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h index 4444546ace..9f01379caf 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -16,6 +16,7 @@ #define R600MACHINEFUNCTIONINFO_H #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include <vector> namespace llvm { @@ -25,6 +26,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { public: R600MachineFunctionInfo(const MachineFunction &MF); std::vector<unsigned> ReservedRegs; + SDNode *Outputs[16]; bool HasLinearInterpolation; bool HasPerspectiveInterpolation; |