summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-31 15:12:20 +0000
committertstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-31 15:12:20 +0000
commit761b5e4d7d62f7dc7ac5561f355a1eb0d7dd766e (patch)
tree40d27ba19701bf184d2f5bc01e61bcdf79b573ae
parent412f51ae015f94286d4efcdd39907e8d720fcb2b (diff)
R600: use specialised R600.store.pixel.* for fragment shader
Patch by: Vincent Lejeune Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@167120 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp11
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp66
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td94
-rw-r--r--lib/Target/AMDGPU/R600Intrinsics.td8
-rw-r--r--lib/Target/AMDGPU/R600MachineFunctionInfo.cpp4
-rw-r--r--lib/Target/AMDGPU/R600MachineFunctionInfo.h2
8 files changed, 185 insertions, 2 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8021fc473d..57dcaac19a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -346,5 +346,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(INTERP)
NODE_NAME_CASE(INTERP_P0)
+ NODE_NAME_CASE(EXPORT)
}
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 2d8ed82c11..58d2287a34 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -121,6 +121,7 @@ enum
URECIP,
INTERP,
INTERP_P0,
+ EXPORT,
LAST_AMDGPU_ISD_NUMBER
};
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 35aa8327f6..5178157d6b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -101,7 +101,8 @@ enum InstrTypes {
INSTR_TEX,
INSTR_FC,
INSTR_NATIVE,
- INSTR_VTX
+ INSTR_VTX,
+ INSTR_EXPORT
};
enum FCInstr {
@@ -177,6 +178,14 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(InstWord2, OS);
break;
}
+ case AMDGPU::EG_Export:
+ case AMDGPU::R600_Export:
+ {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_EXPORT, OS);
+ Emit(Inst, OS);
+ break;
+ }
default:
EmitALUInstr(MI, Fixups, OS);
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 38d68f4d9f..75a2a90b31 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -278,6 +278,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
return BB;
}
+ case AMDGPU::EG_Export:
+ case AMDGPU::R600_Export:
+ {
+ bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
+ if (!EOP)
+ return BB;
+ unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_Export)? 84 : 40;
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addImm(CfInst)
+ .addImm(1);
+ break;
+ }
}
MI->eraseFromParent();
@@ -316,6 +335,53 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
}
+ case AMDGPUIntrinsic::R600_store_pixel_color: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Slot = RegIndex / 4;
+
+ SDNode **OutputsMap = MFI->Outputs;
+
+ if (!OutputsMap[Slot]) {
+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ Op.getDebugLoc(), MVT::v4f32,
+ DAG.getUNDEF(MVT::v4f32),
+ Op.getOperand(2),
+ DAG.getConstant(RegIndex % 4, MVT::i32));
+
+ const SDValue Ops[8] = {Chain, Vector, DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(3, MVT::i32)};
+
+ SDValue Res = DAG.getNode(
+ AMDGPUISD::EXPORT,
+ Op.getDebugLoc(),
+ MVT::Other,
+ Ops, 8);
+ OutputsMap[Slot] = Res.getNode();
+ return Res;
+ }
+
+ SDNode *ExportInstruction = (SDNode *) OutputsMap[Slot] ;
+ SDValue PreviousVector = ExportInstruction->getOperand(1);
+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ Op.getDebugLoc(), MVT::v4f32,
+ PreviousVector,
+ Op.getOperand(2),
+ DAG.getConstant(RegIndex % 4, MVT::i32));
+
+ const SDValue Ops[8] = {ExportInstruction->getOperand(0), Vector, DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(3, MVT::i32)};
+
+ DAG.UpdateNodeOperands(ExportInstruction,
+ Ops, 8);
+
+ return Chain;
+ }
// default for switch(IntrinsicID)
default: break;
}
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 2f9ac0cd19..c8cf0123db 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -456,6 +456,78 @@ def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []>
def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
+//===----------------------------------------------------------------------===//
+// Export Instructions
+//===----------------------------------------------------------------------===//
+
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
+ def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ 0, 61, 0, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ 0, 61, 7, 0, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_pixel_dummy),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src),
+ (i32 imm:$type), (i32 imm:$arraybase),
+ (i32 imm:$sw_x), (i32 imm:$sw_y), (i32 imm:$sw_z), (i32 imm:$sw_w)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ imm:$sw_x, imm:$sw_y, imm:$sw_z, imm:$sw_w, cf_inst, 0)
+ >;
+}
+
+let isTerminator = 1, usesCustomInserter = 1 in {
+
+class ExportInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
+ i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
+ i32imm:$eop),
+ !strconcat("EXPORT", " $src"),
+ []>
+{
+ bits<13> arraybase;
+ bits<2> type;
+ bits<7> src;
+
+ bits<3> sw_x;
+ bits<3> sw_y;
+ bits<3> sw_z;
+ bits<3> sw_w;
+
+ bits<1> eop;
+ bits<8> inst;
+
+ let Inst{12-0} = arraybase;
+ let Inst{14-13} = type;
+ let Inst{21-15} = src;
+ let Inst{22} = 0; // RW_REL
+ let Inst{29-23} = 0; // INDEX_GPR
+ let Inst{31-30} = 3; // ELEM_SIZE
+ let Inst{34-32} = sw_x;
+ let Inst{37-35} = sw_y;
+ let Inst{40-38} = sw_z;
+ let Inst{43-41} = sw_w;
+}
+
+} // End isTerminator = 1, usesCustomInserter = 1
+
+
let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
@@ -894,6 +966,16 @@ let Predicates = [isR600] in {
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+ def R600_Export : ExportInst
+ {
+ let Inst{52-49} = 1; // BURST_COUNT
+ let Inst{53} = 1; // VALID_PIXEL_MODE
+ let Inst{54} = eop;
+ let Inst{62-55} = inst;
+ let Inst{63} = 1; // BARRIER
+ }
+
+ defm : ExportPattern<R600_Export, 39>;
}
// Helper pattern for normalizing inputs to triginomic instructions for R700+
@@ -1024,6 +1106,18 @@ let Predicates = [isEGorCayman] in {
def : Pat<(fp_to_uint R600_Reg32:$src0),
(FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+ def EG_Export : ExportInst
+ {
+ let Inst{51-48} = 1; // BURST_COUNT
+ let Inst{52} = 1; // VALID_PIXEL_MODE
+ let Inst{53} = eop;
+ let Inst{61-54} = inst;
+ let Inst{62} = 0; // MARK
+ let Inst{63} = 1; // BARRIER
+ }
+
+ defm : ExportPattern<EG_Export, 83>;
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index d661366d89..3a7e3ee0d0 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -19,4 +19,12 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_store_pixel_color :
+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_R600_store_pixel_depth :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_pixel_stencil :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_pixel_dummy :
+ Intrinsic<[], [], []>;
}
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index a31848efc9..49e662f340 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -15,7 +15,9 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
HasLinearInterpolation(false),
HasPerspectiveInterpolation(false)
- { }
+ {
+ memset(Outputs, 0, sizeof(Outputs));
+ }
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
{
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 4444546ace..9f01379caf 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -16,6 +16,7 @@
#define R600MACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include <vector>
namespace llvm {
@@ -25,6 +26,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
public:
R600MachineFunctionInfo(const MachineFunction &MF);
std::vector<unsigned> ReservedRegs;
+ SDNode *Outputs[16];
bool HasLinearInterpolation;
bool HasPerspectiveInterpolation;