summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp23
-rw-r--r--lib/Target/R600/R600Instructions.td11
-rw-r--r--lib/Target/R600/R600Intrinsics.td2
3 files changed, 34 insertions, 2 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 37272b568f..95bed0885c 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -271,8 +271,27 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
+ // Instruction is left unmodified if its not the last one of its type
+ bool isLastInstructionOfItsType;
+ {
+ isLastInstructionOfItsType = true;
+ unsigned InstExportType = MI->getOperand(1).getImm();
+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+ EndBlock = BB->end(); NextExportInst != EndBlock;
+ NextExportInst = llvm::next(NextExportInst)) {
+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+ unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+ .getImm();
+ if (CurrentInstExportType == InstExportType) {
+ isLastInstructionOfItsType = false;
+ break;
+ }
+ }
+ }
+ }
bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
- if (!EOP)
+ if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
@@ -284,7 +303,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6))
.addImm(CfInst)
- .addImm(1);
+ .addImm(EOP);
break;
}
}
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index b66f6cc8e0..1b3df999d4 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -592,6 +592,17 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
(ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
0, 1, 2, 3, cf_inst, 0)
>;
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 0, 1, 2, 3, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase,
+ imm:$type),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 0, 1, 2, 3, cf_inst, 0)
+ >;
}
multiclass SteamOutputExportPattern<Instruction ExportInst,
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
index 06a734123f..1394a85461 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -19,6 +19,8 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_store_swizzle :
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_stream_output :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_color :