summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Xiuli <xiuli.pan@intel.com>2015-11-25 11:00:03 +0800
committerYang Rong <rong.r.yang@intel.com>2015-12-10 16:52:43 +0800
commit8a667122c06e6f430c84adb1de3c27d77ebfd55e (patch)
tree9681dc8aba0cc46b110280cd664d55fa9460c97b
parent7bb0da6cf4937aaa5300c7181634517902244f19 (diff)
Backend: refine mix with hardware lrp function
EU support lrp function that simillar to mix, but only with float, so refine only float related mix with lrp. There will be little errors whit mix now with lrp. V2: Rebase the patch Signed-off-by: Pan Xiuli <xiuli.pan@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/gen/gen_mesa_disasm.c1
-rw-r--r--backend/src/backend/gen_context.cpp1
-rw-r--r--backend/src/backend/gen_encoder.cpp1
-rw-r--r--backend/src/backend/gen_encoder.hpp1
-rw-r--r--backend/src/backend/gen_insn_selection.cpp6
-rw-r--r--backend/src/backend/gen_insn_selection.hxx1
-rw-r--r--backend/src/ir/context.hpp1
-rw-r--r--backend/src/ir/instruction.cpp4
-rw-r--r--backend/src/ir/instruction.hpp2
-rw-r--r--backend/src/ir/instruction.hxx1
-rw-r--r--backend/src/libocl/tmpl/ocl_common.tmpl.cl3
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp13
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx3
13 files changed, 37 insertions, 1 deletions
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 3198da79..94bae485 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -77,6 +77,7 @@ static const struct {
[GEN_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+ [GEN_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
[GEN_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b78706a5..6d0e8a53 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1801,6 +1801,7 @@ namespace gbe
const GenRegister src2 = ra->genReg(insn.src(2));
switch (insn.opcode) {
case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
+ case SEL_OP_LRP: p->LRP(dst, src0, src1, src2); break;
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index b8ea736c..d8eedb80 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -780,6 +780,7 @@ namespace gbe
ALU2(PLN)
ALU2(MACH)
ALU3(MAD)
+ ALU3(LRP)
// ALU2(BRC)
// ALU1(ENDIF)
// ALU1(IF)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 503577fd..8cc0e4d1 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -128,6 +128,7 @@ namespace gbe
ALU2(LINE)
ALU2(PLN)
ALU3(MAD)
+ ALU3(LRP)
ALU2(BRC)
ALU1(BRD)
#undef ALU1
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cd7b2ebd..edd50456 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -550,6 +550,7 @@ namespace gbe
ALU2(MACH)
ALU1(LZD)
ALU3(MAD)
+ ALU3(LRP)
ALU2WithTemp(MUL_HI)
ALU1(FBH)
ALU1(FBL)
@@ -5225,6 +5226,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.MAD(dst, src2, src0, src1);
break;
}
+ case OP_LRP:
+ {
+ sel.LRP(dst, src0, src1, src2);
+ break;
+ }
default:
NOT_IMPLEMENTED;
}
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index bc09522f..5611a4f8 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -43,6 +43,7 @@ DECL_SELECTION_IR(CMP, CompareInstruction)
DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
DECL_SELECTION_IR(MAD, TernaryInstruction)
+DECL_SELECTION_IR(LRP, TernaryInstruction)
DECL_SELECTION_IR(JMPI, JumpInstruction)
DECL_SELECTION_IR(EOT, EotInstruction)
DECL_SELECTION_IR(INDIRECT_MOVE, IndirectMoveInstruction)
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 65864daf..b95741fa 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -175,6 +175,7 @@ namespace ir {
DECL_THREE_SRC_INSN(SEL);
DECL_THREE_SRC_INSN(I64MADSAT);
DECL_THREE_SRC_INSN(MAD);
+ DECL_THREE_SRC_INSN(LRP);
#undef DECL_THREE_SRC_INSN
/*! For all nullary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 734e66b7..c0695239 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -2181,6 +2181,10 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg))
Instruction MAD(Type type, Register dst, Tuple src) {
return internal::TernaryInstruction(OP_MAD, type, dst, src).convert();
}
+
+ Instruction LRP(Type type, Register dst, Tuple src) {
+ return internal::TernaryInstruction(OP_LRP, type, dst, src).convert();
+ }
// All compare functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 0bf15f22..ec7b9b58 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -679,6 +679,8 @@ namespace ir {
Instruction I64MADSAT(Type type, Register dst, Tuple src);
/*! mad.type dst src */
Instruction MAD(Type type, Register dst, Tuple src);
+ /*! lrp.type dst src */
+ Instruction LRP(Type type, Register dst, Tuple src);
/*! upsample_short.type dst src */
Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
/*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 27c71594..498861c0 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -103,6 +103,7 @@ DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
DECL_INSN(I64MADSAT, TernaryInstruction)
DECL_INSN(MAD, TernaryInstruction)
+DECL_INSN(LRP, TernaryInstruction)
DECL_INSN(IF, BranchInstruction)
DECL_INSN(ENDIF, BranchInstruction)
DECL_INSN(ELSE, BranchInstruction)
diff --git a/backend/src/libocl/tmpl/ocl_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
index b6b09b52..0b6a8fb6 100644
--- a/backend/src/libocl/tmpl/ocl_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
@@ -24,6 +24,7 @@
/////////////////////////////////////////////////////////////////////////////
PURE CONST OVERLOADABLE float __gen_ocl_fmax(float a, float b);
PURE CONST OVERLOADABLE float __gen_ocl_fmin(float a, float b);
+PURE CONST OVERLOADABLE float __gen_ocl_lrp(float a, float b, float c);
OVERLOADABLE float step(float edge, float x) {
return x < edge ? 0.0 : 1.0;
@@ -36,7 +37,7 @@ OVERLOADABLE float min(float a, float b) {
return __gen_ocl_fmin(a, b);
}
OVERLOADABLE float mix(float x, float y, float a) {
- return x + (y-x)*a;
+ return __gen_ocl_lrp(a,y,x); //The lrp using a different order with mix
}
OVERLOADABLE float clamp(float v, float l, float u) {
return max(min(v, u), l);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 8d18c4ac..c335b43c 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3722,6 +3722,7 @@ namespace gbe
case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD:
case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX:
case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN:
+ case GEN_OCL_LRP:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -4587,6 +4588,18 @@ namespace gbe
this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MAX); break;
case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN:
this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MIN); break;
+ case GEN_OCL_LRP:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ GBE_ASSERT(AI != AE);
+ const ir::Register src0 = this->getRegister(*(AI++));
+ GBE_ASSERT(AI != AE);
+ const ir::Register src1 = this->getRegister(*(AI++));
+ GBE_ASSERT(AI != AE);
+ const ir::Register src2 = this->getRegister(*(AI++));
+ ctx.LRP(ir::TYPE_FLOAT, dst, src0, src1, src2);
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 1855e6fe..8023744b 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -198,3 +198,6 @@ DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_work_group_scan_
DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ALL, __gen_ocl_work_group_all)
DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ANY, __gen_ocl_work_group_any)
+
+// common function
+DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)