diff options
author | Pan Xiuli <xiuli.pan@intel.com> | 2015-11-25 11:00:03 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-12-10 16:52:43 +0800 |
commit | 8a667122c06e6f430c84adb1de3c27d77ebfd55e (patch) | |
tree | 9681dc8aba0cc46b110280cd664d55fa9460c97b | |
parent | 7bb0da6cf4937aaa5300c7181634517902244f19 (diff) |
Backend: refine mix with hardware lrp function
EU support lrp function that simillar to mix, but only
with float, so refine only float related mix with lrp.
There will be little errors whit mix now with lrp.
V2:
Rebase the patch
Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/backend/gen/gen_mesa_disasm.c | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_context.cpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.cpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 6 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hxx | 1 | ||||
-rw-r--r-- | backend/src/ir/context.hpp | 1 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 4 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 2 | ||||
-rw-r--r-- | backend/src/ir/instruction.hxx | 1 | ||||
-rw-r--r-- | backend/src/libocl/tmpl/ocl_common.tmpl.cl | 3 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 13 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 3 |
13 files changed, 37 insertions, 1 deletions
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index 3198da79..94bae485 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -77,6 +77,7 @@ static const struct { [GEN_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, + [GEN_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 }, [GEN_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index b78706a5..6d0e8a53 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -1801,6 +1801,7 @@ namespace gbe const GenRegister src2 = ra->genReg(insn.src(2)); switch (insn.opcode) { case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break; + case SEL_OP_LRP: p->LRP(dst, src0, src1, src2); break; default: NOT_IMPLEMENTED; } } diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index b8ea736c..d8eedb80 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -780,6 +780,7 @@ namespace gbe ALU2(PLN) ALU2(MACH) ALU3(MAD) + ALU3(LRP) // ALU2(BRC) // ALU1(ENDIF) // ALU1(IF) diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 503577fd..8cc0e4d1 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -128,6 +128,7 @@ namespace gbe ALU2(LINE) ALU2(PLN) ALU3(MAD) + ALU3(LRP) ALU2(BRC) ALU1(BRD) #undef ALU1 diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index cd7b2ebd..edd50456 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -550,6 +550,7 @@ namespace gbe ALU2(MACH) ALU1(LZD) ALU3(MAD) + ALU3(LRP) ALU2WithTemp(MUL_HI) ALU1(FBH) ALU1(FBL) @@ -5225,6 +5226,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.MAD(dst, src2, src0, src1); break; } + case OP_LRP: + { + sel.LRP(dst, src0, src1, src2); + break; + } default: NOT_IMPLEMENTED; } diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index bc09522f..5611a4f8 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -43,6 +43,7 @@ DECL_SELECTION_IR(CMP, CompareInstruction) DECL_SELECTION_IR(I64CMP, I64CompareInstruction) DECL_SELECTION_IR(SEL_CMP, CompareInstruction) DECL_SELECTION_IR(MAD, TernaryInstruction) +DECL_SELECTION_IR(LRP, TernaryInstruction) DECL_SELECTION_IR(JMPI, JumpInstruction) DECL_SELECTION_IR(EOT, EotInstruction) DECL_SELECTION_IR(INDIRECT_MOVE, IndirectMoveInstruction) diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 65864daf..b95741fa 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -175,6 +175,7 @@ namespace ir { DECL_THREE_SRC_INSN(SEL); DECL_THREE_SRC_INSN(I64MADSAT); DECL_THREE_SRC_INSN(MAD); + DECL_THREE_SRC_INSN(LRP); #undef DECL_THREE_SRC_INSN /*! For all nullary functions */ diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 734e66b7..c0695239 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -2181,6 +2181,10 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg)) Instruction MAD(Type type, Register dst, Tuple src) { return internal::TernaryInstruction(OP_MAD, type, dst, src).convert(); } + + Instruction LRP(Type type, Register dst, Tuple src) { + return internal::TernaryInstruction(OP_LRP, type, dst, src).convert(); + } // All compare functions #define DECL_EMIT_FUNCTION(NAME) \ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \ diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 0bf15f22..ec7b9b58 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -679,6 +679,8 @@ namespace ir { Instruction I64MADSAT(Type type, Register dst, Tuple src); /*! mad.type dst src */ Instruction MAD(Type type, Register dst, Tuple src); + /*! lrp.type dst src */ + Instruction LRP(Type type, Register dst, Tuple src); /*! upsample_short.type dst src */ Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1); /*! upsample_int.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 27c71594..498861c0 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -103,6 +103,7 @@ DECL_INSN(UPSAMPLE_INT, BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) DECL_INSN(I64MADSAT, TernaryInstruction) DECL_INSN(MAD, TernaryInstruction) +DECL_INSN(LRP, TernaryInstruction) DECL_INSN(IF, BranchInstruction) DECL_INSN(ENDIF, BranchInstruction) DECL_INSN(ELSE, BranchInstruction) diff --git a/backend/src/libocl/tmpl/ocl_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_common.tmpl.cl index b6b09b52..0b6a8fb6 100644 --- a/backend/src/libocl/tmpl/ocl_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_common.tmpl.cl @@ -24,6 +24,7 @@ ///////////////////////////////////////////////////////////////////////////// PURE CONST OVERLOADABLE float __gen_ocl_fmax(float a, float b); PURE CONST OVERLOADABLE float __gen_ocl_fmin(float a, float b); +PURE CONST OVERLOADABLE float __gen_ocl_lrp(float a, float b, float c); OVERLOADABLE float step(float edge, float x) { return x < edge ? 0.0 : 1.0; @@ -36,7 +37,7 @@ OVERLOADABLE float min(float a, float b) { return __gen_ocl_fmin(a, b); } OVERLOADABLE float mix(float x, float y, float a) { - return x + (y-x)*a; + return __gen_ocl_lrp(a,y,x); //The lrp using a different order with mix } OVERLOADABLE float clamp(float v, float l, float u) { return max(min(v, u), l); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 8d18c4ac..c335b43c 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3722,6 +3722,7 @@ namespace gbe case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD: case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX: case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN: + case GEN_OCL_LRP: this->newRegister(&I); break; case GEN_OCL_PRINTF: @@ -4587,6 +4588,18 @@ namespace gbe this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MAX); break; case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN: this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MIN); break; + case GEN_OCL_LRP: + { + const ir::Register dst = this->getRegister(&I); + GBE_ASSERT(AI != AE); + const ir::Register src0 = this->getRegister(*(AI++)); + GBE_ASSERT(AI != AE); + const ir::Register src1 = this->getRegister(*(AI++)); + GBE_ASSERT(AI != AE); + const ir::Register src2 = this->getRegister(*(AI++)); + ctx.LRP(ir::TYPE_FLOAT, dst, src0, src1, src2); + break; + } default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 1855e6fe..8023744b 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -198,3 +198,6 @@ DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_work_group_scan_ DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ALL, __gen_ocl_work_group_all) DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ANY, __gen_ocl_work_group_any) + +// common function +DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp) |