summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Xionghu <xionghu.luo@intel.com>2015-03-10 13:59:48 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-03-12 17:27:09 +0800
commit1303fc4fb2e291f9624184f4706546358b469665 (patch)
treee31ec7b859382ab08765efeaaf0ea4494e864ece
parent2330a7651b34e2f63c7d4cb893f6c33fd52e87e8 (diff)
replace pow with llvm intrinsic.
translate native pow to llvm.pow for fast path. Signed-off-by: Luo Xionghu <xionghu.luo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math.tmpl.cl2
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp18
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx1
3 files changed, 10 insertions, 11 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index d9e677b9..da5b9a95 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -30,7 +30,7 @@ CONST float __gen_ocl_sqrt(float x) __asm("llvm.sqrt" ".f32");
PURE CONST float __gen_ocl_rsqrt(float x);
CONST float __gen_ocl_log(float x) __asm("llvm.log2" ".f32");
CONST float __gen_ocl_exp(float x) __asm("llvm.exp2" ".f32");
-PURE CONST float __gen_ocl_pow(float x, float y);
+PURE CONST float __gen_ocl_pow(float x, float y) __asm("llvm.pow" ".f32");
PURE CONST float __gen_ocl_rcp(float x);
CONST float __gen_ocl_rndz(float x) __asm("llvm.trunc" ".f32");
CONST float __gen_ocl_rnde(float x) __asm("llvm.rint" ".f32");
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index d9ac6e00..773300bf 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2618,6 +2618,7 @@ namespace gbe
case Intrinsic::cos:
case Intrinsic::log2:
case Intrinsic::exp2:
+ case Intrinsic::pow:
this->newRegister(&I);
break;
default:
@@ -2672,7 +2673,6 @@ namespace gbe
case GEN_OCL_FBL:
case GEN_OCL_CBIT:
case GEN_OCL_RSQ:
- case GEN_OCL_POW:
case GEN_OCL_RCP:
case GEN_OCL_ABS:
case GEN_OCL_GET_IMAGE_WIDTH:
@@ -3005,6 +3005,14 @@ namespace gbe
case Intrinsic::exp2: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
case Intrinsic::bswap:
this->emitUnaryCallInst(I,CS,ir::OP_BSWAP, getUnsignedType(ctx, I.getType())); break;
+ case Intrinsic::pow:
+ {
+ const ir::Register src0 = this->getRegister(*AI); ++AI;
+ const ir::Register src1 = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
+ break;
+ }
default: NOT_IMPLEMENTED;
}
} else {
@@ -3021,14 +3029,6 @@ namespace gbe
#endif /* GBE_DEBUG */
switch (genIntrinsicID) {
- case GEN_OCL_POW:
- {
- const ir::Register src0 = this->getRegister(*AI); ++AI;
- const ir::Register src1 = this->getRegister(*AI);
- const ir::Register dst = this->getRegister(&I);
- ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
- break;
- }
case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT, getUnsignedType(ctx, (*AI)->getType())); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 5f5451c3..9536a3c4 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -20,7 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
// Math function
DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
-DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow)
DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp)
DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax)
DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin)