diff options
author | Luo Xionghu <xionghu.luo@intel.com> | 2015-03-10 13:59:48 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-03-12 17:27:09 +0800 |
commit | 1303fc4fb2e291f9624184f4706546358b469665 (patch) | |
tree | e31ec7b859382ab08765efeaaf0ea4494e864ece | |
parent | 2330a7651b34e2f63c7d4cb893f6c33fd52e87e8 (diff) |
replace pow with llvm intrinsic.
translate native pow to llvm.pow for fast path.
Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math.tmpl.cl | 2 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 18 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 1 |
3 files changed, 10 insertions, 11 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index d9e677b9..da5b9a95 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -30,7 +30,7 @@ CONST float __gen_ocl_sqrt(float x) __asm("llvm.sqrt" ".f32"); PURE CONST float __gen_ocl_rsqrt(float x); CONST float __gen_ocl_log(float x) __asm("llvm.log2" ".f32"); CONST float __gen_ocl_exp(float x) __asm("llvm.exp2" ".f32"); -PURE CONST float __gen_ocl_pow(float x, float y); +PURE CONST float __gen_ocl_pow(float x, float y) __asm("llvm.pow" ".f32"); PURE CONST float __gen_ocl_rcp(float x); CONST float __gen_ocl_rndz(float x) __asm("llvm.trunc" ".f32"); CONST float __gen_ocl_rnde(float x) __asm("llvm.rint" ".f32"); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index d9ac6e00..773300bf 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2618,6 +2618,7 @@ namespace gbe case Intrinsic::cos: case Intrinsic::log2: case Intrinsic::exp2: + case Intrinsic::pow: this->newRegister(&I); break; default: @@ -2672,7 +2673,6 @@ namespace gbe case GEN_OCL_FBL: case GEN_OCL_CBIT: case GEN_OCL_RSQ: - case GEN_OCL_POW: case GEN_OCL_RCP: case GEN_OCL_ABS: case GEN_OCL_GET_IMAGE_WIDTH: @@ -3005,6 +3005,14 @@ namespace gbe case Intrinsic::exp2: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break; case Intrinsic::bswap: this->emitUnaryCallInst(I,CS,ir::OP_BSWAP, getUnsignedType(ctx, I.getType())); break; + case Intrinsic::pow: + { + const ir::Register src0 = this->getRegister(*AI); ++AI; + const ir::Register src1 = this->getRegister(*AI); + const ir::Register dst = this->getRegister(&I); + ctx.POW(ir::TYPE_FLOAT, dst, src0, src1); + break; + } default: NOT_IMPLEMENTED; } } else { @@ -3021,14 +3029,6 @@ namespace gbe #endif /* GBE_DEBUG */ switch (genIntrinsicID) { - case GEN_OCL_POW: - { - const ir::Register src0 = this->getRegister(*AI); ++AI; - const ir::Register src1 = this->getRegister(*AI); - const ir::Register dst = this->getRegister(&I); - ctx.POW(ir::TYPE_FLOAT, dst, src0, src1); - break; - } case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break; case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break; case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT, getUnsignedType(ctx, (*AI)->getType())); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 5f5451c3..9536a3c4 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -20,7 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim) // Math function DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) -DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow) DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp) DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax) DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin) |