summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Xionghu <xionghu.luo@intel.com>2015-01-28 11:49:50 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-01-28 12:44:08 +0800
commit3fe24b30395224b904ba926ed71d9898135ef181 (patch)
tree493d1d634ffcd7313a391feb24a764e9512c580f
parenta25c520a9b340b71086eb82464489f02f0bcecd9 (diff)
reimplement the LZD instruction in backend.
handle the byte/word/dword/qword input accordingly. v2: fix build issue. v3: remove duplicate code and unnessesary code. v4: remove the inefficient qword implementation. Signed-off-by: Luo Xionghu <xionghu.luo@intel.com> Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp37
1 files changed, 35 insertions, 2 deletions
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 86030b9a..b9eaf56b 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3286,10 +3286,43 @@ error:
break;
case Intrinsic::ctlz:
{
- ir::Type srcType = getType(ctx, I.getType());
+ Type *llvmDstType = I.getType();
+ ir::Type dstType = getType(ctx, llvmDstType);
+ Type *llvmSrcType = I.getOperand(0)->getType();
+ ir::Type srcType = getUnsignedType(ctx, llvmSrcType);
+
+ //the llvm.ctlz.i64 is lowered to two llvm.ctlz.i32 call in ocl_clz.ll
+ GBE_ASSERT(srcType != ir::TYPE_U64);
+
const ir::Register dst = this->getRegister(&I);
const ir::Register src = this->getRegister(I.getOperand(0));
- ctx.ALU1(ir::OP_LZD, srcType, dst, src);
+ int imm_value = 0;
+ if(srcType == ir::TYPE_U16) {
+ imm_value = 16;
+ }else if(srcType == ir::TYPE_U8) {
+ imm_value = 24;
+ }
+
+ if(srcType == ir::TYPE_U16 || srcType == ir::TYPE_U8) {
+ ir::ImmediateIndex imm;
+ ir::Type tmpType = ir::TYPE_S32;
+ imm = ctx.newIntegerImmediate(imm_value, tmpType);
+ const ir::RegisterFamily family = getFamily(tmpType);
+ const ir::Register immReg = ctx.reg(family);
+ ctx.LOADI(ir::TYPE_S32, immReg, imm);
+
+ ir::Register tmp0 = ctx.reg(getFamily(tmpType));
+ ir::Register tmp1 = ctx.reg(getFamily(tmpType));
+ ir::Register tmp2 = ctx.reg(getFamily(tmpType));
+ ctx.CVT(tmpType, srcType, tmp0, src);
+ ctx.ALU1(ir::OP_LZD, tmpType, tmp1, tmp0);
+ ctx.SUB(tmpType, tmp2, tmp1, immReg);
+ ctx.CVT(dstType, tmpType, dst, tmp2);
+ }
+ else
+ {
+ ctx.ALU1(ir::OP_LZD, dstType, dst, src);
+ }
}
break;
case Intrinsic::fma: