summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorLuo Xionghu <xionghu.luo@intel.com>2014-11-04 06:42:35 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-11-04 19:04:54 +0800
commit79a0abf7e78d79ec339ea04265b2821f7303dfcb (patch)
treeeb884ccaa7ca02620a6c2f8e9a08f899a9d6d8af /backend
parent66a9a2480a2141a318e0076d69300839f3ef6864 (diff)
enable llvm intrinsic call bswap function.
this intrinsic call is implemented at the GEN IR level currently, should be optimazed later. Signed-off-by: Luo Xionghu <xionghu.luo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'backend')
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp89
1 files changed, 89 insertions, 0 deletions
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index feb881d8..6ccedbdc 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2827,6 +2827,95 @@ namespace gbe
NOT_IMPLEMENTED;
break;
case Intrinsic::bswap:
+ {
+ // FIXME, this is an unoptimized version, could be optimized by
+ // leveraging GEN's register region/indirect address feature.
+ Type *llvmDstType = I.getType();
+ uint32_t elementSize = getTypeByteSize(unit, llvmDstType);
+
+ const ir::Register dst0 = this->getRegister(&I);
+ const ir::Register src0 = this->getRegister(I.getOperand(0));
+ switch(elementSize)
+ {
+ case 2:
+ {
+ ir::Type srcType = getUnsignedType(ctx, llvmDstType);
+ ir::Register tmp1 = ctx.reg(getFamily(srcType));
+ ir::Register tmp2 = ctx.reg(getFamily(srcType));
+
+ ir::Register regWMask = ctx.reg( ir::FAMILY_WORD );
+ const ir::ImmediateIndex wMask = ctx.newIntegerImmediate(0x00FF, ir::TYPE_S16);
+ ir::Register regShift = ctx.reg( ir::FAMILY_WORD );
+ const ir::ImmediateIndex shift = ctx.newIntegerImmediate(8, ir::TYPE_S16);
+
+ ctx.LOADI(ir::TYPE_S16, regWMask, wMask);
+ ctx.AND(srcType, tmp1, src0, regWMask);
+
+ ctx.LOADI(ir::TYPE_S16, regShift, shift);
+ ctx.SHL(srcType, tmp2, tmp1, regShift);
+
+ ir::Register tmp3 = ctx.reg( getFamily(srcType) );
+ ctx.SHR(srcType, tmp3, src0, regShift);
+
+ ctx.OR(srcType, dst0, tmp2, tmp3);
+ }
+ break;
+ case 4:
+ {
+ ir::Type srcType = getUnsignedType(ctx, llvmDstType);
+ ir::Register tmp1 = ctx.reg(getFamily(srcType));
+ ir::Register tmp2 = ctx.reg(getFamily(srcType));
+ ir::Register tmp3 = ctx.reg(getFamily(srcType));
+ ir::Register tmp4 = ctx.reg(getFamily(srcType));
+ ir::Register tmp5 = ctx.reg(getFamily(srcType));
+ ir::Register tmp6 = ctx.reg(getFamily(srcType));
+ ir::Register tmp7 = ctx.reg(getFamily(srcType));
+ ir::Register tmp8 = ctx.reg(getFamily(srcType));
+
+ ir::Register regDWMask = ctx.reg( ir::FAMILY_DWORD );
+ ir::Register regShift = ctx.reg( ir::FAMILY_DWORD );
+ ir::ImmediateIndex wMask = ctx.newIntegerImmediate(0x000000FF, ir::TYPE_S32);
+ ir::ImmediateIndex shift = ctx.newIntegerImmediate(24, ir::TYPE_S32);
+ ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
+ ctx.AND(srcType, tmp1, src0, regDWMask);
+ ctx.LOADI(ir::TYPE_S32, regShift, shift);
+ ctx.SHL(srcType, tmp2, tmp1, regShift);
+
+ wMask = ctx.newIntegerImmediate(0x0000FF00, ir::TYPE_S32);
+ shift = ctx.newIntegerImmediate(8, ir::TYPE_S32);
+ ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
+ ctx.AND(srcType, tmp3, src0, regDWMask);
+ ctx.LOADI(ir::TYPE_S32, regShift, shift);
+ ctx.SHL(srcType, tmp4, tmp3, regShift);
+
+ wMask = ctx.newIntegerImmediate(0x00FF0000, ir::TYPE_S32);
+ shift = ctx.newIntegerImmediate(8, ir::TYPE_S32);
+ ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
+ ctx.AND(srcType, tmp5, src0, regDWMask);
+ ctx.LOADI(ir::TYPE_S32, regShift, shift);
+ ctx.SHR(srcType, tmp6, tmp5, regShift);
+
+ wMask = ctx.newIntegerImmediate(0xFF000000, ir::TYPE_S32);
+ shift = ctx.newIntegerImmediate(24, ir::TYPE_S32);
+ ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
+ ctx.AND(srcType, tmp7, src0, regDWMask);
+ ctx.LOADI(ir::TYPE_S32, regShift, shift);
+ ctx.SHR(srcType, tmp8, tmp7, regShift);
+
+ ir::Register tmp9 = ctx.reg(getFamily(srcType));
+ ir::Register tmp10 = ctx.reg(getFamily(srcType));
+ ctx.OR(srcType, tmp9, tmp2, tmp4);
+ ctx.OR(srcType, tmp10, tmp6, tmp8);
+ ctx.OR(srcType, dst0, tmp9, tmp10);
+ }
+ break;
+ case 8:
+ NOT_IMPLEMENTED;
+ break;
+ default:
+ GBE_ASSERT(0);
+ }
+ }
break;
default: NOT_IMPLEMENTED;
}