diff options
author | Guo Yejun <yejun.guo@intel.com> | 2015-05-12 16:27:49 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-05-12 17:20:00 +0800 |
commit | 7179227e123b028cb9e63eb8d9eb97af0fa4be38 (patch) | |
tree | 29004285e787d8722f186ea0be8e27c9eb67a88c /backend | |
parent | 9eb83c751b070268d077c5b71a25fee6ca1676f9 (diff) |
add sub group functions intel_sub_group_shuffle
floatN intel_sub_group_shuffle(floatN x, uint c);
intN intel_sub_group_shuffle(intN x, uint c);
uintN intel_sub_group_shuffle(uintN x, uint c);
the value of x of the c-th channel of the SIMD is returned, for all SIMD channels,
the behavior is undefined if c is larger than simdsize - 1
Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 23 | ||||
-rw-r--r-- | backend/src/backend/gen_context.cpp | 32 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 12 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hxx | 1 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 1 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 2 | ||||
-rw-r--r-- | backend/src/ir/instruction.hxx | 1 | ||||
-rw-r--r-- | backend/src/libocl/script/ocl_simd.def | 3 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 9 |
9 files changed, 81 insertions, 3 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index bf5d9c7f..834a3bef 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -237,6 +237,9 @@ namespace gbe } void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) { + const GenRegister dst = ra->genReg(insn.dst(0)); + const GenRegister src0 = ra->genReg(insn.src(0)); + const GenRegister src1 = ra->genReg(insn.src(1)); switch (insn.opcode) { case SEL_OP_SEL_INT64: case SEL_OP_I64AND: @@ -247,14 +250,28 @@ namespace gbe break; case SEL_OP_UPSAMPLE_LONG: { - const GenRegister dst = ra->genReg(insn.dst(0)); - const GenRegister src0 = ra->genReg(insn.src(0)); - const GenRegister src1 = ra->genReg(insn.src(1)); p->MOV(dst, src0); p->SHL(dst, dst, GenRegister::immud(32)); p->ADD(dst, dst, src1); break; } + case SEL_OP_SIMD_SHUFFLE: + { + uint32_t simd = p->curr.execWidth; + if (src1.file == GEN_IMMEDIATE_VALUE) { + uint32_t offset = src1.value.ud % simd; + GenRegister reg = GenRegister::suboffset(src0, offset); + p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type)); + } else { + uint32_t base = src0.nr * 32 + src0.subnr * 4; + GenRegister baseReg = GenRegister::immuw(base); + const GenRegister a0 = GenRegister::addr8(0); + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + } + break; + } default: GenContext::emitBinaryInstruction(insn); } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 62fd5962..08a67fc3 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -597,6 +597,38 @@ namespace gbe p->MOV(xdst.bottom_half(), xsrc1.bottom_half()); } break; + case SEL_OP_SIMD_SHUFFLE: + { + uint32_t simd = p->curr.execWidth; + if (src1.file == GEN_IMMEDIATE_VALUE) { + uint32_t offset = src1.value.ud % simd; + GenRegister reg = GenRegister::suboffset(src0, offset); + p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type)); + } else { + uint32_t base = src0.nr * 32 + src0.subnr * 4; + GenRegister baseReg = GenRegister::immuw(base); + const GenRegister a0 = GenRegister::addr8(0); + + p->push(); + if (simd == 8) { + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + } else if (simd == 16) { + p->curr.execWidth = 8; + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + + p->curr.quarterControl = 1; + p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + p->MOV(GenRegister::offset(dst, 1, 0), indirect); + } else + NOT_IMPLEMENTED; + p->pop(); + } + } + break; default: NOT_IMPLEMENTED; } } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 39ccf7d0..dc4cdf4e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -523,6 +523,7 @@ namespace gbe ALU1(RNDD) ALU1(RNDU) ALU2(MACH) + ALU2(SIMD_SHUFFLE) ALU1(LZD) ALU3(MAD) ALU2WithTemp(MUL_HI) @@ -2662,6 +2663,17 @@ namespace gbe case OP_UPSAMPLE_LONG: sel.UPSAMPLE_LONG(dst, src0, src1); break; + case OP_SIMD_SHUFFLE: + { + if (src1.file == GEN_IMMEDIATE_VALUE) + sel.SIMD_SHUFFLE(dst, src0, src1); + else { + GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD)); + sel.SHL(shiftL, src1, GenRegister::immud(0x2)); + sel.SIMD_SHUFFLE(dst, src0, shiftL); + } + } + break; default: NOT_IMPLEMENTED; } sel.pop(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 09f5aaf8..79f2ce1c 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -26,6 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction) DECL_SELECTION_IR(RSR, BinaryInstruction) DECL_SELECTION_IR(RSL, BinaryInstruction) DECL_SELECTION_IR(ASR, BinaryInstruction) +DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction) DECL_SELECTION_IR(I64SHR, I64ShiftInstruction) DECL_SELECTION_IR(I64SHL, I64ShiftInstruction) DECL_SELECTION_IR(I64ASR, I64ShiftInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 7723b90c..c38c4275 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1677,6 +1677,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex DECL_EMIT_FUNCTION(RHADD) DECL_EMIT_FUNCTION(I64HADD) DECL_EMIT_FUNCTION(I64RHADD) + DECL_EMIT_FUNCTION(SIMD_SHUFFLE) #undef DECL_EMIT_FUNCTION diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 436bfd28..e1bd05be 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -686,6 +686,8 @@ namespace ir { Instruction GT(Type type, Register dst, Register src0, Register src1); /*! ord.type dst src0 src1 */ Instruction ORD(Type type, Register dst, Register src0, Register src1); + /*! sub_group_shuffle.type dst src0 src1 */ + Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register src1); /*! BITCAST.{dstType <- srcType} dst src */ Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src, uint8_t dstNum, uint8_t srcNum); /*! cvt.{dstType <- srcType} dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 3f08a923..76269bdd 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -59,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR, BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND, BinaryInstruction) +DECL_INSN(SIMD_SHUFFLE, BinaryInstruction) DECL_INSN(SEL, SelectInstruction) DECL_INSN(EQ, CompareInstruction) DECL_INSN(NE, CompareInstruction) diff --git a/backend/src/libocl/script/ocl_simd.def b/backend/src/libocl/script/ocl_simd.def index 80115462..e26243ea 100644 --- a/backend/src/libocl/script/ocl_simd.def +++ b/backend/src/libocl/script/ocl_simd.def @@ -1 +1,4 @@ ##simd level functions +floatn intel_sub_group_shuffle(floatn x, uint c) +intn intel_sub_group_shuffle(intn x, uint c) +uintn intel_sub_group_shuffle(uintn x, uint c) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index f46bc793..f5743ba3 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2807,6 +2807,7 @@ namespace gbe case GEN_OCL_READ_TM: case GEN_OCL_REGION: case GEN_OCL_SIMD_ID: + case GEN_OCL_SIMD_SHUFFLE: this->newRegister(&I); break; case GEN_OCL_PRINTF: @@ -3468,6 +3469,14 @@ namespace gbe ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst); break; } + case GEN_OCL_SIMD_SHUFFLE: + { + const ir::Register src0 = this->getRegister(*AI); ++AI; + const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1); + break; + } default: break; } } |