summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2015-05-12 16:27:49 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-05-12 17:20:00 +0800
commit7179227e123b028cb9e63eb8d9eb97af0fa4be38 (patch)
tree29004285e787d8722f186ea0be8e27c9eb67a88c
parent9eb83c751b070268d077c5b71a25fee6ca1676f9 (diff)
add sub group functions intel_sub_group_shuffle
floatN intel_sub_group_shuffle(floatN x, uint c); intN intel_sub_group_shuffle(intN x, uint c); uintN intel_sub_group_shuffle(uintN x, uint c); the value of x of the c-th channel of the SIMD is returned, for all SIMD channels, the behavior is undefined if c is larger than simdsize - 1 Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp23
-rw-r--r--backend/src/backend/gen_context.cpp32
-rw-r--r--backend/src/backend/gen_insn_selection.cpp12
-rw-r--r--backend/src/backend/gen_insn_selection.hxx1
-rw-r--r--backend/src/ir/instruction.cpp1
-rw-r--r--backend/src/ir/instruction.hpp2
-rw-r--r--backend/src/ir/instruction.hxx1
-rw-r--r--backend/src/libocl/script/ocl_simd.def3
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp9
9 files changed, 81 insertions, 3 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index bf5d9c7f..834a3bef 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -237,6 +237,9 @@ namespace gbe
}
void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
switch (insn.opcode) {
case SEL_OP_SEL_INT64:
case SEL_OP_I64AND:
@@ -247,14 +250,28 @@ namespace gbe
break;
case SEL_OP_UPSAMPLE_LONG:
{
- const GenRegister dst = ra->genReg(insn.dst(0));
- const GenRegister src0 = ra->genReg(insn.src(0));
- const GenRegister src1 = ra->genReg(insn.src(1));
p->MOV(dst, src0);
p->SHL(dst, dst, GenRegister::immud(32));
p->ADD(dst, dst, src1);
break;
}
+ case SEL_OP_SIMD_SHUFFLE:
+ {
+ uint32_t simd = p->curr.execWidth;
+ if (src1.file == GEN_IMMEDIATE_VALUE) {
+ uint32_t offset = src1.value.ud % simd;
+ GenRegister reg = GenRegister::suboffset(src0, offset);
+ p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+ } else {
+ uint32_t base = src0.nr * 32 + src0.subnr * 4;
+ GenRegister baseReg = GenRegister::immuw(base);
+ const GenRegister a0 = GenRegister::addr8(0);
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+ }
+ break;
+ }
default:
GenContext::emitBinaryInstruction(insn);
}
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 62fd5962..08a67fc3 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -597,6 +597,38 @@ namespace gbe
p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
}
break;
+ case SEL_OP_SIMD_SHUFFLE:
+ {
+ uint32_t simd = p->curr.execWidth;
+ if (src1.file == GEN_IMMEDIATE_VALUE) {
+ uint32_t offset = src1.value.ud % simd;
+ GenRegister reg = GenRegister::suboffset(src0, offset);
+ p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+ } else {
+ uint32_t base = src0.nr * 32 + src0.subnr * 4;
+ GenRegister baseReg = GenRegister::immuw(base);
+ const GenRegister a0 = GenRegister::addr8(0);
+
+ p->push();
+ if (simd == 8) {
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+ } else if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+
+ p->curr.quarterControl = 1;
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+ } else
+ NOT_IMPLEMENTED;
+ p->pop();
+ }
+ }
+ break;
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 39ccf7d0..dc4cdf4e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -523,6 +523,7 @@ namespace gbe
ALU1(RNDD)
ALU1(RNDU)
ALU2(MACH)
+ ALU2(SIMD_SHUFFLE)
ALU1(LZD)
ALU3(MAD)
ALU2WithTemp(MUL_HI)
@@ -2662,6 +2663,17 @@ namespace gbe
case OP_UPSAMPLE_LONG:
sel.UPSAMPLE_LONG(dst, src0, src1);
break;
+ case OP_SIMD_SHUFFLE:
+ {
+ if (src1.file == GEN_IMMEDIATE_VALUE)
+ sel.SIMD_SHUFFLE(dst, src0, src1);
+ else {
+ GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD));
+ sel.SHL(shiftL, src1, GenRegister::immud(0x2));
+ sel.SIMD_SHUFFLE(dst, src0, shiftL);
+ }
+ }
+ break;
default: NOT_IMPLEMENTED;
}
sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 09f5aaf8..79f2ce1c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -26,6 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)
DECL_SELECTION_IR(RSR, BinaryInstruction)
DECL_SELECTION_IR(RSL, BinaryInstruction)
DECL_SELECTION_IR(ASR, BinaryInstruction)
+DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)
DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 7723b90c..c38c4275 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1677,6 +1677,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
DECL_EMIT_FUNCTION(RHADD)
DECL_EMIT_FUNCTION(I64HADD)
DECL_EMIT_FUNCTION(I64RHADD)
+ DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
#undef DECL_EMIT_FUNCTION
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 436bfd28..e1bd05be 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -686,6 +686,8 @@ namespace ir {
Instruction GT(Type type, Register dst, Register src0, Register src1);
/*! ord.type dst src0 src1 */
Instruction ORD(Type type, Register dst, Register src0, Register src1);
+ /*! sub_group_shuffle.type dst src0 src1 */
+ Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register src1);
/*! BITCAST.{dstType <- srcType} dst src */
Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src, uint8_t dstNum, uint8_t srcNum);
/*! cvt.{dstType <- srcType} dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 3f08a923..76269bdd 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -59,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction)
DECL_INSN(OR, BinaryInstruction)
DECL_INSN(XOR, BinaryInstruction)
DECL_INSN(AND, BinaryInstruction)
+DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
DECL_INSN(SEL, SelectInstruction)
DECL_INSN(EQ, CompareInstruction)
DECL_INSN(NE, CompareInstruction)
diff --git a/backend/src/libocl/script/ocl_simd.def b/backend/src/libocl/script/ocl_simd.def
index 80115462..e26243ea 100644
--- a/backend/src/libocl/script/ocl_simd.def
+++ b/backend/src/libocl/script/ocl_simd.def
@@ -1 +1,4 @@
##simd level functions
+floatn intel_sub_group_shuffle(floatn x, uint c)
+intn intel_sub_group_shuffle(intn x, uint c)
+uintn intel_sub_group_shuffle(uintn x, uint c)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index f46bc793..f5743ba3 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2807,6 +2807,7 @@ namespace gbe
case GEN_OCL_READ_TM:
case GEN_OCL_REGION:
case GEN_OCL_SIMD_ID:
+ case GEN_OCL_SIMD_SHUFFLE:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -3468,6 +3469,14 @@ namespace gbe
ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
break;
}
+ case GEN_OCL_SIMD_SHUFFLE:
+ {
+ const ir::Register src0 = this->getRegister(*AI); ++AI;
+ const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
default: break;
}
}