summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2014-04-18 13:42:16 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-04-22 11:22:07 +0800
commitbe73d25fc4bd3e68b94a37e524f7edf4aca53ce3 (patch)
tree66061a5755266d7e8f83c4ea59d5360fa15cf7b5
parent8f0015e349c1428496a19236c1dd4132ef4554e5 (diff)
support __gen_ocl_simd_any and __gen_ocl_simd_all
short __gen_ocl_simd_any(short x): if x in any of the active threads in the same SIMD is not zero, the return value for all these threads is not zero, otherwise, zero returned. short __gen_ocl_simd_all(short x): only if x in all of the active threads in the same SIMD is not zero, the return value for all these threads is not zero, otherwise, zero returned. for example: to check if a special value exists in a global buffer, use one SIMD to do the searching parallelly, the whole SIMD can stop the task once the value is found. The key kernel code looks like: for(; ; ) { ... if (__gen_ocl_simd_any(...)) break; //the whole SIMD stop the searching } Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen_insn_selection.cpp61
-rw-r--r--backend/src/ir/instruction.hpp4
-rw-r--r--backend/src/ir/instruction.hxx2
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp16
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx4
-rwxr-xr-xbackend/src/ocl_stdlib.tmpl.h8
6 files changed, 95 insertions, 0 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 75a68754..bcbf1152 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1730,6 +1730,67 @@ namespace gbe
case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break;
case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break;
case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break;
+ case ir::OP_SIMD_ANY:
+ {
+ const GenRegister constZero = GenRegister::immuw(0);;
+ const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one);
+ const GenRegister flag01 = GenRegister::flag(0, 1);
+
+ sel.push();
+ int simdWidth = sel.curr.execWidth;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.MOV(flag01, constZero);
+
+ sel.curr.execWidth = simdWidth;
+ sel.curr.noMask = 0;
+
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero);
+
+ if (sel.curr.execWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else if (sel.curr.execWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else
+ NOT_IMPLEMENTED;
+ sel.SEL(dst, regOne, constZero);
+ sel.pop();
+ }
+ break;
+ case ir::OP_SIMD_ALL:
+ {
+ const GenRegister constZero = GenRegister::immuw(0);
+ const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one);
+ const GenRegister flag01 = GenRegister::flag(0, 1);
+
+ sel.push();
+ int simdWidth = sel.curr.execWidth;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.MOV(flag01, regOne);
+
+ sel.curr.execWidth = simdWidth;
+ sel.curr.noMask = 0;
+
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero);
+
+ if (sel.curr.execWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
+ else if (sel.curr.execWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
+ else
+ NOT_IMPLEMENTED;
+ sel.SEL(dst, regOne, constZero);
+ sel.pop();
+ }
+ break;
+
default: NOT_SUPPORTED;
}
return true;
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 457b5b4c..582e22db 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -567,6 +567,10 @@ namespace ir {
Instruction RCP(Type type, Register dst, Register src);
/*! abs.type dst src */
Instruction ABS(Type type, Register dst, Register src);
+ /*! simd_all.type dst src */
+ Instruction SIMD_ALL(Type type, Register dst, Register src);
+ /*! simd_any.type dst src */
+ Instruction SIMD_ANY(Type type, Register dst, Register src);
/*! log.type dst src */
Instruction LOG(Type type, Register dst, Register src);
/*! exp.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index bebceff9..587517be 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -38,6 +38,8 @@ DECL_INSN(RNDD, UnaryInstruction)
DECL_INSN(RNDE, UnaryInstruction)
DECL_INSN(RNDU, UnaryInstruction)
DECL_INSN(RNDZ, UnaryInstruction)
+DECL_INSN(SIMD_ANY, UnaryInstruction)
+DECL_INSN(SIMD_ALL, UnaryInstruction)
DECL_INSN(POW, BinaryInstruction)
DECL_INSN(MUL, BinaryInstruction)
DECL_INSN(ADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index b46e991b..6c2b45d4 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2282,6 +2282,8 @@ namespace gbe
case GEN_OCL_SAT_CONV_F32_TO_U32:
case GEN_OCL_CONV_F16_TO_F32:
case GEN_OCL_CONV_F32_TO_F16:
+ case GEN_OCL_SIMD_ANY:
+ case GEN_OCL_SIMD_ALL:
this->newRegister(&I);
break;
default:
@@ -2422,6 +2424,20 @@ namespace gbe
ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src);
break;
}
+ case GEN_OCL_SIMD_ALL:
+ {
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S16, dst, src);
+ break;
+ }
+ case GEN_OCL_SIMD_ANY:
+ {
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
+ break;
+ }
case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7058a609..42362985 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -175,3 +175,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32)
DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
+
+// SIMD level function for internal usage
+DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
+DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 22e3aecc..cd8b918e 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -638,6 +638,14 @@ INLINE_OVERLOADABLE ulong abs_diff (ulong x, ulong y) {
return y > x ? (y - x) : (x - y);
}
+
+/////////////////////////////////////////////////////////////////////////////
+// SIMD level function
+/////////////////////////////////////////////////////////////////////////////
+short __gen_ocl_simd_any(short);
+short __gen_ocl_simd_all(short);
+
+
/////////////////////////////////////////////////////////////////////////////
// Work Items functions (see 6.11.1 of OCL 1.1 spec)
/////////////////////////////////////////////////////////////////////////////