diff options
author | Guo Yejun <yejun.guo@intel.com> | 2014-04-18 13:42:16 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-04-22 11:22:07 +0800 |
commit | be73d25fc4bd3e68b94a37e524f7edf4aca53ce3 (patch) | |
tree | 66061a5755266d7e8f83c4ea59d5360fa15cf7b5 | |
parent | 8f0015e349c1428496a19236c1dd4132ef4554e5 (diff) |
support __gen_ocl_simd_any and __gen_ocl_simd_all
short __gen_ocl_simd_any(short x):
if x in any of the active threads in the same SIMD is not zero,
the return value for all these threads is not zero, otherwise, zero returned.
short __gen_ocl_simd_all(short x):
only if x in all of the active threads in the same SIMD is not zero,
the return value for all these threads is not zero, otherwise, zero returned.
for example:
to check if a special value exists in a global buffer, use one SIMD
to do the searching parallelly, the whole SIMD can stop the task
once the value is found. The key kernel code looks like:
for(; ; ) {
...
if (__gen_ocl_simd_any(...))
break; //the whole SIMD stop the searching
}
Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 61 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 4 | ||||
-rw-r--r-- | backend/src/ir/instruction.hxx | 2 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 16 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 4 | ||||
-rwxr-xr-x | backend/src/ocl_stdlib.tmpl.h | 8 |
6 files changed, 95 insertions, 0 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 75a68754..bcbf1152 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1730,6 +1730,67 @@ namespace gbe case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; + case ir::OP_SIMD_ANY: + { + const GenRegister constZero = GenRegister::immuw(0);; + const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one); + const GenRegister flag01 = GenRegister::flag(0, 1); + + sel.push(); + int simdWidth = sel.curr.execWidth; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.execWidth = 1; + sel.curr.noMask = 1; + sel.MOV(flag01, constZero); + + sel.curr.execWidth = simdWidth; + sel.curr.noMask = 0; + + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero); + + if (sel.curr.execWidth == 16) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; + else if (sel.curr.execWidth == 8) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; + else + NOT_IMPLEMENTED; + sel.SEL(dst, regOne, constZero); + sel.pop(); + } + break; + case ir::OP_SIMD_ALL: + { + const GenRegister constZero = GenRegister::immuw(0); + const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one); + const GenRegister flag01 = GenRegister::flag(0, 1); + + sel.push(); + int simdWidth = sel.curr.execWidth; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.execWidth = 1; + sel.curr.noMask = 1; + sel.MOV(flag01, regOne); + + sel.curr.execWidth = simdWidth; + sel.curr.noMask = 0; + + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero); + + if (sel.curr.execWidth == 16) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H; + else if (sel.curr.execWidth == 8) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; + else + NOT_IMPLEMENTED; + sel.SEL(dst, regOne, constZero); + sel.pop(); + } + break; + default: NOT_SUPPORTED; } return true; diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 457b5b4c..582e22db 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -567,6 +567,10 @@ namespace ir { Instruction RCP(Type type, Register dst, Register src); /*! abs.type dst src */ Instruction ABS(Type type, Register dst, Register src); + /*! simd_all.type dst src */ + Instruction SIMD_ALL(Type type, Register dst, Register src); + /*! simd_any.type dst src */ + Instruction SIMD_ANY(Type type, Register dst, Register src); /*! log.type dst src */ Instruction LOG(Type type, Register dst, Register src); /*! exp.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index bebceff9..587517be 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -38,6 +38,8 @@ DECL_INSN(RNDD, UnaryInstruction) DECL_INSN(RNDE, UnaryInstruction) DECL_INSN(RNDU, UnaryInstruction) DECL_INSN(RNDZ, UnaryInstruction) +DECL_INSN(SIMD_ANY, UnaryInstruction) +DECL_INSN(SIMD_ALL, UnaryInstruction) DECL_INSN(POW, BinaryInstruction) DECL_INSN(MUL, BinaryInstruction) DECL_INSN(ADD, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index b46e991b..6c2b45d4 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2282,6 +2282,8 @@ namespace gbe case GEN_OCL_SAT_CONV_F32_TO_U32: case GEN_OCL_CONV_F16_TO_F32: case GEN_OCL_CONV_F32_TO_F16: + case GEN_OCL_SIMD_ANY: + case GEN_OCL_SIMD_ALL: this->newRegister(&I); break; default: @@ -2422,6 +2424,20 @@ namespace gbe ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src); break; } + case GEN_OCL_SIMD_ALL: + { + const ir::Register src = this->getRegister(*AI); + const ir::Register dst = this->getRegister(&I); + ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S16, dst, src); + break; + } + case GEN_OCL_SIMD_ANY: + { + const ir::Register src = this->getRegister(*AI); + const ir::Register dst = this->getRegister(&I); + ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src); + break; + } case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 7058a609..42362985 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -175,3 +175,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) + +// SIMD level function for internal usage +DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) +DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 22e3aecc..cd8b918e 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -638,6 +638,14 @@ INLINE_OVERLOADABLE ulong abs_diff (ulong x, ulong y) { return y > x ? (y - x) : (x - y); } + +///////////////////////////////////////////////////////////////////////////// +// SIMD level function +///////////////////////////////////////////////////////////////////////////// +short __gen_ocl_simd_any(short); +short __gen_ocl_simd_all(short); + + ///////////////////////////////////////////////////////////////////////////// // Work Items functions (see 6.11.1 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// |