diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-11-18 14:06:45 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-11-25 13:22:16 +0800 |
commit | e21b1d92dd811e207fad3d5923d585001d7b0a42 (patch) | |
tree | 1a7d5598a31b9e83a85adaac31669c7e6da1047b | |
parent | 7e89f3b8e0506f0d4184a52165971ba7452f65ae (diff) |
Add WorkGroup functions to Gen IR logic in llvm_gen_backend.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 79 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 18 |
2 files changed, 96 insertions, 1 deletions
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 33ab8711..36c59134 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -470,6 +470,7 @@ namespace gbe /*! legacyMode is for hardware before BDW, * which do not support stateless memory access */ bool legacyMode; + int32_t wgBroadcastSLM; public: static char ID; explicit GenWriter(ir::Unit &unit) @@ -480,7 +481,8 @@ namespace gbe LI(0), TheModule(0), btiBase(BTI_RESERVED_NUM), - legacyMode(true) + legacyMode(true), + wgBroadcastSLM(-1) { #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); @@ -644,6 +646,8 @@ namespace gbe void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode, ir::Type = ir::TYPE_FLOAT); // Emit unary instructions from gen native function void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode); + // Emit workgroup instructions + void emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode); uint8_t appendSampler(CallSite::arg_iterator AI); uint8_t getImageID(CallInst &I); @@ -3602,6 +3606,18 @@ namespace gbe case GEN_OCL_SIMD_ID: case GEN_OCL_SIMD_SHUFFLE: case GEN_OCL_VME: + case GEN_OCL_WORK_GROUP_ALL: + case GEN_OCL_WORK_GROUP_ANY: + case GEN_OCL_WORK_GROUP_BROADCAST: + case GEN_OCL_WORK_GROUP_REDUCE_ADD: + case GEN_OCL_WORK_GROUP_REDUCE_MAX: + case GEN_OCL_WORK_GROUP_REDUCE_MIN: + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_ADD: + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MAX: + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MIN: + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD: + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX: + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN: this->newRegister(&I); break; case GEN_OCL_PRINTF: @@ -3685,6 +3701,45 @@ namespace gbe } } + void GenWriter::emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode) { + if (wgBroadcastSLM < 0 && opcode == ir::WORKGROUP_OP_BROADCAST) { + ir::Function &f = ctx.getFunction(); + uint32_t mapSize = 8; + f.setUseSLM(true); + uint32_t oldSlm = f.getSLMSize(); + f.setSLMSize(oldSlm + mapSize); + wgBroadcastSLM = oldSlm; + GBE_ASSERT(wgBroadcastSLM >= 0); + } + + CallSite::arg_iterator AI = CS.arg_begin(); + CallSite::arg_iterator AE = CS.arg_end(); + GBE_ASSERT(AI != AE); + + if (opcode == ir::WORKGROUP_OP_ALL || opcode == ir::WORKGROUP_OP_ANY) { + GBE_ASSERT(getType(ctx, (*AI)->getType()) == ir::TYPE_S32); + const ir::Register src = this->getRegister(*(AI++)); + const ir::Tuple srcTuple = ctx.arrayTuple(&src, 1); + ctx.WORKGROUP(opcode, (uint32_t)0, getRegister(&I), srcTuple, 1, ir::TYPE_S32); + } else if (opcode == ir::WORKGROUP_OP_BROADCAST) { + int argNum = CS.arg_size(); + ir::Register src[argNum]; + for (int i = 0; i < argNum; i++) { + src[i] = this->getRegister(*(AI++)); + } + const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], argNum); + ctx.WORKGROUP(ir::WORKGROUP_OP_BROADCAST, (uint32_t)wgBroadcastSLM, getRegister(&I), srcTuple, argNum, + getType(ctx, (*CS.arg_begin())->getType())); + } else { + const ir::Register src = this->getRegister(*(AI++)); + const ir::Tuple srcTuple = ctx.arrayTuple(&src, 1); + ctx.WORKGROUP(opcode, (uint32_t)0, getRegister(&I), srcTuple, 1, + getType(ctx, (*CS.arg_begin())->getType())); + } + + GBE_ASSERT(AI == AE); + } + /* append a new sampler. should be called before any reference to * a sampler_t value. */ uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) { @@ -4406,6 +4461,28 @@ namespace gbe ctx.WAIT(); break; } + case GEN_OCL_WORK_GROUP_ALL: this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_ALL); break; + case GEN_OCL_WORK_GROUP_ANY: this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_ANY); break; + case GEN_OCL_WORK_GROUP_BROADCAST: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_BROADCAST); break; + case GEN_OCL_WORK_GROUP_REDUCE_ADD: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_REDUCE_ADD); break; + case GEN_OCL_WORK_GROUP_REDUCE_MAX: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_REDUCE_MAX); break; + case GEN_OCL_WORK_GROUP_REDUCE_MIN: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_REDUCE_MIN); break; + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_ADD: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_EXCLUSIVE_ADD); break; + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MAX: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_EXCLUSIVE_MAX); break; + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MIN: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_EXCLUSIVE_MIN); break; + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_ADD); break; + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MAX); break; + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN: + this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MIN); break; default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index d0e36144..1855e6fe 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -180,3 +180,21 @@ DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling) // debug wait function DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait) + +// work group function +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_BROADCAST, __gen_ocl_work_group_broadcast) + +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_REDUCE_ADD, __gen_ocl_work_group_reduce_add) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_REDUCE_MAX, __gen_ocl_work_group_reduce_max) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_REDUCE_MIN, __gen_ocl_work_group_reduce_min) + +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_EXCLUSIVE_ADD, __gen_ocl_work_group_scan_exclusive_add) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_EXCLUSIVE_MAX, __gen_ocl_work_group_scan_exclusive_max) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_EXCLUSIVE_MIN, __gen_ocl_work_group_scan_exclusive_min) + +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_ADD, __gen_ocl_work_group_scan_inclusive_add) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MAX, __gen_ocl_work_group_scan_inclusive_max) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_work_group_scan_inclusive_min) + +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ALL, __gen_ocl_work_group_all) +DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ANY, __gen_ocl_work_group_any) |