summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-11-18 14:06:45 +0800
committerYang Rong <rong.r.yang@intel.com>2015-11-25 13:22:16 +0800
commite21b1d92dd811e207fad3d5923d585001d7b0a42 (patch)
tree1a7d5598a31b9e83a85adaac31669c7e6da1047b
parent7e89f3b8e0506f0d4184a52165971ba7452f65ae (diff)
Add WorkGroup functions to Gen IR logic in llvm_gen_backend.
Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp79
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx18
2 files changed, 96 insertions, 1 deletions
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 33ab8711..36c59134 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -470,6 +470,7 @@ namespace gbe
/*! legacyMode is for hardware before BDW,
* which do not support stateless memory access */
bool legacyMode;
+ int32_t wgBroadcastSLM;
public:
static char ID;
explicit GenWriter(ir::Unit &unit)
@@ -480,7 +481,8 @@ namespace gbe
LI(0),
TheModule(0),
btiBase(BTI_RESERVED_NUM),
- legacyMode(true)
+ legacyMode(true),
+ wgBroadcastSLM(-1)
{
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
@@ -644,6 +646,8 @@ namespace gbe
void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode, ir::Type = ir::TYPE_FLOAT);
// Emit unary instructions from gen native function
void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
+ // Emit workgroup instructions
+ void emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode);
uint8_t appendSampler(CallSite::arg_iterator AI);
uint8_t getImageID(CallInst &I);
@@ -3602,6 +3606,18 @@ namespace gbe
case GEN_OCL_SIMD_ID:
case GEN_OCL_SIMD_SHUFFLE:
case GEN_OCL_VME:
+ case GEN_OCL_WORK_GROUP_ALL:
+ case GEN_OCL_WORK_GROUP_ANY:
+ case GEN_OCL_WORK_GROUP_BROADCAST:
+ case GEN_OCL_WORK_GROUP_REDUCE_ADD:
+ case GEN_OCL_WORK_GROUP_REDUCE_MAX:
+ case GEN_OCL_WORK_GROUP_REDUCE_MIN:
+ case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_ADD:
+ case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MAX:
+ case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MIN:
+ case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD:
+ case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX:
+ case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -3685,6 +3701,45 @@ namespace gbe
}
}
+ void GenWriter::emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode) {
+ if (wgBroadcastSLM < 0 && opcode == ir::WORKGROUP_OP_BROADCAST) {
+ ir::Function &f = ctx.getFunction();
+ uint32_t mapSize = 8;
+ f.setUseSLM(true);
+ uint32_t oldSlm = f.getSLMSize();
+ f.setSLMSize(oldSlm + mapSize);
+ wgBroadcastSLM = oldSlm;
+ GBE_ASSERT(wgBroadcastSLM >= 0);
+ }
+
+ CallSite::arg_iterator AI = CS.arg_begin();
+ CallSite::arg_iterator AE = CS.arg_end();
+ GBE_ASSERT(AI != AE);
+
+ if (opcode == ir::WORKGROUP_OP_ALL || opcode == ir::WORKGROUP_OP_ANY) {
+ GBE_ASSERT(getType(ctx, (*AI)->getType()) == ir::TYPE_S32);
+ const ir::Register src = this->getRegister(*(AI++));
+ const ir::Tuple srcTuple = ctx.arrayTuple(&src, 1);
+ ctx.WORKGROUP(opcode, (uint32_t)0, getRegister(&I), srcTuple, 1, ir::TYPE_S32);
+ } else if (opcode == ir::WORKGROUP_OP_BROADCAST) {
+ int argNum = CS.arg_size();
+ ir::Register src[argNum];
+ for (int i = 0; i < argNum; i++) {
+ src[i] = this->getRegister(*(AI++));
+ }
+ const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], argNum);
+ ctx.WORKGROUP(ir::WORKGROUP_OP_BROADCAST, (uint32_t)wgBroadcastSLM, getRegister(&I), srcTuple, argNum,
+ getType(ctx, (*CS.arg_begin())->getType()));
+ } else {
+ const ir::Register src = this->getRegister(*(AI++));
+ const ir::Tuple srcTuple = ctx.arrayTuple(&src, 1);
+ ctx.WORKGROUP(opcode, (uint32_t)0, getRegister(&I), srcTuple, 1,
+ getType(ctx, (*CS.arg_begin())->getType()));
+ }
+
+ GBE_ASSERT(AI == AE);
+ }
+
/* append a new sampler. should be called before any reference to
* a sampler_t value. */
uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) {
@@ -4406,6 +4461,28 @@ namespace gbe
ctx.WAIT();
break;
}
+ case GEN_OCL_WORK_GROUP_ALL: this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_ALL); break;
+ case GEN_OCL_WORK_GROUP_ANY: this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_ANY); break;
+ case GEN_OCL_WORK_GROUP_BROADCAST:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_BROADCAST); break;
+ case GEN_OCL_WORK_GROUP_REDUCE_ADD:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_REDUCE_ADD); break;
+ case GEN_OCL_WORK_GROUP_REDUCE_MAX:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_REDUCE_MAX); break;
+ case GEN_OCL_WORK_GROUP_REDUCE_MIN:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_REDUCE_MIN); break;
+ case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_ADD:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_EXCLUSIVE_ADD); break;
+ case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MAX:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_EXCLUSIVE_MAX); break;
+ case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MIN:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_EXCLUSIVE_MIN); break;
+ case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_ADD); break;
+ case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MAX); break;
+ case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN:
+ this->emitWorkGroupInst(I, CS, ir::WORKGROUP_OP_INCLUSIVE_MIN); break;
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index d0e36144..1855e6fe 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -180,3 +180,21 @@ DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling)
// debug wait function
DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait)
+
+// work group function
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_BROADCAST, __gen_ocl_work_group_broadcast)
+
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_REDUCE_ADD, __gen_ocl_work_group_reduce_add)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_REDUCE_MAX, __gen_ocl_work_group_reduce_max)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_REDUCE_MIN, __gen_ocl_work_group_reduce_min)
+
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_EXCLUSIVE_ADD, __gen_ocl_work_group_scan_exclusive_add)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_EXCLUSIVE_MAX, __gen_ocl_work_group_scan_exclusive_max)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_EXCLUSIVE_MIN, __gen_ocl_work_group_scan_exclusive_min)
+
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_ADD, __gen_ocl_work_group_scan_inclusive_add)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MAX, __gen_ocl_work_group_scan_inclusive_max)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_work_group_scan_inclusive_min)
+
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ALL, __gen_ocl_work_group_all)
+DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ANY, __gen_ocl_work_group_any)