summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/backend/context.cpp6
-rw-r--r--backend/src/backend/gen_program.cpp28
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp24
3 files changed, 45 insertions, 13 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index e9ddd172..c9500c87 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -340,7 +340,6 @@ namespace gbe
///////////////////////////////////////////////////////////////////////////
// Generic Context (shared by the simulator and the HW context)
///////////////////////////////////////////////////////////////////////////
- IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
Context::Context(const ir::Unit &unit, const std::string &name) :
unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL), useDWLabel(false)
@@ -361,10 +360,7 @@ namespace gbe
}
void Context::startNewCG(uint32_t simdWidth) {
- if (simdWidth == 0 || OCL_SIMD_WIDTH != 15)
- this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
- else
- this->simdWidth = simdWidth;
+ this->simdWidth = simdWidth;
GBE_SAFE_DELETE(this->registerAllocator);
GBE_SAFE_DELETE(this->scratchAllocator);
GBE_ASSERT(dag != NULL && liveness != NULL);
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 383f2f27..cfb23fed 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -59,6 +59,7 @@
#include <clang/CodeGen/CodeGenAction.h>
#endif
+#include "sys/cvar.hpp"
#include <cstring>
#include <sstream>
#include <memory>
@@ -138,17 +139,24 @@ namespace gbe {
}
/*! We must avoid spilling at all cost with Gen */
- static const struct CodeGenStrategy {
+ struct CodeGenStrategy {
uint32_t simdWidth;
uint32_t reservedSpillRegs;
bool limitRegisterPressure;
- } codeGenStrategy[] = {
+ };
+ static const struct CodeGenStrategy codeGenStrategyDefault[] = {
{16, 0, false},
{8, 0, false},
{8, 8, false},
{8, 16, false},
};
+ static const struct CodeGenStrategy codeGenStrategySimd16[] = {
+ {16, 0, false},
+ {16, 8, false},
+ {16, 16, false},
+ };
+ IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name,
bool relaxMath, int profiling) {
#ifdef GBE_COMPILER_AVAILABLE
@@ -156,19 +164,23 @@ namespace gbe {
// when the function already provides the simd width we need to use (i.e.
// non zero)
const ir::Function *fn = unit.getFunction(name);
+ const struct CodeGenStrategy* codeGenStrategy = codeGenStrategyDefault;
if(fn == NULL)
GBE_ASSERT(0);
- uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]);
+ uint32_t codeGenNum = sizeof(codeGenStrategyDefault) / sizeof(codeGenStrategyDefault[0]);
uint32_t codeGen = 0;
GenContext *ctx = NULL;
- if (fn->getSimdWidth() == 8) {
+ if ( fn->getSimdWidth() != 0 && OCL_SIMD_WIDTH != 15) {
+ GBE_ASSERTM(0, "unsupported SIMD width!");
+ }else if (fn->getSimdWidth() == 8 || OCL_SIMD_WIDTH == 8) {
codeGen = 1;
- } else if (fn->getSimdWidth() == 16) {
- codeGenNum = 1;
- } else if (fn->getSimdWidth() == 0) {
+ } else if (fn->getSimdWidth() == 16 || OCL_SIMD_WIDTH == 16){
+ codeGenStrategy = codeGenStrategySimd16;
+ codeGenNum = sizeof(codeGenStrategySimd16) / sizeof(codeGenStrategySimd16[0]);
+ } else if (fn->getSimdWidth() == 0 && OCL_SIMD_WIDTH == 15) {
codeGen = 0;
} else
- GBE_ASSERT(0);
+ GBE_ASSERTM(0, "unsupported SIMD width!");
Kernel *kernel = NULL;
// Stop when compilation is successful
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 31b8bf27..96c81b92 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2124,6 +2124,7 @@ namespace gbe
// Loop over the kernel metadatas to set the required work group size.
size_t reqd_wg_sz[3] = {0, 0, 0};
size_t hint_wg_sz[3] = {0, 0, 0};
+ size_t reqd_sg_sz = 0;
ir::FunctionArgument::InfoFromLLVM llvmInfo;
MDNode *addrSpaceNode = NULL;
MDNode *typeNameNode = NULL;
@@ -2219,6 +2220,27 @@ namespace gbe
functionAttributes += buffer;
functionAttributes += " ";
}
+ if ((attrNode = F.getMetadata("intel_reqd_sub_group_size"))) {
+ GBE_ASSERT(attrNode->getNumOperands() == 1);
+ ConstantInt *sz = mdconst::extract<ConstantInt>(attrNode->getOperand(0));
+ GBE_ASSERT(sz);
+ reqd_sg_sz = sz->getZExtValue();
+ if(!(reqd_sg_sz == 8 || reqd_sg_sz == 16)){
+ F.getContext().emitError("Required sub group size is illegal!");
+ ctx.getUnit().setValid(false);
+ return;
+ }
+ functionAttributes += "intel_reqd_sub_group_size";
+ std::stringstream param;
+ char buffer[100] = {0};
+ param << "(";
+ param << reqd_sg_sz;
+ param << ")";
+ param >> buffer;
+ functionAttributes += buffer;
+ functionAttributes += " ";
+ }
+
#else
/* First find the meta data belong to this function. */
MDNode *node = getKernelFunctionMetadata(&F);
@@ -2344,6 +2366,8 @@ namespace gbe
#endif /* LLVM 3.9 Function metadata */
ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]);
+ if (reqd_sg_sz)
+ ctx.setSimdWidth(reqd_sg_sz);
ctx.getFunction().setFunctionAttributes(functionAttributes);
// Loop over the arguments and output registers for them