3 files changed, 45 insertions, 13 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index e9ddd172..c9500c87 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -340,7 +340,6 @@ namespace gbe
   ///////////////////////////////////////////////////////////////////////////
   // Generic Context (shared by the simulator and the HW context)
   ///////////////////////////////////////////////////////////////////////////
-  IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
 
   Context::Context(const ir::Unit &unit, const std::string &name) :
     unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL), useDWLabel(false)
@@ -361,10 +360,7 @@ namespace gbe
   }
 
   void Context::startNewCG(uint32_t simdWidth) {
-    if (simdWidth == 0 || OCL_SIMD_WIDTH != 15)
-      this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
-    else
-      this->simdWidth = simdWidth;
+    this->simdWidth = simdWidth;
     GBE_SAFE_DELETE(this->registerAllocator);
     GBE_SAFE_DELETE(this->scratchAllocator);
     GBE_ASSERT(dag != NULL && liveness != NULL);
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 383f2f27..cfb23fed 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -59,6 +59,7 @@
 #include <clang/CodeGen/CodeGenAction.h>
 #endif
 
+#include "sys/cvar.hpp"
 #include <cstring>
 #include <sstream>
 #include <memory>
@@ -138,17 +139,24 @@ namespace gbe {
   }
 
   /*! We must avoid spilling at all cost with Gen */
-  static const struct CodeGenStrategy {
+  struct CodeGenStrategy {
     uint32_t simdWidth;
     uint32_t reservedSpillRegs;
     bool limitRegisterPressure;
-  } codeGenStrategy[] = {
+  };
+  static const struct CodeGenStrategy codeGenStrategyDefault[] = {
     {16, 0, false},
     {8, 0, false},
     {8, 8, false},
     {8, 16, false},
   };
+  static const struct CodeGenStrategy codeGenStrategySimd16[] = {
+    {16, 0, false},
+    {16, 8, false},
+    {16, 16, false},
+  };
 
+  IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
   Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name,
                                     bool relaxMath, int profiling) {
 #ifdef GBE_COMPILER_AVAILABLE
@@ -156,19 +164,23 @@ namespace gbe {
     // when the function already provides the simd width we need to use (i.e.
     // non zero)
     const ir::Function *fn = unit.getFunction(name);
+    const struct CodeGenStrategy* codeGenStrategy = codeGenStrategyDefault;
     if(fn == NULL)
       GBE_ASSERT(0);
-    uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]);
+    uint32_t codeGenNum = sizeof(codeGenStrategyDefault) / sizeof(codeGenStrategyDefault[0]);
     uint32_t codeGen = 0;
     GenContext *ctx = NULL;
-    if (fn->getSimdWidth() == 8) {
+    if ( fn->getSimdWidth() != 0 && OCL_SIMD_WIDTH != 15) {
+      GBE_ASSERTM(0, "unsupported SIMD width!");
+    }else if (fn->getSimdWidth() == 8 || OCL_SIMD_WIDTH == 8) {
       codeGen = 1;
-    } else if (fn->getSimdWidth() == 16) {
-      codeGenNum = 1;
-    } else if (fn->getSimdWidth() == 0) {
+    } else if (fn->getSimdWidth() == 16 || OCL_SIMD_WIDTH == 16){
+      codeGenStrategy = codeGenStrategySimd16;
+      codeGenNum = sizeof(codeGenStrategySimd16) / sizeof(codeGenStrategySimd16[0]);
+    } else if (fn->getSimdWidth() == 0 && OCL_SIMD_WIDTH == 15) {
       codeGen = 0;
     } else
-      GBE_ASSERT(0);
+      GBE_ASSERTM(0, "unsupported SIMD width!");
     Kernel *kernel = NULL;
 
     // Stop when compilation is successful
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 31b8bf27..96c81b92 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2124,6 +2124,7 @@ namespace gbe
     // Loop over the kernel metadatas to set the required work group size.
     size_t reqd_wg_sz[3] = {0, 0, 0};
     size_t hint_wg_sz[3] = {0, 0, 0};
+    size_t reqd_sg_sz = 0;
     ir::FunctionArgument::InfoFromLLVM llvmInfo;
     MDNode *addrSpaceNode = NULL;
     MDNode *typeNameNode = NULL;
@@ -2219,6 +2220,27 @@ namespace gbe
       functionAttributes += buffer;
       functionAttributes += " ";
     }
+    if ((attrNode = F.getMetadata("intel_reqd_sub_group_size"))) {
+      GBE_ASSERT(attrNode->getNumOperands() == 1);
+      ConstantInt *sz = mdconst::extract<ConstantInt>(attrNode->getOperand(0));
+      GBE_ASSERT(sz);
+      reqd_sg_sz = sz->getZExtValue();
+      if(!(reqd_sg_sz == 8 || reqd_sg_sz == 16)){
+        F.getContext().emitError("Required sub group size is illegal!");
+        ctx.getUnit().setValid(false);
+        return;
+      }
+      functionAttributes += "intel_reqd_sub_group_size";
+      std::stringstream param;
+      char buffer[100] = {0};
+      param << "(";
+      param << reqd_sg_sz;
+      param << ")";
+      param >> buffer;
+      functionAttributes += buffer;
+      functionAttributes += " ";
+    }
+
 #else
     /* First find the meta data belong to this function. */
     MDNode *node = getKernelFunctionMetadata(&F);
@@ -2344,6 +2366,8 @@ namespace gbe
 #endif /* LLVM 3.9 Function metadata */
 
     ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]);
+    if (reqd_sg_sz)
+      ctx.setSimdWidth(reqd_sg_sz);
 
     ctx.getFunction().setFunctionAttributes(functionAttributes);
     // Loop over the arguments and output registers for them