Backend: Fix performance regression with sampler refine fro LLVM40

After the refine we can not know if a sampler is a constant initialized or not. Then the compiler optimization for constant sampler will break and we will runtime decide which SAMPLE instruction will use. Now fix the sampler refine for LLVM40 to enable the constant check. V2: Fix a typo of function __gen_ocl_sampler_to_int type. Signed-off-by: Pan Xiuli <xiuli.pan@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
author: Pan Xiuli <xiuli.pan@intel.com> 2017-05-17 17:01:57 +0800
committer: Yang Rong <rong.r.yang@intel.com> 2017-05-18 17:35:18 +0800
commit: b2d51e273edb999ab9251868bb7199b7ddbcec5f (patch)
tree: fd165164cd83d940aa9bcc8c43e063f3dfcb83e6
parent: 47adba6564b73d85c1ea88cb753a1ee92ab4a518 (diff)
2 files changed, 41 insertions, 9 deletions
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index e66aa155..2febfdac 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -295,18 +295,17 @@ GEN_VALIDATE_ARRAY_INDEX(int, read_write image1d_buffer_t)
 // The work around is to use a LD message instead of normal sample message.
 ///////////////////////////////////////////////////////////////////////////////
 
-bool __gen_ocl_sampler_need_fix(int);
-bool __gen_ocl_sampler_need_rounding_fix(int);
-int __gen_ocl_sampler_to_int(sampler_t);
+bool __gen_ocl_sampler_need_fix(sampler_t);
+bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
 
 bool __gen_sampler_need_fix(const sampler_t sampler)
 {
-  return __gen_ocl_sampler_need_fix(__gen_ocl_sampler_to_int(sampler));
+  return __gen_ocl_sampler_need_fix(sampler);
 }
 
 bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
 {
-  return __gen_ocl_sampler_need_rounding_fix(__gen_ocl_sampler_to_int(sampler));
+  return __gen_ocl_sampler_need_rounding_fix(sampler);
 }
 
 INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp
index 2e8bcf93..c2497558 100644
--- a/backend/src/llvm/llvm_sampler_fix.cpp
+++ b/backend/src/llvm/llvm_sampler_fix.cpp
@@ -55,9 +55,17 @@ namespace gbe {
         //          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
         bool needFix = true;
         Value *needFixVal;
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+        CallInst *init = dyn_cast<CallInst>(I->getOperand(0));
+        if (init && init->getCalledValue()->getName().compare("__translate_sampler_initializer"))
+        {
+          const ConstantInt *ci = dyn_cast<ConstantInt>(init->getOperand(0));
+          uint32_t samplerInt = ci->getZExtValue();
+#else
         if (dyn_cast<ConstantInt>(I->getOperand(0))) {
           const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
           uint32_t samplerInt = ci->getZExtValue();
+#endif
           needFix = ((samplerInt & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP &&
                      (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
           needFixVal = ConstantInt::get(boolTy, needFix);
@@ -65,14 +73,24 @@ namespace gbe {
           IRBuilder<> Builder(I->getParent());
 
           Builder.SetInsertPoint(I);
+
           Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
-          Value *addressMode = Builder.CreateAnd(I->getOperand(0), addressMask);
           Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
-          Value *isClampMode = Builder.CreateICmpEQ(addressMode, clampInt);
           Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
-          Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
           Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
+
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+          Module *M = I->getParent()->getParent()->getParent();
+          Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType(), nullptr);
+          Value *samplerVal = Builder.CreateCall(samplerCvt, {I->getOperand(0)});
+#else
+          Value *samplerVal = I->getOperand(0);
+#endif
+          Value *addressMode = Builder.CreateAnd(samplerVal, addressMask);
+          Value *isClampMode = Builder.CreateICmpEQ(addressMode, clampInt);
+          Value *filterMode = Builder.CreateAnd(samplerVal, filterMask);
           Value *isNearestMode = Builder.CreateICmpEQ(filterMode, nearestInt);
+
           needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
         }
 
@@ -83,16 +101,31 @@ namespace gbe {
         //  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
         bool needFix = true;
         Value *needFixVal;
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+        CallInst *init = dyn_cast<CallInst>(I->getOperand(0));
+        if (init && init->getCalledValue()->getName().compare("__translate_sampler_initializer"))
+        {
+          const ConstantInt *ci = dyn_cast<ConstantInt>(init->getOperand(0));
+          uint32_t samplerInt = ci->getZExtValue();
+#else
         if (dyn_cast<ConstantInt>(I->getOperand(0))) {
           const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
           uint32_t samplerInt = ci->getZExtValue();
+#endif
           needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
           needFixVal = ConstantInt::get(boolTy, needFix);
         } else {
           IRBuilder<> Builder(I->getParent());
           Builder.SetInsertPoint(I);
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+          Module *M = I->getParent()->getParent()->getParent();
+          Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType(), nullptr);
+          Value *samplerVal = Builder.CreateCall(samplerCvt, {I->getOperand(0)});
+#else
+          Value *samplerVal = I->getOperand(0);
+#endif
           Value *normalizeMask = ConstantInt::get(i32Ty, CLK_NORMALIZED_COORDS_TRUE);
-          Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), normalizeMask);
+          Value *normalizeMode = Builder.CreateAnd(samplerVal, normalizeMask);
           needFixVal = Builder.CreateICmpEQ(normalizeMode, ConstantInt::get(i32Ty, 0));
         }
         I->replaceAllUsesWith(needFixVal);
author	Pan Xiuli <xiuli.pan@intel.com>	2017-05-17 17:01:57 +0800
committer	Yang Rong <rong.r.yang@intel.com>	2017-05-18 17:35:18 +0800
commit	b2d51e273edb999ab9251868bb7199b7ddbcec5f (patch)
tree	fd165164cd83d940aa9bcc8c43e063f3dfcb83e6
parent	47adba6564b73d85c1ea88cb753a1ee92ab4a518 (diff)