summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-11-17 07:40:09 +0800
committerYang Rong <rong.r.yang@intel.com>2015-11-17 16:23:27 +0800
commit228de1e01fa8eb1036396be373406c581216c297 (patch)
tree520464bc79972fa9bd9171a1343403023d713442
parentcc66e3cb9d83474fb1c845468b0757ff3489adac (diff)
Backend: Add CalcTimestamp and StoreProfiling.
When in profiling, the profiling inserter function will insert calc_timestamp for each point which we are interested in. At the end of the kernel, just before return, we will insert a store_profiling function call. The function will hold a reference to the global val profiling_buf and avoid it being released when run optimization passes. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp42
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx5
2 files changed, 47 insertions, 0 deletions
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 858bd497..84a4f0d0 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1089,6 +1089,9 @@ namespace gbe
} else if (origin->getName().equals(StringRef("__gen_ocl_printf_index_buf"))) {
new_bti = btiBase;
incBtiBase();
+ } else if (origin->getName().equals(StringRef("__gen_ocl_timestamp_buf"))) {
+ new_bti = btiBase;
+ incBtiBase();
}
else if (isa<GlobalVariable>(origin)
&& dyn_cast<GlobalVariable>(origin)->isConstant()) {
@@ -2567,6 +2570,9 @@ namespace gbe
} else if(v.getName().equals(StringRef("__gen_ocl_printf_index_buf"))) {
ctx.getFunction().getPrintfSet()->setIndexBufBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
regTranslator.newScalarProxy(ir::ocl::printfiptr, const_cast<GlobalVariable*>(&v));
+ } else if(v.getName().equals(StringRef("__gen_ocl_profiling_buf"))) {
+ ctx.getUnit().getProfilingInfo()->setBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
+ regTranslator.newScalarProxy(ir::ocl::profilingbptr, const_cast<GlobalVariable*>(&v));
} else if(v.getName().str().substr(0, 4) == ".str") {
/* When there are multi printf statements in multi kernel fucntions within the same
translate unit, if they have the same sting parameter, such as
@@ -3591,6 +3597,8 @@ namespace gbe
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
+ case GEN_OCL_CALC_TIMESTAMP:
+ case GEN_OCL_STORE_PROFILING:
break;
case GEN_OCL_NOT_FOUND:
default:
@@ -4330,6 +4338,40 @@ namespace gbe
assert(fmt);
break;
}
+ case GEN_OCL_CALC_TIMESTAMP:
+ {
+ GBE_ASSERT(AI != AE);
+ ConstantInt *CI = dyn_cast<ConstantInt>(*AI);
+ GBE_ASSERT(CI);
+ uint32_t pointNum = CI->getZExtValue();
+ AI++;
+ GBE_ASSERT(AI != AE);
+ CI = dyn_cast<ConstantInt>(*AI);
+ GBE_ASSERT(CI);
+ uint32_t tsType = CI->getZExtValue();
+ ctx.CALC_TIMESTAMP(pointNum, tsType);
+ break;
+ }
+ case GEN_OCL_STORE_PROFILING:
+ {
+ /* The profiling log always begin at 0 offset, so we
+ never need the buffer ptr value and ptrBase, and
+ no need for SUB to calculate the real address, neither.
+ We just pass down the BTI value to the instruction. */
+ GBE_ASSERT(AI != AE);
+ Value* llvmPtr = *AI;
+ Value *bti = getBtiRegister(llvmPtr);
+ GBE_ASSERT(isa<ConstantInt>(bti)); //Should never be mixed pointer.
+ uint32_t index = cast<ConstantInt>(bti)->getZExtValue();
+ GBE_ASSERT(btiToGen(index) == ir::MEM_GLOBAL);
+ ++AI;
+ GBE_ASSERT(AI != AE);
+ ConstantInt *CI = dyn_cast<ConstantInt>(*AI);
+ GBE_ASSERT(CI);
+ uint32_t ptype = CI->getZExtValue();
+ ctx.getUnit().getProfilingInfo()->setProfilingType(ptype);
+ break;
+ }
case GEN_OCL_SIMD_SIZE:
{
const ir::Register dst = this->getRegister(&I);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 3fbf847b..65bf0c1c 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -172,3 +172,8 @@ DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme)
// printf function
DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
+
+// store timestamp function
+DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP, __gen_ocl_calc_timestamp)
+// store profiling info to the mem.
+DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling)