diff options
author | Wei Wei <weiwei@multicorewareinc.com> | 2013-11-17 20:43:18 -0600 |
---|---|---|
committer | I-Jui (Ray) Sung <ray@multicorewareinc.com> | 2013-11-17 21:37:55 -0600 |
commit | ff983712ecaf91b3dc1ef432fc3fc64de2e29f13 (patch) | |
tree | 648d92d5fe9477998407c0d3a714243e21bd1b33 | |
parent | f99736820a23cb7e37139607713658dea1c69dd4 (diff) |
GPU calc: refactor code for sum reduction and
sumifs naming rule
Change-Id: I685d263337bebe236befa5e5f45356336936c998
Signed-off-by: I-Jui (Ray) Sung <ray@multicorewareinc.com>
-rw-r--r-- | sc/source/core/opencl/formulagroupcl.cxx | 37 | ||||
-rw-r--r-- | sc/source/core/opencl/op_math.cxx | 4 |
2 files changed, 21 insertions, 20 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 43aa30d584ef..a25f729e3d80 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -411,7 +411,7 @@ class DynamicKernelSlidingArgument: public Base public: DynamicKernelSlidingArgument(const std::string &s, FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen): - Base(s, ft), mpCodeGen(CodeGen), needReductionKernel(true), mpClmem2(NULL) + Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL) { FormulaToken *t = ft->GetFormulaToken(); if (t->GetType() != formula::svDoubleVectorRef) @@ -428,7 +428,7 @@ public: (!GetStartFixed() && !GetEndFixed()) ) ; } virtual void GenSlidingWindowFunction(std::stringstream &ss) { - if (dynamic_cast<OpSum*>(mpCodeGen.get())) + if (dynamic_cast<OpSum*>(mpCodeGen.get()) && NeedParallelReduction()) { std::string name = Base::GetName(); ss << "__kernel void "<<name; @@ -436,20 +436,23 @@ public: "__global double *result,int arrayLength,int windowSize){\n"; ss << " double tmp, current_result = 0.0;\n"; ss << " int writePos = get_group_id(1);\n"; - ss << " int offset = get_group_id(1);\n"; ss << " int lidx = get_local_id(0);\n"; ss << " __local double shm_buf[256];\n"; - ss << " if (arrayLength == windowSize)\n"; - ss << " offset = 0;\n"; + if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) + ss << " int offset = 0;\n"; + else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) + ss << " int offset = get_group_id(1);\n"; + else + throw Unhandled(); ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " int loop = arrayLength/512 + 1;\n"; ss << " for (int l=0; l<loop; l++){\n"; ss << " tmp = 0.0;\n"; ss << " int loopOffset = l*512;\n"; - ss << " if((loopOffset + lidx + offset + 256) < ( offset + windowSize))\n"; + ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n"; ss << " tmp = A[loopOffset + lidx + offset] + " "A[loopOffset + lidx + offset + 256];\n"; - ss << " else if ((loopOffset + lidx + offset) < ( offset + windowSize))\n"; + ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n"; ss << " tmp = A[loopOffset + lidx + offset];\n"; ss << " shm_buf[lidx] = tmp;\n"; ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; @@ -487,38 +490,35 @@ public: { if (!bIsStartFixed && !bIsEndFixed) { - // set 100 as a threshold for invoking reduction kernel - // Ray: temporarily turn off parallel sum reduction - if (false /*nCurWindowSize > 100*/) + // set 100 as a temporary threshold for invoking reduction + // kernel in NeedParalleLReduction function + if (NeedParallelReduction()) { std::string temp = Base::GetName() + "[gid0]"; ss << "tmp = "; ss << mpCodeGen->Gen2(temp, "tmp"); ss << ";\n\t"; needBody = false; - needReductionKernel = false; return nCurWindowSize; } } if (bIsStartFixed && bIsEndFixed) { - // set 100 as a threshold for invoking reduction kernel - // Ray: temporarily turn off parallel sum reduction - if (false /* nCurWindowSize > 100 */) + // set 100 as a temporary threshold for invoking reduction + // kernel in NeedParalleLReduction function + if (NeedParallelReduction()) { std::string temp = Base::GetName() + "[0]"; ss << "tmp = "; ss << mpCodeGen->Gen2(temp, "tmp"); ss << ";\n\t"; needBody = false; - needReductionKernel = false; return nCurWindowSize; } } } needBody = true; - needReductionKernel = true; ss << "for (int i = "; if (!bIsStartFixed && bIsEndFixed) { @@ -561,7 +561,7 @@ public: virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram) { - if (needReductionKernel) + if (!NeedParallelReduction()) return Base::Marshal(k, argno, w, mpProgram); assert(Base::mpClmem == NULL); @@ -654,7 +654,6 @@ protected: // from parent nodes boost::shared_ptr<SlidingFunctionBase> mpCodeGen; // controls whether to invoke the reduction kernel during marshaling or not - bool needReductionKernel; cl_mem mpClmem2; }; @@ -1096,7 +1095,7 @@ public: if (CL_SUCCESS != err) throw OpenCLError(err); - std::string kernelName = "SumIfs_reduction"; + std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction"; cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err); if (err != CL_SUCCESS) throw OpenCLError(err); diff --git a/sc/source/core/opencl/op_math.cxx b/sc/source/core/opencl/op_math.cxx index 30eb759303f5..e76a5d0dcdc0 100644 --- a/sc/source/core/opencl/op_math.cxx +++ b/sc/source/core/opencl/op_math.cxx @@ -453,8 +453,10 @@ void OpSumIfs::GenSlidingWindowFunction(std::stringstream &ss, if (mNeedReductionKernel) { // generate reduction functions + ss << "__kernel void "; - ss << "SumIfs_reduction( "; + ss << vSubArguments[0]->GetName(); + ss << "_SumIfs_reduction( "; for (unsigned i = 0; i < vSubArguments.size(); i++) { if (i) |