summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWei Wei <weiwei@multicorewareinc.com>2013-11-17 20:43:18 -0600
committerI-Jui (Ray) Sung <ray@multicorewareinc.com>2013-11-17 21:37:55 -0600
commitff983712ecaf91b3dc1ef432fc3fc64de2e29f13 (patch)
tree648d92d5fe9477998407c0d3a714243e21bd1b33
parentf99736820a23cb7e37139607713658dea1c69dd4 (diff)
GPU calc: refactor code for sum reduction and
sumifs naming rule Change-Id: I685d263337bebe236befa5e5f45356336936c998 Signed-off-by: I-Jui (Ray) Sung <ray@multicorewareinc.com>
-rw-r--r--sc/source/core/opencl/formulagroupcl.cxx37
-rw-r--r--sc/source/core/opencl/op_math.cxx4
2 files changed, 21 insertions, 20 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 43aa30d584ef..a25f729e3d80 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -411,7 +411,7 @@ class DynamicKernelSlidingArgument: public Base
public:
DynamicKernelSlidingArgument(const std::string &s,
FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen):
- Base(s, ft), mpCodeGen(CodeGen), needReductionKernel(true), mpClmem2(NULL)
+ Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL)
{
FormulaToken *t = ft->GetFormulaToken();
if (t->GetType() != formula::svDoubleVectorRef)
@@ -428,7 +428,7 @@ public:
(!GetStartFixed() && !GetEndFixed()) ) ;
}
virtual void GenSlidingWindowFunction(std::stringstream &ss) {
- if (dynamic_cast<OpSum*>(mpCodeGen.get()))
+ if (dynamic_cast<OpSum*>(mpCodeGen.get()) && NeedParallelReduction())
{
std::string name = Base::GetName();
ss << "__kernel void "<<name;
@@ -436,20 +436,23 @@ public:
"__global double *result,int arrayLength,int windowSize){\n";
ss << " double tmp, current_result = 0.0;\n";
ss << " int writePos = get_group_id(1);\n";
- ss << " int offset = get_group_id(1);\n";
ss << " int lidx = get_local_id(0);\n";
ss << " __local double shm_buf[256];\n";
- ss << " if (arrayLength == windowSize)\n";
- ss << " offset = 0;\n";
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int offset = 0;\n";
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int offset = get_group_id(1);\n";
+ else
+ throw Unhandled();
ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
ss << " int loop = arrayLength/512 + 1;\n";
ss << " for (int l=0; l<loop; l++){\n";
ss << " tmp = 0.0;\n";
ss << " int loopOffset = l*512;\n";
- ss << " if((loopOffset + lidx + offset + 256) < ( offset + windowSize))\n";
+ ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
ss << " tmp = A[loopOffset + lidx + offset] + "
"A[loopOffset + lidx + offset + 256];\n";
- ss << " else if ((loopOffset + lidx + offset) < ( offset + windowSize))\n";
+ ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
ss << " tmp = A[loopOffset + lidx + offset];\n";
ss << " shm_buf[lidx] = tmp;\n";
ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
@@ -487,38 +490,35 @@ public:
{
if (!bIsStartFixed && !bIsEndFixed)
{
- // set 100 as a threshold for invoking reduction kernel
- // Ray: temporarily turn off parallel sum reduction
- if (false /*nCurWindowSize > 100*/)
+ // set 100 as a temporary threshold for invoking reduction
+ // kernel in NeedParalleLReduction function
+ if (NeedParallelReduction())
{
std::string temp = Base::GetName() + "[gid0]";
ss << "tmp = ";
ss << mpCodeGen->Gen2(temp, "tmp");
ss << ";\n\t";
needBody = false;
- needReductionKernel = false;
return nCurWindowSize;
}
}
if (bIsStartFixed && bIsEndFixed)
{
- // set 100 as a threshold for invoking reduction kernel
- // Ray: temporarily turn off parallel sum reduction
- if (false /* nCurWindowSize > 100 */)
+ // set 100 as a temporary threshold for invoking reduction
+ // kernel in NeedParalleLReduction function
+ if (NeedParallelReduction())
{
std::string temp = Base::GetName() + "[0]";
ss << "tmp = ";
ss << mpCodeGen->Gen2(temp, "tmp");
ss << ";\n\t";
needBody = false;
- needReductionKernel = false;
return nCurWindowSize;
}
}
}
needBody = true;
- needReductionKernel = true;
ss << "for (int i = ";
if (!bIsStartFixed && bIsEndFixed)
{
@@ -561,7 +561,7 @@ public:
virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
{
- if (needReductionKernel)
+ if (!NeedParallelReduction())
return Base::Marshal(k, argno, w, mpProgram);
assert(Base::mpClmem == NULL);
@@ -654,7 +654,6 @@ protected:
// from parent nodes
boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
// controls whether to invoke the reduction kernel during marshaling or not
- bool needReductionKernel;
cl_mem mpClmem2;
};
@@ -1096,7 +1095,7 @@ public:
if (CL_SUCCESS != err)
throw OpenCLError(err);
- std::string kernelName = "SumIfs_reduction";
+ std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
if (err != CL_SUCCESS)
throw OpenCLError(err);
diff --git a/sc/source/core/opencl/op_math.cxx b/sc/source/core/opencl/op_math.cxx
index 30eb759303f5..e76a5d0dcdc0 100644
--- a/sc/source/core/opencl/op_math.cxx
+++ b/sc/source/core/opencl/op_math.cxx
@@ -453,8 +453,10 @@ void OpSumIfs::GenSlidingWindowFunction(std::stringstream &ss,
if (mNeedReductionKernel)
{
// generate reduction functions
+
ss << "__kernel void ";
- ss << "SumIfs_reduction( ";
+ ss << vSubArguments[0]->GetName();
+ ss << "_SumIfs_reduction( ";
for (unsigned i = 0; i < vSubArguments.size(); i++)
{
if (i)