summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorI-Jui (Ray) Sung <ray@multicorewareinc.com>2013-11-18 15:05:25 -0600
committerI-Jui (Ray) Sung <ray@multicorewareinc.com>2013-11-18 15:59:15 -0600
commit912d23636a55473221e3e35768fb9ac42c3e9b76 (patch)
tree9dff9f1ceb0b6126e590cb341b0a729975a447c8
parentaf223ecdf01b76bc1005c8fcc342165639a8823f (diff)
GPU Calc: turn on parallel sumifs and parallel sum reduce
Change-Id: Id615ea0f5f16a4dfc517aacb30715c2df84553e3
-rw-r--r--sc/source/core/opencl/formulagroupcl.cxx34
1 files changed, 11 insertions, 23 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 3a63c9205943..9d1e2a9e3a3a 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -426,9 +426,10 @@ public:
}
virtual bool NeedParallelReduction(void) const
{
- if (dynamic_cast<OpSum*>(mpCodeGen.get())
- && !dynamic_cast<OpAverage*>(mpCodeGen.get()))
- return GetWindowSize()> 100 &&
+ if ((dynamic_cast<OpSum*>(mpCodeGen.get())
+ && !dynamic_cast<OpAverage*>(mpCodeGen.get())) ||
+ dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
+ return GetWindowSize()> 4 &&
( (GetStartFixed() && GetEndFixed()) ||
(!GetStartFixed() && !GetEndFixed()) ) ;
else
@@ -457,10 +458,10 @@ public:
ss << " tmp = 0.0;\n";
ss << " int loopOffset = l*512;\n";
ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
- ss << " tmp = A[loopOffset + lidx + offset] + "
- "A[loopOffset + lidx + offset + 256];\n";
+ ss << " tmp = fsum(A[loopOffset + lidx + offset], 0) + "
+ "fsum(A[loopOffset + lidx + offset + 256], 0);\n";
ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
- ss << " tmp = A[loopOffset + lidx + offset];\n";
+ ss << " tmp = fsum(A[loopOffset + lidx + offset], 0);\n";
ss << " shm_buf[lidx] = tmp;\n";
ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
ss << " for (int i = 128; i >0; i/=2) {\n";
@@ -496,7 +497,8 @@ public:
size_t nCurWindowSize = mpDVR->GetRefRowSize();
if (dynamic_cast<OpSum*>(mpCodeGen.get()))
{
- if (!bIsStartFixed && !bIsEndFixed)
+ if ((!bIsStartFixed && !bIsEndFixed) ||
+ (bIsStartFixed && bIsEndFixed))
{
// set 100 as a temporary threshold for invoking reduction
// kernel in NeedParalleLReduction function
@@ -510,21 +512,6 @@ public:
return nCurWindowSize;
}
}
-
- if (bIsStartFixed && bIsEndFixed)
- {
- // set 100 as a temporary threshold for invoking reduction
- // kernel in NeedParalleLReduction function
- if (NeedParallelReduction())
- {
- std::string temp = Base::GetName() + "[0]";
- ss << "tmp = ";
- ss << mpCodeGen->Gen2(temp, "tmp");
- ss << ";\n\t";
- needBody = false;
- return nCurWindowSize;
- }
- }
}
needBody = true;
@@ -576,7 +563,8 @@ public:
virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
{
- if (!NeedParallelReduction())
+ if (!NeedParallelReduction() ||
+ dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
return Base::Marshal(k, argno, w, mpProgram);
assert(Base::mpClmem == NULL);