diff options
author | I-Jui (Ray) Sung <ray@multicorewareinc.com> | 2013-11-18 15:05:25 -0600 |
---|---|---|
committer | I-Jui (Ray) Sung <ray@multicorewareinc.com> | 2013-11-18 15:59:15 -0600 |
commit | 912d23636a55473221e3e35768fb9ac42c3e9b76 (patch) | |
tree | 9dff9f1ceb0b6126e590cb341b0a729975a447c8 | |
parent | af223ecdf01b76bc1005c8fcc342165639a8823f (diff) |
GPU Calc: turn on parallel sumifs and parallel sum reduce
Change-Id: Id615ea0f5f16a4dfc517aacb30715c2df84553e3
-rw-r--r-- | sc/source/core/opencl/formulagroupcl.cxx | 34 |
1 files changed, 11 insertions, 23 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 3a63c9205943..9d1e2a9e3a3a 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -426,9 +426,10 @@ public: } virtual bool NeedParallelReduction(void) const { - if (dynamic_cast<OpSum*>(mpCodeGen.get()) - && !dynamic_cast<OpAverage*>(mpCodeGen.get())) - return GetWindowSize()> 100 && + if ((dynamic_cast<OpSum*>(mpCodeGen.get()) + && !dynamic_cast<OpAverage*>(mpCodeGen.get())) || + dynamic_cast<OpSumIfs*>(mpCodeGen.get())) + return GetWindowSize()> 4 && ( (GetStartFixed() && GetEndFixed()) || (!GetStartFixed() && !GetEndFixed()) ) ; else @@ -457,10 +458,10 @@ public: ss << " tmp = 0.0;\n"; ss << " int loopOffset = l*512;\n"; ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n"; - ss << " tmp = A[loopOffset + lidx + offset] + " - "A[loopOffset + lidx + offset + 256];\n"; + ss << " tmp = fsum(A[loopOffset + lidx + offset], 0) + " + "fsum(A[loopOffset + lidx + offset + 256], 0);\n"; ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n"; - ss << " tmp = A[loopOffset + lidx + offset];\n"; + ss << " tmp = fsum(A[loopOffset + lidx + offset], 0);\n"; ss << " shm_buf[lidx] = tmp;\n"; ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " for (int i = 128; i >0; i/=2) {\n"; @@ -496,7 +497,8 @@ public: size_t nCurWindowSize = mpDVR->GetRefRowSize(); if (dynamic_cast<OpSum*>(mpCodeGen.get())) { - if (!bIsStartFixed && !bIsEndFixed) + if ((!bIsStartFixed && !bIsEndFixed) || + (bIsStartFixed && bIsEndFixed)) { // set 100 as a temporary threshold for invoking reduction // kernel in NeedParalleLReduction function @@ -510,21 +512,6 @@ public: return nCurWindowSize; } } - - if (bIsStartFixed && bIsEndFixed) - { - // set 100 as a temporary threshold for invoking reduction - // kernel in NeedParalleLReduction function - if (NeedParallelReduction()) - { - std::string temp = Base::GetName() + "[0]"; - ss << "tmp = "; - ss << mpCodeGen->Gen2(temp, "tmp"); - ss << ";\n\t"; - needBody = false; - return nCurWindowSize; - } - } } needBody = true; @@ -576,7 +563,8 @@ public: virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram) { - if (!NeedParallelReduction()) + if (!NeedParallelReduction() || + dynamic_cast<OpSumIfs*>(mpCodeGen.get())) return Base::Marshal(k, argno, w, mpProgram); assert(Base::mpClmem == NULL); |