diff options
author | hongyu zhong <hongyu@multicorewareinc.com> | 2013-12-10 16:06:47 +0800 |
---|---|---|
committer | I-Jui (Ray) Sung <ray@multicorewareinc.com> | 2013-12-19 17:53:48 -0600 |
commit | 19410777d2d0f2f4d492ebb93c393f74b0783ac6 (patch) | |
tree | bc791710ce79403c89fe22bc7dcb9808f5fbda6d | |
parent | 2664b6fdebbeacf4ac047fd1b622e76cfb294d70 (diff) |
GPU Calc: Optimized NORMSDIST
AMLOEXT-345
Change-Id: If355844275d0789b7f0851e9061de1c3ddf5b325
Signed-off-by: haochen <haochen@multicorewareinc.com>
Signed-off-by: Wei Wei <weiwei@multicorewareinc.com>
Signed-off-by: I-Jui (Ray) Sung <ray@multicorewareinc.com>
-rw-r--r-- | sc/source/core/opencl/op_statistical.cxx | 77 | ||||
-rw-r--r-- | sc/source/core/opencl/op_statistical.hxx | 1 | ||||
-rw-r--r-- | sc/source/core/opencl/opinlinefun_statistical.cxx | 175 |
3 files changed, 237 insertions, 16 deletions
diff --git a/sc/source/core/opencl/op_statistical.cxx b/sc/source/core/opencl/op_statistical.cxx index 79f2cebfb8ff..b4884ef1fabb 100644 --- a/sc/source/core/opencl/op_statistical.cxx +++ b/sc/source/core/opencl/op_statistical.cxx @@ -4995,6 +4995,21 @@ void OpNormdist::GenSlidingWindowFunction( ss << "return tmp;\n"; ss << "}\n"; } +void OpNormsdist::BinInlineFun(std::set<std::string>& decls, + std::set<std::string>& funs) +{ + decls.insert(lcl_Erfc0600Decl); + funs.insert(lcl_Erfc0600); + decls.insert(lcl_Erfc2654Decl); + funs.insert(lcl_Erfc2654); + decls.insert(lcl_Erf0065Decl); + funs.insert(lcl_Erf0065); + decls.insert(rtl_math_erf_rdDecl); + funs.insert(rtl_math_erf_rd); + decls.insert(rtl_math_erfc_rdDecl); + funs.insert(rtl_math_erfc_rd); +} + void OpNormsdist::GenSlidingWindowFunction( std::stringstream &ss,const std::string sSymName, SubArguments &vSubArguments) @@ -5009,28 +5024,58 @@ void OpNormsdist::GenSlidingWindowFunction( } ss << ")\n"; ss << "{\n"; - ss << " double x;\n"; + ss << " double x = 0,tmp0 = 0;\n"; ss << " int gid0=get_global_id(0);\n"; -#ifdef ISNAN - FormulaToken *tmpCur0 = vSubArguments[0]->GetFormulaToken(); - const formula::SingleVectorRefToken*tmpCurDVR0= dynamic_cast<const - formula::SingleVectorRefToken *>(tmpCur0); - ss << "int buffer_x_len = "; - ss << tmpCurDVR0->GetArrayLength(); - ss << ";\n"; + size_t i = vSubArguments.size(); + ss <<"\n "; + for (i = 0; i < vSubArguments.size(); i++) + { + FormulaToken *pCur = vSubArguments[i]->GetFormulaToken(); + assert(pCur); + if (pCur->GetType() == formula::svSingleVectorRef) + { +#ifdef ISNAN + const formula::SingleVectorRefToken* pSVR = + dynamic_cast< const formula::SingleVectorRefToken* >(pCur); + ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n"; #endif -#ifdef ISNAN - ss <<" if((gid0)>=buffer_x_len || isNan("; - ss << vSubArguments[0]->GenSlidingWindowDeclRef(); - ss <<"))\n"; - ss <<" x = 0;\nelse \n"; + } + else if (pCur->GetType() == formula::svDouble) + { +#ifdef ISNAN + ss << "{\n"; #endif - ss << " x = "<<vSubArguments[0]->GenSlidingWindowDeclRef(); - ss << ";\n"; - ss << " double tmp = 0.5 *erfc(-x * 0.7071067811865475);\n"; + } + else + { +#ifdef ISNAN +#endif + } +#ifdef ISNAN + if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode()) + { + ss << " if (isNan("; + ss << vSubArguments[i]->GenSlidingWindowDeclRef(); + ss << "))\n"; + ss << " tmp"<<i<<"= 0;\n"; + ss << " else\n"; + ss << " tmp"<<i<<"=\n"; + ss << vSubArguments[i]->GenSlidingWindowDeclRef(); + ss << ";\n}\n"; + } + else + { + ss << "tmp"<<i<<"="<<vSubArguments[i]->GenSlidingWindowDeclRef(); + ss <<";\n"; + } +#endif + } + ss << " x = tmp0;\n"; + ss << " double tmp = 0.5 * rtl_math_erfc_rd((-1)*x * 0.7071067811865475);\n"; ss << " return tmp;\n"; ss << "}\n"; } + void OpVariationen::GenSlidingWindowFunction( std::stringstream &ss,const std::string sSymName, SubArguments &vSubArguments) diff --git a/sc/source/core/opencl/op_statistical.hxx b/sc/source/core/opencl/op_statistical.hxx index 2d59ad49274c..2edad6e28376 100644 --- a/sc/source/core/opencl/op_statistical.hxx +++ b/sc/source/core/opencl/op_statistical.hxx @@ -240,6 +240,7 @@ class OpNormsdist:public Normal{ public: virtual void GenSlidingWindowFunction(std::stringstream &ss, const std::string sSymName, SubArguments &vSubArguments); + virtual void BinInlineFun(std::set<std::string>& ,std::set<std::string>&); virtual std::string BinFuncName(void) const { return "OpNormsdist"; } }; class OpNorminv:public Normal{ diff --git a/sc/source/core/opencl/opinlinefun_statistical.cxx b/sc/source/core/opencl/opinlinefun_statistical.cxx index 8fbd1eb92c32..9e9b8cc7c58c 100644 --- a/sc/source/core/opencl/opinlinefun_statistical.cxx +++ b/sc/source/core/opencl/opinlinefun_statistical.cxx @@ -1367,6 +1367,181 @@ std::string gauss = " else\n" " return nVal;\n" "}\n"; +std::string lcl_Erfc0600Decl= +"void lcl_Erfc0600( double x, double *fVal );\n"; +std::string lcl_Erfc0600 = +"void lcl_Erfc0600( double x, double *fVal )\n" +"{\n" +" double fPSum = 0.0;\n" +" double fQSum = 0.0;\n" +" double fXPow = 1.0;\n" +" double *pn;\n" +" double *qn;\n" +" if ( x < 2.2 )\n" +" {\n" +" double pn22[] = { \n" +" 9.99999992049799098E-1, \n" +" 1.33154163936765307, \n" +" 8.78115804155881782E-1, \n" +" 3.31899559578213215E-1, \n" +" 7.14193832506776067E-2, \n" +" 7.06940843763253131E-3 \n" +" }; \n" +" double qn22[] = { \n" +" 1.00000000000000000, \n" +" 2.45992070144245533, \n" +" 2.65383972869775752, \n" +" 1.61876655543871376, \n" +" 5.94651311286481502E-1, \n" +" 1.26579413030177940E-1, \n" +" 1.25304936549413393E-2 \n" +" }; \n" +" pn = pn22; \n" +" qn = qn22; \n" +" } \n" +" else \n" +" \n" +" { \n" +" double pn60[] = {\n" +" 9.99921140009714409E-1,\n" +" 1.62356584489366647,\n" +" 1.26739901455873222,\n" +" 5.81528574177741135E-1,\n" +" 1.57289620742838702E-1,\n" +" 2.25716982919217555E-2\n" +" };\n" +" double qn60[] = {\n" +" 1.00000000000000000,\n" +" 2.75143870676376208,\n" +" 3.37367334657284535,\n" +" 2.38574194785344389,\n" +" 1.05074004614827206,\n" +" 2.78788439273628983E-1,\n" +" 4.00072964526861362E-2\n" +" };\n" +" pn = pn60;\n" +" qn = qn60;\n" +" }\n" +" for ( unsigned int i = 0; i < 6; ++i ) \n" +" {\n" +" fPSum += pn[i]*fXPow;\n" +" fQSum += qn[i]*fXPow;\n" +" fXPow *= x;\n" +" }\n" +" fQSum += qn[6]*fXPow;\n" +" *fVal = exp((-1.0)*x*x)*fPSum*pow(fQSum, -1.0);\n" +" }\n"; +std::string lcl_Erfc2654Decl= +"void lcl_Erfc2654( double x, double *fVal );\n"; +std::string lcl_Erfc2654 = +"void lcl_Erfc2654( double x, double *fVal )\n" +"{\n" +" double pn[] = {\n" +" 5.64189583547756078E-1,\n" +" 8.80253746105525775,\n" +" 3.84683103716117320E1,\n" +" 4.77209965874436377E1,\n" +" 8.08040729052301677\n" +" };\n" +" double qn[] = {\n" +" 1.00000000000000000,\n" +" 1.61020914205869003E1,\n" +" 7.54843505665954743E1,\n" +" 1.12123870801026015E2,\n" +" 3.73997570145040850E1\n" +" };\n" +"\n" +" double fPSum = 0.0;\n" +" double fQSum = 0.0;\n" +" double fXPow = 1.0;\n" +"\n" +" for ( unsigned int i = 0; i <= 4; ++i )\n" +" {\n" +" fPSum += pn[i]*fXPow; \n" +" fQSum += qn[i]*fXPow;\n" +" fXPow *= pow(x*x, -1.0);\n" +" }\n" +" *fVal = exp((-1.0)*x*x)*fPSum*pow(x*fQSum, -1.0);\n" +"}\n"; +std::string lcl_Erf0065Decl= +"void lcl_Erf0065( double x, double *fVal );\n"; +std::string lcl_Erf0065 = +"void lcl_Erf0065( double x, double *fVal )\n" +" {\n" +" double pn[] = {\n" +" 1.12837916709551256,\n" +" 1.35894887627277916E-1,\n" +" 4.03259488531795274E-2,\n" +" 1.20339380863079457E-3,\n" +" 6.49254556481904354E-5\n" +" };\n" +" double qn[] = {\n" +" 1.00000000000000000,\n" +" 4.53767041780002545E-1,\n" +" 8.69936222615385890E-2,\n" +" 8.49717371168693357E-3,\n" +" 3.64915280629351082E-4\n" +" };\n" +" double fPSum = 0.0;\n" +" double fQSum = 0.0;\n" +" double fXPow = 1.0;\n" +" for ( unsigned int i = 0; i <= 4; ++i )\n" +" {\n" +" fPSum += pn[i]*fXPow;\n" +" fQSum += qn[i]*fXPow;\n" +" fXPow *= x*x;\n" +" }\n" +" *fVal = x * fPSum * pow(fQSum, -1.0);\n" +" }\n"; +std::string rtl_math_erf_rdDecl= +"double rtl_math_erf_rd( double x );\n"; +std::string rtl_math_erf_rd = +" double rtl_math_erf_rd( double x )\n" +" {\n" +" if( x == 0.0 )\n" +" return 0.0;\n" +" bool bNegative = false;\n" +" if ( x < 0.0 )\n" +" {\n" +" x = fabs( x );\n" +" bNegative = true;\n" +" }\n" +" double fErf = 1.0;\n" +" if ( x < 1.0e-10 )\n" +" fErf = (double) (x*1.1283791670955125738961589031215452);\n" +" else if ( x < 0.65 )\n" +" lcl_Erf0065( x, &fErf );\n" +" if ( bNegative )\n" +" fErf *= (-1.0);\n" +" return fErf;\n" +" }\n"; +std::string rtl_math_erfc_rdDecl= +"double rtl_math_erfc_rd( double x );\n"; +std::string rtl_math_erfc_rd = +" double rtl_math_erfc_rd( double x )\n" +" {\n" +" if ( x == 0.0 )\n" +" return 1.0;\n" +" bool bNegative = false;\n" +" if ( x < 0.0 )\n" +" {\n" +" x = fabs( x );\n" +" bNegative = true;\n" +" }\n" +" double fErfc = 0.0;\n" +" if ( x >= 0.65 )\n" +" {\n" +" if ( x < 6.0 )\n" +" lcl_Erfc0600( x, &fErfc );\n" +" else\n" +" lcl_Erfc2654( x, &fErfc );\n" +" }\n" +" else\n" +" fErfc = 1.0 - rtl_math_erf_rd( x );\n" +" if ( bNegative )\n" +" fErfc = 2.0 - fErfc;\n" +" return fErfc;\n" +" }\n"; #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |