GPU Calc: Optimized NORMSDIST

AMLOEXT-345 Change-Id: If355844275d0789b7f0851e9061de1c3ddf5b325 Signed-off-by: haochen <haochen@multicorewareinc.com> Signed-off-by: Wei Wei <weiwei@multicorewareinc.com> Signed-off-by: I-Jui (Ray) Sung <ray@multicorewareinc.com>
author: hongyu zhong <hongyu@multicorewareinc.com> 2013-12-10 16:06:47 +0800
committer: I-Jui (Ray) Sung <ray@multicorewareinc.com> 2013-12-19 17:53:48 -0600
commit: 19410777d2d0f2f4d492ebb93c393f74b0783ac6 (patch)
tree: bc791710ce79403c89fe22bc7dcb9808f5fbda6d
parent: 2664b6fdebbeacf4ac047fd1b622e76cfb294d70 (diff)
3 files changed, 237 insertions, 16 deletions
diff --git a/sc/source/core/opencl/op_statistical.cxx b/sc/source/core/opencl/op_statistical.cxx
index 79f2cebfb8ff..b4884ef1fabb 100644
--- a/sc/source/core/opencl/op_statistical.cxx
+++ b/sc/source/core/opencl/op_statistical.cxx
@@ -4995,6 +4995,21 @@ void OpNormdist::GenSlidingWindowFunction(
     ss << "return tmp;\n";
     ss << "}\n";
 }
+void OpNormsdist::BinInlineFun(std::set<std::string>& decls,
+    std::set<std::string>& funs)
+{
+    decls.insert(lcl_Erfc0600Decl);
+    funs.insert(lcl_Erfc0600);
+    decls.insert(lcl_Erfc2654Decl);
+    funs.insert(lcl_Erfc2654);
+    decls.insert(lcl_Erf0065Decl);
+    funs.insert(lcl_Erf0065);
+    decls.insert(rtl_math_erf_rdDecl);
+    funs.insert(rtl_math_erf_rd);
+    decls.insert(rtl_math_erfc_rdDecl);
+    funs.insert(rtl_math_erfc_rd);
+}
+
 void OpNormsdist::GenSlidingWindowFunction(
     std::stringstream &ss,const std::string sSymName,
     SubArguments &vSubArguments)
@@ -5009,28 +5024,58 @@ void OpNormsdist::GenSlidingWindowFunction(
     }
     ss << ")\n";
     ss << "{\n";
-    ss << "    double x;\n";
+    ss << "    double x = 0,tmp0 = 0;\n";
     ss << "    int gid0=get_global_id(0);\n";
-#ifdef ISNAN
-    FormulaToken *tmpCur0 = vSubArguments[0]->GetFormulaToken();
-    const formula::SingleVectorRefToken*tmpCurDVR0= dynamic_cast<const
-    formula::SingleVectorRefToken *>(tmpCur0);
-    ss << "int buffer_x_len = ";
-    ss << tmpCurDVR0->GetArrayLength();
-    ss << ";\n";
+    size_t i = vSubArguments.size();
+    ss <<"\n    ";
+    for (i = 0; i < vSubArguments.size(); i++)
+    {
+        FormulaToken *pCur = vSubArguments[i]->GetFormulaToken();
+        assert(pCur);
+        if (pCur->GetType() == formula::svSingleVectorRef)
+        {
+#ifdef  ISNAN
+                const formula::SingleVectorRefToken* pSVR =
+                dynamic_cast< const formula::SingleVectorRefToken* >(pCur);
+            ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n";
 #endif
-#ifdef ISNAN
-    ss <<"    if((gid0)>=buffer_x_len || isNan(";
-    ss << vSubArguments[0]->GenSlidingWindowDeclRef();
-    ss <<"))\n";
-    ss <<"        x = 0;\nelse \n";
+        }
+        else if (pCur->GetType() == formula::svDouble)
+        {
+#ifdef  ISNAN
+            ss << "{\n";
 #endif
-    ss << "       x = "<<vSubArguments[0]->GenSlidingWindowDeclRef();
-    ss << ";\n";
-    ss << "    double tmp = 0.5 *erfc(-x * 0.7071067811865475);\n";
+        }
+        else
+        {
+#ifdef  ISNAN
+#endif
+        }
+#ifdef  ISNAN
+        if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
+        {
+            ss << "    if (isNan(";
+            ss << vSubArguments[i]->GenSlidingWindowDeclRef();
+            ss << "))\n";
+            ss << "        tmp"<<i<<"= 0;\n";
+            ss << "    else\n";
+            ss << "        tmp"<<i<<"=\n";
+            ss << vSubArguments[i]->GenSlidingWindowDeclRef();
+            ss << ";\n}\n";
+        }
+        else
+        {
+            ss << "tmp"<<i<<"="<<vSubArguments[i]->GenSlidingWindowDeclRef();
+            ss <<";\n";
+        }
+#endif
+    }
+    ss << "    x = tmp0;\n";
+    ss << "    double tmp = 0.5 * rtl_math_erfc_rd((-1)*x * 0.7071067811865475);\n";
     ss << "    return tmp;\n";
     ss << "}\n";
 }
+
 void OpVariationen::GenSlidingWindowFunction(
     std::stringstream &ss,const std::string sSymName,
     SubArguments &vSubArguments)
diff --git a/sc/source/core/opencl/op_statistical.hxx b/sc/source/core/opencl/op_statistical.hxx
index 2d59ad49274c..2edad6e28376 100644
--- a/sc/source/core/opencl/op_statistical.hxx
+++ b/sc/source/core/opencl/op_statistical.hxx
@@ -240,6 +240,7 @@ class OpNormsdist:public Normal{
     public:
     virtual void GenSlidingWindowFunction(std::stringstream &ss,
             const std::string sSymName, SubArguments &vSubArguments);
+    virtual void BinInlineFun(std::set<std::string>& ,std::set<std::string>&);
     virtual std::string BinFuncName(void) const { return "OpNormsdist"; }
 };
 class OpNorminv:public Normal{
diff --git a/sc/source/core/opencl/opinlinefun_statistical.cxx b/sc/source/core/opencl/opinlinefun_statistical.cxx
index 8fbd1eb92c32..9e9b8cc7c58c 100644
--- a/sc/source/core/opencl/opinlinefun_statistical.cxx
+++ b/sc/source/core/opencl/opinlinefun_statistical.cxx
@@ -1367,6 +1367,181 @@ std::string gauss =
 "    else\n"
 "        return nVal;\n"
 "}\n";
+std::string lcl_Erfc0600Decl=
+"void lcl_Erfc0600( double x, double *fVal );\n";
+std::string lcl_Erfc0600 =
+"void lcl_Erfc0600( double  x, double *fVal )\n"
+"{\n"
+"    double fPSum = 0.0;\n"
+"    double fQSum = 0.0;\n"
+"    double fXPow = 1.0;\n"
+"    double *pn;\n"
+"    double *qn;\n"
+"    if ( x < 2.2 )\n"
+"    {\n"
+"        double pn22[] = {         \n"
+"        9.99999992049799098E-1,   \n"
+"        1.33154163936765307,      \n"
+"        8.78115804155881782E-1,   \n"
+"        3.31899559578213215E-1,   \n"
+"        7.14193832506776067E-2,   \n"
+"        7.06940843763253131E-3    \n"
+"      };                          \n"
+"        double qn22[] = {         \n"
+"        1.00000000000000000,      \n"
+"        2.45992070144245533,      \n"
+"        2.65383972869775752,      \n"
+"        1.61876655543871376,      \n"
+"        5.94651311286481502E-1,   \n"
+"        1.26579413030177940E-1,   \n"
+"        1.25304936549413393E-2    \n"
+"       };                         \n"
+"        pn = pn22;                \n"
+"        qn = qn22;                \n"
+"      }                           \n"
+"      else                        \n"
+"                                  \n"
+"       {                          \n"
+"            double pn60[] = {\n"
+"            9.99921140009714409E-1,\n"
+"            1.62356584489366647,\n"
+"            1.26739901455873222,\n"
+"            5.81528574177741135E-1,\n"
+"            1.57289620742838702E-1,\n"
+"            2.25716982919217555E-2\n"
+"            };\n"
+"            double qn60[] = {\n"
+"            1.00000000000000000,\n"
+"            2.75143870676376208,\n"
+"            3.37367334657284535,\n"
+"            2.38574194785344389,\n"
+"            1.05074004614827206,\n"
+"            2.78788439273628983E-1,\n"
+"            4.00072964526861362E-2\n"
+"            };\n"
+"            pn = pn60;\n"
+"            qn = qn60;\n"
+"       }\n"
+"    for ( unsigned int i = 0; i < 6; ++i )  \n"
+"    {\n"
+"        fPSum += pn[i]*fXPow;\n"
+"        fQSum += qn[i]*fXPow;\n"
+"        fXPow *= x;\n"
+"    }\n"
+"    fQSum += qn[6]*fXPow;\n"
+"    *fVal = exp((-1.0)*x*x)*fPSum*pow(fQSum, -1.0);\n"
+"   }\n";
+std::string lcl_Erfc2654Decl=
+"void lcl_Erfc2654( double x, double *fVal );\n";
+std::string lcl_Erfc2654 =
+"void lcl_Erfc2654( double x, double *fVal )\n"
+"{\n"
+"    double pn[] = {\n"
+"        5.64189583547756078E-1,\n"
+"        8.80253746105525775,\n"
+"        3.84683103716117320E1,\n"
+"        4.77209965874436377E1,\n"
+"        8.08040729052301677\n"
+"    };\n"
+"    double qn[] = {\n"
+"        1.00000000000000000,\n"
+"        1.61020914205869003E1,\n"
+"        7.54843505665954743E1,\n"
+"        1.12123870801026015E2,\n"
+"        3.73997570145040850E1\n"
+"    };\n"
+"\n"
+"    double fPSum = 0.0;\n"
+"    double fQSum = 0.0;\n"
+"    double fXPow = 1.0;\n"
+"\n"
+"    for ( unsigned int i = 0; i <= 4; ++i )\n"
+"    {\n"
+"        fPSum += pn[i]*fXPow;       \n"
+"        fQSum += qn[i]*fXPow;\n"
+"        fXPow *= pow(x*x, -1.0);\n"
+"    }\n"
+"    *fVal = exp((-1.0)*x*x)*fPSum*pow(x*fQSum, -1.0);\n"
+"}\n";
+std::string lcl_Erf0065Decl=
+"void lcl_Erf0065( double x, double *fVal );\n";
+std::string lcl_Erf0065 =
+"void lcl_Erf0065( double x, double *fVal )\n"
+"   {\n"
+"        double pn[] = {\n"
+"            1.12837916709551256,\n"
+"            1.35894887627277916E-1,\n"
+"            4.03259488531795274E-2,\n"
+"            1.20339380863079457E-3,\n"
+"            6.49254556481904354E-5\n"
+"            };\n"
+"        double qn[] = {\n"
+"            1.00000000000000000,\n"
+"            4.53767041780002545E-1,\n"
+"            8.69936222615385890E-2,\n"
+"            8.49717371168693357E-3,\n"
+"            3.64915280629351082E-4\n"
+"            };\n"
+"        double fPSum = 0.0;\n"
+"        double fQSum = 0.0;\n"
+"        double fXPow = 1.0;\n"
+"        for ( unsigned int i = 0; i <= 4; ++i )\n"
+"        {\n"
+"            fPSum += pn[i]*fXPow;\n"
+"            fQSum += qn[i]*fXPow;\n"
+"            fXPow *= x*x;\n"
+"        }\n"
+"        *fVal = x * fPSum * pow(fQSum, -1.0);\n"
+"   }\n";
+std::string rtl_math_erf_rdDecl=
+"double rtl_math_erf_rd( double x );\n";
+std::string rtl_math_erf_rd =
+" double rtl_math_erf_rd( double x )\n"
+" {\n"
+"     if( x == 0.0 )\n"
+"         return 0.0;\n"
+"     bool bNegative = false;\n"
+"     if ( x < 0.0 )\n"
+"     {\n"
+"         x = fabs( x );\n"
+"         bNegative = true;\n"
+"     }\n"
+"     double fErf = 1.0;\n"
+"     if ( x < 1.0e-10 )\n"
+"         fErf = (double) (x*1.1283791670955125738961589031215452);\n"
+"     else if ( x < 0.65 )\n"
+"         lcl_Erf0065( x, &fErf );\n"
+"     if ( bNegative )\n"
+"         fErf *= (-1.0);\n"
+"     return fErf;\n"
+" }\n";
+std::string rtl_math_erfc_rdDecl=
+"double rtl_math_erfc_rd( double x );\n";
+std::string rtl_math_erfc_rd =
+" double rtl_math_erfc_rd( double x )\n"
+" {\n"
+"     if ( x == 0.0 )\n"
+"        return 1.0;\n"
+"     bool bNegative = false;\n"
+"     if ( x < 0.0 )\n"
+"     {\n"
+"         x = fabs( x );\n"
+"         bNegative = true;\n"
+"     }\n"
+"     double fErfc = 0.0;\n"
+"     if ( x >= 0.65 )\n"
+"     {\n"
+"         if ( x < 6.0 )\n"
+"             lcl_Erfc0600( x, &fErfc );\n"
+"         else\n"
+"             lcl_Erfc2654( x, &fErfc );\n"
+"     }\n"
+"     else\n"
+"         fErfc = 1.0 - rtl_math_erf_rd( x );\n"
+"     if ( bNegative )\n"
+"         fErfc = 2.0 - fErfc;\n"
+"     return fErfc;\n"
+" }\n";
 #endif
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
author	hongyu zhong <hongyu@multicorewareinc.com>	2013-12-10 16:06:47 +0800
committer	I-Jui (Ray) Sung <ray@multicorewareinc.com>	2013-12-19 17:53:48 -0600
commit	19410777d2d0f2f4d492ebb93c393f74b0783ac6 (patch)
tree	bc791710ce79403c89fe22bc7dcb9808f5fbda6d
parent	2664b6fdebbeacf4ac047fd1b622e76cfb294d70 (diff)