diff options
author | Michael Meeks <michael.meeks@suse.com> | 2013-07-08 21:35:26 +0100 |
---|---|---|
committer | Kohei Yoshida <kohei.yoshida@gmail.com> | 2013-07-11 00:25:36 -0400 |
commit | c6fd693a8a1efb9e301bd623d29411dfa2378e5c (patch) | |
tree | c907ed85ad080fc393778d09fd65bbf4f6d1ff70 /sc | |
parent | d5ae61ed2394b6945b969c61aea0adac2234f702 (diff) |
cleanup formulagroupcl and add opencl kernel for averagedelta.
Conflicts:
sc/source/core/opencl/openclwrapper.hxx
Change-Id: Id4777d3854d34ab34dd29b050cd329a803023a39
Diffstat (limited to 'sc')
-rwxr-xr-x | sc/source/core/opencl/formulagroupcl.cxx | 51 | ||||
-rwxr-xr-x | sc/source/core/opencl/oclkernels.hxx | 28 | ||||
-rwxr-xr-x | sc/source/core/opencl/openclwrapper.cxx | 177 | ||||
-rwxr-xr-x | sc/source/core/opencl/openclwrapper.hxx | 7 |
4 files changed, 196 insertions, 67 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index d92a4719f5d9..8bc0224b874f 100755 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -310,11 +310,8 @@ bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAdd OpCode eOp; // type of operation: ocAverage, ocMax, ocMin const double *pArrayToSubtractOneElementFrom; const double *pGroundWaterDataArray; - size_t nGroundWaterDataArrayLen; // Output: - double *pResult = new double[xGroup->mnLength]; - RETURN_IF_FAIL(pResult != NULL, "buffer alloc failed"); std::vector<double> aMatrixContent; const formula::FormulaToken *p; @@ -338,36 +335,13 @@ bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAdd p = rCode.NextNoSpaces(); RETURN_IF_FAIL(p != NULL, "no function argument"); - if (p->GetType() == formula::svDoubleVectorRef) - { - // FIXME: this is what I would expect; but table1.cxx's - // ScColumn::ResolveStaticReference as called from - // GroupTokenConverter::convert returns an ScMatrixToken un-conditionally - const formula::DoubleVectorRefToken* pDvr = static_cast<const formula::DoubleVectorRefToken*>(p); - const std::vector<const double*>& rArrays = pDvr->GetArrays(); - RETURN_IF_FAIL(rArrays.size() == 1, "unexpectedly large double ref array"); - RETURN_IF_FAIL(pDvr->GetArrayLength() == (size_t)xGroup->mnLength, "wrong double ref length"); - RETURN_IF_FAIL(pDvr->IsStartFixed() && pDvr->IsEndFixed(), "non-fixed ranges )"); - pGroundWaterDataArray = rArrays[0]; - nGroundWaterDataArrayLen = xGroup->mnLength; - } - else - { - RETURN_IF_FAIL(p->GetType() == formula::svMatrix, "unexpected fn. param type"); - const ScMatrixToken *pMatTok = static_cast<const ScMatrixToken *>(p); - pMatTok->GetMatrix()->GetDoubleArray( aMatrixContent ); - // FIXME: horrible hackery: the legacy / excel shared formula oddness, - // such that the 1st entry is not truly shared, making these a different - // shape. - if (aMatrixContent.size() > (size_t)xGroup->mnLength + 1) - { - fprintf(stderr, "Error size range mismatch: %ld vs %ld\n", - (long)aMatrixContent.size(), (long)xGroup->mnLength); - return false; - } - pGroundWaterDataArray = &aMatrixContent[0]; - nGroundWaterDataArrayLen = aMatrixContent.size(); - } + RETURN_IF_FAIL(p->GetType() == formula::svDoubleVectorRef, "wrong type of fn argument"); + const formula::DoubleVectorRefToken* pDvr = static_cast<const formula::DoubleVectorRefToken*>(p); + const std::vector<const double*>& rArrays = pDvr->GetArrays(); + RETURN_IF_FAIL(rArrays.size() == 1, "unexpectedly large double ref array"); + RETURN_IF_FAIL(pDvr->GetArrayLength() == (size_t)xGroup->mnLength, "wrong double ref length"); + RETURN_IF_FAIL(pDvr->IsStartFixed() && pDvr->IsEndFixed(), "non-fixed ranges )"); + pGroundWaterDataArray = rArrays[0]; p = rCode.NextNoSpaces(); RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocClose, "missing closing )"); @@ -400,17 +374,22 @@ bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAdd // =AVERAGE(L$6:L$7701) - L6 // we would get: // eOp => ocAverage - // pGroundWaterDataArray => contains L$6:L$7701 - // pGroundWaterDataArrayLen => 7701 - 6 + 1 - // pArrayToSubtractOneElementFrom => contains L$5:L$7701 (overlapping) + // pGroundWaterDataArray => contains L$5:L$7701 + // pArrayToSubtractOneElementFrom => contains L$5:L$7701 (ie. a copy) // length of this array -> xGroup->mnLength fprintf (stderr, "Calculate !\n"); + double *pResult = ocl_calc.OclSimpleDeltaOperation( eOp, pGroundWaterDataArray, + pArrayToSubtractOneElementFrom, + (size_t) xGroup->mnLength ); + RETURN_IF_FAIL(pResult != NULL, "buffer alloc / calculaton failed"); + // Insert the double data, in rResult[i] back into the document rDoc.SetFormulaResults(rTopPos, pResult, xGroup->mnLength); delete [] pResult; + SAL_DEBUG ("exit cleanly !"); return true; } diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx index 6c9012641b54..e13c24a3a158 100755 --- a/sc/source/core/opencl/oclkernels.hxx +++ b/sc/source/core/opencl/oclkernels.hxx @@ -142,7 +142,6 @@ __kernel void oclFormulaAverage(__global float *input,__global int *start,__glob for(i = start[id];i<=end[id];i++) sum += input[i]; output[id] = sum / (end[id]-start[id]+1); - } //Sumproduct @@ -162,6 +161,33 @@ __kernel void oclFormulaMinverse(__global float *data, } +// Double precision is a requirement of spreadsheets +#if 0 +#if defined(cl_khr_fp64) // Khronos extension +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#elif defined(cl_amd_fp64) // AMD extension +#pragma OPENCL EXTENSION cl_amd_fp64 : enable +#endif +typedef double fp_t; +#else +typedef float fp_t; +#endif + +__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, __global int start, __global int end, __global fp_t *output) +{ + const unsigned int id = get_global_id(0); + + // Average + int i; + fp_t sum = 0.0; + for(i = start; i < end; i++) + sum += values[i]; + fp_t val = sum/(end-start); + + // Subtract & output + output[id] = val - subtract[id]; +} + ); #endif // USE_EXTERNAL_KERNEL diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx index 597f37097ce8..3030a2e8b703 100755 --- a/sc/source/core/opencl/openclwrapper.cxx +++ b/sc/source/core/opencl/openclwrapper.cxx @@ -76,16 +76,17 @@ int OpenclDevice::ReleaseOpenclRunEnv() { } /////////////////////////////////////////////////////// /////////////////////////////////////////////////////// -inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) { +inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) +{ strcpy(gpuEnv.mArrykernelNames[kCount], kName); gpuEnv.mnKernelCount++; return 0; } -int OpenclDevice::RegistOpenclKernel() { - if (!gpuEnv.mnIsUserCreated) { +int OpenclDevice::RegistOpenclKernel() +{ + if (!gpuEnv.mnIsUserCreated) memset(&gpuEnv, 0, sizeof(gpuEnv)); - } gpuEnv.mnFileCount = 0; //argc; gpuEnv.mnKernelCount = 0UL; @@ -100,17 +101,22 @@ int OpenclDevice::RegistOpenclKernel() { AddKernelConfig(7, (const char*) "oclFormulaSumproduct"); AddKernelConfig(8, (const char*) "oclFormulaMinverse"); - AddKernelConfig(9, (const char*) "oclSignedAdd"); + AddKernelConfig(9, (const char*) "oclSignedAdd"); AddKernelConfig(10, (const char*) "oclSignedSub"); AddKernelConfig(11, (const char*) "oclSignedMul"); AddKernelConfig(12, (const char*) "oclSignedDiv"); + AddKernelConfig(13, (const char*) "oclAverageDelta"); + return 0; } -OpenclDevice::OpenclDevice(){ + +OpenclDevice::OpenclDevice() +{ //InitEnv(); } -OpenclDevice::~OpenclDevice() { +OpenclDevice::~OpenclDevice() +{ //ReleaseOpenclRunEnv(); } @@ -122,13 +128,15 @@ int OpenclDevice::SetKernelEnv(KernelEnv *envInfo) return 1; } -int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){ + +int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName) +{ //printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount); int kCount; for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) { if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) { - printf("match %s kernel right\n",kernelName); - break; + printf("match %s kernel right\n",kernelName); + break; } } envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount]; @@ -141,7 +149,8 @@ int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){ return 1; } -int OpenclDevice::ConvertToString(const char *filename, char **source) { +int OpenclDevice::ConvertToString(const char *filename, char **source) +{ int file_size; size_t result; FILE *file = NULL; @@ -174,8 +183,9 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) { return 0; } -int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) { - unsigned int i = 0; +int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) +{ + unsigned int i = 0; cl_int status; char *str = NULL; FILE *fd = NULL; @@ -208,7 +218,8 @@ int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) { } int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary, - size_t numBytes) { + size_t numBytes) +{ FILE *output = NULL; output = fopen(fileName, "wb"); if (output == NULL) { @@ -223,7 +234,8 @@ int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary, } int OpenclDevice::GeneratBinFromKernelSource(cl_program program, - const char * clFileName) { + const char * clFileName) +{ unsigned int i = 0; cl_int status; size_t *binarySizes, numDevices; @@ -319,10 +331,10 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, return 1; } -int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) { - if (gpuEnv.mnIsUserCreated) { +int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) +{ + if (gpuEnv.mnIsUserCreated) return 1; - } gpuEnv.mpContext = env->mpOclContext; gpuEnv.mpPlatformID = env->mpOclPlatformID; @@ -334,21 +346,24 @@ int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) { return 0; } -int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) { +int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) +{ int status; - env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status); + env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status); env->mpkContext = gpuEnv.mpContext; env->mpkCmdQueue = gpuEnv.mpCmdQueue; return status != CL_SUCCESS ? 1 : 0; } -int OpenclDevice::ReleaseKernel(KernelEnv * env) { +int OpenclDevice::ReleaseKernel(KernelEnv * env) +{ int status = clReleaseKernel(env->mpkKernel); return status != CL_SUCCESS ? 1 : 0; } -int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) { +int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) +{ int i = 0; int status = 0; @@ -378,7 +393,8 @@ int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) { } int OpenclDevice::RunKernelWrapper(cl_kernel_function function, - const char * kernelName, void **usrdata) { + const char * kernelName, void **usrdata) +{ printf("oclwrapper:RunKernel_wrapper...\n"); if (RegisterKernelWrapper(kernelName, function) != 1) { fprintf(stderr, @@ -389,8 +405,9 @@ int OpenclDevice::RunKernelWrapper(cl_kernel_function function, } int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached, - const char * clFileName) { - int i; + const char * clFileName) +{ + int i; for (i = 0; i < gpuEnvCached->mnFileCount; i++) { if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) { if (gpuEnvCached->mpArryPrograms[i] != NULL) { @@ -574,6 +591,7 @@ int OpenclDevice::RunKernel(const char *kernelName, void **userdata) { } return 0; } + int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles) { int status = 0; @@ -1007,6 +1025,7 @@ int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *outpu CHECK_OPENCL(clStatus); return 0; } + int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) { KernelEnv kEnv; @@ -1590,7 +1609,6 @@ int OclCalc::OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npE clStatus = clReleaseMemObject(outputCl); CHECK_OPENCL(clStatus); return 0; - } int OclCalc::OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size) @@ -2066,4 +2084,111 @@ int OclCalc::OclHostFormulaSumProduct(float *dpSrcData,int *npStart,int *npEnd,f } #endif +#if 0 +typedef double fp_; +#else +typedef float fp_t; +#endif + +// FIXME: should be templatised in <double> - double buffering [sic] rocks +static cl_mem allocateDoubleBuffer(KernelEnv &rEnv, const double *_pValues, + size_t nElements, cl_int *pStatus) +{ + // Ugh - horrible redundant copying ... + cl_mem xValues = clCreateBuffer(rEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE), + nElements * sizeof(double), NULL, pStatus); + fp_t *pValues = (fp_t *)clEnqueueMapBuffer(rEnv.mpkCmdQueue,xValues,CL_TRUE,CL_MAP_WRITE,0, + nElements * sizeof(fp_t),0,NULL,NULL,NULL); + for(int i=0;i<nElements;i++) + pValues[i] = (fp_t)_pValues[i]; + + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL); + + return xValues; +} + +double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray, + const double *pSubtractSingle, size_t nElements) +{ + KernelEnv kEnv; + + // select a kernel: cut & paste coding is utterly evil. + const char *kernelName; + switch (eOp) { + case ocMax: + case ocMin: + ; // FIXME: fallthrough for now + case ocAverage: + kernelName = "oclAverageDelta"; + break; + default: + assert(false); + } + CheckKernelName(&kEnv,kernelName); + + cl_int clStatus; + size_t global_work_size[1]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + + // Ugh - horrible redundant copying ... + cl_mem valuesCl = allocateDoubleBuffer(kEnv, pOpArray, nElements, &clStatus); + cl_mem subtractCl = allocateDoubleBuffer(kEnv, pSubtractSingle, nElements, &clStatus); + + cl_int start = 0; + cl_int end = (cl_int) nElements; + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + nElements * sizeof(fp_t), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&valuesCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&subtractCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&start); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem), + (void *)&end); + clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem), + (void *)&outputCl); + CHECK_OPENCL(clStatus); + + fprintf(stderr, "prior to enqueue range kernel\n"); + + global_work_size[0] = nElements; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + double *pResult = new double[nElements]; + if(!pResult) + return NULL; // leak. + + fp_t *pOutput = (fp_t *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE, + CL_MAP_READ,0,nElements*sizeof(fp_t), + 0,NULL,NULL,NULL); + for(int i = 0; i < nElements; i++) + pResult[i] = (double)pOutput[i]; + + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,pOutput,0,NULL,NULL); + + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(valuesCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(subtractCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + + fprintf(stderr, "completed opencl delta operation\n"); + + return pResult; +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx index 3e87f8445e7d..a0c132a2c49f 100755 --- a/sc/source/core/opencl/openclwrapper.hxx +++ b/sc/source/core/opencl/openclwrapper.hxx @@ -11,10 +11,8 @@ #define _OPENCL_WRAPPER_H_ #include <config_features.h> - -#ifdef __APPLE__ -#include <OpenCL/cl.h> -#else +#include <formula/opcode.hxx> +#include <cassert> #include <CL/cl.h> #endif @@ -212,6 +210,7 @@ public: int OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize); int OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize); int OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize); + double *OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements); //int OclHostFormulaCount(int *startPos,int *endPos,float *output,int outputSize); //int OclHostFormulaSum(float *srcData,int *startPos,int *endPos,float *output,int outputSize); |