diff options
author | Tor Lillqvist <tml@collabora.com> | 2013-11-06 21:21:56 +0200 |
---|---|---|
committer | Tor Lillqvist <tml@collabora.com> | 2013-11-13 20:28:11 +0200 |
commit | 3b7c9033e8b0f28544468a15bb902c1029c47084 (patch) | |
tree | fcf067bb36364766a6e98b5dfd5d483300cfae1d | |
parent | cb0755e8d1ebb88a8f72f87cd6c822d2bd022c93 (diff) |
We don't use the static OpenCL kernels any more
So remove them and code related only to them.
Change-Id: Ibd09e7a801b9757443b6f87018570ec007e201d5
-rw-r--r-- | sc/inc/formulagroup.hxx | 1 | ||||
-rw-r--r-- | sc/source/core/opencl/formulagroupcl.cxx | 17 | ||||
-rw-r--r-- | sc/source/core/opencl/oclkernels.hxx | 390 | ||||
-rw-r--r-- | sc/source/core/opencl/openclwrapper.cxx | 1884 | ||||
-rw-r--r-- | sc/source/core/opencl/openclwrapper.hxx | 88 | ||||
-rw-r--r-- | sc/source/core/tool/formulagroup.cxx | 26 | ||||
-rw-r--r-- | sc/source/filter/ftools/clkernelthread.cxx | 2 |
7 files changed, 18 insertions, 2390 deletions
diff --git a/sc/inc/formulagroup.hxx b/sc/inc/formulagroup.hxx index fb7692bcd2ed..7c1636457809 100644 --- a/sc/inc/formulagroup.hxx +++ b/sc/inc/formulagroup.hxx @@ -89,7 +89,6 @@ class SC_DLLPUBLIC FormulaGroupInterpreter static FormulaGroupInterpreter *getStatic(); static void fillOpenCLInfo(std::vector<OpenclPlatformInfo>& rPlatforms); static bool switchOpenCLDevice(const OUString& rDeviceId, bool bAutoSelect); - static void compileOpenCLKernels(); static void enableOpenCL(bool bEnable); virtual ScMatrixRef inverseMatrix(const ScMatrix& rMat) = 0; diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index fbe43c896d3d..81ac09d5bda6 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -72,7 +72,7 @@ size_t DynamicKernelArgument::Marshal(cl_kernel k, int argno, int) } // Obtain cl context KernelEnv kEnv; - OclCalc::setKernelEnv(&kEnv); + OpenclDevice::setKernelEnv(&kEnv); cl_int err; mpClmem = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, @@ -139,7 +139,7 @@ public: // marshaling // Obtain cl context KernelEnv kEnv; - OclCalc::setKernelEnv(&kEnv); + OpenclDevice::setKernelEnv(&kEnv); // Pass the scalar result back to the rest of the formula kernel cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode); if (CL_SUCCESS != err) @@ -220,7 +220,7 @@ size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int) assert(mpClmem == NULL); // Obtain cl context KernelEnv kEnv; - OclCalc::setKernelEnv(&kEnv); + OpenclDevice::setKernelEnv(&kEnv); cl_int err; formula::VectorRefArray vRef; size_t nStrings = 0; @@ -1367,7 +1367,7 @@ public: { // Obtain cl context KernelEnv kEnv; - OclCalc::setKernelEnv(&kEnv); + OpenclDevice::setKernelEnv(&kEnv); cl_int err; // The results mpResClmem = clCreateBuffer(kEnv.mpkContext, @@ -1424,7 +1424,7 @@ void DynamicKernel::CreateKernel(void) // Compile kernel here!!! // Obtain cl context KernelEnv kEnv; - OclCalc::setKernelEnv(&kEnv); + OpenclDevice::setKernelEnv(&kEnv); const char *src = mFullProgramSrc.c_str(); if (OpenclDevice::buildProgramFromBinary("", &OpenclDevice::gpuEnv, @@ -1546,7 +1546,7 @@ bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc, mpKernel->CodeGen(); // Obtain cl context KernelEnv kEnv; - OclCalc::setKernelEnv(&kEnv); + OpenclDevice::setKernelEnv(&kEnv); // Compile kernel here!!! mpKernel->CreateKernel(); // Run the kernel. @@ -1636,11 +1636,6 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice( return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect); } -SAL_DLLPUBLIC_EXPORT void compileOpenCLKernels(const OUString* pDeviceId) -{ - sc::opencl::compileOpenCLKernels(pDeviceId); -} - } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx deleted file mode 100644 index 3e0af5b6ca47..000000000000 --- a/sc/source/core/opencl/oclkernels.hxx +++ /dev/null @@ -1,390 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -#ifndef SC_OCLKERNELS_HXX -#define SC_OCLKERNELS_HXX - -#ifndef USE_EXTERNAL_KERNEL -#define KERNEL( ... )# __VA_ARGS__ - -namespace sc { namespace opencl { - -// Double precision is a default of spreadsheets -// cl_khr_fp64: Khronos extension -// cl_amd_fp64: AMD extension -// use build option outside to define fp_t -///////////////////////////////////////////// -const char *kernel_src = KERNEL( -\n#ifdef KHR_DP_EXTENSION\n -\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n -\n#elif AMD_DP_EXTENSION\n -\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n -\n#else\n -\n#endif\n -inline fp_t oclAverage( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray) -{ - uint start = startArray[id]; - uint end = endArray[id]; - fp_t fSum = 0.0; - fp_t zero[16] = {0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f}; - fp_t16 vSum=vload16(0,zero); - fp_t16 ptr; - __global fp_t *p = values; - p+= start; - - for(int i = 0; i < (end - start + 1)/16; ++i) - { - ptr=vload16(0,p); - vSum += ptr; - p+=16; - } - int lastData = (end-start+1)%16; - for(int i = 0; i <lastData; i++) - { - fSum += *p; - p+=1; - } - vSum.s01234567 = vSum.s01234567+vSum.s89abcdef; - vSum.s0123 = vSum.s0123+vSum.s4567; - vSum.s01 = vSum.s01+vSum.s23; - vSum.s0 = vSum.s0+vSum.s1; - fSum = vSum.s0+fSum; - fp_t fVal = fSum/(end-start+1); - return fVal; -} -inline fp_t oclMax( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray) -{ - uint start = startArray[id]; - uint end = endArray[id]; - fp_t fMax = values[start]; - fp_t zero[16] = {fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax}; - fp_t16 vMax=vload16(0,zero); - //Max - fp_t16 ptr; - __global fp_t *p = values; - p+= start; - for(int i = 0; i < (end - start + 1)/16; ++i) - { - ptr=vload16(0,p); - vMax = fmax(vMax,ptr); - p+=16; - } - int lastData = (end-start+1)%16; - for(int i = 0; i <lastData; i++) - { - fMax = fmax(fMax,*p); - p+=1; - } - vMax.s01234567 = fmax(vMax.s01234567, vMax.s89abcdef); - vMax.s0123 = fmax(vMax.s0123, vMax.s4567); - vMax.s01 = fmax(vMax.s01, vMax.s23); - vMax.s0 = fmax(vMax.s0, vMax.s1); - fMax = fmax(vMax.s0, fMax); - return fMax; -} -inline fp_t oclMin( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray) -{ - uint start = startArray[id]; - uint end = endArray[id]; - fp_t fMin = values[start]; - fp_t zero[16] = {fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin}; - fp_t16 vMin=vload16(0,zero); - //Min - fp_t16 ptr; - __global fp_t *p = values; - p+= start; - for(int i = 0; i < (end - start + 1)/16; ++i) - { - ptr=vload16(0,p); - vMin = fmin(vMin,ptr); - p+=16; - } - int lastData = (end-start+1)%16; - for(int i = 0; i <lastData; i++) - { - fMin = fmin(fMin,*p); - p+=1; - } - vMin.s01234567 = fmin(vMin.s01234567, vMin.s89abcdef); - vMin.s0123 = fmin(vMin.s0123, vMin.s4567); - vMin.s01 = fmin(vMin.s01, vMin.s23); - vMin.s0 = fmin(vMin.s0, vMin.s1); - fMin = fmin(vMin.s0, fMin); - return fMin; -} - -__kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData) -{ - const unsigned int id = get_global_id(0); - otData[id] = ltData[id] + rtData[id]; -} - -__kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData) -{ - const unsigned int id = get_global_id(0); - otData[id] = ltData[id] - rtData[id]; -} - -__kernel void oclSignedMul(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData) -{ - int id = get_global_id(0); - otData[id] = ltData[id] * rtData[id]; -} - -__kernel void oclSignedDiv(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData) -{ - const unsigned int id = get_global_id(0); - fp_t divisor = rtData[id]; - if ( divisor != 0 ) - otData[id] = ltData[id] / divisor; - else - otData[id] = 0.0; -} - -__kernel void oclFormulaMin(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclMin(id,input,start,end); - output[id] = fVal ; -} - -__kernel void oclFormulaMax(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclMax(id,input,start,end); - output[id] = fVal ; -} -//Sum -__kernel void oclFormulaSum(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output) -{ - const unsigned int nId = get_global_id(0); - fp_t fSum = 0.0; - for(int i = start[nId]; i<=end[nId]; i++) - fSum += input[i]; - output[nId] = fSum ; -} -//Count -__kernel void oclFormulaCount(__global uint *start,__global uint *end,__global fp_t *output) -{ - const unsigned int nId = get_global_id(0); - output[nId] = end[nId] - start[nId] + 1; -} - -__kernel void oclFormulaAverage(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclAverage(id,input,start,end); - output[id] = fVal ; - -} -//Sumproduct -__kernel void oclFormulaSumproduct(__global fp_t *firstCol,__global uint* npSumSize,__global fp_t *output,uint nMatixSize) -{ - const unsigned int id = get_global_id(0); - unsigned int nSumSize = npSumSize[id]; - fp_t fSum = 0.0; - for(int i=0;i<nSumSize;i++) - fSum += firstCol[i + nMatixSize * id]; - output[id] = fSum; -} - -__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - - // Average - fp_t fSum = 0.0; - for(int i = start; i < end; i++) - fSum += values[i]; - fp_t fVal = fSum/(end-start); - - // Subtract & output - output[id] = fVal - subtract[id]; -} - -__kernel void oclMaxDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - - // Max - fp_t fMaxVal = values[start]; - for ( int i = start + 1; i < end; i++ ) - { - if(values[i]>fMaxVal) - fMaxVal = values[i]; - } - - // Subtract & output - output[id] = fMaxVal - subtract[id]; -} - -__kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - - // Min - fp_t fMinVal = values[start]; - for ( int i = start + 1; i < end; i++ ) - { - if(values[i]<fMinVal) - fMinVal = values[i]; - } - - // Subtract & output - output[id] = fMinVal - subtract[id]; -} - -__kernel void oclSubDelta( fp_t ltData, __global fp_t *rtData, __global fp_t *outData ) -{ - const unsigned int id = get_global_id(0); - outData[id] = ltData - rtData[id]; -} - -__kernel void oclFormulaMtxInv(__global fp_t * fpMatrixInput, __global fp_t * fpP, int nOffset, int nMax) -{ - //get the global id of the workitem - int nId = get_global_id(0); - int nDimension = get_global_size(0); - fp_t dMovebuffer; - dMovebuffer = fpMatrixInput[nOffset*nDimension+nId]; - fpMatrixInput[nOffset*nDimension+nId] = fpMatrixInput[nMax*nDimension+nId]; - fpMatrixInput[nMax*nDimension+nId] = dMovebuffer; - - dMovebuffer = fpP[nOffset*nDimension+nId]; - fpP[nOffset*nDimension+nId] = fpP[nMax*nDimension+nId]; - fpP[nMax*nDimension+nId] = dMovebuffer; -} -__kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMatrixOutput,__global fp_t * fpP,__global fp_t * fpY,__global uint* npDim) -{ - int nId = get_global_id(0); - int nDimension = npDim[nId]; - fp_t fsum = 0.0; - for ( int yi=0; yi < nDimension; yi++ ) - { - fsum = 0.0; - for ( int yj=0; yj < nDimension; yj++ ) - { - fsum += fpMatrixInput[yi*nDimension+yj] * fpY[nId+yj*nDimension]; - } - - fpY[nId+yi*nDimension] = fpP[yi*nDimension+nId] - fsum; - } - for ( int xi = nDimension - 1; xi >= 0; xi-- ) - { - fsum = 0.0; - for ( int xj = 0; xj < nDimension; xj++ ) - { - fsum += fpMatrixInput[xi*nDimension+xj] * fpMatrixOutput[nId+nDimension*xj]; - } - fpMatrixOutput[nId+xi*nDimension] = (fpY[xi*nDimension+nId] - fsum) / fpMatrixInput[xi*nDimension+xi]; - } -} - -__kernel void oclAverageAdd(__global fp_t *values,__global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclAverage(id,values,startArray,endArray); - output[id] = fVal + addend[id]; -} - -__kernel void oclAverageSub(__global fp_t *values,__global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclAverage(id,values,startArray,endArray); - output[id] = fVal - subtract[id]; -} - -__kernel void oclAverageMul(__global fp_t *values,__global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclAverage(id,values,startArray,endArray); - output[id] = fVal * multiplier[id]; -} -__kernel void oclAverageDiv(__global fp_t *values,__global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fVal = oclAverage(id,values,startArray,endArray); - fp_t divisor = div[id]; - if ( divisor != 0 ) - output[id] = fVal / divisor; - else - output[id] = 0.0; -} - -__kernel void oclMinAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMin = oclMin(id,values,startArray,endArray); - output[id] = fMin + addend[id]; -} - -__kernel void oclMinSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMin = oclMin(id,values,startArray,endArray); - output[id] = fMin - subtract[id]; -} -__kernel void oclMinMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMin = oclMin(id,values,startArray,endArray); - output[id] = fMin * multiplier[id]; -} -__kernel void oclMinDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMin = oclMin(id,values,startArray,endArray); - fp_t divisor = div[id]; - if ( divisor != 0 ) - output[id] = fMin / divisor; - else - output[id] = 0.0; -} -__kernel void oclMaxAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMax = oclMax(id,values,startArray,endArray); - output[id] = fMax + addend[id]; -} - -__kernel void oclMaxSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMax = oclMax(id,values,startArray,endArray); - output[id] = fMax - subtract[id]; -} -__kernel void oclMaxMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMax = oclMax(id,values,startArray,endArray); - output[id] = fMax * multiplier[id]; -} -__kernel void oclMaxDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output) -{ - const unsigned int id = get_global_id(0); - fp_t fMax = oclMax(id,values,startArray,endArray); - fp_t divisor = div[id]; - if ( divisor != 0 ) - output[id] = fMax / divisor; - else - output[id] = 0.0; -} - -__kernel void oclSub( fp_t ltData, __global fp_t *rtData, __global fp_t *outData ) -{ - const unsigned int id = get_global_id(0); - outData[id] = ltData - rtData[id]; -} -); - -}} - -#endif // USE_EXTERNAL_KERNEL -#endif //_OCL_KERNEL_H_ -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx index 61eed493dd26..aa411123a300 100644 --- a/sc/source/core/opencl/openclwrapper.cxx +++ b/sc/source/core/opencl/openclwrapper.cxx @@ -19,7 +19,6 @@ #include "sal/config.h" #include <osl/file.hxx> -#include "oclkernels.hxx" #include <stdio.h> #include <stdlib.h> @@ -43,8 +42,6 @@ using namespace std; namespace sc { namespace opencl { -Kernel::Kernel( const char* pName ) : mpName(pName), mpKernel(NULL) {} - GPUEnv OpenclDevice::gpuEnv; int OpenclDevice::isInited =0; @@ -68,12 +65,6 @@ OString generateMD5(const void* pData, size_t length) return aBuffer.makeStringAndClear(); } -OString generateHashForSource() -{ - size_t nLength = strlen(kernel_src); - return generateMD5(kernel_src, nLength); -} - OString getCacheFolder() { OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); @@ -86,6 +77,15 @@ OString getCacheFolder() void clearCache() { +#if 0 + // We used to delete all files that did not end with the hash of + // the static kernel source string from oclkernels.hxx. But as + // those static kernels were not used for anything, it was + // pointless, that hash never changed. The static kernels are now + // removed, their hash is not part of the .bin file names any + // more. So there is little this function can do until we come up + // with some other way to figure out which cached .bin files are + // "current". OUString aCacheDirURL(rtl::OStringToOUString(OpenclDevice::maCacheFolder, RTL_TEXTENCODING_UTF8)); osl::Directory aCacheDir(aCacheDirURL); osl::FileBase::RC status = aCacheDir.open(); @@ -93,7 +93,6 @@ void clearCache() return; osl::DirectoryItem aItem; - OUString aSourceString = rtl::OStringToOUString(OpenclDevice::maSourceHash + ".bin", RTL_TEXTENCODING_UTF8); while(osl::FileBase::E_None == aCacheDir.getNextItem(aItem)) { osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileName|osl_FileStatus_Mask_FileURL); @@ -104,7 +103,7 @@ void clearCache() OUString aFileName = aFileStatus.getFileName(); if(aFileName.endsWith(".bin")) { - if(!aFileName.endsWith(aSourceString)) + if ( file is in some way obsolete ) { // delete the file OUString aFileUrl = aFileStatus.getFileURL(); @@ -112,11 +111,11 @@ void clearCache() } } } +#endif } } -OString OpenclDevice::maSourceHash = generateHashForSource(); OString OpenclDevice::maCacheFolder = getCacheFolder(); int OpenclDevice::releaseOpenclRunEnv() @@ -126,64 +125,11 @@ int OpenclDevice::releaseOpenclRunEnv() return 1; } -namespace { - -const char* pKernelNames[] = { - - "oclFormulaMin", - "oclFormulaMax", - "oclFormulaSum", - "oclFormulaCount", - "oclFormulaAverage", - "oclFormulaSumproduct", - "oclFormulaMtxInv", - - "oclSignedAdd", - "oclSignedSub", - "oclSignedMul", - "oclSignedDiv", - "oclAverageDelta", - "oclMaxDelta", - "oclMinDelta", - "oclSubDelta", - "oclLUDecomposition", - "oclAverageDeltaRPN", - "oclMaxDeltaRPN", - "oclMinDeltaRPN", - "oclMoreColArithmeticOperator", - "oclColumnH", - "oclColumnL", - "oclColumnN", - "oclColumnJ", - "oclMaxSub", - "oclAverageSub", - "oclMinSub", - "oclMaxAdd", - "oclAverageAdd", - "oclMinAdd", - "oclMaxMul", - "oclAverageMul" - "oclMinMul", - "oclMaxDiv", - "oclAverageDiv" - "oclMinDiv", - "oclSub", - - "oclMatrixSolve" -}; - -} - int OpenclDevice::registOpenclKernel() { if ( !gpuEnv.mnIsUserCreated ) memset( &gpuEnv, 0, sizeof(gpuEnv) ); - gpuEnv.mnFileCount = 0; //argc; - - for (size_t i = 0, n = SAL_N_ELEMENTS(pKernelNames); i < n; ++i) - gpuEnv.maKernels.push_back(Kernel(pKernelNames[i])); - return 0; } @@ -196,32 +142,6 @@ int OpenclDevice::setKernelEnv( KernelEnv *envInfo ) return 1; } -Kernel* OpenclDevice::fetchKernel( const char *kernelName ) -{ - cl_int nStatus; - for (size_t i = 0, n = gpuEnv.maKernels.size(); i < n; ++i) - { - Kernel* pKernel = &gpuEnv.maKernels[i]; - if (!strcasecmp(kernelName, pKernel->mpName)) - { - printf("found the kernel named %s.\n", kernelName); - if (!pKernel->mpKernel && gpuEnv.mpArryPrograms[0]) - { - pKernel->mpKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelName, &nStatus); - if (nStatus != CL_SUCCESS) - pKernel->mpKernel = NULL; - - printf("Kernel named '%s' has been compiled\n", kernelName); - } - - return pKernel->mpKernel ? pKernel : NULL; - } - } - - printf("No kernel named %s found.\n", kernelName); - return NULL; -} - namespace { OString createFileName(cl_device_id deviceId, const char* clFileName) @@ -252,7 +172,7 @@ OString createFileName(cl_device_id deviceId, const char* clFileName) OString aHash = generateMD5(aString.getStr(), aString.getLength()); return OpenclDevice::maCacheFolder + fileName + "-" + - aHash + "-" + OpenclDevice::maSourceHash + ".bin"; + aHash + ".bin"; } } @@ -402,27 +322,11 @@ int OpenclDevice::initOpenclAttr( OpenCLEnv * env ) int OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo ) { - int clStatus = 0; - if ( !isInited ) { return 1; } - // Release all cached kernels. - for (size_t i = 0, n = gpuInfo->maKernels.size(); i < n; ++i) - clReleaseKernel(gpuInfo->maKernels[i].mpKernel); - gpuInfo->maKernels.clear(); - - for (int i = 0; i < gpuEnv.mnFileCount; i++) - { - if ( gpuEnv.mpArryPrograms[i] ) - { - clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] ); - CHECK_OPENCL( clStatus, "clReleaseProgram" ); - gpuEnv.mpArryPrograms[i] = NULL; - } - } if ( gpuEnv.mpCmdQueue ) { clReleaseCommandQueue( gpuEnv.mpCmdQueue ); @@ -440,23 +344,6 @@ int OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo ) return 1; } -int OpenclDevice::cachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName ) -{ - int i; - for ( i = 0; i < gpuEnvCached->mnFileCount; i++ ) - { - if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 ) - { - if ( gpuEnvCached->mpArryPrograms[i] != NULL ) - { - return 1; - } - } - } - - return 0; -} - namespace { bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx) @@ -532,25 +419,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx) } -bool OpenclDevice::buildProgramFromSource(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx) -{ - cl_int clStatus = 0; - // create a CL program using the kernel source - fprintf(stderr, "Create kernel from source\n"); - size_t source_size[1]; - - source_size[0] = strlen( kernel_src ); - gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &kernel_src, - source_size, &clStatus); - - if(clStatus != CL_SUCCESS) - return false; - - bool bSuccess = buildProgram(buildOption, gpuInfo, idx); - generatBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename ); - return bSuccess; -} - bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx) { size_t numDevices; @@ -851,1706 +719,6 @@ int OpenclDevice::getOpenclState() return isInited; } -OclCalc::OclCalc() -{ - fprintf(stderr,"OclCalc:: init opencl ...\n"); - nFormulaColSize = 0; - nFormulaRowSize = 0; - nArithmeticLen = 0; - nFormulaLen = 0; - mpClmemSrcData = NULL; - mpClmemStartPos = NULL; - mpClmemEndPos = NULL; - mpClmemLeftData = NULL; - mpClmemRightData = NULL; - mpClmemMergeLfData = NULL; - mpClmemMergeRtData = NULL; - mpClmemMatixSumSize = NULL; - mpClmemeOp = NULL; -} - -OclCalc::~OclCalc() -{ - releaseOclBuffer(); -} - -void OclCalc::releaseOclBuffer() -{ - cl_int clStatus = 0; - CHECK_OPENCL_RELEASE( clStatus, mpClmemSrcData ); - CHECK_OPENCL_RELEASE( clStatus, mpClmemStartPos ); - CHECK_OPENCL_RELEASE( clStatus, mpClmemEndPos ); - CHECK_OPENCL_RELEASE( clStatus, mpClmemLeftData ); - CHECK_OPENCL_RELEASE( clStatus, mpClmemRightData ); - fprintf(stderr,"OclCalc:: opencl end ...\n"); -} - -///////////////////////////////////////////////////////////////////////////// - -bool OclCalc::createBuffer64Bits( double *&dpLeftData, double *&dpRightData, int nBufferSize ) -{ - cl_int clStatus = 0; - setKernelEnv( &kEnv ); - - mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ), - nBufferSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ), - nBufferSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus,"clCreateBuffer" ); - dpLeftData = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue,mpClmemLeftData,CL_TRUE,CL_MAP_WRITE,0, - nBufferSize * sizeof(double),0,NULL,NULL,&clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish(kEnv.mpkCmdQueue); - dpRightData = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue,mpClmemRightData,CL_TRUE,CL_MAP_WRITE,0, - nBufferSize * sizeof(double),0,NULL,NULL,&clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos); - return true; -} - -bool OclCalc::mapAndCopy64Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize) -{ - cl_int clStatus = 0; - double * dpSrcDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemSrcData, CL_TRUE,CL_MAP_WRITE, 0, - nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int *npStartPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemStartPos, CL_TRUE,CL_MAP_WRITE, 0, - nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int *npEndPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemEndPos, CL_TRUE, CL_MAP_WRITE, 0, - nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for(int i=0;i<nBufferSize;i++) - dpSrcDataMap[i] = dpTempSrcData[i]; - for(int i=0;i<nRowsize;i++) - { - npStartPosMap[i] = unStartPos[i]; - npEndPosMap[i] = unEndPos[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemSrcData, dpSrcDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPosMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPosMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::mapAndCopy64Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize ) -{ - cl_int clStatus = 0; - double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - double *dpRightDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nBufferSize; i++ ) - { - dpLeftDataMap[i] = dpTempLeftData[i]; - dpRightDataMap[i] = dpTempRightData[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, dpRightDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::mapAndCopyArithmetic64Bits( const double *dpMoreColArithmetic, int nBufferSize ) -{ - cl_int clStatus = 0; - double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nBufferSize; i++ ) - { - dpLeftDataMap[i] = dpMoreColArithmetic[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::mapAndCopyMoreColArithmetic64Bits( const double *dpMoreColArithmetic, int nBufferSize, uint *npeOp, uint neOpSize ) -{ - cl_int clStatus = 0; - double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - uint *dpeOpMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemeOp, CL_TRUE, CL_MAP_WRITE, - 0, neOpSize * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nBufferSize; i++ ) - { - dpLeftDataMap[i] = dpMoreColArithmetic[i]; - } - for( uint i = 0; i<neOpSize; i++) - { - dpeOpMap[i] = npeOp[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemeOp, dpeOpMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::createFormulaBuf64Bits( int nBufferSize, int rowSize ) -{ - cl_int clStatus = 0; - setKernelEnv( &kEnv ); - mpClmemSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - nFormulaLen = nBufferSize; - mpClmemStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - rowSize * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - rowSize * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::createArithmeticOptBuf64Bits( int nBufferSize ) -{ - cl_int clStatus = 0; - nArithmeticLen = nBufferSize; - setKernelEnv( &kEnv ); - mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(double), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::createMoreColArithmeticBuf64Bits( int nBufferSize, int neOpSize ) -{ - cl_int clStatus = 0; - nArithmeticLen = nBufferSize; - setKernelEnv( &kEnv ); - mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(double), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemeOp = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - neOpSize * sizeof(uint), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::oclHostArithmeticOperator64Bits( const char* aKernelName, double *&rResult,int nRowSize ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - clFinish( kEnv.mpkCmdQueue ); - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, - nRowSize * sizeof(double), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - global_work_size[0] = nRowSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - double *dpOutPut = (double *) clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE,CL_MAP_READ, - 0, nRowSize*sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - for ( int i = 0; i < nRowSize; i++ ) - rResult[i] = dpOutPut[i]; - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, rResult, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - const char *aKernelName = "oclMoreColArithmeticOperator"; - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_int), (void *)&nDataSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemeOp); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_int), (void *)&neOpSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nRowSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - double * hostMapResult = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0, - nRowSize*sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nRowSize; i++) - rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostArithmeticStash64Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - setKernelEnv( &kEnv ); - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - clFinish( kEnv.mpkCmdQueue ); - - cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - nRowSize * sizeof(double), (void *)dpLeftData, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - nRowSize * sizeof(double), (void *)dpRightData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, - nRowSize * sizeof(double), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&clLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clRightData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - global_work_size[0] = nRowSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, clResult, CL_TRUE, 0, nRowSize * sizeof(double), (double *)rResult, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" ); - - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clLeftData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clRightData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostFormulaStash64Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - setKernelEnv( &kEnv ); - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - cl_mem clSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - nBufferSize * sizeof(double), (void *)dpSrcData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - size * sizeof(unsigned int), (void *)nStartPos, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - size * sizeof(unsigned int), (void *)nEndPos, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&clSrcData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clStartPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clEndPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, outputCl, CL_TRUE, 0, size * sizeof(double), (double *)output, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clReadBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( outputCl ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clSrcData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clStartPos ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clEndPos ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&output, int size ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemStartPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemEndPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - double *dpOutPut = (double *) clEnqueueMapBuffer( kEnv.mpkCmdQueue, outputCl, CL_TRUE,CL_MAP_READ, - 0, size*sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - for ( int i = 0; i < size; i++ ) - { - output[i] = dpOutPut[i]; - } - - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, outputCl, output, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( outputCl ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize ) -{ - const char *cpKernelName = "oclFormulaCount"; - Kernel* pKernel = fetchKernel(cpKernelName); - if (!pKernel) - return false; - - cl_int clStatus; - - size_t global_work_size[1]; - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, - nSize* sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemStartPos); - CHECK_OPENCL( clStatus,"clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemEndPos); - CHECK_OPENCL( clStatus,"clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - dpOutput = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ, - 0, nSize*sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, dpOutput, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clpOutput ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -/* - * The dpsrcData is double rows,one col is the first column data,second is the second column data.if get a cell data range,the range - *save the npStart array eg:a4-a8;b10-b14,the npStart will store a4,b10,and the npEnd will store a8,b14 range.So it can if(i +1)%2 to judge - * the a cloumn or b cloumn npStart range.so as b bolumn. - */ -bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, double *dpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize ) -{ - cl_int clStatus; - size_t global_work_size[1]; - memset(dpOutput,0,nSize); - const char *cpFirstKernelName = "oclSignedMul"; - const char *cpSecondKernelName = "oclFormulaSumproduct"; - Kernel* pKernel1 = fetchKernel(cpFirstKernelName); - if (!pKernel1) - return false; - - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, dpSumProMergeLfData, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish(kEnv.mpkCmdQueue); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeRtData, dpSumProMergeRrData, 0, NULL, NULL ); - clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMatixSumSize, npSumSize, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int nMulResultSize = nFormulaRowSize + nFormulaRowSize * nSize * nFormulaColSize - 1; - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nMulResultSize * sizeof(double), - NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel1->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemMergeLfData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel1->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMergeRtData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel1->mpKernel, 2, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nMulResultSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel1->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clReleaseMemObject( mpClmemMergeLfData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( mpClmemMergeRtData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - - Kernel* pKernel2 = fetchKernel(cpSecondKernelName); - if (!pKernel2) - return false; - - cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize; - clStatus = clSetKernelArg(pKernel2->mpKernel, 0, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel2->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMatixSumSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel2->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel2->mpKernel, 3, sizeof(cl_uint), (void *)&nMatixSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel2->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - double * outputMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ, - 0, nSize*sizeof(double), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nSize; i++ ) - dpOutput[i] = outputMap[i]; - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, outputMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( mpClmemMatixSumSize ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( clpOutput ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - - return true; -} - -bool OclCalc::createMoreColArithmeticBuf32Bits( int nBufferSize, int neOpSize ) -{ - cl_int clStatus = 0; - nArithmeticLen = nBufferSize; - setKernelEnv( &kEnv ); - mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(float), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemeOp = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - neOpSize * sizeof(uint), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::createArithmeticOptBuf32Bits( int nBufferSize ) -{ - cl_int clStatus = 0; - setKernelEnv( &kEnv ); - nArithmeticLen = nBufferSize; - mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(float), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::createFormulaBuf32Bits( int nBufferSize, int rowSize ) -{ - cl_int clStatus = 0; - setKernelEnv( &kEnv ); - nFormulaLen = nBufferSize; - - mpClmemSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nBufferSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - mpClmemStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - rowSize * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - rowSize * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::createBuffer32Bits( float *&fpLeftData, float *&fpRightData, int nBufferSize ) -{ - cl_int clStatus = 0; - setKernelEnv( &kEnv ); - mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), - nBufferSize * sizeof(float), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), - nBufferSize * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - fpLeftData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION, - 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - fpRightData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, - 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos); - return true; -} - -bool OclCalc::mapAndCopy32Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize) -{ - cl_int clStatus = 0; - float *fpSrcData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemSrcData, CL_TRUE, CL_MAP_WRITE, 0, - nBufferSize * sizeof(float) , 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int *npStartPos = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemStartPos, CL_TRUE, CL_MAP_WRITE, 0, - nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int *npEndPos = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemEndPos, CL_TRUE, CL_MAP_WRITE, 0, - nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for(int i=0;i<nBufferSize;i++) - { - fpSrcData[i] = (float)dpTempSrcData[i]; - } - for(int i=0;i<nRowsize;i++) - { - npStartPos[i] = unStartPos[i]; - npEndPos[i] = unEndPos[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemSrcData, fpSrcData, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::mapAndCopy32Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize ) -{ - cl_int clStatus = 0; - float *fpLeftData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - float *fpRightData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for(int i=0;i<nBufferSize;i++) - { - fpLeftData[i] = (float)dpTempLeftData[i]; - fpRightData[i] = (float)dpTempRightData[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpLeftData, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, fpRightData, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::mapAndCopyArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize ) -{ - cl_int clStatus = 0; - float *dpLeftDataMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nBufferSize; i++ ) - { - dpLeftDataMap[i] = (float)dpMoreColArithmetic[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::mapAndCopyMoreColArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize, uint *npeOp, uint neOpSize ) -{ - cl_int clStatus = 0; - float *fpLeftDataMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE, - 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - uint *dpeOpMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemeOp, CL_TRUE, CL_MAP_WRITE, - 0, neOpSize * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nBufferSize; i++ ) - { - fpLeftDataMap[i] = (float)dpMoreColArithmetic[i]; - } - for( uint i = 0; i<neOpSize; i++ ) - { - dpeOpMap[i] = npeOp[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpLeftDataMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemeOp, dpeOpMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - return true; -} - -bool OclCalc::oclHostArithmeticOperator32Bits( const char* aKernelName,double *rResult, int nRowSize ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nRowSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - float * hostMapResult = (float *)clEnqueueMapBuffer( - kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0, nRowSize*sizeof(float), 0, NULL, NULL, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nRowSize; i++) - rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - - return true; -} - -bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - const char *aKernelName = "oclMoreColArithmeticOperator"; - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_int), (void *)&nDataSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemeOp); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_int), (void *)&neOpSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nRowSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - float * hostMapResult = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0, - nRowSize*sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < nRowSize; i++) - rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - - return true; -} - -bool OclCalc::oclHostFormulaStatistics32Bits(const char* aKernelName,double *output,int size) -{ - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - cl_int clStatus = 0; - size_t global_work_size[1]; - - cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemSrcData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemStartPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemEndPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - float * outputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, - 0, size*sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( int i = 0; i < size; i++ ) - output[i] = outputMap[i]; // from gpu float type to cpu double type - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, outputCl, outputMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( outputCl ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostArithmeticStash32Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - setKernelEnv( &kEnv ); - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - float *fpLeftData = (float *)malloc( sizeof(float) * nRowSize ); - float *fpRightData = (float *)malloc( sizeof(float) * nRowSize ); - float *fpResult = (float *)malloc( sizeof(float) * nRowSize ); - for(int i=0;i<nRowSize;i++) - { - fpLeftData[i] = (float)dpLeftData[i]; - fpRightData[i] = (float)dpRightData[i]; - } - cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - nRowSize * sizeof(float), (void *)fpLeftData, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR), - nRowSize * sizeof(float), (void *)fpRightData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, - nRowSize * sizeof(float), NULL, &clStatus); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - clFinish( kEnv.mpkCmdQueue ); - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&clLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clRightData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - global_work_size[0] = nRowSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, clResult, CL_TRUE, 0, nRowSize * sizeof(float), (float *)fpResult, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" ); - for(int i=0;i<nRowSize;i++) - rResult[i] = (double)fpResult[i]; - if(fpResult) - { - free(fpResult); - fpResult = NULL; - } - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clLeftData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clRightData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size ) -{ - cl_int clStatus = 0; - size_t global_work_size[1]; - setKernelEnv( &kEnv ); - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - float *fpSrcData = (float *)malloc( sizeof(float) * nBufferSize ); - float *fpResult = (float *)malloc( sizeof(float) * size ); - for(int i=0;i<nBufferSize;i++) - fpSrcData[i] = (float)dpSrcData[i]; - cl_mem clSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR), - nBufferSize * sizeof(float), (void *)fpSrcData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR), - size * sizeof(unsigned int), (void *)nStartPos, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR), - size * sizeof(unsigned int), (void *)nEndPos, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&clSrcData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clStartPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clEndPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, outputCl, CL_TRUE, 0, size * sizeof(float), (double *)fpResult, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clReadBuffer" ); - for(int i = 0;i<size;i++) - output[i] = (float)fpResult[i]; - clFinish( kEnv.mpkCmdQueue ); - if(fpResult) - { - free(fpResult); - fpResult = NULL; - } - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( outputCl ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clSrcData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clStartPos ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clEndPos ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -bool OclCalc::oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize ) -{ - const char *cpKernelName = "oclFormulaCount"; - Kernel* pKernel = fetchKernel(cpKernelName); - if (!pKernel) - return false; - - cl_int clStatus; - size_t global_work_size[1]; - - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, - nSize* sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemStartPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemEndPos); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - global_work_size[0] = nSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - float * fpOutputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, - CL_MAP_READ, 0, nSize*sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for (int i = 0; i < nSize; i++ ) - dpOutput[i] = fpOutputMap[i];// from gpu float type to cpu double type - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, fpOutputMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject(mpClmemSrcData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( mpClmemStartPos ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( mpClmemEndPos ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - clStatus = clReleaseMemObject( clpOutput ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - return true; -} - -//sumproduct -bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize ) -{ - cl_int clStatus; - size_t global_work_size[1]; - memset(dpOutput,0,nSize); - const char *cpFirstKernelName = "oclSignedMul"; - const char *cpSecondKernelName = "oclFormulaSumproduct"; - Kernel* pKernel1 = fetchKernel(cpFirstKernelName); - if (!pKernel1) - return false; - - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, fpSumProMergeLfData, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeRtData, fpSumProMergeRrData, 0, NULL, NULL ); - clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMatixSumSize, npSumSize, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int nMulResultSize = nFormulaRowSize + nFormulaRowSize * nSize * nFormulaColSize - 1; - cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nMulResultSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - clStatus = clSetKernelArg(pKernel1->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemMergeLfData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel1->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMergeRtData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel1->mpKernel, 2, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nMulResultSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel1->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clReleaseMemObject( mpClmemMergeLfData ); - CHECK_OPENCL( clStatus,"clReleaseMemObject" ); - clStatus = clReleaseMemObject( mpClmemMergeRtData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - - Kernel* pKernel2 = fetchKernel(cpSecondKernelName); - if (!pKernel2) - return false; - - cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize; - clStatus = clSetKernelArg(pKernel2->mpKernel, 0, sizeof(cl_mem), (void *)&clResult); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel2->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMatixSumSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel2->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel2->mpKernel, 3, sizeof(cl_uint), (void *)&nMatixSize); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - global_work_size[0] = nSize; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel2->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - float * fpOutputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ, 0, - nSize*sizeof(float), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for (int i = 0; i < nSize; i++ ) - { - dpOutput[i] = fpOutputMap[i]; // from gpu float type to cpu double type - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, fpOutputMap, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( clResult ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( mpClmemMatixSumSize ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( clpOutput ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - return true; -} - - -// FIXME: should be templatised in <double> - double buffering [sic] rocks -static cl_mem allocateDoubleBuffer( KernelEnv &rEnv, const double *_pValues, size_t nElements, cl_int *pStatus ) -{ - // Ugh - horrible redundant copying ... - cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), - nElements * sizeof(double), NULL, pStatus); - double *pValues = (double *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0, - nElements * sizeof(double), 0, NULL, NULL, NULL); - clFinish(rEnv.mpkCmdQueue); - for ( int i = 0; i < (int)nElements; i++ ) - pValues[i] = _pValues[i]; - clEnqueueUnmapMemObject( rEnv.mpkCmdQueue, xValues, pValues, 0, NULL, NULL ); - clFinish( rEnv.mpkCmdQueue ); - return xValues; -} - -static cl_mem allocateFloatBuffer( KernelEnv &rEnv, const double *_pValues, size_t nElements, cl_int *pStatus ) -{ - // Ugh - horrible redundant copying ... - cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), - nElements * sizeof(float), NULL, pStatus); - float *pValues = (float *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0, - nElements * sizeof(float), 0, NULL, NULL, NULL ); - clFinish( rEnv.mpkCmdQueue ); - for ( int i = 0; i < (int)nElements; i++ ) - pValues[i] = (float)_pValues[i]; - - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL); - clFinish( rEnv.mpkCmdQueue ); - return xValues; -} - -bool OclCalc::oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArray, const double *pSubtractSingle, size_t nSrcDataSize,size_t nElements, double del ,uint *nStartPos,uint *nEndPos,double *dpResult) -{ - setKernelEnv( &kEnv ); - - char kernelName[256] = ""; - double delta = del; - bool subFlag = false; - strcat(kernelName,"ocl"); - for ( size_t i = 0; i < eOpNum; i++ ) - { - switch ( eOp[i] ) - { - case ocAdd: - strcat(kernelName,"Add"); - break; - case ocSub: - strcat(kernelName,"Sub"); - break; - case ocMul: - strcat(kernelName,"Mul"); - break; - case ocDiv: - strcat(kernelName,"Div"); - break; - case ocMax: - strcat(kernelName,"Max"); - break; - case ocMin: - strcat(kernelName,"Min"); - break; - case ocAverage: - strcat(kernelName,"Average"); - break; - default: - assert( false ); - break; - } - } - Kernel* pKernel = fetchKernel(kernelName); - if (!pKernel) - return false; - - cl_int clStatus; - size_t global_work_size[1]; - if ( ( eOpNum == 1 ) && ( eOp[0] == ocSub ) ) - subFlag = true; - - cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL, startPosCL = NULL, endPosCL = NULL; - - if(!subFlag) - { - startPosCL = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), - nElements * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - endPosCL = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), - nElements * sizeof(unsigned int), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - unsigned int *npStartPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, startPosCL, CL_TRUE, CL_MAP_WRITE, 0, - nElements * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - unsigned int *npEndPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, endPosCL, CL_TRUE, CL_MAP_WRITE, 0, - nElements * sizeof(uint), 0, NULL, NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - - for(uint i=0;i<nElements;i++) - { - npStartPosMap[i]=nStartPos[i]; - npEndPosMap[i]=nEndPos[i]; - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, startPosCL, npStartPosMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clFinish( kEnv.mpkCmdQueue ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, endPosCL, npEndPosMap, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - - if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag ) - { - valuesCl = allocateDoubleBuffer( kEnv, pOpArray, nSrcDataSize, &clStatus ); - subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nElements * sizeof(double), NULL, &clStatus ); - } - else - { - valuesCl = allocateFloatBuffer( kEnv, pOpArray, nSrcDataSize, &clStatus ); - subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE , nElements * sizeof(float), NULL, &clStatus); - } - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&valuesCl); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&startPosCL); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&endPosCL); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - fprintf( stderr, "prior to enqueue range kernel\n" ); - } - else - { - if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag ) - { - subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_double), (void *)&delta); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - } - else - { - float fTmp = (float)delta; - subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_float), (void *)&fTmp); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - } - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - } - global_work_size[0] = nElements; - clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag ) - { - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, - outputCl, - CL_TRUE,0, - nElements * sizeof(double), - (void *)dpResult,0,NULL,NULL); - CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - } - else - { - float *afBuffer = new float[nElements]; - if ( !afBuffer ) - return false; - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, - outputCl, - CL_TRUE,0, - nElements * sizeof(float), - (void *)afBuffer,0,NULL,NULL); - CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" ); - clFinish( kEnv.mpkCmdQueue ); - for ( size_t i = 0; i < nElements; i++ ) - { - dpResult[i] = (double)afBuffer[i]; - } - delete [] afBuffer; - } - - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - - CHECK_OPENCL_RELEASE( clStatus, valuesCl ); - CHECK_OPENCL_RELEASE( clStatus, subtractCl ); - CHECK_OPENCL_RELEASE( clStatus, outputCl ); - CHECK_OPENCL_RELEASE( clStatus, startPosCL ); - CHECK_OPENCL_RELEASE( clStatus, endPosCL ); - - fprintf( stderr, "completed opencl operation\n" ); - - return true; -} -double *OclCalc::oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double del ) -{ - setKernelEnv( &kEnv ); - - // select a kernel: cut & paste coding is utterly evil. - const char *kernelName = NULL; - double delta = del; - bool subFlag = false; - switch ( eOp ) { - case ocAdd: - fprintf( stderr, "ocSub is %d\n", ocSub ); - case ocMul: - case ocDiv: - ; // FIXME: fallthrough for now - case ocMax: - kernelName = "oclMaxDelta"; - break; - case ocMin: - kernelName = "oclMinDelta"; - break; - case ocAverage: - kernelName = "oclAverageDelta"; - break; - case ocSub: - kernelName = "oclSubDelta"; - subFlag = true; - break; - default: - assert( false ); - } - - Kernel* pKernel = fetchKernel(kernelName); - if (!pKernel) - return NULL; - - cl_int clStatus; - size_t global_work_size[1]; - - // Ugh - horrible redundant copying ... - - cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL; - if(!subFlag) - { - if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag ) - { - valuesCl = allocateDoubleBuffer( kEnv, pOpArray, nElements, &clStatus ); - subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus ); - } - else - { - valuesCl = allocateFloatBuffer( kEnv, pOpArray, nElements, &clStatus ); - subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus); - } - CHECK_OPENCL_PTR( clStatus, "clCreateBuffer" ); - - cl_uint start = 0; - cl_uint end = (cl_uint)nElements; - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&valuesCl); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_uint), (void *)&start); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_uint), (void *)&end); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" ); - - fprintf( stderr, "prior to enqueue range kernel\n" ); - } - else - { - if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag ) - { - subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_double), (void *)&delta); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg"); - } - else - { - float fTmp = (float)delta; - subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus ); - outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus ); - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_float), (void *)&fTmp); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg"); - } - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&outputCl); - CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" ); - } - global_work_size[0] = nElements; - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL_PTR( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - - double *pResult = new double[nElements]; - if ( !pResult ) - return NULL; // leak. - if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag ) - { - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, - outputCl, - CL_TRUE,0, - nElements * sizeof(double), - (void *)pResult,0,NULL,NULL); - } - else - { - float *afBuffer = new float[nElements]; - if ( !afBuffer ) - return NULL; - clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, - outputCl, - CL_TRUE,0, - nElements * sizeof(float), - (void *)afBuffer,0,NULL,NULL); - for ( int i = 0; i < (int)nElements; i++ ) - pResult[i] = (double)afBuffer[i]; - if ( !afBuffer ) - delete [] afBuffer; - } - CHECK_OPENCL_PTR( clStatus, "clEnqueueReadBuffer" ); - - clStatus = clFinish( kEnv.mpkCmdQueue ); - CHECK_OPENCL_PTR( clStatus, "clFinish" ); - - if ( valuesCl != NULL ) - { - clStatus = clReleaseMemObject( valuesCl ); - CHECK_OPENCL_PTR( clStatus, "clReleaseMemObject" ); - } - if ( subtractCl != NULL ) - { - clStatus = clReleaseMemObject( subtractCl ); - CHECK_OPENCL_PTR( clStatus, "clReleaseMemObject" ); - } - if ( outputCl != NULL ) - { - clStatus = clReleaseMemObject( outputCl ); - CHECK_OPENCL_PTR( clStatus, "clReleaseMemObject" ); - } - fprintf( stderr, "completed opencl delta operation\n" ); - - return pResult; -} - -bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOclMatrixSrc, double *dpOclMatrixDst,std::vector<double>&dpResult, uint nDim ) -{ - cl_int clStatus = 0; - uint nMatrixSize = nDim * nDim; - size_t global_work_size[1] = { nDim }; - cl_mem clpPData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ), nMatrixSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clpYData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ), nMatrixSize * sizeof(double), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clpNData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE ), nDim * sizeof(uint), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - double * dpY = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpYData, CL_TRUE, CL_MAP_WRITE, 0, nMatrixSize * sizeof(double), 0, NULL,NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - memset( dpY, 0, nMatrixSize*sizeof(double) ); - memset( dpOclMatrixDst, 0, nMatrixSize*sizeof(double) ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpYData, dpY, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - double * dpP = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpPData, CL_TRUE, CL_MAP_WRITE, 0, nMatrixSize * sizeof(double), 0, NULL,NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - for (uint i=0;i<nDim;i++) - { - for (uint j=0;j<nDim;j++) - { - if ( i == j ) - dpP[i*nDim+j] = 1.0; - else - dpP[i*nDim+j] = 0.0; - } - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpPData, dpP, 0, NULL, NULL ); - uint * npDim = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpNData, CL_TRUE, CL_MAP_WRITE, 0, nDim * sizeof(uint), 0, NULL,NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - for ( uint i = 0; i < nDim; i++ ) - npDim[i] = nDim; - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpNData, npDim, 0, NULL, NULL ); - - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clpPData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - for ( uint nOffset = 0; nOffset < nDim- 1; nOffset++ ) - { - int nMax = nOffset; - for ( uint i = nOffset + 1; i < nDim; i++ ) - { - if( fabs(dpOclMatrixSrc[nMax*nDim+nOffset]) < fabs(dpOclMatrixSrc[i*nDim+nOffset])) - nMax=i; - } - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&nOffset); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&nMax); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - clFinish( kEnv.mpkCmdQueue ); - for ( uint i = nOffset + 1; i < nDim; i++ ) - { - dpOclMatrixSrc[i*nDim+nOffset] = dpOclMatrixSrc[i*nDim+nOffset] / dpOclMatrixSrc[nOffset*nDim+nOffset]; - for ( uint j = nOffset+ 1; j < nDim; j++ ) - dpOclMatrixSrc[i*nDim+j] = dpOclMatrixSrc[i*nDim+j] - dpOclMatrixSrc[nOffset*nDim+j] * dpOclMatrixSrc[i*nDim+nOffset]; - } - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpOclMatrixSrc, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - Kernel* pKernelMatrix = fetchKernel("oclMatrixSolve"); - if (!pKernelMatrix) - return false; - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpPData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&clpYData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clpNData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - for ( uint i = 0; i < nDim; i++ ) - for ( uint j = 0; j < nDim; j++ ) - dpResult[i*nDim+j] = dpOclMatrixDst[j*nDim+i]; - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, dpOclMatrixDst, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( mpClmemLeftData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - mpClmemLeftData = NULL; - clStatus = clReleaseMemObject( mpClmemRightData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - mpClmemRightData = NULL; - clStatus = clReleaseMemObject( clpPData ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( clpYData ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( clpNData ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - - return true; -} - -bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclMatrixSrc, float *fpOclMatrixDst, std::vector<double>& dpResult, uint nDim ) -{ - cl_int clStatus = 0; - uint nMatrixSize = nDim * nDim; - size_t global_work_size[1] = { nDim }; - - cl_mem clpPData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ), nMatrixSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clpYData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ), nMatrixSize * sizeof(float), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - cl_mem clpNData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE ), nDim * sizeof(uint), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - float * fpY = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpYData, CL_TRUE, CL_MAP_WRITE, 0, nMatrixSize * sizeof(float), 0, NULL,NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - memset( fpY, 0, nMatrixSize*sizeof(float) ); - memset( fpOclMatrixDst, 0, nMatrixSize*sizeof(float) ); - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpYData, fpY, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - float * fpP = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpPData, CL_TRUE, CL_MAP_WRITE, 0, nMatrixSize * sizeof(float), 0, NULL,NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer" ); - for ( uint i = 0;i < nDim; i++ ) - { - for ( uint j = 0;j < nDim; j++ ) - { - if( i == j ) - fpP[i*nDim+j]=1.0f; - else - fpP[i*nDim+j]=0.0f; - } - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpPData, fpP, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - uint * npDim = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpNData, CL_TRUE, CL_MAP_WRITE, 0, nDim * sizeof(uint), 0, NULL,NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" ); - for ( uint i = 0; i < nDim; i++ ) - npDim[i] = nDim; - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpNData, npDim, 0, NULL, NULL ); - Kernel* pKernel = fetchKernel(aKernelName); - if (!pKernel) - return false; - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clpPData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - for ( uint nOffset = 0; nOffset < nDim- 1; nOffset++ ) - { - int nMax = nOffset; - for( uint i = nOffset+1; i < nDim; i++ ) - { - if( fabs(fpOclMatrixSrc[nMax*nDim+nOffset]) < fabs(fpOclMatrixSrc[i*nDim+nOffset])) - nMax=i; - } - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&nOffset); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&nMax); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - clFinish( kEnv.mpkCmdQueue ); - - for ( uint i= nOffset + 1; i < nDim; i++ ) - { - fpOclMatrixSrc[i*nDim+nOffset] = fpOclMatrixSrc[i*nDim+nOffset] / fpOclMatrixSrc[nOffset*nDim+nOffset]; - for ( uint j= nOffset + 1; j < nDim; j++ ) - fpOclMatrixSrc[i*nDim+j] = fpOclMatrixSrc[i*nDim+j] - fpOclMatrixSrc[nOffset*nDim+j] * fpOclMatrixSrc[i*nDim+nOffset]; - } - } - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpOclMatrixSrc, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - - Kernel* pKernelMatrix = fetchKernel("oclMatrixSolve"); - if (!pKernelMatrix) - return false; - - clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpPData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&clpYData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clpNData); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - clFinish( kEnv.mpkCmdQueue ); - for ( uint i = 0; i < nDim; i++ ) - for ( uint j = 0; j < nDim; j++ ) - dpResult[i*nDim+j] = fpOclMatrixDst[j*nDim+i]; // from gpu float type to cpu double type - clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, fpOclMatrixDst, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" ); - clStatus = clFinish(kEnv.mpkCmdQueue ); - CHECK_OPENCL( clStatus, "clFinish" ); - clStatus = clReleaseMemObject( mpClmemLeftData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - mpClmemLeftData = NULL; - clStatus = clReleaseMemObject( mpClmemRightData ); - CHECK_OPENCL( clStatus, "clReleaseMemObject" ); - mpClmemRightData = NULL; - clStatus = clReleaseMemObject( clpPData ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( clpYData ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - clStatus = clReleaseMemObject( clpNData ); - CHECK_OPENCL( clStatus, "clReleaseKernel" ); - return true; -} - namespace { void createDeviceInfo(cl_device_id aDeviceId, OpenclPlatformInfo& rPlatformInfo) @@ -2796,34 +964,6 @@ bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect) return !OpenclDevice::initOpenclRunEnv(0); } -void compileOpenCLKernels(const OUString* pDeviceId) -{ - if (!pDeviceId) - return; - - if (pDeviceId->isEmpty()) - return; - - if (!switchOpenclDevice(pDeviceId, false)) - return; - - cl_program pProgram = OpenclDevice::gpuEnv.mpArryPrograms[0]; - if (!pProgram) - return; - - cl_int nStatus; - for (size_t i = 0, n = OpenclDevice::gpuEnv.maKernels.size(); i < n; ++i) - { - Kernel& r = OpenclDevice::gpuEnv.maKernels[i]; - if (r.mpKernel) - continue; - - r.mpKernel = clCreateKernel(pProgram, r.mpName, &nStatus); - if (nStatus != CL_SUCCESS) - r.mpKernel = NULL; - } -} - }} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx index aee27789ba59..0dfe24c63873 100644 --- a/sc/source/core/opencl/openclwrapper.hxx +++ b/sc/source/core/opencl/openclwrapper.hxx @@ -111,14 +111,6 @@ struct OpenCLEnv cl_command_queue mpOclCmdQueue; }; -struct Kernel -{ - const char* mpName; - cl_kernel mpKernel; - - Kernel( const char* pName ); -}; - struct GPUEnv { //share vb in all modules in hb library @@ -128,11 +120,7 @@ struct GPUEnv cl_device_id *mpArryDevsID; cl_device_id mpDevID; cl_command_queue mpCmdQueue; - cl_kernel mpArryKernels[MAX_CLFILE_NUM]; cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file - char mArryKnelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256 - std::vector<Kernel> maKernels; - int mnFileCount; // only one kernel file int mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper int mnKhrFp64Flag; int mnAmdFp64Flag; @@ -165,7 +153,6 @@ class OpenclDevice public: static GPUEnv gpuEnv; static int isInited; - static OString maSourceHash; static OString maCacheFolder; static int registOpenclKernel(); @@ -173,91 +160,18 @@ public: static int initOpenclRunEnv( GPUEnv *gpu ); static int releaseOpenclEnv( GPUEnv *gpuInfo ); static int initOpenclRunEnv( int argc ); - static int cachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName ); static int generatBinFromKernelSource( cl_program program, const char * clFileName ); static int writeBinaryToFile( const OString& rName, const char* birary, size_t numBytes ); static std::vector<boost::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context); - static bool buildProgramFromSource(const char* buildOption, GPUEnv* gpuEnv, const char* filename, int idx); static bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuEnv, const char* filename, int idx); static int initOpenclAttr( OpenCLEnv * env ); static int setKernelEnv( KernelEnv *envInfo ); - static Kernel* fetchKernel( const char *kernelName ); static int getOpenclState(); static void setOpenclState( int state ); }; -class OclCalc: public OpenclDevice -{ - -public: - KernelEnv kEnv; - cl_mem mpClmemSrcData; - cl_mem mpClmemStartPos; - cl_mem mpClmemEndPos; - cl_mem mpClmemLeftData; - cl_mem mpClmemRightData; - cl_mem mpClmemMergeLfData; - cl_mem mpClmemMergeRtData; - cl_mem mpClmemMatixSumSize; - cl_mem mpClmemeOp; - unsigned int nArithmeticLen; - unsigned int nFormulaLen; - unsigned int nClmemLen; - unsigned int nFormulaColSize; - unsigned int nFormulaRowSize; - - OclCalc(); - ~OclCalc(); - -// for 64bits double - bool oclHostArithmeticOperator64Bits( const char* aKernelName, double *&rResult, int nRowSize ); - bool oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize ); - bool oclHostFormulaStatistics64Bits( const char* aKernelName, double *&output, int outputSize); - bool oclHostFormulaStash64Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size); - bool oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize ); - bool oclHostFormulaSumProduct64Bits( double *fpSumProMergeLfData, double *fpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize); - bool oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOclMatrixSrc, double *dpOclMatrixDst, std::vector<double>&dpResult, uint nDim ); -// for 32bits float - bool oclHostArithmeticOperator32Bits( const char* aKernelName, double *rResult, int nRowSize ); - bool oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize ); - bool oclHostFormulaStatistics32Bits( const char* aKernelName, double *output, int outputSize); - bool oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize ); - bool oclHostArithmeticStash64Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize ); - bool oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize ); - bool oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclMatrixSrc, float *fpOclMatrixDst, std::vector<double>& dpResult, uint nDim ); -// for groundwater - bool oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArray, const double *pSubtractSingle,size_t nSrcDataSize, size_t nElements, double delta ,uint *nStartPos,uint *nEndPos,double *deResult); - double *oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double delta ); - - /////////////////////////////////////////////////////////////// - bool createBuffer64Bits( double *&dpLeftData, double *&dpRightData, int nBufferSize ); - bool mapAndCopy64Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize ); - bool mapAndCopy64Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize); - bool mapAndCopyArithmetic64Bits( const double *dpMoreArithmetic,int nBufferSize ); - bool mapAndCopyMoreColArithmetic64Bits( const double *dpMoreColArithmetic,int nBufferSize ,uint *npeOp,uint neOpSize ); - bool createMoreColArithmeticBuf64Bits( int nBufferSize, int neOpSize ); - - bool createFormulaBuf64Bits( int nBufferSize, int rowSize ); - bool createArithmeticOptBuf64Bits( int nBufferSize ); - - bool createBuffer32Bits( float *&fpLeftData, float *&fpRightData, int nBufferSize ); - bool mapAndCopy32Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize ); - bool mapAndCopy32Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize); - bool mapAndCopyArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize ); - bool mapAndCopyMoreColArithmetic32Bits( const double *dpMoreColArithmetic,int nBufferSize ,uint *npeOp,uint neOpSize ); - bool createMoreColArithmeticBuf32Bits( int nBufferSize, int neOpSize ); - bool createFormulaBuf32Bits( int nBufferSize, int rowSize ); - bool createArithmeticOptBuf32Bits( int nBufferSize ); - bool oclHostFormulaStash32Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size ); - bool oclHostArithmeticStash32Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize ); - - void releaseOclBuffer(); - - friend class agency; -}; - size_t getOpenCLPlatformCount(); const std::vector<OpenclPlatformInfo>& fillOpenCLInfo(); @@ -271,8 +185,6 @@ const std::vector<OpenclPlatformInfo>& fillOpenCLInfo(); */ bool switchOpenclDevice(const OUString* pDeviceId, bool bAutoSelect); -void compileOpenCLKernels(const OUString* pDeviceId); - }} #endif diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx index 8d87e246607a..072f162052c8 100644 --- a/sc/source/core/tool/formulagroup.cxx +++ b/sc/source/core/tool/formulagroup.cxx @@ -33,7 +33,6 @@ extern "C" size_t getOpenCLPlatformCount(void); extern "C" void fillOpenCLInfo(sc::OpenclPlatformInfo*, size_t); extern "C" bool switchOpenClDevice(const OUString*, bool); extern "C" sc::FormulaGroupInterpreter* createFormulaGroupOpenCLInterpreter(); -extern "C" void compileOpenCLKernels(const OUString*); #endif @@ -494,7 +493,6 @@ typedef FormulaGroupInterpreter* (*__createFormulaGroupOpenCLInterpreter)(void); typedef size_t (*__getOpenCLPlatformCount)(void); typedef void (*__fillOpenCLInfo)(OpenclPlatformInfo*, size_t); typedef bool (*__switchOpenClDevice)(const OUString*, bool); -typedef void (*__compileOpenCLKernels)(const OUString*); #endif @@ -644,30 +642,6 @@ bool FormulaGroupInterpreter::switchOpenCLDevice(const OUString& rDeviceId, bool return false; } -void FormulaGroupInterpreter::compileOpenCLKernels() -{ - const ScCalcConfig& rConfig = ScInterpreter::GetGlobalConfig(); - if (!rConfig.mbOpenCLEnabled) - // OpenCL is not enabled. - return; - -#if HAVE_FEATURE_OPENCL -#ifndef DISABLE_DYNLOADING - osl::Module* pModule = getOpenCLModule(); - if (!pModule) - return; - - oslGenericFunction fn = pModule->getFunctionSymbol("compileOpenCLKernels"); - if (!fn) - return; - - reinterpret_cast<__compileOpenCLKernels>(fn)(&rConfig.maOpenCLDevice); -#else - ::compileOpenCLKernels(&rConfig.maOpenCLDevice); -#endif -#endif -} - void FormulaGroupInterpreter::enableOpenCL(bool bEnable) { ScCalcConfig aConfig = ScInterpreter::GetGlobalConfig(); diff --git a/sc/source/filter/ftools/clkernelthread.cxx b/sc/source/filter/ftools/clkernelthread.cxx index dcb1e74d6a8d..c307793c3b41 100644 --- a/sc/source/filter/ftools/clkernelthread.cxx +++ b/sc/source/filter/ftools/clkernelthread.cxx @@ -8,7 +8,6 @@ */ #include "clkernelthread.hxx" -#include "formulagroup.hxx" using namespace std; @@ -20,7 +19,6 @@ CLBuildKernelThread::~CLBuildKernelThread() {} void CLBuildKernelThread::execute() { - sc::FormulaGroupInterpreter::compileOpenCLKernels(); } } |