diff options
author | Michael Meeks <michael.meeks@suse.com> | 2013-07-08 10:49:05 +0100 |
---|---|---|
committer | Kohei Yoshida <kohei.yoshida@gmail.com> | 2013-07-11 00:25:35 -0400 |
commit | d05ec5563621f0b51757dd42737565d29fbadd2b (patch) | |
tree | f0867b723054e865e1adeb3334549874d443a434 /sc | |
parent | a344684f0864f070d5ad1ffd9f2f844ffbd29fde (diff) |
Latest cleanup and improvements of opencl backend.
Conflicts:
sc/source/core/opencl/openclwrapper.cxx
Change-Id: I3fdc90570e90a156ccecb511fc04b473752018bd
Diffstat (limited to 'sc')
-rwxr-xr-x[-rw-r--r--] | sc/source/core/opencl/formulagroupcl.cxx | 140 | ||||
-rwxr-xr-x[-rw-r--r--] | sc/source/core/opencl/oclkernels.hxx | 181 | ||||
-rwxr-xr-x[-rw-r--r--] | sc/source/core/opencl/openclwrapper.cxx | 1996 | ||||
-rwxr-xr-x[-rw-r--r--] | sc/source/core/opencl/openclwrapper.hxx | 137 |
4 files changed, 1585 insertions, 869 deletions
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 6a96129d5f9d..857f045cd6c5 100644..100755 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -65,35 +65,32 @@ ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix(const ScMatrix& /* rMat bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& rTopPos, const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode) { - size_t rowSize = xGroup->mnLength; //, srcSize = 0; + size_t rowSize = xGroup->mnLength; fprintf(stderr,"rowSize at begin is ...%ld.\n",(long)rowSize); - int *rangeStart =NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100)) - int *rangeEnd = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100)) // The row quantity can be gotten from p2->GetArrayLength() - int count1 =0,count2 =0,count3=0; - int oclOp=0; - double *srcData = NULL; // Point to the input data from CPU - double *rResult=NULL; // Point to the output data from GPU - double *leftData=NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData) - double *rightData=NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData) - // The rightData can't be zero for "/" - - leftData = (double *)malloc(sizeof(double) * rowSize); - rightData = (double *)malloc(sizeof(double) * rowSize); - rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C) - srcData = (double *)calloc(rowSize,sizeof(double)); - - rangeStart =(int *)malloc(sizeof(int) * rowSize); - rangeEnd =(int *)malloc(sizeof(int) * rowSize); - - memset(rResult,0,rowSize); - if(NULL==leftData||NULL==rightData|| - NULL==rResult||NULL==rangeStart||NULL==rangeEnd) + int nCount1 = 0, nCount2 = 0, nCount3 = 0; + int nOclOp = 0; + double *rResult = NULL; // Point to the output data from GPU + rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C) + if(NULL==rResult) { printf("malloc err\n"); return false; } - // printf("rowSize is %d.\n",rowsize); + memset(rResult,0,rowSize); + float * fpOclSrcData = NULL; // Point to the input data from CPU + uint * npOclStartPos = NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100)) + uint * npOclEndPos = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100)) + float * fpLeftData = NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData) + float * fpRightData = NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData) + // The rightData can't be zero for "/" + static OclCalc ocl_calc; + // Don't know how large the size will be applied previously, so create them as the rowSize or 65536 + // Don't know which formulae will be used previously, so create buffers for different formulae used probably + ocl_calc.CreateBuffer(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize); + ocl_calc.CreateBuffer(fpLeftData,fpRightData,rowSize); + //printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos); +/////////////////////////////////////////////////////////////////////////////////////////// // Until we implement group calculation for real, decompose the group into // individual formula token arrays for individual calculation. @@ -125,26 +122,23 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& size_t nRowSize = nRowEnd - nRowStart + 1; ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0)); - //srcSize = rowSize+nRowSize-rowSize%nRowSize;//align as nRowSize - //srcData = (double *)calloc(srcSize,sizeof(double)); - rangeStart[i] = nRowStart;//record the start position - rangeEnd[i] = nRowEnd;//record the end position + npOclStartPos[i] = nRowStart; // record the start position + npOclEndPos[i] = nRowEnd; // record the end position for (size_t nCol = 0; nCol < nColSize; ++nCol) { const double* pArray = rArrays[nCol]; - - //printf("pArray is %p.\n",pArray); if( NULL==pArray ) { fprintf(stderr,"Error: pArray is NULL!\n"); return false; } - //fprintf(stderr,"(rowSize+nRowSize-1) is %d.\n",rowSize+nRowSize-1); + for( size_t u=0; u<rowSize; u++ ) { - srcData[u] = pArray[u];// note:rowSize<=srcSize - //fprintf(stderr,"srcData[%d] is %f.\n",u,srcData[u]); + // Many video cards can't support double type in kernel, so need transfer the double to float + fpOclSrcData[u] = (float)pArray[u]; + //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpOclSrcData[u]); } for (size_t nRow = 0; nRow < nRowSize; ++nRow) @@ -177,26 +171,26 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& OpCode eOp = pCur->GetOpCode(); if(eOp==0) { - if(count3%2==0) - leftData[count1++] = pCur->GetDouble(); - else - rightData[count2++] = pCur->GetDouble(); - count3++; - } - else if( eOp!=ocOpen && eOp!=ocClose ) - oclOp = eOp; - -// if(count1>0){//dbg -// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]); -// count1--; -// } -// if(count2>0){//dbg -// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]); -// count2--; -// } + if(nCount3%2==0) + fpLeftData[nCount1++] = (float)pCur->GetDouble(); + else + fpRightData[nCount2++] = (float)pCur->GetDouble(); + nCount3++; + } + else if( eOp!=ocOpen && eOp!=ocClose ) + nOclOp = eOp; + +// if(count1>0){//dbg +// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]); +// count1--; +// } +// if(count2>0){//dbg +// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]); +// count2--; +// } } - if(!getenv("SC_GPU")) + if(!getenv("SC_GPU")||!ocl_calc.GetOpenclState()) { fprintf(stderr,"ccCPU flow...\n\n"); ScCompiler aComp(&rDoc, aTmpPos, aCode2); @@ -211,34 +205,42 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& } // for loop end (xGroup->mnLength) // For GPU calculation - if(getenv("SC_GPU")) + if(getenv("SC_GPU")&&ocl_calc.GetOpenclState()) { fprintf(stderr,"ggGPU flow...\n\n"); - printf(" oclOp is... %d\n",oclOp); + printf(" oclOp is... %d\n",nOclOp); osl_getSystemTime(&aTimeBefore); //timer - static OclCalc ocl_calc; - switch(oclOp) + switch(nOclOp) { case ocAdd: - ocl_calc.OclHostSignedAdd(leftData,rightData,rResult,count1); + ocl_calc.OclHostSignedAdd32Bits(fpLeftData,fpRightData,rResult,nCount1); break; case ocSub: - ocl_calc.OclHostSignedSub(leftData,rightData,rResult,count1); + ocl_calc.OclHostSignedSub32Bits(fpLeftData,fpRightData,rResult,nCount1); break; case ocMul: - ocl_calc.OclHostSignedMul(leftData,rightData,rResult,count1); + ocl_calc.OclHostSignedMul32Bits(fpLeftData,fpRightData,rResult,nCount1); break; case ocDiv: - ocl_calc.OclHostSignedDiv(leftData,rightData,rResult,count1); + ocl_calc.OclHostSignedDiv32Bits(fpLeftData,fpRightData,rResult,nCount1); break; case ocMax: - ocl_calc.OclHostFormulaMax(srcData,rangeStart,rangeEnd,rResult,rowSize); + ocl_calc.OclHostFormulaMax32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize); break; case ocMin: - ocl_calc.OclHostFormulaMin(srcData,rangeStart,rangeEnd,rResult,rowSize); + ocl_calc.OclHostFormulaMin32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize); break; case ocAverage: - ocl_calc.OclHostFormulaAverage(srcData,rangeStart,rangeEnd,rResult,rowSize); + ocl_calc.OclHostFormulaAverage32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize); + break; + case ocSum: + //ocl_calc.OclHostFormulaSum(srcData,rangeStart,rangeEnd,rResult,rowSize); + break; + case ocCount: + //ocl_calc.OclHostFormulaCount(rangeStart,rangeEnd,rResult,rowSize); + break; + case ocSumProduct: + //ocl_calc.OclHostFormulaSumProduct(srcData,rangeStart,rangeEnd,rResult,rowSize); break; default: fprintf(stderr,"No OpenCL function for this calculation.\n"); @@ -254,26 +256,16 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& ///////////////////////////////////////////////////// //rResult[i]; -// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results -// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]); -// } +// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results +// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]); +// } // Insert the double data, in rResult[i] back into the document rDoc.SetFormulaResults(rTopPos, rResult, xGroup->mnLength); } - if(leftData) - free(leftData); - if(rightData) - free(rightData); - if(rangeStart) - free(rangeStart); - if(rangeEnd) - free(rangeEnd); if(rResult) free(rResult); - if(srcData) - free(srcData); if(getenv("SC_GPUSAMPLE")){ //fprintf(stderr,"FormulaGroupInterpreter::interpret(),iniflag...%d\n",ocl_calc.GetOpenclState()); diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx index 3269f3a3eba2..6c9012641b54 100644..100755 --- a/sc/source/core/opencl/oclkernels.hxx +++ b/sc/source/core/opencl/oclkernels.hxx @@ -6,153 +6,158 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + #ifndef _OCL_KERNEL_H_ #define _OCL_KERNEL_H_ #ifndef USE_EXTERNAL_KERNEL #define KERNEL( ... )# __VA_ARGS__ - ///////////////////////////////////////////// const char *kernel_src = KERNEL( __kernel void hello(__global uint *buffer) { -size_t idx = get_global_id(0); - -buffer[idx]=idx; - + size_t idx = get_global_id(0); + buffer[idx]=idx; } __kernel void oclformula(__global float *data, - const uint type) + const uint type) { - const unsigned int i = get_global_id(0); - - switch (type) - { - case 0: //MAX - { - //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]); - if(data[2*i]>data[2*i+1]) - data[i] = data[2*i]; - else - data[i] = data[2*i+1]; - break; - } - case 1: //MIN - { - //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]); - if(data[2*i]<data[2*i+1]) - data[i] = data[2*i]; - else - data[i] = data[2*i+1]; - break; - } - case 2: //SUM - case 3: //AVG - { - //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]); - data[i] = data[2*i] + data[2*i+1]; - break; - } - default: - break; - - } + const unsigned int i = get_global_id(0); + + switch (type) + { + case 0: //MAX + { + //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]); + if(data[2*i]>data[2*i+1]) + data[i] = data[2*i]; + else + data[i] = data[2*i+1]; + break; + } + case 1: //MIN + { + //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]); + if(data[2*i]<data[2*i+1]) + data[i] = data[2*i]; + else + data[i] = data[2*i+1]; + break; + } + case 2: //SUM + case 3: //AVG + { + //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]); + data[i] = data[2*i] + data[2*i+1]; + break; + } + default: + break; + + } } __kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData) { - const unsigned int id = get_global_id(0); - otData[id] = ltData[id] + rtData[id]; + const unsigned int id = get_global_id(0); + otData[id] = ltData[id] + rtData[id]; } __kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData) { - const unsigned int id = get_global_id(0); - otData[id] = ltData[id] - rtData[id]; + const unsigned int id = get_global_id(0); + otData[id] = ltData[id] - rtData[id]; } __kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData) { - int id = get_global_id(0); - otData[id] =ltData[id] * rtData[id]; + int id = get_global_id(0); + otData[id] =ltData[id] * rtData[id]; } __kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData) { - const unsigned int id = get_global_id(0); - otData[id] = ltData[id] / rtData[id]; + const unsigned int id = get_global_id(0); + otData[id] = ltData[id] / rtData[id]; } __kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output) { - const unsigned int id = get_global_id(0); - int i=0; - unsigned int startFlag = start[id]; - unsigned int endFlag = end[id]; - float min = input[startFlag]; - for(i=startFlag;i<=endFlag;i++) - { - if(input[i]<min) - min = input[i]; - } - output[id] = min; + const unsigned int id = get_global_id(0); + int i=0; + unsigned int startFlag = start[id]; + unsigned int endFlag = end[id]; + float min = input[startFlag]; + for(i=startFlag;i<=endFlag;i++) + { + if(input[i]<min) + min = input[i]; + } + output[id] = min; } __kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output) { - const unsigned int id = get_global_id(0); - int i=0; - unsigned int startFlag = start[id]; - unsigned int endFlag = end[id]; - float max = input[startFlag]; - for(i=startFlag;i<=endFlag;i++) - { - if(input[i]>max) - max = input[i]; - } - output[id] = max; + const unsigned int id = get_global_id(0); + int i=0; + unsigned int startFlag = start[id]; + unsigned int endFlag = end[id]; + float max = input[startFlag]; + for(i=startFlag;i<=endFlag;i++) + { + if(input[i]>max) + max = input[i]; + } + output[id] = max; } - -__kernel void oclFormulaSum(__global float *data, - const uint type) +//Sum +__kernel void oclFormulaSum(__global float *input,__global int *start,__global int *end,__global float *output) { - + const unsigned int nId = get_global_id(0); + float fSum = 0.0f; + for(int i = start[nId]; i<=end[nId]; i++) + fSum += input[i]; + output[nId] = fSum ; } - -__kernel void oclFormulaCount(__global float *data, - const uint type) +//Count +__kernel void oclFormulaCount(__global int *start,__global int *end,__global float *output) { - + const unsigned int nId = get_global_id(0); + output[nId] = end[nId] - start[nId] + 1; } __kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output) { - const unsigned int id = get_global_id(0); - int i=0; - float sum=0; - for(i = start[id];i<=end[id];i++) - sum += input[i]; - output[id] = sum / (end[id]-start[id]+1); + const unsigned int id = get_global_id(0); + int i=0; + float sum=0; + for(i = start[id];i<=end[id];i++) + sum += input[i]; + output[id] = sum / (end[id]-start[id]+1); } - -__kernel void oclFormulaSumproduct(__global float *data, - const uint type) +//Sumproduct +__kernel void oclFormulaSumproduct(__global float *firstCol,__global float *secondCol,__global int *start,__global int *end,__global float *output) { - + const int nId = get_global_id(0); + int nCount = start[nId] - end[nId] + 1; + int nStartA = start[nId*2]; + int nStartB = start[nId*2+1]; + for(int i = 0; i<nCount; i++) + output[nId] += firstCol[nStartA+i]*secondCol[nStartB+i]; } __kernel void oclFormulaMinverse(__global float *data, - const uint type) + const uint type) { } diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx index aa3ce698452f..597f37097ce8 100644..100755 --- a/sc/source/core/opencl/openclwrapper.cxx +++ b/sc/source/core/opencl/openclwrapper.cxx @@ -22,6 +22,7 @@ GPUEnv OpenclDevice::gpuEnv; int OpenclDevice::isInited =0; + #ifdef SAL_WIN32 #define OPENCL_DLL_NAME "opencllo.dll" @@ -32,62 +33,62 @@ HINSTANCE HOpenclDll = NULL; int OpenclDevice::LoadOpencl() { - //fprintf(stderr, " LoadOpenclDllxx... \n"); - OpenclDll = static_cast<HINSTANCE>(HOpenclDll); - OpenclDll = LoadLibrary(OPENCL_DLL_NAME); - if (!static_cast<HINSTANCE>(OpenclDll)) - { - fprintf(stderr, " Load opencllo.dll failed! \n"); - FreeLibrary(static_cast<HINSTANCE>(OpenclDll)); - return OCLERR; - } - fprintf(stderr, " Load opencllo.dll successfully!\n"); - return OCLSUCCESS; + //fprintf(stderr, " LoadOpenclDllxx... \n"); + OpenclDll = static_cast<HINSTANCE>(HOpenclDll); + OpenclDll = LoadLibrary(OPENCL_DLL_NAME); + if (!static_cast<HINSTANCE>(OpenclDll)) + { + fprintf(stderr, " Load opencllo.dll failed! \n"); + FreeLibrary(static_cast<HINSTANCE>(OpenclDll)); + return OCLERR; + } + fprintf(stderr, " Load opencllo.dll successfully!\n"); + return OCLSUCCESS; } void OpenclDevice::FreeOpenclDll() { - fprintf(stderr, " Free opencllo.dll ... \n"); - if(!static_cast<HINSTANCE>(OpenclDll)) - FreeLibrary(static_cast<HINSTANCE>(OpenclDll)); + fprintf(stderr, " Free opencllo.dll ... \n"); + if(!static_cast<HINSTANCE>(OpenclDll)) + FreeLibrary(static_cast<HINSTANCE>(OpenclDll)); } #endif int OpenclDevice::InitEnv() { #ifdef SAL_WIN32 - while(1) + while(1) { - if(1==LoadOpencl()) - break; - } + if(1==LoadOpencl()) + break; + } #endif - InitOpenclRunEnv(0,NULL); - return 1; + InitOpenclRunEnv(0,NULL); + return 1; } int OpenclDevice::ReleaseOpenclRunEnv() { - ReleaseOpenclEnv(&gpuEnv); + ReleaseOpenclEnv(&gpuEnv); #ifdef SAL_WIN32 - FreeOpenclDll(); + FreeOpenclDll(); #endif return 1; } /////////////////////////////////////////////////////// /////////////////////////////////////////////////////// inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) { - strcpy(gpuEnv.kernelNames[kCount], kName); - gpuEnv.kernelCount++; + strcpy(gpuEnv.mArrykernelNames[kCount], kName); + gpuEnv.mnKernelCount++; return 0; } int OpenclDevice::RegistOpenclKernel() { - if (!gpuEnv.isUserCreated) { + if (!gpuEnv.mnIsUserCreated) { memset(&gpuEnv, 0, sizeof(gpuEnv)); } - gpuEnv.fileCount = 0; //argc; - gpuEnv.kernelCount = 0UL; + gpuEnv.mnFileCount = 0; //argc; + gpuEnv.mnKernelCount = 0UL; AddKernelConfig(0, (const char*) "hello"); AddKernelConfig(1, (const char*) "oclformula"); @@ -99,34 +100,39 @@ int OpenclDevice::RegistOpenclKernel() { AddKernelConfig(7, (const char*) "oclFormulaSumproduct"); AddKernelConfig(8, (const char*) "oclFormulaMinverse"); - AddKernelConfig(9, (const char*) "oclSignedAdd"); + AddKernelConfig(9, (const char*) "oclSignedAdd"); AddKernelConfig(10, (const char*) "oclSignedSub"); AddKernelConfig(11, (const char*) "oclSignedMul"); AddKernelConfig(12, (const char*) "oclSignedDiv"); - return 0; + return 0; } OpenclDevice::OpenclDevice(){ - //InitEnv(); + //InitEnv(); } OpenclDevice::~OpenclDevice() { - //ReleaseOpenclRunEnv(); + //ReleaseOpenclRunEnv(); } +int OpenclDevice::SetKernelEnv(KernelEnv *envInfo) +{ + envInfo->mpkContext = gpuEnv.mpContext; + envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; + envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; + + return 1; +} int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){ //printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount); int kCount; - for(kCount=0; kCount < gpuEnv.kernelCount; kCount++) { - if(strcasecmp(kernelName, gpuEnv.kernelNames[kCount]) == 0) { - printf("match %s kernel right\n",kernelName); - break; + for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) { + if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) { + printf("match %s kernel right\n",kernelName); + break; } } - envInfo->context = gpuEnv.context; - envInfo->commandQueue = gpuEnv.commandQueue; - envInfo->program = gpuEnv.programs[0]; - envInfo->kernel = gpuEnv.kernels[kCount]; - strcpy(envInfo->kernelName, kernelName); + envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount]; + strcpy(envInfo->mckKernelName, kernelName); if (envInfo == (KernelEnv *) NULL) { printf("get err func and env\n"); @@ -145,7 +151,7 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) { printf("open kernel file %s.\n",filename); if (file != NULL) { - printf("Open ok!\n"); + printf("Open ok!\n"); fseek(file, 0, SEEK_END); file_size = ftell(file); @@ -169,35 +175,35 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) { } int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) { - unsigned int i = 0; - cl_int status; - char *str = NULL; - FILE *fd = NULL; - cl_uint numDevices=0; - status = clGetDeviceIDs(gpuEnv.platform, // platform - CL_DEVICE_TYPE_ALL, // device_type - 0, // num_entries - NULL, // devices - &numDevices); - for (i = 0; i <numDevices; i++) { - char fileName[256] = { 0 }, cl_name[128] = { 0 }; - if (gpuEnv.devices[i] != 0) { - char deviceName[1024]; - status = clGetDeviceInfo(gpuEnv.devices[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL); - CHECK_OPENCL(status); - str = (char*) strstr(clFileName, (char*) ".cl"); - memcpy(cl_name, clFileName, str - clFileName); - cl_name[str - clFileName] = '\0'; - sprintf(fileName, "./%s-%s.bin", cl_name, deviceName); - fd = fopen(fileName, "rb"); - status = (fd != NULL) ? 1 : 0; - } - } - if (fd != NULL) { - *fhandle = fd; - } - - return status; + unsigned int i = 0; + cl_int status; + char *str = NULL; + FILE *fd = NULL; + cl_uint numDevices=0; + status = clGetDeviceIDs(gpuEnv.mpPlatformID, // platform + CL_DEVICE_TYPE_GPU, // device_type + 0, // num_entries + NULL, // devices ID + &numDevices); + for (i = 0; i <numDevices; i++) { + char fileName[256] = { 0 }, cl_name[128] = { 0 }; + if (gpuEnv.mpArryDevsID[i] != 0) { + char deviceName[1024]; + status = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL); + CHECK_OPENCL(status); + str = (char*) strstr(clFileName, (char*) ".cl"); + memcpy(cl_name, clFileName, str - clFileName); + cl_name[str - clFileName] = '\0'; + sprintf(fileName, "./%s-%s.bin", cl_name, deviceName); + fd = fopen(fileName, "rb"); + status = (fd != NULL) ? 1 : 0; + } + } + if (fd != NULL) { + *fhandle = fd; + } + + return status; } @@ -220,22 +226,21 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, const char * clFileName) { unsigned int i = 0; cl_int status; - size_t *binarySizes; - cl_uint numDevices; - cl_device_id *devices; + size_t *binarySizes, numDevices; + cl_device_id *mpArryDevsID; char **binaries, *str = NULL; status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, NULL); CHECK_OPENCL(status) - devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices); - if (devices == NULL) { + mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices); + if (mpArryDevsID == NULL) { return 0; } /* grab the handles to all of the devices in the program. */ status = clGetProgramInfo(program, CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, devices, NULL); + sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL); CHECK_OPENCL(status) /* figure out the sizes of each of the binaries. */ @@ -272,7 +277,7 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, if (binarySizes[i] != 0) { char deviceName[1024]; - status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, + status = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); CHECK_OPENCL(status) @@ -307,24 +312,24 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, binarySizes = NULL; } - if (devices != NULL) { - free(devices); - devices = NULL; + if (mpArryDevsID != NULL) { + free(mpArryDevsID); + mpArryDevsID = NULL; } return 1; } int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) { - if (gpuEnv.isUserCreated) { + if (gpuEnv.mnIsUserCreated) { return 1; } - gpuEnv.context = env->context; - gpuEnv.platform = env->platform; - gpuEnv.dev = env->devices; - gpuEnv.commandQueue = env->commandQueue; + gpuEnv.mpContext = env->mpOclContext; + gpuEnv.mpPlatformID = env->mpOclPlatformID; + gpuEnv.mpDevID = env->mpOclDevsID; + gpuEnv.mpCmdQueue = env->mpOclCmdQueue; - gpuEnv.isUserCreated = 1; + gpuEnv.mnIsUserCreated = 1; return 0; } @@ -332,14 +337,14 @@ int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) { int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) { int status; - env->kernel = clCreateKernel(gpuEnv.programs[0], kernelname, &status); - env->context = gpuEnv.context; - env->commandQueue = gpuEnv.commandQueue; + env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status); + env->mpkContext = gpuEnv.mpContext; + env->mpkCmdQueue = gpuEnv.mpCmdQueue; return status != CL_SUCCESS ? 1 : 0; } int OpenclDevice::ReleaseKernel(KernelEnv * env) { - int status = clReleaseKernel(env->kernel); + int status = clReleaseKernel(env->mpkKernel); return status != CL_SUCCESS ? 1 : 0; } @@ -351,24 +356,24 @@ int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) { return 1; } - for (i = 0; i < gpuEnv.fileCount; i++) { - if (gpuEnv.programs[i]) { - status = clReleaseProgram(gpuEnv.programs[i]); + for (i = 0; i < gpuEnv.mnFileCount; i++) { + if (gpuEnv.mpArryPrograms[i]) { + status = clReleaseProgram(gpuEnv.mpArryPrograms[i]); CHECK_OPENCL(status) - gpuEnv.programs[i] = NULL; + gpuEnv.mpArryPrograms[i] = NULL; } } - if (gpuEnv.commandQueue) { - clReleaseCommandQueue(gpuEnv.commandQueue); - gpuEnv.commandQueue = NULL; + if (gpuEnv.mpCmdQueue) { + clReleaseCommandQueue(gpuEnv.mpCmdQueue); + gpuEnv.mpCmdQueue = NULL; } - if (gpuEnv.context) { - clReleaseContext(gpuEnv.context); - gpuEnv.context = NULL; + if (gpuEnv.mpContext) { + clReleaseContext(gpuEnv.mpContext); + gpuEnv.mpContext = NULL; } isInited = 0; - gpuInfo->isUserCreated = 0; - free(gpuInfo->devices); + gpuInfo->mnIsUserCreated = 0; + free(gpuInfo->mpArryDevsID); return 1; } @@ -386,9 +391,9 @@ int OpenclDevice::RunKernelWrapper(cl_kernel_function function, int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char * clFileName) { int i; - for (i = 0; i < gpuEnvCached->fileCount; i++) { - if (strcasecmp(gpuEnvCached->kernelSrcFile[i], clFileName) == 0) { - if (gpuEnvCached->programs[i] != NULL) { + for (i = 0; i < gpuEnvCached->mnFileCount; i++) { + if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) { + if (gpuEnvCached->mpArryPrograms[i] != NULL) { return 1; } } @@ -404,31 +409,28 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) { const char *source; size_t source_size[1]; int b_error, binary_status, binaryExisted, idx; - cl_uint numDevices; - cl_device_id *devices; + size_t numDevices; + cl_device_id *mpArryDevsID; FILE *fd, *fd1; const char* filename = "kernel.cl"; - fprintf(stderr, "CompileKernelFile ... \n"); + fprintf(stderr, "CompileKernelFile ... \n"); if (CachedOfKernerPrg(gpuInfo, filename) == 1) { return 1; } - idx = gpuInfo->fileCount; + idx = gpuInfo->mnFileCount; source = kernel_src; source_size[0] = strlen(source); binaryExisted = 0; if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) { -#ifdef CL_CONTEXT_NUM_DEVICES - status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_NUM_DEVICES, + status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, sizeof(numDevices), &numDevices, NULL); CHECK_OPENCL(status) -#else - numDevices = 1; // ??? -#endif - devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices); - if (devices == NULL) { + + mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices); + if (mpArryDevsID == NULL) { return 0; } @@ -452,50 +454,50 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) { fclose(fd); fd = NULL; // grab the handles to all of the devices in the context. - status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, devices, NULL); + status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL); CHECK_OPENCL(status) - gpuInfo->programs[idx] = clCreateProgramWithBinary(gpuInfo->context, - numDevices, devices, &length, (const unsigned char**) &binary, + gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(gpuInfo->mpContext, + numDevices, mpArryDevsID, &length, (const unsigned char**) &binary, &binary_status, &status); CHECK_OPENCL(status) free(binary); - free(devices); - devices = NULL; + free(mpArryDevsID); + mpArryDevsID = NULL; } else { // create a CL program using the kernel source - gpuEnv.programs[idx] = clCreateProgramWithSource(gpuEnv.context, + gpuEnv.mpArryPrograms[idx] = clCreateProgramWithSource(gpuEnv.mpContext, 1, &source, source_size, &status); CHECK_OPENCL(status); } - if (gpuInfo->programs[idx] == (cl_program) NULL) { + if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) { return 0; } //char options[512]; // create a cl program executable for all the devices specified - if (!gpuInfo->isUserCreated) { - status = clBuildProgram(gpuInfo->programs[idx], 1, gpuInfo->devices, + if (!gpuInfo->mnIsUserCreated) { + status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, buildOption, NULL, NULL); CHECK_OPENCL(status) } else { - status = clBuildProgram(gpuInfo->programs[idx], 1, &(gpuInfo->dev), + status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), buildOption, NULL, NULL); CHECK_OPENCL(status) } printf("BuildProgram.\n"); if (status != CL_SUCCESS) { - if (!gpuInfo->isUserCreated) { - status = clGetProgramBuildInfo(gpuInfo->programs[idx], - gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, + if (!gpuInfo->mnIsUserCreated) { + status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], + gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &length); } else { - status = clGetProgramBuildInfo(gpuInfo->programs[idx], - gpuInfo->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], + gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length); } if (status != CL_SUCCESS) { printf("opencl create build log fail\n"); @@ -505,13 +507,13 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) { if (buildLog == (char*) NULL) { return 0; } - if (!gpuInfo->isUserCreated) { - status = clGetProgramBuildInfo(gpuInfo->programs[idx], - gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, length, + if (!gpuInfo->mnIsUserCreated) { + status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], + gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, length, buildLog, &length); } else { - status = clGetProgramBuildInfo(gpuInfo->programs[idx], - gpuInfo->dev, CL_PROGRAM_BUILD_LOG, length, buildLog, + status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], + gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length); } @@ -525,12 +527,12 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) { return 0; } - strcpy(gpuEnv.kernelSrcFile[idx], filename); + strcpy(gpuEnv.mArryKnelSrcFile[idx], filename); if (binaryExisted == 0) - GeneratBinFromKernelSource(gpuEnv.programs[idx], filename); + GeneratBinFromKernelSource(gpuEnv.mpArryPrograms[idx], filename); - gpuInfo->fileCount += 1; + gpuInfo->mnFileCount += 1; return 1; @@ -539,14 +541,14 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) { int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName, KernelEnv *env, cl_kernel_function *function) { int i; //,program_idx ; - printf("----------------OpenclDevice::GetKernelEnvAndFunc\n"); - for (i = 0; i < gpuEnv.kernelCount; i++) { - if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0) { - env->context = gpuEnv.context; - env->commandQueue = gpuEnv.commandQueue; - env->program = gpuEnv.programs[0]; - env->kernel = gpuEnv.kernels[i]; - *function = gpuEnv.kernelFunctions[i]; + //printf("----------------OpenclDevice::GetKernelEnvAndFunc\n"); + for (i = 0; i < gpuEnv.mnKernelCount; i++) { + if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0) { + env->mpkContext = gpuEnv.mpContext; + env->mpkCmdQueue = gpuEnv.mpCmdQueue; + env->mpkProgram = gpuEnv.mpArryPrograms[0]; + env->mpkKernel = gpuEnv.mpArryKernels[i]; + *function = gpuEnv.mpArryKnelFuncs[i]; return 1; } } @@ -554,21 +556,21 @@ int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName, } int OpenclDevice::RunKernel(const char *kernelName, void **userdata) { - KernelEnv env; + KernelEnv kEnv; cl_kernel_function function; int status; - memset(&env, 0, sizeof(KernelEnv)); - status = GetKernelEnvAndFunc(kernelName, &env, &function); - strcpy(env.kernelName, kernelName); + memset(&kEnv, 0, sizeof(KernelEnv)); + status = GetKernelEnvAndFunc(kernelName, &kEnv, &function); + strcpy(kEnv.mckKernelName, kernelName); if (status == 1) { - if (&env == (KernelEnv *) NULL + if (&kEnv == (KernelEnv *) NULL || &function == (cl_kernel_function *) NULL) { return 0; } - return (function(userdata, &env)); + return (function(userdata, &kEnv)); } return 0; } @@ -593,7 +595,7 @@ int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles) printf("init_opencl_env successed.\n"); //initialize program, kernelName, kernelCount status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles); - if (status == 0 || gpuEnv.kernelCount == 0) { + if (status == 0 || gpuEnv.mnKernelCount == 0) { printf("CompileKernelFile failed.\n"); return 1; } @@ -615,12 +617,12 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo) // Have a look at the available platforms. - if (!gpuInfo->isUserCreated) { + if (!gpuInfo->mnIsUserCreated) { status = clGetPlatformIDs(0, NULL, &numPlatforms); if (status != CL_SUCCESS) { return 1; } - gpuInfo->platform = NULL; + gpuInfo->mpPlatformID = NULL; if (0 < numPlatforms) { platforms = (cl_platform_id*) malloc( @@ -641,18 +643,18 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo) if (status != CL_SUCCESS) { return 1; } - gpuInfo->platform = platforms[i]; + gpuInfo->mpPlatformID = platforms[i]; //if (!strcmp(platformName, "Intel(R) Coporation")) //if( !strcmp( platformName, "Advanced Micro Devices, Inc." )) { - gpuInfo->platform = platforms[i]; + gpuInfo->mpPlatformID = platforms[i]; - status = clGetDeviceIDs(gpuInfo->platform, // platform - CL_DEVICE_TYPE_ALL, // device_type - 0, // num_entries - NULL, // devices - &numDevices); + status = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform + CL_DEVICE_TYPE_GPU, // device_type + 0, // num_entries + NULL, // devices + &numDevices); if (status != CL_SUCCESS) { return 1; @@ -665,82 +667,82 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo) } free(platforms); } - if (NULL == gpuInfo->platform) { + if (NULL == gpuInfo->mpPlatformID) { return 1; } // Use available platform. cps[0] = CL_CONTEXT_PLATFORM; - cps[1] = (cl_context_properties) gpuInfo->platform; + cps[1] = (cl_context_properties) gpuInfo->mpPlatformID; cps[2] = 0; // Check for GPU. - gpuInfo->dType = CL_DEVICE_TYPE_GPU; - gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, NULL, + gpuInfo->mDevType = CL_DEVICE_TYPE_GPU; + gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL, NULL, &status); - // If no GPU, check for CPU. - if ((gpuInfo->context == (cl_context) NULL) + if ((gpuInfo->mpContext == (cl_context) NULL) || (status != CL_SUCCESS)) { - gpuInfo->dType = CL_DEVICE_TYPE_CPU; - gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, + gpuInfo->mDevType = CL_DEVICE_TYPE_CPU; + gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL, NULL, &status); } - - // If no GPU or CPU, check for a "default" type. - if ((gpuInfo->context == (cl_context) NULL) + if ((gpuInfo->mpContext == (cl_context) NULL) || (status != CL_SUCCESS)) { - gpuInfo->dType = CL_DEVICE_TYPE_DEFAULT; - gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, + gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT; + gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL, NULL, &status); } - if ((gpuInfo->context == (cl_context) NULL) + if ((gpuInfo->mpContext == (cl_context) NULL) || (status != CL_SUCCESS)) { return 1; } // Detect OpenCL devices. // First, get the size of device list data - status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, 0, + status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0, NULL, &length); if ((status != CL_SUCCESS) || (length == 0)) { return 1; } // Now allocate memory for device list based on the size we got earlier - gpuInfo->devices = (cl_device_id*) malloc(length); - if (gpuInfo->devices == (cl_device_id*) NULL) { + gpuInfo->mpArryDevsID = (cl_device_id*) malloc(length); + if (gpuInfo->mpArryDevsID == (cl_device_id*) NULL) { return 1; } // Now, get the device list data - status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, length, - gpuInfo->devices, NULL); + status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, length, + gpuInfo->mpArryDevsID, NULL); if (status != CL_SUCCESS) { return 1; } // Create OpenCL command queue. - gpuInfo->commandQueue = clCreateCommandQueue(gpuInfo->context, - gpuInfo->devices[0], 0, &status); + gpuInfo->mpCmdQueue = clCreateCommandQueue(gpuInfo->mpContext, + gpuInfo->mpArryDevsID[0], 0, &status); if (status != CL_SUCCESS) { return 1; } } + status = clGetCommandQueueInfo(gpuInfo->mpCmdQueue, + CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL); + return 0; } int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function) { - int i; - printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.kernelCount); - for (i = 0; i < gpuEnv.kernelCount; i++) - { - if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0) - { - gpuEnv.kernelFunctions[i] = function; - return 1; - } - } + int i; + //printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.mnKernelCount); + for (i = 0; i < gpuEnv.mnKernelCount; i++) + { + if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0) + { + gpuEnv.mpArryKnelFuncs[i] = function; + return 1; + } + } return 0; } @@ -772,20 +774,20 @@ int OclFormulax(void ** usrdata, KernelEnv *env) { tdata[i] = (float) data[i]; } - env->kernel = clCreateKernel(env->program, "oclformula", &clStatus); + env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus); //printf("ScInterpreter::IterateParameters...after clCreateKernel.\n"); //fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n"); int size = NUM; - cl_mem formula_data = clCreateBuffer(env->context, + cl_mem formula_data = clCreateBuffer(env->mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR), size * sizeof(float), (void *) tdata, &clStatus); //fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n"); - status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem), + status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem), (void *) &formula_data); CHECK_OPENCL(status) - status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int), + status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int), (void *) &type); CHECK_OPENCL(status) @@ -795,21 +797,21 @@ int OclFormulax(void ** usrdata, KernelEnv *env) { while (global_work_size[0] != 1) { global_work_size[0] = global_work_size[0] / 2; - status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1, + status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); CHECK_OPENCL(status) } //fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n"); - status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0, + status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0, sizeof(float), (void *) &tdata, 0, NULL, NULL); CHECK_OPENCL(status) - status = clFinish(env->commandQueue); + status = clFinish(env->mpkCmdQueue); CHECK_OPENCL(status) //PPAStopCpuEvent(ppa_proc); //fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n"); - status = clReleaseKernel(env->kernel); + status = clReleaseKernel(env->mpkKernel); CHECK_OPENCL(status) status = clReleaseMemObject(formula_data); CHECK_OPENCL(status) @@ -840,20 +842,20 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) { tdata[i] = (float) data[i]; } - env->kernel = clCreateKernel(env->program, "oclformula", &clStatus); + env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus); //printf("ScInterpreter::IterateParameters...after clCreateKernel.\n"); //fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n"); int size = NUM; - cl_mem formula_data = clCreateBuffer(env->context, + cl_mem formula_data = clCreateBuffer(env->mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR), size * sizeof(float), (void *) tdata, &clStatus); //fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n"); - status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem), + status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem), (void *) &formula_data); CHECK_OPENCL(status) - status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int), + status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int), (void *) &type); CHECK_OPENCL(status) @@ -863,21 +865,21 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) { while (global_work_size[0] != 1) { global_work_size[0] = global_work_size[0] / 2; - status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1, + status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); CHECK_OPENCL(status) } //fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n"); - status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0, + status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0, sizeof(float), (void *) &tdata, 0, NULL, NULL); CHECK_OPENCL(status) - status = clFinish(env->commandQueue); + status = clFinish(env->mpkCmdQueue); CHECK_OPENCL(status) //PPAStopCpuEvent(ppa_proc); //fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n"); - status = clReleaseKernel(env->kernel); + status = clReleaseKernel(env->mpkKernel); CHECK_OPENCL(status) status = clReleaseMemObject(formula_data); CHECK_OPENCL(status) @@ -894,13 +896,13 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) { double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax type) { - fprintf(stderr, "\n OpenclDevice, proc...begin\n"); - double ret = 0; - void *usrdata[2]; - usrdata[0] = (void *) data; - usrdata[1] = (void *) &type; - RunKernelWrapper(function, "oclformula", usrdata); - return ret; + fprintf(stderr, "\n OpenclDevice, proc...begin\n"); + double ret = 0; + void *usrdata[2]; + usrdata[0] = (void *) data; + usrdata[1] = (void *) &type; + RunKernelWrapper(function, "oclformula", usrdata); + return ret; } double OclCalc::OclTest() { @@ -927,467 +929,1141 @@ double OclCalc::OclTestDll() { OclCalc::OclCalc() { - OpenclDevice::SetOpenclState(1); - fprintf(stderr,"OclCalc:: init opencl ok.\n"); + fprintf(stderr,"OclCalc:: init opencl ...\n"); } OclCalc::~OclCalc() { - OpenclDevice::SetOpenclState(0); - fprintf(stderr,"OclCalc:: opencl end ok.\n"); + fprintf(stderr,"OclCalc:: opencl end ...\n"); } ///////////////////////////////////////////////////////////////////////////// -int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size) { - KernelEnv env; - const char *kernelName = "oclFormulaMax"; - CheckKernelName(&env,kernelName); - cl_int clStatus; - size_t global_work_size[1]; - int alignSize = size + end[0]-start[0]; - - env.kernel = clCreateKernel(env.program,kernelName, &clStatus); - cl_int ret=0; - cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE), - alignSize * sizeof(float), NULL, &clStatus); - cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size * sizeof(unsigned int), NULL, &ret); - cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size * sizeof(unsigned int), NULL, &ret); - cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size* sizeof(float), NULL, &ret); - - float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL); - int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); - int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); - for(int i=0;i<size;i++) - { - hostMapStart[i] = start[i]; - hostMapEnd[i] = end[i]; - } - for(int i=0;i<alignSize;i++) - hostMapSrc[i] = (float)srcData[i]; - clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL); - - clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&inputCl); - clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&startCl); - clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&endCl); - clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem), - (void *)&outputCl); - CHECK_OPENCL(clStatus); - - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(clStatus) - - float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<size;i++) - output[i]=outPutMap[i]; - - clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL); - clStatus = clFinish(env.commandQueue); - - CHECK_OPENCL(clStatus); - clStatus = clReleaseKernel(env.kernel); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(inputCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(startCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(endCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(outputCl); - CHECK_OPENCL(clStatus); - return 0; +#ifdef GPU_64BITS +int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size) +{ + KernelEnv kEnv; + const char *kernelName = "oclFormulaMax"; + CheckKernelName(&kEnv,kernelName); + cl_int clStatus; + size_t global_work_size[1]; + int alignSize = size + end[0]-start[0]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + cl_int ret=0; + cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE), + alignSize * sizeof(float), NULL, &clStatus); + cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size * sizeof(unsigned int), NULL, &ret); + cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size * sizeof(unsigned int), NULL, &ret); + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size* sizeof(float), NULL, &ret); + + float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL); + int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); + int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); + for(int i=0;i<size;i++) + { + hostMapStart[i] = start[i]; + hostMapEnd[i] = end[i]; + } + for(int i=0;i<alignSize;i++) + hostMapSrc[i] = (float)srcData[i]; + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&inputCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&startCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&endCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem), + (void *)&outputCl); + CHECK_OPENCL(clStatus); + + global_work_size[0] = size; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<size;i++) + output[i]=outPutMap[i]; + + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL); + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(inputCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(startCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(endCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + return 0; } -int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) { - KernelEnv env; - const char *kernelName = "oclFormulaMin"; - CheckKernelName(&env,kernelName); - - cl_int clStatus; - size_t global_work_size[1]; - int alignSize = size + end[0]-start[0]; - - env.kernel = clCreateKernel(env.program,kernelName, &clStatus); - cl_int ret=0; - cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE), - alignSize * sizeof(float), NULL, &clStatus); - cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size * sizeof(unsigned int), NULL, &ret); - cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size * sizeof(unsigned int), NULL, &ret); - cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size* sizeof(float), NULL, &ret); - - float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL); - int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); - int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); - for(int i=0;i<size;i++) - { - hostMapStart[i] = start[i]; - hostMapEnd[i] = end[i]; - } - for(int i=0;i<alignSize;i++) - hostMapSrc[i] = (float)srcData[i]; - clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL); - - clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&inputCl); - clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&startCl); - clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&endCl); - clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem), - (void *)&outputCl); - CHECK_OPENCL(clStatus); - - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(clStatus) - - float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<size;i++) - output[i]=outPutMap[i]; - - clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL); - clStatus = clFinish(env.commandQueue); - - CHECK_OPENCL(clStatus); - clStatus = clReleaseKernel(env.kernel); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(inputCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(startCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(endCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(outputCl); - CHECK_OPENCL(clStatus); - return 0; +int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) +{ + KernelEnv kEnv; + const char *kernelName = "oclFormulaMin"; + CheckKernelName(&kEnv,kernelName); + cl_int clStatus; + size_t global_work_size[1]; + int alignSize = size + end[0]-start[0]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + cl_int ret=0; + cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE), + alignSize * sizeof(float), NULL, &clStatus); + cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size * sizeof(unsigned int), NULL, &ret); + cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size * sizeof(unsigned int), NULL, &ret); + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size* sizeof(float), NULL, &ret); + + float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL); + int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); + int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); + for(int i=0;i<size;i++) + { + hostMapStart[i] = start[i]; + hostMapEnd[i] = end[i]; + } + for(int i=0;i<alignSize;i++) + hostMapSrc[i] = (float)srcData[i]; + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&inputCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&startCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&endCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem), + (void *)&outputCl); + CHECK_OPENCL(clStatus); + + global_work_size[0] = size; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<size;i++) + output[i]=outPutMap[i]; + + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL); + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(inputCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(startCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(endCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + return 0; } -int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size) { - KernelEnv env; - const char *kernelName = "oclFormulaAverage"; - CheckKernelName(&env,kernelName); - - cl_int clStatus; - size_t global_work_size[1]; - int alignSize = size + end[0]-start[0]; - - env.kernel = clCreateKernel(env.program, kernelName, &clStatus); - cl_int ret=0; - cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE), - alignSize * sizeof(float), NULL, &clStatus); - cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size * sizeof(unsigned int), NULL, &ret); - cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size * sizeof(unsigned int), NULL, &ret); - cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE), - size* sizeof(float), NULL, &ret); - - float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL); - int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); - int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); - for(int i=0;i<size;i++) - { - hostMapStart[i] = start[i]; - hostMapEnd[i] = end[i]; - } - for(int i=0;i<alignSize;i++) - hostMapSrc[i] = (float)srcData[i]; - clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL); - - clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&inputCl); - clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&startCl); - clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&endCl); - clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem), - (void *)&outputCl); - CHECK_OPENCL(clStatus); - - global_work_size[0] = size; - clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(clStatus) - - float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<size;i++) - output[i]=outPutMap[i]; - - clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL); - clStatus = clFinish(env.commandQueue); - - CHECK_OPENCL(clStatus); - clStatus = clReleaseKernel(env.kernel); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(inputCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(startCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(endCl); - CHECK_OPENCL(clStatus); - clStatus = clReleaseMemObject(outputCl); - CHECK_OPENCL(clStatus); - return 0; +int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size) +{ + KernelEnv kEnv; + const char *kernelName = "oclFormulaAverage"; + CheckKernelName(&kEnv,kernelName); + cl_int clStatus; + size_t global_work_size[1]; + int alignSize = size + end[0]-start[0]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + cl_int ret=0; + cl_mem inputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + alignSize * sizeof(float), NULL, &clStatus); + cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + size * sizeof(unsigned int), NULL, &ret); + cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + size * sizeof(unsigned int), NULL, &ret); + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), + size* sizeof(float), NULL, &ret); + + float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,alignSize * sizeof(float),0,NULL,NULL,NULL); + int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); + int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,size * sizeof(unsigned int),0,NULL,NULL,NULL); +#if 1 + for(int i=0;i<size;i++) + { + hostMapStart[i] = start[i]; + hostMapEnd[i] = end[i]; + } + for(int i=0;i<alignSize;i++) + hostMapSrc[i] = (float)srcData[i]; + //memcpy(hostMapSrc,srcData,alignSize * sizeof(float)); +#endif + for(sal_Int32 i = 0; i < alignSize; ++i){//dbg + fprintf(stderr,"In avg host,hostMapSrc[%d] is ...%f\n",i,hostMapSrc[i]); + } -} + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc, 0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl, hostMapEnd, 0,NULL,NULL); -int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) { - KernelEnv env; - int status; - const char *kernelName = "oclSignedAdd"; - CheckKernelName(&env,kernelName); - - - cl_int clStatus; - size_t global_work_size[1]; - - env.kernel = clCreateKernel(env.program,kernelName, &clStatus); - cl_mem clLiftData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clRightData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clResult = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - - float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - { - hostMapLeftData[i] = (float)lData[i]; - hostMapRightData[i] = (float)rData[i]; - } - clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL); - - status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&clLiftData); - status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&clRightData); - status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&clResult); - CHECK_OPENCL(status) - global_work_size[0] = dSize; - status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(status); - - float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - rResult[i]=hostMapResult[i]; - clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL); - - CHECK_OPENCL(status); - status = clFinish(env.commandQueue); - CHECK_OPENCL(status); - status = clReleaseKernel(env.kernel); - CHECK_OPENCL(status); - status = clReleaseMemObject(clLiftData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clRightData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clResult); - CHECK_OPENCL(status); - return 0; + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&inputCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&startCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&endCl); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem), + (void *)&outputCl); + CHECK_OPENCL(clStatus); + + global_work_size[0] = size; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL); + + for(int i=0;i<size;i++){ + //fprintf(stderr,"In avg host,outPutMap[%d] is ...%f\n",i,outPutMap[i]); + output[i]=outPutMap[i]; + } + + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL); + clStatus = clFinish(kEnv.mpkCmdQueue); + + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(inputCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(startCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(endCl); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + return 0; + + } -int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) { - KernelEnv env; - int status; - const char *kernelName = "oclSignedMul"; - CheckKernelName(&env,kernelName); - - - size_t global_work_size[1]; - cl_int clStatus; - env.kernel = clCreateKernel(env.program, kernelName, &clStatus); - cl_mem clLiftData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clRightData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clResult = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - - float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - { - hostMapLeftData[i] = (float)lData[i]; - hostMapRightData[i] = (float)rData[i]; - } - clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL); - - status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&clLiftData); - status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&clRightData); - status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&clResult); - CHECK_OPENCL(status) - global_work_size[0] = dSize; - status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(status); - - float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - rResult[i]=hostMapResult[i]; - clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL); - - CHECK_OPENCL(status); - status = clFinish(env.commandQueue); - CHECK_OPENCL(status); - status = clReleaseKernel(env.kernel); - CHECK_OPENCL(status); - status = clReleaseMemObject(clLiftData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clRightData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clResult); - CHECK_OPENCL(status); - return 0; + +int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) { + + KernelEnv kEnv; + int status; + const char *kernelName = "oclSignedAdd"; + CheckKernelName(&kEnv,kernelName); + + + cl_int clStatus; + size_t global_work_size[1]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clRightData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + + float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + { + hostMapLeftData[i] = (float)lData[i]; + hostMapRightData[i] = (float)rData[i]; + } + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL); + + status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&clLiftData); + status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&clRightData); + status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clResult); + CHECK_OPENCL(status) + global_work_size[0] = dSize; + status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(status); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + rResult[i]=hostMapResult[i]; + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + + CHECK_OPENCL(status); + status = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(status); + status = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(status); + status = clReleaseMemObject(clLiftData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clRightData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clResult); + CHECK_OPENCL(status); + return 0; } + int OclCalc::OclHostSignedSub(double *lData,double *rData,double *rResult,int dSize) { - KernelEnv env; - int status; - const char *kernelName = "oclSignedSub"; - CheckKernelName(&env,kernelName); - - cl_int clStatus; - size_t global_work_size[1]; - env.kernel = clCreateKernel(env.program,kernelName, &clStatus); - cl_mem clLiftData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clRightData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clResult = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - - float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - { - hostMapLeftData[i] = (float)lData[i]; - hostMapRightData[i] = (float)rData[i]; - } - clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL); - - status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&clLiftData); - status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&clRightData); - status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&clResult); - CHECK_OPENCL(status) - global_work_size[0] = dSize; - status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(status); - - float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - rResult[i]=hostMapResult[i]; - clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL); - - CHECK_OPENCL(status); - status = clFinish(env.commandQueue); - CHECK_OPENCL(status); - status = clReleaseKernel(env.kernel); - CHECK_OPENCL(status); - status = clReleaseMemObject(clLiftData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clRightData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clResult); - CHECK_OPENCL(status); - return 0; + KernelEnv kEnv; + int status; + const char *kernelName = "oclSignedSub"; + CheckKernelName(&kEnv,kernelName); + + cl_int clStatus; + size_t global_work_size[1]; + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clRightData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + + float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + { + hostMapLeftData[i] = (float)lData[i]; + hostMapRightData[i] = (float)rData[i]; + } + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL); + + status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&clLiftData); + status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&clRightData); + status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clResult); + CHECK_OPENCL(status) + global_work_size[0] = dSize; + status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(status); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + rResult[i]=hostMapResult[i]; + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + + CHECK_OPENCL(status); + status = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(status); + status = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(status); + status = clReleaseMemObject(clLiftData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clRightData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clResult); + CHECK_OPENCL(status); + return 0; } + +int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) { + KernelEnv kEnv; + int status; + const char *kernelName = "oclSignedMul"; + CheckKernelName(&kEnv,kernelName); + + + size_t global_work_size[1]; + cl_int clStatus; + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus); + cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clRightData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + + float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + { + hostMapLeftData[i] = (float)lData[i]; + hostMapRightData[i] = (float)rData[i]; + } + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL); + + status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&clLiftData); + status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&clRightData); + status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clResult); + CHECK_OPENCL(status) + global_work_size[0] = dSize; + status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(status); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + rResult[i]=hostMapResult[i]; + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + + CHECK_OPENCL(status); + status = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(status); + status = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(status); + status = clReleaseMemObject(clLiftData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clRightData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clResult); + CHECK_OPENCL(status); + return 0; +} + int OclCalc::OclHostSignedDiv(double *lData,double *rData,double *rResult,int dSize) { - KernelEnv env; - int status; - const char *kernelName = "oclSignedDiv"; - CheckKernelName(&env,kernelName); - - - size_t global_work_size[1]; - cl_int clStatus; - env.kernel = clCreateKernel(env.program,kernelName, &clStatus); - cl_mem clLiftData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clRightData = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - cl_mem clResult = clCreateBuffer(env.context, - (cl_mem_flags) (CL_MEM_READ_WRITE), - dSize * sizeof(float), NULL, &clStatus); - - float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - { - hostMapLeftData[i] = (float)lData[i]; - hostMapRightData[i] = (float)rData[i]; - } - clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL); - clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL); - - status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem), - (void *)&clLiftData); - status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem), - (void *)&clRightData); - status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem), - (void *)&clResult); - CHECK_OPENCL(status) - global_work_size[0] = dSize; - status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1, - NULL, global_work_size, NULL, 0, NULL, NULL); - CHECK_OPENCL(status); - - float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); - for(int i=0;i<dSize;i++) - rResult[i]=hostMapResult[i]; - clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL); - - CHECK_OPENCL(status); - status = clFinish(env.commandQueue); - CHECK_OPENCL(status); - status = clReleaseKernel(env.kernel); - CHECK_OPENCL(status); - status = clReleaseMemObject(clLiftData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clRightData); - CHECK_OPENCL(status); - status = clReleaseMemObject(clResult); - CHECK_OPENCL(status); - return 0; + KernelEnv kEnv; + int status; + const char *kernelName = "oclSignedDiv"; + CheckKernelName(&kEnv,kernelName); + + + cl_int clStatus; + size_t global_work_size[1]; + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clRightData = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + (cl_mem_flags) (CL_MEM_READ_WRITE), + dSize * sizeof(float), NULL, &clStatus); + + float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + { + hostMapLeftData[i] = (float)lData[i]; + hostMapRightData[i] = (float)rData[i]; + } + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL); + + status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&clLiftData); + status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&clRightData); + status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clResult); + CHECK_OPENCL(status); + global_work_size[0] = dSize; + status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(status); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<dSize;i++) + rResult[i]=hostMapResult[i]; + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + + CHECK_OPENCL(status); + status = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(status); + status = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(status); + status = clReleaseMemObject(clLiftData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clRightData); + CHECK_OPENCL(status); + status = clReleaseMemObject(clResult); + CHECK_OPENCL(status); + return 0; +} +#endif // GPU_64BITS +int OclCalc::CreateBuffer(float *&fpSrcData,uint *&npStartPos,uint *&npEndPos,int nBufferSize) +{ + cl_int clStatus = 0; + SetKernelEnv(&kEnv); + + mpClmemSrcData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + nBufferSize * sizeof(float), NULL, &clStatus); + CHECK_OPENCL(clStatus); + mpClmemStartPos = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + nBufferSize * sizeof(unsigned int), NULL, &clStatus); + CHECK_OPENCL(clStatus); + mpClmemEndPos = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + nBufferSize * sizeof(unsigned int), NULL, &clStatus); + CHECK_OPENCL(clStatus); + + fpSrcData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemSrcData,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + npStartPos = (uint *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemStartPos,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(uint),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + npEndPos = (uint *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemEndPos,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(uint),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos); + return 0; +} + +int OclCalc::CreateBuffer(float *&fpLeftData,float *&fpRightData,int nBufferSize) +{ + cl_int clStatus = 0; + SetKernelEnv(&kEnv); + + mpClmemLeftData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + nBufferSize * sizeof(float), NULL, &clStatus); + CHECK_OPENCL(clStatus); + mpClmemRightData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), + nBufferSize * sizeof(unsigned int), NULL, &clStatus); + CHECK_OPENCL(clStatus); + fpLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemLeftData,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + fpRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemRightData,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos); + return 0; +} + +int OclCalc::OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size) +{ + cl_int clStatus; + size_t global_work_size[1]; + //int alignSize = size + end[0]-start[0]; + //for(int u=0;u < size;u++) + //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]); + const char *kernelName = "oclFormulaMax"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + size* sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl); + CHECK_OPENCL(clStatus); + + global_work_size[0] = size; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, + kEnv.mpkKernel, + 1, + NULL, + global_work_size, + NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue, + outputCl, + CL_TRUE,CL_MAP_READ, + 0, + size*sizeof(float), + 0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<size;i++) + output[i]=outputMap[i];// from gpu float type to cpu double type + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemSrcData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemStartPos); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemEndPos); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + return 0; + +} + +int OclCalc::OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size) +{ + cl_int clStatus; + size_t global_work_size[1]; + //int alignSize = size + end[0]-start[0]; + //for(int u=0;u < size;u++) + //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]); + const char *kernelName = "oclFormulaMin"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + size* sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl); + CHECK_OPENCL(clStatus); + + global_work_size[0] = size; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, + kEnv.mpkKernel, + 1, + NULL, + global_work_size, + NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue, + outputCl, + CL_TRUE,CL_MAP_READ, + 0, + size*sizeof(float), + 0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<size;i++) + output[i]=outputMap[i];// from gpu float type to cpu double type + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemSrcData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemStartPos); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemEndPos); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + return 0; + } +int OclCalc::OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size) +{ + cl_int clStatus; + size_t global_work_size[1]; + //int alignSize = size + end[0]-start[0]; + //for(int u=0;u < size;u++) + //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]); + const char *kernelName = "oclFormulaAverage"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + size* sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl); + CHECK_OPENCL(clStatus); + + global_work_size[0] = size; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, + kEnv.mpkKernel, + 1, + NULL, + global_work_size, + NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue, + outputCl, + CL_TRUE,CL_MAP_READ, + 0, + size*sizeof(float), + 0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<size;i++) + output[i]=outputMap[i];// from gpu float type to cpu double type + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemSrcData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemStartPos); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemEndPos); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(outputCl); + CHECK_OPENCL(clStatus); + return 0; +} + + +int OclCalc::OclHostSignedAdd32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize) +{ + cl_int clStatus; + size_t global_work_size[1]; + const char *kernelName = "oclSignedAdd"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + nRowSize * sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nRowSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<nRowSize;i++) + rResult[i]=hostMapResult[i]; + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clResult); + CHECK_OPENCL(clStatus); + return 0; +} +int OclCalc::OclHostSignedSub32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize) +{ + cl_int clStatus; + size_t global_work_size[1]; + const char *kernelName = "oclSignedSub"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + nRowSize * sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nRowSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<nRowSize;i++) + rResult[i]=hostMapResult[i]; + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clResult); + CHECK_OPENCL(clStatus); + return 0; +} +int OclCalc::OclHostSignedMul32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize) +{ + cl_int clStatus; + size_t global_work_size[1]; + const char *kernelName = "oclSignedMul"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + nRowSize * sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nRowSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<nRowSize;i++) + rResult[i]=hostMapResult[i]; + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clResult); + CHECK_OPENCL(clStatus); + return 0; +} +int OclCalc::OclHostSignedDiv32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize) +{ + cl_int clStatus; + size_t global_work_size[1]; + const char *kernelName = "oclSignedDiv"; + CheckKernelName(&kEnv,kernelName); + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemLeftData,fpLeftData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemRightData,fpRightData,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + cl_mem clResult = clCreateBuffer(kEnv.mpkContext, + CL_MEM_READ_WRITE, + nRowSize * sizeof(float), + NULL, + &clStatus); + CHECK_OPENCL(clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&clResult); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nRowSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + + float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,nRowSize*sizeof(float),0,NULL,NULL,&clStatus); + CHECK_OPENCL(clStatus); + for(int i=0;i<nRowSize;i++) + rResult[i]=hostMapResult[i]; + clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL); + CHECK_OPENCL(clStatus); + + clStatus = clFinish(kEnv.mpkCmdQueue); + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemLeftData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(mpClmemRightData); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clResult); + CHECK_OPENCL(clStatus); + return 0; +} + +#ifdef FORMULAE_DEBUG +int OclCalc::OclHostFormulaSum(float *dpsrcData,int *npStart,int *npEnd,float *dpOutput,int nSize) { + KernelEnv kEnv; + const char *cpKernelName = "oclFormulaSum"; + CheckKernelName(&kEnv,cpKernelName); + + cl_int clStatus; + size_t global_work_size[1]; + if(nSize < 1 ) + { + printf("The nSize should be bigger than one\n"); + return -1; + } + int nAlignSize = npEnd[nSize-1]-npStart[0]+1; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus); + cl_mem clpInput = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR), + nAlignSize * sizeof(float), (void *)dpsrcData, &clStatus); + cl_mem clpStart = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR ), + nSize * sizeof(unsigned int), (void *)npStart, &clStatus); + cl_mem clpEnd = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR), + nSize * sizeof(unsigned int), (void *)npEnd, &clStatus); + cl_mem clpOutput = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR), + nSize* sizeof(float), NULL, &clStatus); + + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&clpInput); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&clpStart); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clpEnd); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem), + (void *)&clpOutput); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus); + float * fpOutPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clpOutput,CL_TRUE,CL_MAP_READ,0,nSize*sizeof(float),0,NULL,NULL,NULL); + for(int i=0;i<nSize;i++) + dpOutput[i]=fpOutPutMap[i]; + + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clpOutput,fpOutPutMap,0,NULL,NULL); + clStatus = clFinish(kEnv.mpkCmdQueue); + + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpInput); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpStart); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpEnd); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpOutput); + CHECK_OPENCL(clStatus); + return 0; +} + +int OclCalc::OclHostFormulaCount(int *npStart,int *npEnd,float *dpOutput,int nSize) { + KernelEnv kEnv; + const char *cpKernelName = "oclFormulaCount"; + CheckKernelName(&kEnv,cpKernelName); + + cl_int clStatus; + size_t global_work_size[1]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus); + cl_mem clpStart = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR ), + nSize * sizeof(unsigned int), (void *)npEnd, &clStatus); + cl_mem clpEnd = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR), + nSize * sizeof(unsigned int), (void *)dpOutput, &clStatus); + cl_mem clpOutput = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_ONLY), + nSize * sizeof(float), (void *)npStart, &clStatus); + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&clpStart); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&clpEnd); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clpOutput); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus) + + clStatus=clEnqueueReadBuffer(kEnv.mpkCmdQueue,clpOutput,CL_TRUE,0,nSize*sizeof(float),(void *)dpOutput,0,NULL,NULL); + if(clStatus!=0) + { + printf("clEnqueueReadBuffer err\n"); + return -1; + } + + clStatus = clFinish(kEnv.mpkCmdQueue); + + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpStart); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpEnd); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpOutput); + CHECK_OPENCL(clStatus); + return 0; +} + +/* + * The dpsrcData is double rows,one col is the first column data,second is the second column data.if get a cell data range,the range + *save the npStart array eg:a4-a8;b10-b14,the npStart will store a4,b10,and the npEnd will store a8,b14 range.So it can if(i +1)%2 to judge + * the a cloumn or b cloumn npStart range.so as b bolumn. + */ +int OclCalc::OclHostFormulaSumProduct(float *dpSrcData,int *npStart,int *npEnd,float *dpOutput,int nSize) { + KernelEnv kEnv; + const char *cpKernelName = "oclFormulaSumproduct"; + CheckKernelName(&kEnv,cpKernelName); + + cl_int clStatus; + size_t global_work_size[1]; + + kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus); + cl_mem clpFirstCol = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE ), + nSize * sizeof(float),NULL, &clStatus); + cl_mem clpSecondCol = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE), + nSize * sizeof(float),NULL, &clStatus); + cl_mem clpStart = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR ), + nSize *2 * sizeof(unsigned int), (void *)npStart, &clStatus); + cl_mem clpEnd = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR ), + nSize *2 * sizeof(unsigned int),(void *)npEnd , &clStatus); + cl_mem clpOutput = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY), + nSize* sizeof(float), (void *)dpOutput, &clStatus); + + float * fpHostMapFirCol = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clpFirstCol,CL_TRUE,CL_MAP_WRITE,0,nSize * sizeof(float),0,NULL,NULL,NULL); + //checkUpPoint("hostMapSrc",hostMapSrc); + float * fpHostMapSecCol = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clpSecondCol,CL_TRUE,CL_MAP_WRITE,0,nSize * sizeof(float),0,NULL,NULL,NULL); + //checkUpPoint("hostMapSrc",hostMapSrc); + if(NULL==fpHostMapFirCol||NULL==fpHostMapSecCol) + { + printf("In File %s at %d line alloc err\n",__FILE__,__LINE__); + return -1; + } + for(int i=0;i<nSize;i++) + { + fpHostMapFirCol[i] = dpSrcData[2*i]; + fpHostMapSecCol[i] = dpSrcData[2*i+1]; + } + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clpFirstCol,fpHostMapFirCol,0,NULL,NULL); + clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clpSecondCol,fpHostMapSecCol,0,NULL,NULL); + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), + (void *)&fpHostMapFirCol); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), + (void *)&fpHostMapSecCol); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem), + (void *)&clpStart); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem), + (void *)&clpEnd); + clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem), + (void *)&clpOutput); + CHECK_OPENCL(clStatus); + + global_work_size[0] = nSize; + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + CHECK_OPENCL(clStatus) + + clStatus=clEnqueueReadBuffer(kEnv.mpkCmdQueue,clpOutput,CL_TRUE,0,nSize*sizeof(float),(void *)dpOutput,0,NULL,NULL); + if(clStatus!=0) + { + printf("clEnqueueReadBuffer err\n"); + return -1; + } + clStatus = clFinish(kEnv.mpkCmdQueue); + + CHECK_OPENCL(clStatus); + clStatus = clReleaseKernel(kEnv.mpkKernel); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpFirstCol); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpSecondCol); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpStart); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpEnd); + CHECK_OPENCL(clStatus); + clStatus = clReleaseMemObject(clpOutput); + CHECK_OPENCL(clStatus); + return 0; +} +#endif + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx index 349809bda7b1..3e87f8445e7d 100644..100755 --- a/sc/source/core/opencl/openclwrapper.hxx +++ b/sc/source/core/opencl/openclwrapper.hxx @@ -24,19 +24,21 @@ #endif #endif +typedef unsigned int uint; + typedef struct _KernelEnv { - cl_context context; - cl_command_queue commandQueue; - cl_program program; - cl_kernel kernel; - char kernelName[150]; + cl_context mpkContext; + cl_command_queue mpkCmdQueue; + cl_program mpkProgram; + cl_kernel mpkKernel; + char mckKernelName[150]; } KernelEnv; typedef struct _OpenCLEnv { - cl_platform_id platform; - cl_context context; - cl_device_id devices; - cl_command_queue commandQueue; + cl_platform_id mpOclPlatformID; + cl_context mpOclContext; + cl_device_id mpOclDevsID; + cl_command_queue mpOclCmdQueue; } OpenCLEnv; #if defined __cplusplus @@ -54,32 +56,32 @@ typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv); #define CHECK_OPENCL(status) \ if(status != CL_SUCCESS) \ -{ \ - printf ("error code is %d.\n",status); \ - return (0); \ +{ \ + printf ("error code is %d.\n",status); \ + return 0; \ } -#define MAX_KERNEL_STRING_LEN 64 +#define MAX_KERNEL_STRING_LEN 64 #define MAX_CLFILE_NUM 50 #define MAX_CLKERNEL_NUM 200 #define MAX_KERNEL_NAME_LEN 64 typedef struct _GPUEnv { //share vb in all modules in hb library - cl_platform_id platform; - cl_device_type dType; - cl_context context; - cl_device_id *devices; - cl_device_id dev; - cl_command_queue commandQueue; - cl_kernel kernels[MAX_CLFILE_NUM]; - cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file - char kernelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256 - kernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; - cl_kernel_function kernelFunctions[MAX_CLKERNEL_NUM]; - int kernelCount, fileCount, // only one kernel file - isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper + cl_platform_id mpPlatformID; + cl_device_type mDevType; + cl_context mpContext; + cl_device_id *mpArryDevsID; + cl_device_id mpDevID; + cl_command_queue mpCmdQueue; + cl_kernel mpArryKernels[MAX_CLFILE_NUM]; + cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file + char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256 + mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; + cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; + int mnKernelCount, mnFileCount, // only one kernel file + mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper } GPUEnv; @@ -92,6 +94,7 @@ class OpenclCalcBase{ public: OpenclCalcBase(){}; virtual ~OpenclCalcBase(){}; +#ifdef GPU_64BITS virtual int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize)=0; virtual int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize)=0; virtual int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize)=0; @@ -99,6 +102,19 @@ public: virtual int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0; virtual int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0; virtual int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0; +#endif + virtual int OclHostSignedAdd32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)=0; + virtual int OclHostSignedSub32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)=0; + virtual int OclHostSignedMul32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)=0; + virtual int OclHostSignedDiv32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize)=0; + virtual int OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize)=0; + virtual int OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize)=0; + virtual int OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize)=0; + + + //virtual int OclHostFormulaCount(int *start,int *end,float *output,int size)=0; + //virtual int OclHostFormulaSum(float *srcData,int *startPos,int *endPos,float *output,int outputSize)=0; + //virtual int OclHostFormulaSumProduct(float *pdSrcData,int *pnStart,int *pnEnd,float *pdOutput,int nSize)=0; }; @@ -123,8 +139,9 @@ public: static int BinaryGenerated(const char * clFileName, FILE ** fhandle); static int CompileKernelFile(const char *filename, GPUEnv *gpuInfo, const char *buildOption); - int ReleaseKernel(KernelEnv * env); int InitOpenclAttr(OpenCLEnv * env); + int ReleaseKernel(KernelEnv * env); + int SetKernelEnv(KernelEnv *envInfo); int CreateKernel(char * kernelname, KernelEnv * env); int RunKernel(const char *kernelName, void **userdata); int ConvertToString(const char *filename, char **source); @@ -148,35 +165,61 @@ public: #define NUM 4//(16*16*16) typedef enum _formulax_ { - MIN, - MAX, - SUM, - AVG, - COUNT, - SUMPRODUCT, - MINVERSE, - SIGNEDADD, - SIGNEDNUL, - SIGNEDDIV, - SIGNEDSUB + MIN, + MAX, + SUM, + AVG, + COUNT, + SUMPRODUCT, + MINVERSE, + SIGNEDADD, + SIGNEDNUL, + SIGNEDDIV, + SIGNEDSUB } formulax; class OclCalc: public OpenclDevice,OpenclCalcBase { public: + KernelEnv kEnv; + cl_mem mpClmemSrcData; + cl_mem mpClmemStartPos; + cl_mem mpClmemEndPos; + cl_mem mpClmemLeftData; + cl_mem mpClmemRightData; + + OclCalc(); ~OclCalc(); double OclTest(); - double OclTestDll(); + double OclTestDll(); double OclMin(); - double OclProcess(cl_kernel_function function, double *data, formulax type); - int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize); - int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize); - int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize); - int OclHostSignedDiv(double *lData,double *rData,double *rResult,int rowSize); - int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize); - int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize); - int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize); + double OclProcess(cl_kernel_function function, double *data, formulax type); + +#ifdef GPU_64BITS + int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize); + int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize); + int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize); + int OclHostSignedDiv(double *lData,double *rData,double *rResult,int rowSize); + int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize); + int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize); + int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize); +#endif + int OclHostSignedAdd32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize); + int OclHostSignedSub32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize); + int OclHostSignedMul32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize); + int OclHostSignedDiv32Bits(float *fpLeftData,float *fpRightData,double *rResult,int nRowSize); + int OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize); + int OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize); + int OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize); + + //int OclHostFormulaCount(int *startPos,int *endPos,float *output,int outputSize); + //int OclHostFormulaSum(float *srcData,int *startPos,int *endPos,float *output,int outputSize); + //int OclHostFormulaSumProduct(float *pdSrcData,int *pnStart,int *pnEnd,float *pdOutput,int nSize); + + /////////////////////////////////////////////////////////////// + int CreateBuffer(float *&fpSrcData,uint *&npStartPos,uint *&npEndPos,int nBufferSize); + int CreateBuffer(float *&fpLeftData,float *&fpRightData,int nBufferSize); }; #endif |