diff options
author | Tor Lillqvist <tml@collabora.com> | 2014-11-27 15:13:12 +0200 |
---|---|---|
committer | Tor Lillqvist <tml@collabora.com> | 2014-11-27 15:32:58 +0200 |
commit | a70b717ef872c0ac652883ecd2a82c4cc29763e2 (patch) | |
tree | 0975349b1b8798eb9d444d7c91d97d52a2f6fa4e /opencl | |
parent | d83b031346799bff0a3298387f76b16baad2e5cf (diff) |
Move more Calc-independent OpenCL stuff from the sc to the opencl module
No cleanups yet. Just removed the "sc" namespace parts now when this stuff is
no longer Calc-specific. There is still horribly confusing use of the same
OpenCLDevice name for both a class and as a namespace, for instance. And the
OpenCLDevice class has only public static members even, so effectively it acts
as just a namespace anyway... Etc.
Change-Id: Idc5f30a721df0101426c676f04a85e02c5dc8443
Diffstat (limited to 'opencl')
-rw-r--r-- | opencl/Library_opencl.mk | 7 | ||||
-rw-r--r-- | opencl/inc/opencl_device.hxx | 25 | ||||
-rw-r--r-- | opencl/inc/opencl_device_selection.h | 641 | ||||
-rw-r--r-- | opencl/source/opencl_device.cxx | 598 | ||||
-rw-r--r-- | opencl/source/openclwrapper.cxx | 804 |
5 files changed, 2075 insertions, 0 deletions
diff --git a/opencl/Library_opencl.mk b/opencl/Library_opencl.mk index 51ca62c71302..ad1af3761b08 100644 --- a/opencl/Library_opencl.mk +++ b/opencl/Library_opencl.mk @@ -9,6 +9,11 @@ $(eval $(call gb_Library_Library,opencl)) +$(eval $(call gb_Library_set_include,opencl,\ + -I$(SRCDIR)/opencl/inc \ + $$(INCLUDE) \ +)) + $(eval $(call gb_Library_add_defs,opencl,\ -DOPENCL_DLLIMPLEMENTATION \ )) @@ -36,6 +41,8 @@ $(eval $(call gb_Library_use_libraries,opencl,\ $(eval $(call gb_Library_add_exception_objects,opencl,\ opencl/source/openclconfig \ + opencl/source/openclwrapper \ + opencl/source/opencl_device \ opencl/source/platforminfo \ )) diff --git a/opencl/inc/opencl_device.hxx b/opencl/inc/opencl_device.hxx new file mode 100644 index 000000000000..7435879ecc83 --- /dev/null +++ b/opencl/inc/opencl_device.hxx @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_OPENCL_INC_OPENCL_DEVICE_HXX +#define INCLUDED_OPENCL_INC_OPENCL_DEVICE_HXX + +#include "opencl_device_selection.h" + +namespace OpenCLDevice { + +ds_device getDeviceSelection(const char* pFileName, bool bForceSelection = false); +bool selectedDeviceIsOpenCL(ds_device device); +bool selectedDeviceIsNativeCPU(ds_device device); + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/inc/opencl_device_selection.h b/opencl/inc/opencl_device_selection.h new file mode 100644 index 000000000000..03373f4a3d0f --- /dev/null +++ b/opencl/inc/opencl_device_selection.h @@ -0,0 +1,641 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_OPENCL_INC_OPENCL_DEVICE_SELECTION_H +#define INCLUDED_OPENCL_INC_OPENCL_DEVICE_SELECTION_H + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <clew.h> + +#define DS_DEVICE_NAME_LENGTH 256 + +enum ds_status +{ + DS_SUCCESS = 0 + ,DS_INVALID_PROFILE = 1000 + ,DS_MEMORY_ERROR + , DS_INVALID_PERF_EVALUATOR_TYPE + , DS_INVALID_PERF_EVALUATOR + , DS_PERF_EVALUATOR_ERROR + , DS_FILE_ERROR + , DS_UNKNOWN_DEVICE_TYPE + , DS_PROFILE_FILE_ERROR + , DS_SCORE_SERIALIZER_ERROR + , DS_SCORE_DESERIALIZER_ERROR +}; + +// device type +enum ds_device_type +{ + DS_DEVICE_NATIVE_CPU = 0 + ,DS_DEVICE_OPENCL_DEVICE +}; + + +struct ds_device +{ + ds_device_type type; + cl_device_id oclDeviceID; + char* oclPlatformVendor; + char* oclDeviceName; + char* oclDriverVersion; + void* score; // a pointer to the score data, the content/format is application defined +}; + +struct ds_profile +{ + unsigned int numDevices; + ds_device* devices; + const char* version; +}; + +// deallocate memory used by score +typedef ds_status(* ds_score_release)(void* score); +inline ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) +{ + ds_status status = DS_SUCCESS; + if (profile != NULL) + { + if (profile->devices != NULL && sr != NULL) + { + unsigned int i; + for (i = 0; i < profile->numDevices; i++) + { + free(profile->devices[i].oclPlatformVendor); + free(profile->devices[i].oclDeviceName); + free(profile->devices[i].oclDriverVersion); + status = sr(profile->devices[i].score); + if (status != DS_SUCCESS) break; + } + free(profile->devices); + } + free(profile); + } + return status; +} + + +inline ds_status initDSProfile(ds_profile** p, const char* version) +{ + int numDevices; + cl_uint numPlatforms; + cl_platform_id* platforms = NULL; + cl_device_id* devices = NULL; + ds_status status = DS_SUCCESS; + ds_profile* profile = NULL; + unsigned int next; + unsigned int i; + + if (p == NULL) return DS_INVALID_PROFILE; + + profile = (ds_profile*)malloc(sizeof(ds_profile)); + if (profile == NULL) return DS_MEMORY_ERROR; + + memset(profile, 0, sizeof(ds_profile)); + + clGetPlatformIDs(0, NULL, &numPlatforms); + if (numPlatforms != 0) + { + platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); + if (platforms == NULL) + { + status = DS_MEMORY_ERROR; + goto cleanup; + } + clGetPlatformIDs(numPlatforms, platforms, NULL); + } + + numDevices = 0; + for (i = 0; i < (unsigned int)numPlatforms; i++) + { + cl_uint num; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num); + numDevices += num; + } + if (numDevices != 0) + { + devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); + if (devices == NULL) + { + status = DS_MEMORY_ERROR; + goto cleanup; + } + } + + profile->numDevices = numDevices + 1; // +1 to numDevices to include the native CPU + profile->devices = (ds_device*)malloc(profile->numDevices * sizeof(ds_device)); + if (profile->devices == NULL) + { + profile->numDevices = 0; + status = DS_MEMORY_ERROR; + goto cleanup; + } + memset(profile->devices, 0, profile->numDevices * sizeof(ds_device)); + + next = 0; + for (i = 0; i < (unsigned int)numPlatforms; i++) + { + cl_uint num; + unsigned j; + char vendor[256]; + if (clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL) != CL_SUCCESS) + vendor[0] = '\0'; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, devices, &num); + for (j = 0; j < num; j++, next++) + { + char buffer[DS_DEVICE_NAME_LENGTH]; + size_t length; + + profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE; + profile->devices[next].oclDeviceID = devices[j]; + + profile->devices[next].oclPlatformVendor = strdup(vendor); + + clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME + , DS_DEVICE_NAME_LENGTH, &buffer, NULL); + length = strlen(buffer); + profile->devices[next].oclDeviceName = (char*)malloc(length + 1); + memcpy(profile->devices[next].oclDeviceName, buffer, length + 1); + + clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION + , DS_DEVICE_NAME_LENGTH, &buffer, NULL); + length = strlen(buffer); + profile->devices[next].oclDriverVersion = (char*)malloc(length + 1); + memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1); + } + } + profile->devices[next].type = DS_DEVICE_NATIVE_CPU; + profile->version = version; + +cleanup: + if (platforms) free(platforms); + if (devices) free(devices); + if (status == DS_SUCCESS) + { + *p = profile; + } + else + { + if (profile) + { + if (profile->devices) free(profile->devices); + free(profile); + } + } + return status; +} + +// Pointer to a function that calculates the score of a device (ex: device->score) +// update the data size of score. The encoding and the format of the score data +// is implementation defined. The function should return DS_SUCCESS if there's no error to be reported. +typedef ds_status(* ds_perf_evaluator)(ds_device* device, void* data); + +typedef enum { + DS_EVALUATE_ALL + , DS_EVALUATE_NEW_ONLY +} ds_evaluation_type; + +inline ds_status profileDevices(ds_profile* profile, const ds_evaluation_type type, + ds_perf_evaluator evaluator, void* evaluatorData, unsigned int* numUpdates) +{ + ds_status status = DS_SUCCESS; + unsigned int i; + unsigned int updates = 0; + + if (profile == NULL) + { + return DS_INVALID_PROFILE; + } + if (evaluator == NULL) + { + return DS_INVALID_PERF_EVALUATOR; + } + + for (i = 0; i < profile->numDevices; i++) + { + ds_status evaluatorStatus; + + switch (type) + { + case DS_EVALUATE_NEW_ONLY: + if (profile->devices[i].score != NULL) break; + // else fall through + case DS_EVALUATE_ALL: + evaluatorStatus = evaluator(profile->devices + i, evaluatorData); + if (evaluatorStatus != DS_SUCCESS) + { + status = evaluatorStatus; + return status; + } + updates++; + break; + default: + return DS_INVALID_PERF_EVALUATOR_TYPE; + break; + }; + } + if (numUpdates) *numUpdates = updates; + return status; +} + + +#define DS_TAG_VERSION "<version>" +#define DS_TAG_VERSION_END "</version>" +#define DS_TAG_DEVICE "<device>" +#define DS_TAG_DEVICE_END "</device>" +#define DS_TAG_SCORE "<score>" +#define DS_TAG_SCORE_END "</score>" +#define DS_TAG_DEVICE_TYPE "<type>" +#define DS_TAG_DEVICE_TYPE_END "</type>" +#define DS_TAG_DEVICE_NAME "<name>" +#define DS_TAG_DEVICE_NAME_END "</name>" +#define DS_TAG_DEVICE_DRIVER_VERSION "<driver>" +#define DS_TAG_DEVICE_DRIVER_VERSION_END "</driver>" + +#define DS_DEVICE_NATIVE_CPU_STRING "native_cpu" + +typedef ds_status(* ds_score_serializer)(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize); +inline ds_status writeProfileToFile(ds_profile* profile, ds_score_serializer serializer, const char* file) +{ + ds_status status = DS_SUCCESS; + FILE* profileFile = NULL; + + + if (profile == NULL) return DS_INVALID_PROFILE; + + profileFile = fopen(file, "wb"); + if (profileFile == NULL) + { + status = DS_FILE_ERROR; + } + else + { + unsigned int i; + + // write version string + fwrite(DS_TAG_VERSION, sizeof(char), strlen(DS_TAG_VERSION), profileFile); + fwrite(profile->version, sizeof(char), strlen(profile->version), profileFile); + fwrite(DS_TAG_VERSION_END, sizeof(char), strlen(DS_TAG_VERSION_END), profileFile); + fwrite("\n", sizeof(char), 1, profileFile); + + for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) + { + void* serializedScore; + unsigned int serializedScoreSize; + + fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile); + + fwrite(DS_TAG_DEVICE_TYPE, sizeof(char), strlen(DS_TAG_DEVICE_TYPE), profileFile); + fwrite(&profile->devices[i].type, sizeof(ds_device_type), 1, profileFile); + fwrite(DS_TAG_DEVICE_TYPE_END, sizeof(char), strlen(DS_TAG_DEVICE_TYPE_END), profileFile); + + switch (profile->devices[i].type) + { + case DS_DEVICE_NATIVE_CPU: + { + // There's no need to emit a device name for the native CPU device. + /* + fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile); + fwrite(DS_DEVICE_NATIVE_CPU_STRING,sizeof(char),strlen(DS_DEVICE_NATIVE_CPU_STRING), profileFile); + fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile); + */ + } + break; + case DS_DEVICE_OPENCL_DEVICE: + { + fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile); + fwrite(profile->devices[i].oclDeviceName, sizeof(char), strlen(profile->devices[i].oclDeviceName), profileFile); + fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile); + + fwrite(DS_TAG_DEVICE_DRIVER_VERSION, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION), profileFile); + fwrite(profile->devices[i].oclDriverVersion, sizeof(char), strlen(profile->devices[i].oclDriverVersion), profileFile); + fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile); + } + break; + default: + break; + }; + + fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile); + status = serializer(profile->devices + i, &serializedScore, &serializedScoreSize); + if (status == DS_SUCCESS && serializedScore != NULL && serializedScoreSize > 0) + { + fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile); + free(serializedScore); + } + fwrite(DS_TAG_SCORE_END, sizeof(char), strlen(DS_TAG_SCORE_END), profileFile); + fwrite(DS_TAG_DEVICE_END, sizeof(char), strlen(DS_TAG_DEVICE_END), profileFile); + fwrite("\n", sizeof(char), 1, profileFile); + } + fclose(profileFile); + } + return status; +} + + +inline ds_status readProFile(const char* fileName, char** content, size_t* contentSize) +{ + FILE* input = NULL; + size_t size = 0; + char* binary = NULL; + long pos = -1; + + *contentSize = 0; + *content = NULL; + + input = fopen(fileName, "rb"); + if (input == NULL) + { + return DS_FILE_ERROR; + } + + fseek(input, 0L, SEEK_END); + pos = ftell(input); + if (pos < 0) + { + fclose(input); + return DS_FILE_ERROR; + } + + size = pos; + rewind(input); + binary = (char*)malloc(size); + if (binary == NULL) + { + fclose(input); + return DS_FILE_ERROR; + } + size_t bytesRead = fread(binary, sizeof(char), size, input); + (void) bytesRead; // avoid warning + fclose(input); + + *contentSize = size; + *content = binary; + return DS_SUCCESS; +} + + +inline const char* findString(const char* contentStart, const char* contentEnd, const char* string) +{ + size_t stringLength; + const char* currentPosition; + const char* found; + found = NULL; + stringLength = strlen(string); + currentPosition = contentStart; + for (currentPosition = contentStart; currentPosition < contentEnd; currentPosition++) + { + if (*currentPosition == string[0]) + { + if (currentPosition + stringLength < contentEnd) + { + if (strncmp(currentPosition, string, stringLength) == 0) + { + found = currentPosition; + break; + } + } + } + } + return found; +} + + +typedef ds_status(* ds_score_deserializer)(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize); +inline ds_status readProfileFromFile(ds_profile* profile, ds_score_deserializer deserializer, const char* file) +{ + + ds_status status = DS_SUCCESS; + char* contentStart = NULL; + const char* contentEnd = NULL; + size_t contentSize; + + if (profile == NULL) return DS_INVALID_PROFILE; + + status = readProFile(file, &contentStart, &contentSize); + if (status == DS_SUCCESS) + { + const char* currentPosition; + const char* dataStart; + const char* dataEnd; + size_t versionStringLength; + + contentEnd = contentStart + contentSize; + currentPosition = contentStart; + + + // parse the version string + dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION); + if (dataStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + dataStart += strlen(DS_TAG_VERSION); + + dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END); + if (dataEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + versionStringLength = strlen(profile->version); + if (versionStringLength != static_cast<size_t>(dataEnd - dataStart) + || strncmp(profile->version, dataStart, versionStringLength) != 0) + { + // version mismatch + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + currentPosition = dataEnd + strlen(DS_TAG_VERSION_END); + + // parse the device information + while (true) + { + unsigned int i; + + const char* deviceTypeStart; + const char* deviceTypeEnd; + ds_device_type deviceType; + + const char* deviceNameStart; + const char* deviceNameEnd; + + const char* deviceScoreStart; + const char* deviceScoreEnd; + + const char* deviceDriverStart; + const char* deviceDriverEnd; + + dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE); + if (dataStart == NULL) + { + // nothing useful remain, quit... + break; + } + dataStart += strlen(DS_TAG_DEVICE); + dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END); + if (dataEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + // parse the device type + deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE); + if (deviceTypeStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE); + deviceTypeEnd = findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END); + if (deviceTypeEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + memcpy(&deviceType, deviceTypeStart, sizeof(ds_device_type)); + + + // parse the device name + if (deviceType == DS_DEVICE_OPENCL_DEVICE) + { + + deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME); + if (deviceNameStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceNameStart += strlen(DS_TAG_DEVICE_NAME); + deviceNameEnd = findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END); + if (deviceNameEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + + deviceDriverStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION); + if (deviceDriverStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION); + deviceDriverEnd = findString(deviceDriverStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION_END); + if (deviceDriverEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + + // check if this device is on the system + for (i = 0; i < profile->numDevices; i++) + { + if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) + { + size_t actualDeviceNameLength; + size_t driverVersionLength; + + actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName); + driverVersionLength = strlen(profile->devices[i].oclDriverVersion); + if (actualDeviceNameLength == static_cast<size_t>(deviceNameEnd - deviceNameStart) + && driverVersionLength == static_cast<size_t>(deviceDriverEnd - deviceDriverStart) + && strncmp(profile->devices[i].oclDeviceName, deviceNameStart, actualDeviceNameLength) == 0 + && strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart, driverVersionLength) == 0) + { + + deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); + if (deviceScoreStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceScoreStart += strlen(DS_TAG_SCORE); + deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); + status = deserializer(profile->devices + i, (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart); + if (status != DS_SUCCESS) + { + goto cleanup; + } + } + } + } + + } + else if (deviceType == DS_DEVICE_NATIVE_CPU) + { + for (i = 0; i < profile->numDevices; i++) + { + if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) + { + deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); + if (deviceScoreStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceScoreStart += strlen(DS_TAG_SCORE); + deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); + status = deserializer(profile->devices + i, (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart); + if (status != DS_SUCCESS) + { + goto cleanup; + } + } + } + } + + // skip over the current one to find the next device + currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END); + } + } +cleanup: + if (contentStart != NULL) free(contentStart); + if (status != DS_SUCCESS) + return status; + + // Check that all the devices present had valid cached scores. If + // not, return DS_INVALID_PROFILE and let the caller re-evaluate + // scores for present devices, and write a new profile file. + for (unsigned int i = 0; i < profile->numDevices; i++) + if (profile->devices[i].score == NULL) + return DS_INVALID_PROFILE; + + return DS_SUCCESS; +} + +inline ds_status getNumDeviceWithEmptyScore(ds_profile* profile, unsigned int* num) +{ + unsigned int i; + if (profile == NULL || num == NULL) return DS_MEMORY_ERROR; + *num = 0; + for (i = 0; i < profile->numDevices; i++) + { + if (profile->devices[i].score == NULL) + { + (*num)++; + } + } + return DS_SUCCESS; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/opencl_device.cxx b/opencl/source/opencl_device.cxx new file mode 100644 index 000000000000..204ab345b5e0 --- /dev/null +++ b/opencl/source/opencl_device.cxx @@ -0,0 +1,598 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifdef _WIN32 +#include <prewin.h> +#include <postwin.h> +#elif defined __MACH__ +#include <mach/mach_time.h> +#else +#include <sys/time.h> +#endif + +#include <time.h> +#include <math.h> +#include <float.h> +#include <iostream> +#include <sstream> +#include <vector> + +#include <boost/scoped_ptr.hpp> + +#include <comphelper/random.hxx> +#include <opencl/openclconfig.hxx> +#include <opencl/openclwrapper.hxx> +#include <opencl/platforminfo.hxx> +#include <sal/log.hxx> + +#include "opencl_device.hxx" + +#define INPUTSIZE 15360 +#define OUTPUTSIZE 15360 + +#define STRINGIFY(...) #__VA_ARGS__"\n" + +#define DS_CHECK_STATUS(status, name) \ + if (CL_SUCCESS != status) \ + { \ + SAL_INFO("opencl.device", "Error code is " << status << " at " name); \ + } + +namespace OpenCLDevice { + +bool bIsInited = false; +bool bIsDeviceSelected = false; +ds_device selectedDevice; + +struct LibreOfficeDeviceScore +{ + double fTime; // small time means faster device + bool bNoCLErrors; // were there any opencl errors +}; + +struct LibreOfficeDeviceEvaluationIO +{ + std::vector<double> input0; + std::vector<double> input1; + std::vector<double> input2; + std::vector<double> input3; + std::vector<double> output; + unsigned long inputSize; + unsigned long outputSize; +}; + +struct timer +{ +#ifdef _WIN32 + LARGE_INTEGER start; +#else + long long start; +#endif +}; + +const char* source = STRINGIFY( +\n#if defined(KHR_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable +\n#elif defined(AMD_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable +\n#endif + \n + int isNan(fp_t a) { return a != a; } + fp_t fsum(fp_t a, fp_t b) { return a + b; } + + fp_t fAverage(__global fp_t* input) +{ + fp_t sum = 0; + int count = 0; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + sum = fsum(input[i], sum); + count += 1; + } + } + return sum / (fp_t)count; +} + fp_t fMin(__global fp_t* input) +{ + fp_t min = MAXFLOAT; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + min = fmin(input[i], min); + } + } + return min; +} + fp_t fSoP(__global fp_t* input0, __global fp_t* input1) +{ + fp_t sop = 0.0; + for (int i = 0; i < INPUTSIZE; i++) + { + sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]); + } + return sop; +} + __kernel void DynamicKernel( + __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3) +{ + int gid0 = get_global_id(0); + fp_t tmp0 = fAverage(input0); + fp_t tmp1 = fMin(input1) * fSoP(input2, input3); + result[gid0] = fsum(tmp0, tmp1); +} + ); + +size_t sourceSize[] = { strlen(source) }; + +/*************************************************************************/ +/* INTERNAL FUNCTIONS */ +/*************************************************************************/ +/* Timer functions - start timer */ +void timerStart(timer* mytimer) +{ +#ifdef _WIN32 + QueryPerformanceCounter(&mytimer->start); +#elif defined __MACH__ + mytimer->start = mach_absolute_time(); +#else + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3; +#endif +} + +/* Timer functions - get current value */ +double timerCurrent(timer* mytimer) +{ +#ifdef _WIN32 + LARGE_INTEGER stop, frequency; + QueryPerformanceCounter(&stop); + QueryPerformanceFrequency(&frequency); + double time = ((double)(stop.QuadPart - mytimer->start.QuadPart) / frequency.QuadPart); +#elif defined __MACH__ + static mach_timebase_info_data_t info = { 0, 0 }; + if (info.numer == 0) + mach_timebase_info(&info); + long long stop = mach_absolute_time(); + double time = ((stop - mytimer->start) * (double) info.numer / info.denom) / 1.0E9; +#else + struct timespec s; + long long stop; + clock_gettime(CLOCK_MONOTONIC, &s); + stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3; + double time = ((double)(stop - mytimer->start) / 1.0E6); +#endif + return time; +} + +/* Random number generator */ +double random(double min, double max) +{ + if (min == max) + return min; + return comphelper::rng::uniform_real_distribution(min, max); +} + +/* Populate input */ +void populateInput(LibreOfficeDeviceEvaluationIO* testData) +{ + double* input0 = &testData->input0[0]; + double* input1 = &testData->input1[0]; + double* input2 = &testData->input2[0]; + double* input3 = &testData->input3[0]; + for (unsigned long i = 0; i < testData->inputSize; i++) + { + input0[i] = random(0, i); + input1[i] = random(0, i); + input2[i] = random(0, i); + input3[i] = random(0, i); + } +} +/* Encode score object as byte string */ +ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize) +{ + *serializedScoreSize = sizeof(LibreOfficeDeviceScore); + *serializedScore = (void*)new unsigned char[*serializedScoreSize]; + memcpy(*serializedScore, device->score, *serializedScoreSize); + return DS_SUCCESS; +} + +/* Parses byte string and stores in score object */ +ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize) +{ + // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore); + device->score = new LibreOfficeDeviceScore; + memcpy(device->score, serializedScore, serializedScoreSize); + return DS_SUCCESS; +} + +/* Releases memory held by score */ +ds_status releaseScore(void* score) +{ + if (NULL != score) + { + delete (LibreOfficeDeviceScore*)score; + } + return DS_SUCCESS; +} + +/* Evaluate devices */ +ds_status evaluateScoreForDevice(ds_device* device, void* evalData) +{ + if (DS_DEVICE_OPENCL_DEVICE == device->type) + { + /* Evaluating an OpenCL device */ + SAL_INFO("opencl.device", "Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation..."); + cl_int clStatus; + /* Check for 64-bit float extensions */ + size_t aDevExtInfoSize = 0; + clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + + char* aExtInfo = new char[aDevExtInfoSize]; + clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + bool bKhrFp64Flag = false; + bool bAmdFp64Flag = false; + const char* buildOption = NULL; + std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE="); + std::ostringstream tmpOStrStr; + tmpOStrStr << std::dec << INPUTSIZE; + tmpStr.append(tmpOStrStr.str()); + + if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos) + { + bKhrFp64Flag = true; + //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DKHR_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + SAL_INFO("opencl.device", "... has cl_khr_fp64"); + } + else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos) + { + bAmdFp64Flag = true; + //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DAMD_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + SAL_INFO("opencl.device", "... has cl_amd_fp64"); + } + delete[] aExtInfo; + + if (!bKhrFp64Flag && !bAmdFp64Flag) + { + /* No 64-bit float support */ + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX; + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + SAL_INFO("opencl.device", "... no fp64 support"); + } + else + { + /* 64-bit float support present */ + + /* Create context and command queue */ + cl_context clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext"); + cl_command_queue clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue"); + + /* Build program */ + cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource"); + clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram"); + if (CL_SUCCESS != clStatus) + { + /* Build program failed */ + size_t length; + char* buildLog; + clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + buildLog = (char*)malloc(length); + clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length); + SAL_INFO("opencl.device", "Build Errors:\n" << buildLog); + free(buildLog); + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX; + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = false; + } + else + { + /* Build program succeeded */ + timer kernelTime; + timerStart(&kernelTime); + + /* Run kernel */ + LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData; + cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel"); + cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult"); + cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input0[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0"); + cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input1[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1"); + cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input2[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2"); + cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input3[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3"); + clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&clResult); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult"); + clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&clInput0); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0"); + clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void*)&clInput1); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1"); + clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), (void*)&clInput2); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2"); + clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), (void*)&clInput3); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3"); + size_t globalWS[1] = { testData->outputSize }; + size_t localSize[1] = { 64 }; + clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel"); + clFinish(clQueue); + clReleaseMemObject(clInput3); + clReleaseMemObject(clInput2); + clReleaseMemObject(clInput1); + clReleaseMemObject(clInput0); + clReleaseMemObject(clResult); + clReleaseKernel(clKernel); + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime); + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + } + + clReleaseProgram(clProgram); + clReleaseCommandQueue(clQueue); + clReleaseContext(clContext); + } + } + else + { + /* Evaluating an Native CPU device */ + SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation..."); + timer kernelTime; + timerStart(&kernelTime); + + LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData; + for (unsigned long j = 0; j < testData->outputSize; j++) + { + double fAverage = 0.0f; + double fMin = DBL_MAX; + double fSoP = 0.0f; + for (unsigned long i = 0; i < testData->inputSize; i++) + { + fAverage += testData->input0[i]; + fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]); + fSoP += testData->input2[i] * testData->input3[i]; + } + fAverage /= testData->inputSize; + testData->output[j] = fAverage + (fMin * fSoP); + } + + // InterpretTail - the S/W fallback is nothing like as efficient + // as any good openCL implementation: no SIMD, tons of branching + // in the inner loops etc. Generously characterise it as only 10x + // slower than the above. + float fInterpretTailFactor = 10.0; + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime); + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + + ((LibreOfficeDeviceScore*)device->score)->fTime *= fInterpretTailFactor; + } + return DS_SUCCESS; +} + +/* Pick best device */ +ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx) +{ + double bestScore = DBL_MAX; + *bestDeviceIdx = -1; + + for (unsigned int d = 0; d < profile->numDevices; d++) + { + ds_device device = profile->devices[d]; + LibreOfficeDeviceScore *pScore = (LibreOfficeDeviceScore*)device.score; + + // Check blacklist and whitelist for actual devices + if (device.type == DS_DEVICE_OPENCL_DEVICE) + { + // There is a silly impedance mismatch here. Why do we + // need two different ways to describe an OpenCL platform + // and an OpenCL device driver? + + OpenCLPlatformInfo aPlatform; + OpenCLDeviceInfo aDevice; + + // We know that only the below fields are used by checkForKnownBadCompilers() + aPlatform.maVendor = OUString(device.oclPlatformVendor, strlen(device.oclPlatformVendor), RTL_TEXTENCODING_UTF8); + aDevice.maName = OUString(device.oclDeviceName, strlen(device.oclDeviceName), RTL_TEXTENCODING_UTF8); + aDevice.maDriver = OUString(device.oclDriverVersion, strlen(device.oclDriverVersion), RTL_TEXTENCODING_UTF8); + + // If blacklisted or not whitelisted, ignore it + if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice)) + { + SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " is blacklisted or not whitelisted"); + pScore->fTime = DBL_MAX; + pScore->bNoCLErrors = true; + } + } + + double fScore = DBL_MAX; + if (pScore) + { + fScore = pScore->fTime; + } + else + { + SAL_INFO("opencl.device", "Unusual null score"); + } + + if (DS_DEVICE_OPENCL_DEVICE == device.type) + { + SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << fScore); + } + else + { + SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore); + } + if (fScore < bestScore) + { + bestScore = fScore; + *bestDeviceIdx = d; + } + } + if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type) + { + SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL)."); + } + else + { + SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: CPU (Native)."); + } + + return DS_SUCCESS; +} + +/* Return device ID for matching device name */ +int matchDevice(ds_profile* profile, char* deviceName) +{ + int deviceMatch = -1; + for (unsigned int d = 0; d < profile->numDevices - 1; d++) + { + if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d; + } + if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1; + return deviceMatch; +} + +/*************************************************************************/ +/* EXTERNAL FUNCTIONS */ +/*************************************************************************/ +ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection) +{ + /* Run only if device is not yet selected */ + if (!bIsDeviceSelected || bForceSelection) + { + /* Setup */ + ds_status status; + ds_profile* profile = NULL; + status = initDSProfile(&profile, "LibreOffice v0.1"); + + if (!profile) + { + // failed to initialize profile. + selectedDevice.type = DS_DEVICE_NATIVE_CPU; + return selectedDevice; + } + + /* Try reading scores from file */ + std::string tmpStr(sProfilePath); + const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str(); + if (!bForceSelection) + { + status = readProfileFromFile(profile, deserializeScore, fileName); + } + else + { + status = DS_INVALID_PROFILE; + SAL_INFO("opencl.device", "Performing forced profiling."); + } + if (DS_SUCCESS != status) + { + if (!bForceSelection) + { + SAL_INFO("opencl.device", "Profile file not available (" << fileName << "); performing profiling."); + } + + /* Populate input data for micro-benchmark */ + boost::scoped_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO); + testData->inputSize = INPUTSIZE; + testData->outputSize = OUTPUTSIZE; + testData->input0.resize(testData->inputSize); + testData->input1.resize(testData->inputSize); + testData->input2.resize(testData->inputSize); + testData->input3.resize(testData->inputSize); + testData->output.resize(testData->outputSize); + populateInput(testData.get()); + + /* Perform evaluations */ + unsigned int numUpdates; + status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void*)testData.get(), &numUpdates); + + if (DS_SUCCESS == status) + { + /* Write scores to file */ + status = writeProfileToFile(profile, serializeScore, fileName); + if (DS_SUCCESS == status) + { + SAL_INFO("opencl.device", "Scores written to file (" << fileName << ")."); + } + else + { + SAL_INFO("opencl.device", "Error saving scores to file (" << fileName << "); scores not written to file."); + } + } + else + { + SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file."); + } + } + else + { + SAL_INFO("opencl.device", "Profile read from file (" << fileName << ")."); + } + + /* Pick best device */ + int bestDeviceIdx; + pickBestDevice(profile, &bestDeviceIdx); + + /* Overide if necessary */ + char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE"); + if (NULL != overrideDeviceStr) + { + int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr); + if (-1 != overrideDeviceIdx) + { + SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + bestDeviceIdx = overrideDeviceIdx; + if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type) + { + SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL)."); + } + else + { + SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native)."); + } + } + else + { + SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + } + } + + /* Final device selection */ + selectedDevice = profile->devices[bestDeviceIdx]; + bIsDeviceSelected = true; + + /* Release profile */ + releaseDSProfile(profile, releaseScore); + } + return selectedDevice; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx new file mode 100644 index 000000000000..86ba6cd3a4e3 --- /dev/null +++ b/opencl/source/openclwrapper.cxx @@ -0,0 +1,804 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <config_folders.h> + +#include "opencl_device.hxx" + +#include <comphelper/string.hxx> +#include <opencl/openclconfig.hxx> +#include <opencl/openclwrapper.hxx> +#include <osl/file.hxx> +#include <rtl/bootstrap.hxx> +#include <rtl/digest.h> +#include <rtl/strbuf.hxx> +#include <rtl/ustring.hxx> +#include <sal/config.h> + +#include <boost/scoped_array.hpp> +#include <unicode/regex.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <cmath> + +#ifdef _WIN32 +#include <prewin.h> +#include <postwin.h> +#define OPENCL_DLL_NAME "OpenCL.dll" +#elif defined(MACOSX) +#define OPENCL_DLL_NAME NULL +#else +#define OPENCL_DLL_NAME "libOpenCL.so" +#endif + +#define DEVICE_NAME_LENGTH 1024 +#define DRIVER_VERSION_LENGTH 1024 +#define PLATFORM_VERSION_LENGTH 1024 + +using namespace std; + +namespace opencl { + +GPUEnv OpenCLDevice::gpuEnv; +bool OpenCLDevice::bIsInited = false; + +namespace { + +OString generateMD5(const void* pData, size_t length) +{ + sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5]; + rtlDigestError aError = rtl_digest_MD5(pData, length, + pBuffer, RTL_DIGEST_LENGTH_MD5); + SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed"); + + OStringBuffer aBuffer; + const char* pString = "0123456789ABCDEF"; + for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i) + { + sal_uInt8 val = pBuffer[i]; + aBuffer.append(pString[val/16]); + aBuffer.append(pString[val%16]); + } + return aBuffer.makeStringAndClear(); +} + +OString getCacheFolder() +{ + OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); + rtl::Bootstrap::expandMacros(url); + + osl::Directory::create(url); + + return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8); +} + +} + +OString OpenCLDevice::maCacheFolder = getCacheFolder(); + +void OpenCLDevice::setKernelEnv( KernelEnv *envInfo ) +{ + envInfo->mpkContext = gpuEnv.mpContext; + envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; + envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; +} + +namespace { + +OString createFileName(cl_device_id deviceId, const char* clFileName) +{ + OString fileName(clFileName); + sal_Int32 nIndex = fileName.lastIndexOf(".cl"); + if(nIndex > 0) + fileName = fileName.copy(0, nIndex); + + char deviceName[DEVICE_NAME_LENGTH] = {0}; + clGetDeviceInfo(deviceId, CL_DEVICE_NAME, + sizeof(deviceName), deviceName, NULL); + + char driverVersion[DRIVER_VERSION_LENGTH] = {0}; + clGetDeviceInfo(deviceId, CL_DRIVER_VERSION, + sizeof(driverVersion), driverVersion, NULL); + + cl_platform_id platformId; + clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, NULL); + + char platformVersion[PLATFORM_VERSION_LENGTH] = {0}; + clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion), + platformVersion, NULL); + + // create hash for deviceName + driver version + platform version + OString aString = OString(deviceName) + driverVersion + platformVersion; + OString aHash = generateMD5(aString.getStr(), aString.getLength()); + + return OpenCLDevice::maCacheFolder + fileName + "-" + + aHash + ".bin"; +} + +} + +std::vector<boost::shared_ptr<osl::File> > OpenCLDevice::binaryGenerated( const char * clFileName, cl_context context ) +{ + size_t numDevices=0; + + std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles; + cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES, + 0, NULL, &numDevices ); + numDevices /= sizeof(numDevices); + + if(clStatus != CL_SUCCESS) + return aGeneratedFiles; + + // grab the handles to all of the devices in the context. + boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]); + clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES, + sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL ); + + if(clStatus != CL_SUCCESS) + return aGeneratedFiles; + + for ( size_t i = 0; i < numDevices; i++ ) + { + if ( pArryDevsID[i] != 0 ) + { + OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName); + osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8)); + if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None) + { + aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile)); + SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success"); + } + else + { + SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL"); + delete pNewFile; + break; + } + } + } + + return aGeneratedFiles; +} + +bool OpenCLDevice::writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes ) +{ + osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8)); + osl::FileBase::RC status = file.open( + osl_File_OpenFlag_Write | osl_File_OpenFlag_Create ); + + if(status != osl::FileBase::E_None) + return false; + + sal_uInt64 nBytesWritten = 0; + file.write( binary, numBytes, nBytesWritten ); + + assert(numBytes == nBytesWritten); + + return true; +} + +bool OpenCLDevice::generatBinFromKernelSource( cl_program program, const char * clFileName ) +{ + cl_uint numDevices; + + cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + std::vector<cl_device_id> pArryDevsID(numDevices); + /* grab the handles to all of the devices in the program. */ + clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, &pArryDevsID[0], NULL ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + /* figure out the sizes of each of the binaries. */ + std::vector<size_t> binarySizes(numDevices); + + clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, &binarySizes[0], NULL ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + /* copy over all of the generated binaries. */ + boost::scoped_array<char*> binaries(new char*[numDevices]); + + for ( size_t i = 0; i < numDevices; i++ ) + { + if ( binarySizes[i] != 0 ) + { + binaries[i] = new char[binarySizes[i]]; + } + else + { + binaries[i] = NULL; + } + } + + clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, binaries.get(), NULL ); + CHECK_OPENCL(clStatus,"clGetProgramInfo"); + + /* dump out each binary into its own separate file. */ + for ( size_t i = 0; i < numDevices; i++ ) + { + + if ( binarySizes[i] != 0 ) + { + OString fileName = createFileName(pArryDevsID[i], clFileName); + if ( !writeBinaryToFile( fileName, + binaries[i], binarySizes[i] ) ) + SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL"); + else + SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success"); + } + } + + // Release all resouces and memory + for ( size_t i = 0; i < numDevices; i++ ) + { + delete[] binaries[i]; + } + + return true; +} + +bool OpenCLDevice::initOpenCLAttr( OpenCLEnv * env ) +{ + if ( gpuEnv.mnIsUserCreated ) + return true; + + gpuEnv.mpContext = env->mpOclContext; + gpuEnv.mpPlatformID = env->mpOclPlatformID; + gpuEnv.mpDevID = env->mpOclDevsID; + gpuEnv.mpCmdQueue = env->mpOclCmdQueue; + + gpuEnv.mnIsUserCreated = 1; + + return false; +} + +void OpenCLDevice::releaseOpenCLEnv( GPUEnv *gpuInfo ) +{ + if ( !bIsInited ) + { + return; + } + + if ( gpuEnv.mpCmdQueue ) + { + clReleaseCommandQueue( gpuEnv.mpCmdQueue ); + gpuEnv.mpCmdQueue = NULL; + } + if ( gpuEnv.mpContext ) + { + clReleaseContext( gpuEnv.mpContext ); + gpuEnv.mpContext = NULL; + } + bIsInited = false; + gpuInfo->mnIsUserCreated = 0; + free( gpuInfo->mpArryDevsID ); + + return; +} + +namespace { + +bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx) +{ + cl_int clStatus; + //char options[512]; + // create a cl program executable for all the devices specified + if (!gpuInfo->mnIsUserCreated) + { + clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, + buildOption, NULL, NULL); + } + else + { + clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), + buildOption, NULL, NULL); + } + + if ( clStatus != CL_SUCCESS ) + { + size_t length; + if ( !gpuInfo->mnIsUserCreated ) + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + } + else + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + } + if ( clStatus != CL_SUCCESS ) + { + return false; + } + + boost::scoped_array<char> buildLog(new char[length]); + if ( !gpuInfo->mnIsUserCreated ) + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length ); + } + else + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length ); + } + if ( clStatus != CL_SUCCESS ) + { + return false; + } + + OString aBuildLogFileURL = OpenCLDevice::maCacheFolder + "kernel-build.log"; + osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8)); + osl::FileBase::RC status = aBuildLogFile.open( + osl_File_OpenFlag_Write | osl_File_OpenFlag_Create ); + + if(status != osl::FileBase::E_None) + return false; + + sal_uInt64 nBytesWritten = 0; + aBuildLogFile.write( buildLog.get(), length, nBytesWritten ); + + return false; + } + + return true; +} + +} + +bool OpenCLDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx) +{ + size_t numDevices; + cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, + 0, NULL, &numDevices ); + numDevices /= sizeof(numDevices); + CHECK_OPENCL( clStatus, "clGetContextInfo" ); + + std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated( + filename, gpuInfo->mpContext ); + + if (aGeneratedFiles.size() == numDevices) + { + boost::scoped_array<size_t> length(new size_t[numDevices]); + boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]); + for(size_t i = 0; i < numDevices; ++i) + { + sal_uInt64 nSize; + aGeneratedFiles[i]->getSize(nSize); + unsigned char* binary = new unsigned char[nSize]; + sal_uInt64 nBytesRead; + aGeneratedFiles[i]->read(binary, nSize, nBytesRead); + if(nSize != nBytesRead) + assert(false); + + length[i] = nBytesRead; + + pBinary[i] = binary; + } + + // grab the handles to all of the devices in the context. + boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]); + clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, + sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL ); + + if(clStatus != CL_SUCCESS) + { + for(size_t i = 0; i < numDevices; ++i) + { + delete[] pBinary[i]; + } + return false; + } + + cl_int binary_status; + + gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices, + pArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(), + &binary_status, &clStatus ); + if(clStatus != CL_SUCCESS) + { + // something went wrong, fall back to compiling from source + return false; + } + for(size_t i = 0; i < numDevices; ++i) + { + delete[] pBinary[i]; + } + } + + if ( !gpuInfo->mpArryPrograms[idx] ) + { + return false; + } + return buildProgram(buildOption, gpuInfo, idx); +} + +bool OpenCLDevice::initOpenCLRunEnv( int argc ) +{ + if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) ) + return true; + + if ( !bIsInited ) + { + if ( !gpuEnv.mnIsUserCreated ) + memset( &gpuEnv, 0, sizeof(gpuEnv) ); + + //initialize devices, context, command_queue + bool status = initOpenCLRunEnv( &gpuEnv ); + if ( status ) + { + return true; + } + //initialize program, kernelName, kernelCount + if( getenv( "SC_FLOAT" ) ) + { + gpuEnv.mnKhrFp64Flag = false; + gpuEnv.mnAmdFp64Flag = false; + } + if( gpuEnv.mnKhrFp64Flag ) + { + SAL_INFO("opencl", "Use Khr double"); + } + else if( gpuEnv.mnAmdFp64Flag ) + { + SAL_INFO("opencl", "Use AMD double type"); + } + else + { + SAL_INFO("opencl", "USE float type"); + } + bIsInited = true; + } + return false; +} + +namespace { + +void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64) +{ + bKhrFp64 = false; + bAmdFp64 = false; + + // Check device extensions for double type + size_t aDevExtInfoSize = 0; + + cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize ); + if( clStatus != CL_SUCCESS ) + return; + + boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]); + + clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, + sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL); + + if( clStatus != CL_SUCCESS ) + return; + + if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) ) + { + bKhrFp64 = true; + } + else + { + // Check if cl_amd_fp64 extension is supported + if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) ) + bAmdFp64 = true; + } +} + +} + +bool OpenCLDevice::initOpenCLRunEnv( GPUEnv *gpuInfo ) +{ + bool bKhrFp64 = false; + bool bAmdFp64 = false; + + checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64); + + gpuInfo->mnKhrFp64Flag = bKhrFp64; + gpuInfo->mnAmdFp64Flag = bAmdFp64; + + return false; +} + +namespace { + +// based on crashes and hanging during kernel compilation +void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo) +{ + OpenCLDeviceInfo aDeviceInfo; + aDeviceInfo.device = aDeviceId; + + char pName[DEVICE_NAME_LENGTH]; + cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maName = OUString::createFromAscii(pName); + + char pVendor[DEVICE_NAME_LENGTH]; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maVendor = OUString::createFromAscii(pVendor); + + cl_ulong nMemSize; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.mnMemory = nMemSize; + + cl_uint nClockFrequency; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.mnFrequency = nClockFrequency; + + cl_uint nComputeUnits; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL); + if(nState != CL_SUCCESS) + return; + + char pDriver[DEVICE_NAME_LENGTH]; + nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL); + + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maDriver = OUString::createFromAscii(pDriver); + + bool bKhrFp64 = false; + bool bAmdFp64 = false; + checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64); + + // only list devices that support double + if(!bKhrFp64 && !bAmdFp64) + return; + + aDeviceInfo.mnComputeUnits = nComputeUnits; + + if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo)) + rPlatformInfo.maDevices.push_back(aDeviceInfo); +} + +bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo) +{ + rPlatformInfo.platform = nPlatformId; + char pName[64]; + cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64, + pName, NULL); + if(nState != CL_SUCCESS) + return false; + rPlatformInfo.maName = OUString::createFromAscii(pName); + + char pVendor[64]; + nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64, + pVendor, NULL); + if(nState != CL_SUCCESS) + return false; + + rPlatformInfo.maVendor = OUString::createFromAscii(pVendor); + + cl_uint nDevices; + nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices); + if(nState != CL_SUCCESS) + return false; + + // memory leak that does not matter + // memory is stored in static variable that lives through the whole program + cl_device_id* pDevices = new cl_device_id[nDevices]; + nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL); + if(nState != CL_SUCCESS) + return false; + + for(size_t i = 0; i < nDevices; ++i) + { + createDeviceInfo(pDevices[i], rPlatformInfo); + } + + return true; +} + +} + +const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo() +{ + static std::vector<OpenCLPlatformInfo> aPlatforms; + if(!aPlatforms.empty()) + return aPlatforms; + + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return aPlatforms; + + cl_uint nPlatforms; + cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms); + + if(nState != CL_SUCCESS) + return aPlatforms; + + // memory leak that does not matter, + // memory is stored in static instance aPlatforms + cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms]; + nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL); + + if(nState != CL_SUCCESS) + return aPlatforms; + + for(size_t i = 0; i < nPlatforms; ++i) + { + OpenCLPlatformInfo aPlatformInfo; + if(createPlatformInfo(pPlatforms[i], aPlatformInfo)) + aPlatforms.push_back(aPlatformInfo); + } + + return aPlatforms; +} + +namespace { + +cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms) +{ + std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end(); + for(; it != itEnd; ++it) + { + std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end(); + for(; itr != itrEnd; ++itr) + { + OUString aDeviceId = it->maVendor + " " + itr->maName; + if(rString == aDeviceId) + { + return static_cast<cl_device_id>(itr->device); + } + } + } + + return NULL; +} + +void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId) +{ + cl_platform_id platformId; + cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, NULL); + + if(nState != CL_SUCCESS) + return; + + const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo(); + for(size_t i = 0; i < rPlatforms.size(); ++i) + { + cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform); + if(platId != platformId) + continue; + + for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j) + { + cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device); + if(id == aDeviceId) + { + rDeviceId = j; + rPlatformId = i; + return; + } + } + } +} + +} + +bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation) +{ + if(fillOpenCLInfo().empty()) + return false; + + cl_device_id pDeviceId = NULL; + if(pDevice) + pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo()); + + if(!pDeviceId || bAutoSelect) + { + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return false; + + OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); + rtl::Bootstrap::expandMacros(url); + OUString path; + osl::FileBase::getSystemPathFromFileURL(url,path); + OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8); + ds_device pSelectedDevice = ::OpenCLDevice::getDeviceSelection(dsFileName.getStr(), bForceEvaluation); + pDeviceId = pSelectedDevice.oclDeviceID; + + } + + if(OpenCLDevice::gpuEnv.mpDevID == pDeviceId) + { + // we don't need to change anything + // still the same device + return pDeviceId != NULL; + } + + cl_platform_id platformId; + cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, NULL); + + cl_context_properties cps[3]; + cps[0] = CL_CONTEXT_PLATFORM; + cps[1] = reinterpret_cast<cl_context_properties>(platformId); + cps[2] = 0; + cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState ); + + if(nState != CL_SUCCESS || context == NULL) + { + if(context != NULL) + clReleaseContext(context); + + SAL_WARN("opencl", "failed to set/switch opencl device"); + return false; + } + + cl_command_queue command_queue = clCreateCommandQueue( + context, pDeviceId, 0, &nState); + + if(command_queue == NULL || nState != CL_SUCCESS) + { + if(command_queue != NULL) + clReleaseCommandQueue(command_queue); + + clReleaseContext(context); + SAL_WARN("opencl", "failed to set/switch opencl device"); + return false; + } + + OpenCLDevice::releaseOpenCLEnv(&OpenCLDevice::gpuEnv); + OpenCLEnv env; + env.mpOclPlatformID = platformId; + env.mpOclContext = context; + env.mpOclDevsID = pDeviceId; + env.mpOclCmdQueue = command_queue; + OpenCLDevice::initOpenCLAttr(&env); + + // why do we need this at all? + + // (Assuming the above question refers to the mpArryDevsID + // initialisation below.) Because otherwise the code crashes in + // initOpenCLRunEnv(). Confused? You should be. + + OpenCLDevice::gpuEnv.mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) ); + OpenCLDevice::gpuEnv.mpArryDevsID[0] = pDeviceId; + + return !OpenCLDevice::initOpenCLRunEnv(0); +} + +void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId) +{ + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return; + + cl_device_id id = OpenCLDevice::gpuEnv.mpDevID; + findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |