From cb309928872735c577d2b59fe9dd8690424410b5 Mon Sep 17 00:00:00 2001 From: Laszlo Nemeth Date: Mon, 25 Jul 2016 16:28:39 +0300 Subject: Need to try to avoid TDR also with NVIDIA cards on Windows 7 or earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (TDR is Timeout detection and recovery, was introduced in Vista.) Change-Id: If88f8e9e2aff2a5ffd633607ee6aebb5614c5caf Reviewed-on: https://gerrit.libreoffice.org/27523 Reviewed-by: Caolán McNamara Reviewed-by: Tor Lillqvist Reviewed-by: Jan Holesovsky Tested-by: Jan Holesovsky --- include/opencl/openclwrapper.hxx | 2 +- opencl/source/openclwrapper.cxx | 37 +++++++++++++++++++++++++++++++++++-- sc/source/core/data/formulacell.cxx | 5 ++--- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/include/opencl/openclwrapper.hxx b/include/opencl/openclwrapper.hxx index 05834004b12c..ac86beaf9908 100644 --- a/include/opencl/openclwrapper.hxx +++ b/include/opencl/openclwrapper.hxx @@ -49,7 +49,7 @@ struct OPENCL_DLLPUBLIC GPUEnv int mnCmdQueuePos; bool mnKhrFp64Flag; bool mnAmdFp64Flag; - cl_uint mnPreferredVectorWidthFloat; + bool mbNeedsTDRAvoidance; static bool isOpenCLEnabled(); }; diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx index 2551b057b1db..de1ed2649fd2 100644 --- a/opencl/source/openclwrapper.cxx +++ b/opencl/source/openclwrapper.cxx @@ -42,6 +42,10 @@ #define OPENCL_DLL_NAME "libOpenCL.so" #endif +#ifdef _WIN32_WINNT_WINBLUE +#include +#endif + #define DEVICE_NAME_LENGTH 1024 #define DRIVER_VERSION_LENGTH 1024 #define PLATFORM_VERSION_LENGTH 1024 @@ -456,6 +460,8 @@ void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bA bool initOpenCLRunEnv( GPUEnv *gpuInfo ) { OpenCLZone zone; + cl_uint nPreferredVectorWidthFloat; + char pName[64]; bool bKhrFp64 = false; bool bAmdFp64 = false; @@ -465,11 +471,38 @@ bool initOpenCLRunEnv( GPUEnv *gpuInfo ) gpuInfo->mnKhrFp64Flag = bKhrFp64; gpuInfo->mnAmdFp64Flag = bAmdFp64; - gpuInfo->mnPreferredVectorWidthFloat = 0; + gpuInfo->mbNeedsTDRAvoidance = false; clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), - &gpuInfo->mnPreferredVectorWidthFloat, nullptr); + &nPreferredVectorWidthFloat, nullptr); + clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64, + pName, nullptr); + + bool bIsNotWinOrIsWin8OrGreater = true; + +// the Win32 SDK 8.1 deprecates GetVersionEx() +#ifdef _WIN32_WINNT_WINBLUE + bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater(); +#elif defined (_WIN32) + OSVERSIONINFO aVersionInfo; + memset( &aVersionInfo, 0, sizeof(aVersionInfo) ); + aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo ); + if (GetVersionEx( &aVersionInfo )) + { + // Windows 7 or lower? + if (aVersionInfo.dwMajorVersion < 6 || + (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2)) + bIsNotWinOrIsWin8OrGreater = false; + } +#endif + // Heuristic: Certain old low-end OpenCL implementations don't + // work for us with too large group lengths. Looking at the preferred + // float vector width seems to be a way to detect these devices, except + // the non-working NVIDIA cards on Windows older than version 8. + gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) || + ( !bIsNotWinOrIsWin8OrGreater && + OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 ); return false; } diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx index daf8f65de44f..147c097ae6bd 100644 --- a/sc/source/core/data/formulacell.cxx +++ b/sc/source/core/data/formulacell.cxx @@ -4039,9 +4039,8 @@ bool ScFormulaCell::InterpretFormulaGroup() #ifdef WNT // Heuristic: Certain old low-end OpenCL implementations don't // work for us with too large group lengths. 1000 was determined - // empirically to be a good compromise. Looking at the preferred - // float vector width seems to be a way to detect these devices. - if (opencl::gpuEnv.mnPreferredVectorWidthFloat == 4) + // empirically to be a good compromise. + if (opencl::gpuEnv.mbNeedsTDRAvoidance) nMaxGroupLength = 1000; #endif -- cgit