From d115a235bf3ff5366d992d01fb418a3eacb9d125 Mon Sep 17 00:00:00 2001 From: Tomaž Vajngerl Date: Thu, 3 Nov 2016 23:11:18 +0100 Subject: tdf#103804 reduced register areaScale shader and detection for intel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some intel drivers crash when areaScale shader with "large" array is used. This adds a "reduced register" version of the areaScale shader. We still use the first version of the shader for other drivers and switch between the 2 implementations with a runtime detection. Change-Id: I1860f898c03b40a600eb1b41f7262719382a7171 Reviewed-on: https://gerrit.libreoffice.org/30571 Reviewed-by: Tomaž Vajngerl Tested-by: Tomaž Vajngerl --- include/vcl/opengl/OpenGLContext.hxx | 16 ++++ vcl/opengl/gdiimpl.cxx | 16 +++- vcl/opengl/scale.cxx | 10 ++- vcl/opengl/shaders/areaScaleFragmentShader.glsl | 113 ++++++++++++++++++++---- vcl/opengl/win/gdiimpl.cxx | 5 ++ 5 files changed, 138 insertions(+), 22 deletions(-) diff --git a/include/vcl/opengl/OpenGLContext.hxx b/include/vcl/opengl/OpenGLContext.hxx index dbe0ec66c280..e3e9ce0d2d83 100644 --- a/include/vcl/opengl/OpenGLContext.hxx +++ b/include/vcl/opengl/OpenGLContext.hxx @@ -52,6 +52,15 @@ struct VCL_DLLPUBLIC GLWindow virtual ~GLWindow(); }; +struct VCL_DLLPUBLIC OpenGLCapabilitySwitch +{ + bool mbLimitedShaderRegisters; + + OpenGLCapabilitySwitch() + : mbLimitedShaderRegisters(false) + {} +}; + class VCL_DLLPUBLIC OpenGLContext { friend class OpenGLTests; @@ -94,6 +103,11 @@ public: return mpRenderState; } + OpenGLCapabilitySwitch& getOpenGLCapabilitySwitch() + { + return maOpenGLCapabilitySwitch; + } + /// Is this GL context the current context ? virtual bool isCurrent(); /// Is any GL context the current context ? @@ -165,6 +179,8 @@ protected: OpenGLFramebuffer* mpFirstFramebuffer; OpenGLFramebuffer* mpLastFramebuffer; + OpenGLCapabilitySwitch maOpenGLCapabilitySwitch; + private: struct ProgramHash { diff --git a/vcl/opengl/gdiimpl.cxx b/vcl/opengl/gdiimpl.cxx index b1898df3e5c8..b487468f10eb 100644 --- a/vcl/opengl/gdiimpl.cxx +++ b/vcl/opengl/gdiimpl.cxx @@ -993,7 +993,11 @@ bool scaleTexture(const rtl::Reference< OpenGLContext > &xContext, int nNewWidth = nWidth / ixscale; int nNewHeight = nHeight / iyscale; - OpenGLProgram* pProgram = xContext->UseProgram("textureVertexShader", "areaScaleFragmentShader"); + OString sUseReducedRegisterVariantDefine; + if (xContext->getOpenGLCapabilitySwitch().mbLimitedShaderRegisters) + sUseReducedRegisterVariantDefine = OString("#define USE_REDUCED_REGISTER_VARIANT\n"); + + OpenGLProgram* pProgram = xContext->UseProgram("textureVertexShader", "areaScaleFragmentShader", sUseReducedRegisterVariantDefine); if (pProgram == nullptr) return false; @@ -1058,6 +1062,11 @@ void OpenGLSalGraphicsImpl::DrawTransformedTexture( // See OpenGLSalBitmap::ImplScaleArea(). bool areaScaling = false; bool fastAreaScaling = false; + + OString sUseReducedRegisterVariantDefine; + if (mpContext->getOpenGLCapabilitySwitch().mbLimitedShaderRegisters) + sUseReducedRegisterVariantDefine = OString("#define USE_REDUCED_REGISTER_VARIANT\n"); + OUString textureFragmentShader; if( ixscale >= 2 && iyscale >= 2 ) // scale ratio less than 50% { @@ -1122,7 +1131,7 @@ void OpenGLSalGraphicsImpl::DrawTransformedTexture( { if( !UseProgram( "transformedTextureVertexShader", textureFragmentShader.isEmpty() ? "maskedTextureFragmentShader" : textureFragmentShader, - "#define MASKED" ) ) + "#define MASKED\n" + sUseReducedRegisterVariantDefine)) return; mpProgram->SetTexture( "mask", aInMask ); GLfloat aMaskCoord[8]; @@ -1134,7 +1143,8 @@ void OpenGLSalGraphicsImpl::DrawTransformedTexture( else { if( !UseProgram( "transformedTextureVertexShader", - textureFragmentShader.isEmpty() ? "textureFragmentShader" : textureFragmentShader ) ) + textureFragmentShader.isEmpty() ? "textureFragmentShader" : textureFragmentShader, + sUseReducedRegisterVariantDefine)) return; } diff --git a/vcl/opengl/scale.cxx b/vcl/opengl/scale.cxx index 9feb933d0ed4..3e2b9d4127fc 100644 --- a/vcl/opengl/scale.cxx +++ b/vcl/opengl/scale.cxx @@ -231,8 +231,14 @@ bool OpenGLSalBitmap::ImplScaleArea( const rtl::Reference< OpenGLContext > &xCon // TODO Make sure the framebuffer is alright + OString sUseReducedRegisterVariantDefine; + if (xContext->getOpenGLCapabilitySwitch().mbLimitedShaderRegisters) + sUseReducedRegisterVariantDefine = OString("#define USE_REDUCED_REGISTER_VARIANT\n"); + OpenGLProgram* pProgram = xContext->UseProgram( "textureVertexShader", - fast ? OUString( "areaScaleFastFragmentShader" ) : OUString( "areaScaleFragmentShader" )); + fast ? OUString( "areaScaleFastFragmentShader" ) : OUString( "areaScaleFragmentShader" ), + sUseReducedRegisterVariantDefine); + if( pProgram == nullptr ) return false; @@ -281,7 +287,7 @@ bool OpenGLSalBitmap::ImplScaleArea( const rtl::Reference< OpenGLContext > &xCon ixscale = 1 / rScaleX; iyscale = 1 / rScaleY; - pProgram = xContext->UseProgram("textureVertexShader", "areaScaleFragmentShader"); + pProgram = xContext->UseProgram("textureVertexShader", "areaScaleFragmentShader", sUseReducedRegisterVariantDefine); if (pProgram == nullptr) return false; diff --git a/vcl/opengl/shaders/areaScaleFragmentShader.glsl b/vcl/opengl/shaders/areaScaleFragmentShader.glsl index c83c5e0699f2..e16133682ba2 100644 --- a/vcl/opengl/shaders/areaScaleFragmentShader.glsl +++ b/vcl/opengl/shaders/areaScaleFragmentShader.glsl @@ -7,13 +7,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#version 120 -#if __VERSION__ < 130 -int min( int a, int b ) { return a < b ? a : b; } -float min( float a, float b ) { return a < b ? a : b; } -#endif - -/* TODO Use textureOffset for newest version of GLSL */ +#version 130 uniform sampler2D sampler; uniform int swidth; @@ -34,23 +28,108 @@ varying vec2 mask_coord; uniform sampler2D mask; #endif +vec4 getTexel(int x, int y) +{ + vec2 offset = vec2(x * xsrcconvert, y * ysrcconvert); + vec4 texel = texture2D(sampler, offset); +#ifdef MASKED + texel.a = 1.0 - texture2D(mask, offset).r; +#endif + return texel; +} + +#ifdef USE_REDUCED_REGISTER_VARIANT + +void main(void) +{ + // Convert to pixel coordinates again. + int dx = int(tex_coord.s * xdestconvert); + int dy = int(tex_coord.t * ydestconvert); + + // Compute the range of source pixels which will make up this destination pixel. + float fsx1 = min(dx * xscale, float(swidth - 1)); + float fsx2 = min(fsx1 + xscale, float(swidth - 1)); + + float fsy1 = min(dy * yscale, float(sheight - 1)); + float fsy2 = min(fsy1 + yscale, float(sheight - 1)); + + // To whole pixel coordinates. + int xstart = int(floor(fsx1)); + int xend = int(floor(fsx2)); + + int ystart = int(floor(fsy1)); + int yend = int(floor(fsy2)); + + float xlength = fsx2 - fsx1; + float ylength = fsy2 - fsy1; + + float xContribution[3]; + xContribution[0] = (1.0 - max(0.0, fsx1 - xstart)) / xlength; + xContribution[1] = 1.0 / xlength; + xContribution[2] = (1.0 - max(0.0, (xend + 1) - fsx2)) / xlength; + + float yContribution[3]; + yContribution[0] = (1.0 - max(0.0, fsy1 - ystart)) / ylength; + yContribution[1] = 1.0 / ylength; + yContribution[2] = (1.0 - max(0.0, (yend + 1) - fsy2)) / ylength; + + vec4 sumAll = vec4(0.0, 0.0, 0.0, 0.0); + vec4 texel; + // First Y pass + { + vec4 sumX = vec4(0.0, 0.0, 0.0, 0.0); + + sumX += getTexel(xstart, ystart) * xContribution[0]; + for (int x = xstart + 1; x < xend; ++x) + { + sumX += getTexel(x, ystart) * xContribution[1]; + } + sumX += getTexel(xend, ystart) * xContribution[2]; + + sumAll += sumX * yContribution[0]; + } + + // Middle Y Passes + for (int y = ystart + 1; y < yend; ++y) + { + vec4 sumX = vec4(0.0, 0.0, 0.0, 0.0); + + sumX += getTexel(xstart, y) * xContribution[0]; + for (int x = xstart + 1; x < xend; ++x) + { + sumX += getTexel(x, y) * xContribution[1]; + } + sumX += getTexel(xend, y) * xContribution[2]; + + sumAll += sumX * yContribution[1]; + } + + // Last Y pass + { + vec4 sumX = vec4(0.0, 0.0, 0.0, 0.0); + + sumX += getTexel(xstart, yend) * xContribution[0]; + for (int x = xstart + 1; x < xend; ++x) + { + sumX += getTexel(x, yend) * xContribution[1]; + } + sumX += getTexel(xend, yend) * xContribution[2]; + + sumAll += sumX * yContribution[2]; + } + + gl_FragColor = sumAll; +} +#else void main(void) { // Convert to pixel coordinates again. int dx = int( tex_coord.s * xdestconvert ); int dy = int( tex_coord.t * ydestconvert ); - // Note: These values are always the same for the same X (or Y), - // so they could be precalculated in C++ and passed to the shader, - // but GLSL has limits on the size of uniforms passed to it, - // so it'd need something like texture buffer objects from newer - // GLSL versions, and it seems the hassle is not really worth it. - - // How much each column/row will contribute to the resulting pixel. - // assert( xscale <= 100 ); assert( yscale <= 100 ); float xratio[ 16 + 2 ]; float yratio[ 16 + 2 ]; - // For finding the first and last source pixel. + int xpixel[ 16 + 2 ]; int ypixel[ 16 + 2 ]; @@ -147,5 +226,5 @@ void main(void) gl_FragColor = sum; } - +#endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/opengl/win/gdiimpl.cxx b/vcl/opengl/win/gdiimpl.cxx index 6470bd093cff..93e7cb904293 100644 --- a/vcl/opengl/win/gdiimpl.cxx +++ b/vcl/opengl/win/gdiimpl.cxx @@ -684,6 +684,11 @@ bool WinOpenGLContext::ImplInit() bFirstCall = false; + static OString aVendor(reinterpret_cast(glGetString(GL_VENDOR))); + + if (aVendor.equalsIgnoreAsciiCase("intel")) + maOpenGLCapabilitySwitch.mbLimitedShaderRegisters = true; + return true; } -- cgit