diff options
author | Tomaž Vajngerl <tomaz.vajngerl@collabora.co.uk> | 2019-04-12 22:38:13 +0900 |
---|---|---|
committer | Tomaž Vajngerl <quikee@gmail.com> | 2019-04-13 04:23:01 +0200 |
commit | 9695896d8f0e3d4b2961c7a753c279a70f5bbaf2 (patch) | |
tree | 1d7286c0a0d66a727ef464fff6f8cc16493617cf /vcl/source | |
parent | 7a092e254111a2d98446e7140ef24c652c245bfa (diff) |
Improve multi-threaded scaling in BitmapScaleSuperFilter
The old approach was to calculate the number of stripes of the
bitmap per thread and later create the exact number tasks
(ScaleTask) as there are threads, where each task would process
stripes it had been given. This is needlesly complicated as the
job of a thread pool is to properly delegate the tasks between
threads. This was now changed so that we create one stripe
per ScaleTask and let the threadpool delegate the tasks to its
threads (that are available).
It also wanted to be clever and use the main thread to do the
work also, but it had a major flaw. The threadpool started to
process the tasks only when "waitUntilDone" method was called,
but the code first processed its slices and then called the
threadpool method to start processing. Because of this the
performance of scaling wasn't as good as it could be.
This behaviour was now changed so that the main thread isn't
involved in processing. It just creates the task, runs the
threadpool and waits until the tasks are finished.
Change-Id: I1e8c733bdbced8867d0a7f1190f0421a0cc3e067
Reviewed-on: https://gerrit.libreoffice.org/70668
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Tested-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'vcl/source')
-rw-r--r-- | vcl/source/bitmap/BitmapScaleSuperFilter.cxx | 72 |
1 files changed, 35 insertions, 37 deletions
diff --git a/vcl/source/bitmap/BitmapScaleSuperFilter.cxx b/vcl/source/bitmap/BitmapScaleSuperFilter.cxx index ea73f3b10a04..a10bd8ccc17c 100644 --- a/vcl/source/bitmap/BitmapScaleSuperFilter.cxx +++ b/vcl/source/bitmap/BitmapScaleSuperFilter.cxx @@ -77,29 +77,32 @@ struct ScaleContext { } }; -#define SCALE_THREAD_STRIP 32 -struct ScaleRangeContext { - ScaleContext *mrCtx; - long mnStartY, mnEndY; - ScaleRangeContext( ScaleContext *rCtx, long nStartY ) - : mrCtx( rCtx ), mnStartY( nStartY ), - mnEndY( nStartY + SCALE_THREAD_STRIP ) {} -}; +constexpr long constScaleThreadStrip = 32; -typedef void (*ScaleRangeFn)(ScaleContext &rCtx, long nStartY, long nEndY); +typedef void (*ScaleRangeFn)(ScaleContext &rContext, long nStartY, long nEndY); class ScaleTask : public comphelper::ThreadTask { - ScaleRangeFn const mpFn; - std::vector< ScaleRangeContext > maStrips; + ScaleRangeFn const mpScaleRangeFunction; + ScaleContext& mrContext; + const long mnStartY; + const long mnEndY; + public: - explicit ScaleTask( const std::shared_ptr<comphelper::ThreadTaskTag>& pTag, ScaleRangeFn pFn ) - : comphelper::ThreadTask(pTag), mpFn( pFn ) {} - void push( ScaleRangeContext const &aRC ) { maStrips.push_back( aRC ); } + explicit ScaleTask(const std::shared_ptr<comphelper::ThreadTaskTag>& pTag, + ScaleRangeFn pScaleRangeFunction, + ScaleContext& rContext, + long nStartY, long nEndY) + : comphelper::ThreadTask(pTag) + , mpScaleRangeFunction(pScaleRangeFunction) + , mrContext(rContext) + , mnStartY(nStartY) + , mnEndY(nEndY) + {} + virtual void doWork() override { - for (auto const& strip : maStrips) - mpFn( *(strip.mrCtx), strip.mnStartY, strip.mnEndY ); + mpScaleRangeFunction(mrContext, mnStartY, mnEndY); } }; @@ -1026,12 +1029,11 @@ BitmapEx BitmapScaleSuperFilter::execute(BitmapEx const& rBitmap) const // We want to thread - only if there is a lot of work to do: // We work hard when there is a large destination image, or // A large source image. - bool bHorizontalWork = pReadAccess->Width() > 512 || pWriteAccess->Width() > 512; + bool bHorizontalWork = pReadAccess->Height() >= 512 && pReadAccess->Width() >= 512; bool bUseThreads = true; static bool bDisableThreadedScaling = getenv ("VCL_NO_THREAD_SCALE"); - if ( bDisableThreadedScaling || !bHorizontalWork || - nEndY - nStartY < SCALE_THREAD_STRIP ) + if (bDisableThreadedScaling || !bHorizontalWork) { SAL_INFO("vcl.gdi", "Scale in main thread"); bUseThreads = false; @@ -1044,26 +1046,22 @@ BitmapEx BitmapScaleSuperFilter::execute(BitmapEx const& rBitmap) const // partition and queue work comphelper::ThreadPool &rShared = comphelper::ThreadPool::getSharedOptimalPool(); std::shared_ptr<comphelper::ThreadTaskTag> pTag = comphelper::ThreadPool::createThreadTaskTag(); - sal_uInt32 nThreads = rShared.getWorkerCount(); - assert( nThreads > 0 ); - sal_uInt32 nStrips = ((nEndY - nStartY) + SCALE_THREAD_STRIP - 1) / SCALE_THREAD_STRIP; - sal_uInt32 nStripsPerThread = nStrips / nThreads; - SAL_INFO("vcl.gdi", "Scale in " << nStrips << " strips " << nStripsPerThread << " per thread we have " << nThreads << " CPU threads "); - long nStripY = nStartY; - for ( sal_uInt32 t = 0; t < nThreads - 1; t++ ) + + long nStripYStart = nStartY; + long nStripYEnd = nStripYStart + constScaleThreadStrip - 1; + + while (nStripYEnd < nEndY) { - std::unique_ptr<ScaleTask> pTask(new ScaleTask( pTag, pScaleRangeFn )); - for ( sal_uInt32 j = 0; j < nStripsPerThread; j++ ) - { - ScaleRangeContext aRC( &aContext, nStripY ); - pTask->push( aRC ); - nStripY += SCALE_THREAD_STRIP; - } - rShared.pushTask( std::move(pTask) ); + std::unique_ptr<ScaleTask> pTask(new ScaleTask(pTag, pScaleRangeFn, aContext, nStripYStart, nStripYEnd)); + rShared.pushTask(std::move(pTask)); + nStripYStart += constScaleThreadStrip; + nStripYEnd += constScaleThreadStrip; + } + if (nStripYStart <= nEndY) + { + std::unique_ptr<ScaleTask> pTask(new ScaleTask(pTag, pScaleRangeFn, aContext, nStripYStart, nEndY)); + rShared.pushTask(std::move(pTask)); } - // finish any remaining bits here - pScaleRangeFn( aContext, nStripY, nEndY ); - rShared.waitUntilDone(pTag); SAL_INFO("vcl.gdi", "All threaded scaling tasks complete"); } |