diff options
Diffstat (limited to 'sc')
-rw-r--r-- | sc/inc/arraysumfunctor.hxx | 25 | ||||
-rw-r--r-- | sc/inc/arraysumfunctorinternal.hxx | 34 | ||||
-rw-r--r-- | sc/inc/kahan.hxx | 8 | ||||
-rw-r--r-- | sc/qa/unit/functions_statistical.cxx | 6 | ||||
-rw-r--r-- | sc/source/core/tool/arraysum.hxx | 52 | ||||
-rw-r--r-- | sc/source/core/tool/arraysumAVX.cxx | 22 | ||||
-rw-r--r-- | sc/source/core/tool/arraysumAVX512.cxx | 33 | ||||
-rw-r--r-- | sc/source/core/tool/arraysumSSE2.cxx | 23 |
8 files changed, 144 insertions, 59 deletions
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx index ecd428e9f037..d251b4a6f9fb 100644 --- a/sc/inc/arraysumfunctor.hxx +++ b/sc/inc/arraysumfunctor.hxx @@ -12,35 +12,17 @@ #include <cmath> #include "kahan.hxx" -#include "scdllapi.h" +#include "arraysumfunctorinternal.hxx" #include <tools/cpuid.hxx> #include <formula/errorcodes.hxx> namespace sc::op { /* Checkout available optimization options */ -SC_DLLPUBLIC extern const bool hasAVX512F; const bool hasAVX = cpuid::hasAVX(); const bool hasSSE2 = cpuid::hasSSE2(); /** - * Performs one step of the Neumanier sum between doubles - * Overwrites the summand and error - * @parma sum - * @param err - * @param value - */ -inline void sumNeumanierNormal(double& sum, double& err, const double& value) -{ - double t = sum + value; - if (std::abs(sum) >= std::abs(value)) - err += (sum - t) + value; - else - err += (value - t) + sum; - sum = t; -} - -/** * If no boosts available, Unrolled KahanSum. * Most likely to use on android. */ @@ -69,11 +51,6 @@ static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pC return 0.0; } -/* Available methods */ -SC_DLLPUBLIC KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent); -SC_DLLPUBLIC KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent); -SC_DLLPUBLIC KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent); - /** * This function task is to choose the fastest method available to perform the sum. * @param i diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx new file mode 100644 index 000000000000..a06e3fc17439 --- /dev/null +++ b/sc/inc/arraysumfunctorinternal.hxx @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include "scdllapi.h" + +namespace sc::op +{ +SC_DLLPUBLIC extern const bool hasAVX512F; + +// Plain old data structure, to be used by code compiled with CPU intrinsics without generating any +// code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy +// when merging duplicates). +struct KahanSumSimple +{ + double m_fSum; + double m_fError; +}; + +/* Available methods */ +SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent); +SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent); +SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent); + +} // namespace + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sc/inc/kahan.hxx b/sc/inc/kahan.hxx index 3404fb6d14a6..ded7bd78d70e 100644 --- a/sc/inc/kahan.hxx +++ b/sc/inc/kahan.hxx @@ -11,6 +11,8 @@ #include <cmath> +#include "arraysumfunctorinternal.hxx" + /** * This class provides LO with Kahan summation algorithm * About this algorithm: https://en.wikipedia.org/wiki/Kahan_summation_algorithm @@ -34,6 +36,12 @@ public: { } + constexpr KahanSum(const sc::op::KahanSumSimple& sum) + : m_fSum(sum.m_fSum) + , m_fError(sum.m_fError) + { + } + constexpr KahanSum(const KahanSum& fSum) = default; public: diff --git a/sc/qa/unit/functions_statistical.cxx b/sc/qa/unit/functions_statistical.cxx index 2e489d26dd0d..a034964f4923 100644 --- a/sc/qa/unit/functions_statistical.cxx +++ b/sc/qa/unit/functions_statistical.cxx @@ -37,13 +37,13 @@ void StatisticalFunctionsTest::testIntrinsicSums() double* pCurrent = summands; size_t i = 0; if (sc::op::hasAVX512F) - CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeAVX512F(i, 9, pCurrent).get()); + CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX512F(i, 9, pCurrent)).get()); i = 0; if (sc::op::hasAVX) - CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeAVX(i, 9, pCurrent).get()); + CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX(i, 9, pCurrent)).get()); i = 0; if (sc::op::hasSSE2) - CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeSSE2(i, 9, pCurrent).get()); + CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeSSE2(i, 9, pCurrent)).get()); i = 0; CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeUnrolled(i, 9, pCurrent).get()); } diff --git a/sc/source/core/tool/arraysum.hxx b/sc/source/core/tool/arraysum.hxx new file mode 100644 index 000000000000..62c2514182b7 --- /dev/null +++ b/sc/source/core/tool/arraysum.hxx @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#pragma once + +#include <cmath> + +namespace sc::op +{ +// Code must not be shared between different CPU instrinsics flags (e.g. in debug mode the compiler would not +// inline them, and merge the copies, keeping only the one with the most demanding CPU set that's not available otherwise). +// Put everything in a different namespace and additionally try to force inlining. +namespace LO_ARRAYSUM_SPACE +{ +#if defined _MSC_VER +#define INLINE __forceinline static +#elif defined __GNUC__ +#define INLINE __attribute__((always_inline)) static inline +#else +#define static inline +#endif + +/** + * Performs one step of the Neumanier sum between doubles + * Overwrites the summand and error + * @parma sum + * @param err + * @param value + */ +INLINE void sumNeumanierNormal(double& sum, double& err, const double& value) +{ + double t = sum + value; + if (std::abs(sum) >= std::abs(value)) + err += (sum - t) + value; + else + err += (value - t) + sum; + sum = t; +} + +#undef INLINE + +} // end namespace LO_ARRAYSUM_SPACE +} // end namespace sc::op + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx index 49407b95dfb6..54c47780f63c 100644 --- a/sc/source/core/tool/arraysumAVX.cxx +++ b/sc/source/core/tool/arraysumAVX.cxx @@ -8,21 +8,28 @@ * */ -#include <arraysumfunctor.hxx> -#include <sal/log.hxx> +#define LO_ARRAYSUM_SPACE AVX +#include "arraysum.hxx" + +#include <arraysumfunctorinternal.hxx> + #include <tools/simd.hxx> #include <tools/simdsupport.hxx> +#include <cstdlib> + namespace sc::op { #ifdef LO_AVX_AVAILABLE // Old processors -const __m256d ANNULATE_SIGN_BIT = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF)); +using namespace AVX; /** Kahan sum with AVX. */ static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value) { + const __m256d ANNULATE_SIGN_BIT + = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF)); // Temporal parameter __m256d t = _mm256_add_pd(sum, value); // Absolute value of the total sum @@ -45,7 +52,7 @@ static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value) /** Execute Kahan sum with AVX. */ -KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent) +KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent) { #ifdef LO_AVX_AVAILABLE // Make sure we don't fall out of bounds. @@ -97,15 +104,14 @@ KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent) sumNeumanierNormal(sums[0], errs[0], errs[2]); // Store result - return KahanSum(sums[0], errs[0]); + return { sums[0], errs[0] }; } - return 0.0; + return { 0.0, 0.0 }; #else - SAL_WARN("sc", "Failed to use AVX"); (void)i; (void)nSize; (void)pCurrent; - return 0.0; + abort(); #endif } diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx index 0fa49c6bccc8..f8e8de729279 100644 --- a/sc/source/core/tool/arraysumAVX512.cxx +++ b/sc/source/core/tool/arraysumAVX512.cxx @@ -8,11 +8,16 @@ * */ -#include <arraysumfunctor.hxx> -#include <sal/log.hxx> +#define LO_ARRAYSUM_SPACE AVX512 +#include "arraysum.hxx" + +#include <arraysumfunctorinternal.hxx> + #include <tools/simd.hxx> #include <tools/simdsupport.hxx> +#include <cstdlib> + /* TODO Remove this once GCC updated and AVX512 can work. */ #ifdef __GNUC__ #if __GNUC__ < 9 @@ -23,16 +28,18 @@ #endif #endif +namespace sc::op +{ #ifdef LO_AVX512F_AVAILABLE -const bool sc::op::hasAVX512F = cpuid::hasAVX512F(); +const bool hasAVX512F = cpuid::hasAVX512F(); #else -const bool sc::op::hasAVX512F = false; +const bool hasAVX512F = false; #endif -namespace sc::op -{ #ifdef LO_AVX512F_AVAILABLE // New processors +using namespace AVX512; + /** Kahan sum with AVX512. */ static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value) @@ -59,7 +66,7 @@ static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value) /** Execute Kahan sum with AVX512. */ -KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent) +KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent) { #ifdef LO_AVX512F_AVAILABLE // New processors // Make sure we don't fall out of bounds. @@ -85,8 +92,8 @@ KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent) static_assert(sizeof(double) == 8); double sums[8]; double errs[8]; - _mm512_storeu_pd(static_cast<void*>(&sums[0]), sum); - _mm512_storeu_pd(static_cast<void*>(&errs[0]), err); + _mm512_storeu_pd(&sums[0], sum); + _mm512_storeu_pd(&errs[0], err); // First Kahan & pairwise summation // 0+1 1+2 3+4 4+5 6+7 -> 0, 2, 4, 6 @@ -112,16 +119,14 @@ KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent) sumNeumanierNormal(sums[0], errs[0], errs[4]); // Return final result - return KahanSum(sums[0], errs[0]); + return { sums[0], errs[0] }; } - else - return 0.0; + return { 0.0, 0.0 }; #else - SAL_WARN("sc", "Failed to use AVX 512"); (void)i; (void)nSize; (void)pCurrent; - return 0.0; + abort(); #endif } diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx index e2ab945acc4a..95b42f868461 100644 --- a/sc/source/core/tool/arraysumSSE2.cxx +++ b/sc/source/core/tool/arraysumSSE2.cxx @@ -8,23 +8,27 @@ * */ -#include <arraysumfunctor.hxx> -#include <sal/log.hxx> +#define LO_ARRAYSUM_SPACE SSE2 +#include "arraysum.hxx" + +#include <arraysumfunctorinternal.hxx> + #include <tools/simd.hxx> #include <tools/simdsupport.hxx> -//AVX512VL + AVX512F + KNCNI +#include <cstdlib> namespace sc::op { #ifdef LO_SSE2_AVAILABLE // Old processors -const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF)); +using namespace SSE2; -/** Kahan sum with SSE4.2. +/** Kahan sum with SSE2. */ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value) { + const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF)); // Temporal parameter __m128d t = _mm_add_pd(sum, value); // Absolute value of the total sum @@ -47,7 +51,7 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value) /** Execute Kahan sum with SSE2. */ -KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent) +KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent) { #ifdef LO_SSE2_AVAILABLE // Make sure we don't fall out of bounds. @@ -113,15 +117,14 @@ KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent) sumNeumanierNormal(sums[0], errs[0], errs[1]); // Store result - return KahanSum(sums[0], errs[0]); + return { sums[0], errs[0] }; } - return 0.0; + return { 0.0, 0.0 }; #else - SAL_WARN("sc", "Failed to use SSE2"); (void)i; (void)nSize; (void)pCurrent; - return 0.0; + abort(); #endif } } |