8 files changed, 144 insertions, 59 deletions
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
index ecd428e9f037..d251b4a6f9fb 100644
--- a/sc/inc/arraysumfunctor.hxx
+++ b/sc/inc/arraysumfunctor.hxx
@@ -12,35 +12,17 @@
 
 #include <cmath>
 #include "kahan.hxx"
-#include "scdllapi.h"
+#include "arraysumfunctorinternal.hxx"
 #include <tools/cpuid.hxx>
 #include <formula/errorcodes.hxx>
 
 namespace sc::op
 {
 /* Checkout available optimization options */
-SC_DLLPUBLIC extern const bool hasAVX512F;
 const bool hasAVX = cpuid::hasAVX();
 const bool hasSSE2 = cpuid::hasSSE2();
 
 /**
-  * Performs one step of the Neumanier sum between doubles
-  * Overwrites the summand and error
-  * @parma sum
-  * @param err
-  * @param value
-  */
-inline void sumNeumanierNormal(double& sum, double& err, const double& value)
-{
-    double t = sum + value;
-    if (std::abs(sum) >= std::abs(value))
-        err += (sum - t) + value;
-    else
-        err += (value - t) + sum;
-    sum = t;
-}
-
-/**
   * If no boosts available, Unrolled KahanSum.
   * Most likely to use on android.
   */
@@ -69,11 +51,6 @@ static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pC
     return 0.0;
 }
 
-/* Available methods */
-SC_DLLPUBLIC KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
-
 /**
   * This function task is to choose the fastest method available to perform the sum.
   * @param i
diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx
new file mode 100644
index 000000000000..a06e3fc17439
--- /dev/null
+++ b/sc/inc/arraysumfunctorinternal.hxx
@@ -0,0 +1,34 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "scdllapi.h"
+
+namespace sc::op
+{
+SC_DLLPUBLIC extern const bool hasAVX512F;
+
+// Plain old data structure, to be used by code compiled with CPU intrinsics without generating any
+// code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy
+// when merging duplicates).
+struct KahanSumSimple
+{
+    double m_fSum;
+    double m_fError;
+};
+
+/* Available methods */
+SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent);
+SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent);
+SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
+
+} // namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sc/inc/kahan.hxx b/sc/inc/kahan.hxx
index 3404fb6d14a6..ded7bd78d70e 100644
--- a/sc/inc/kahan.hxx
+++ b/sc/inc/kahan.hxx
@@ -11,6 +11,8 @@
 
 #include <cmath>
 
+#include "arraysumfunctorinternal.hxx"
+
 /**
   * This class provides LO with Kahan summation algorithm
   * About this algorithm: https://en.wikipedia.org/wiki/Kahan_summation_algorithm
@@ -34,6 +36,12 @@ public:
     {
     }
 
+    constexpr KahanSum(const sc::op::KahanSumSimple& sum)
+        : m_fSum(sum.m_fSum)
+        , m_fError(sum.m_fError)
+    {
+    }
+
     constexpr KahanSum(const KahanSum& fSum) = default;
 
 public:
diff --git a/sc/qa/unit/functions_statistical.cxx b/sc/qa/unit/functions_statistical.cxx
index 2e489d26dd0d..a034964f4923 100644
--- a/sc/qa/unit/functions_statistical.cxx
+++ b/sc/qa/unit/functions_statistical.cxx
@@ -37,13 +37,13 @@ void StatisticalFunctionsTest::testIntrinsicSums()
     double* pCurrent = summands;
     size_t i = 0;
     if (sc::op::hasAVX512F)
-        CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeAVX512F(i, 9, pCurrent).get());
+        CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX512F(i, 9, pCurrent)).get());
     i = 0;
     if (sc::op::hasAVX)
-        CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeAVX(i, 9, pCurrent).get());
+        CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX(i, 9, pCurrent)).get());
     i = 0;
     if (sc::op::hasSSE2)
-        CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeSSE2(i, 9, pCurrent).get());
+        CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeSSE2(i, 9, pCurrent)).get());
     i = 0;
     CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeUnrolled(i, 9, pCurrent).get());
 }
diff --git a/sc/source/core/tool/arraysum.hxx b/sc/source/core/tool/arraysum.hxx
new file mode 100644
index 000000000000..62c2514182b7
--- /dev/null
+++ b/sc/source/core/tool/arraysum.hxx
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#pragma once
+
+#include <cmath>
+
+namespace sc::op
+{
+// Code must not be shared between different CPU instrinsics flags (e.g. in debug mode the compiler would not
+// inline them, and merge the copies, keeping only the one with the most demanding CPU set that's not available otherwise).
+// Put everything in a different namespace and additionally try to force inlining.
+namespace LO_ARRAYSUM_SPACE
+{
+#if defined _MSC_VER
+#define INLINE __forceinline static
+#elif defined __GNUC__
+#define INLINE __attribute__((always_inline)) static inline
+#else
+#define static inline
+#endif
+
+/**
+  * Performs one step of the Neumanier sum between doubles
+  * Overwrites the summand and error
+  * @parma sum
+  * @param err
+  * @param value
+  */
+INLINE void sumNeumanierNormal(double& sum, double& err, const double& value)
+{
+    double t = sum + value;
+    if (std::abs(sum) >= std::abs(value))
+        err += (sum - t) + value;
+    else
+        err += (value - t) + sum;
+    sum = t;
+}
+
+#undef INLINE
+
+} // end namespace LO_ARRAYSUM_SPACE
+} // end namespace sc::op
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx
index 49407b95dfb6..54c47780f63c 100644
--- a/sc/source/core/tool/arraysumAVX.cxx
+++ b/sc/source/core/tool/arraysumAVX.cxx
@@ -8,21 +8,28 @@
  *
  */
 
-#include <arraysumfunctor.hxx>
-#include <sal/log.hxx>
+#define LO_ARRAYSUM_SPACE AVX
+#include "arraysum.hxx"
+
+#include <arraysumfunctorinternal.hxx>
+
 #include <tools/simd.hxx>
 #include <tools/simdsupport.hxx>
 
+#include <cstdlib>
+
 namespace sc::op
 {
 #ifdef LO_AVX_AVAILABLE // Old processors
 
-const __m256d ANNULATE_SIGN_BIT = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
+using namespace AVX;
 
 /** Kahan sum with AVX.
   */
 static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
 {
+    const __m256d ANNULATE_SIGN_BIT
+        = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
     // Temporal parameter
     __m256d t = _mm256_add_pd(sum, value);
     // Absolute value of the total sum
@@ -45,7 +52,7 @@ static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
 
 /** Execute Kahan sum with AVX.
   */
-KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent)
+KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
 {
 #ifdef LO_AVX_AVAILABLE
     // Make sure we don't fall out of bounds.
@@ -97,15 +104,14 @@ KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent)
         sumNeumanierNormal(sums[0], errs[0], errs[2]);
 
         // Store result
-        return KahanSum(sums[0], errs[0]);
+        return { sums[0], errs[0] };
     }
-    return 0.0;
+    return { 0.0, 0.0 };
 #else
-    SAL_WARN("sc", "Failed to use AVX");
     (void)i;
     (void)nSize;
     (void)pCurrent;
-    return 0.0;
+    abort();
 #endif
 }
 
diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx
index 0fa49c6bccc8..f8e8de729279 100644
--- a/sc/source/core/tool/arraysumAVX512.cxx
+++ b/sc/source/core/tool/arraysumAVX512.cxx
@@ -8,11 +8,16 @@
  *
  */
 
-#include <arraysumfunctor.hxx>
-#include <sal/log.hxx>
+#define LO_ARRAYSUM_SPACE AVX512
+#include "arraysum.hxx"
+
+#include <arraysumfunctorinternal.hxx>
+
 #include <tools/simd.hxx>
 #include <tools/simdsupport.hxx>
 
+#include <cstdlib>
+
 /* TODO Remove this once GCC updated and AVX512 can work. */
 #ifdef __GNUC__
 #if __GNUC__ < 9
@@ -23,16 +28,18 @@
 #endif
 #endif
 
+namespace sc::op
+{
 #ifdef LO_AVX512F_AVAILABLE
-const bool sc::op::hasAVX512F = cpuid::hasAVX512F();
+const bool hasAVX512F = cpuid::hasAVX512F();
 #else
-const bool sc::op::hasAVX512F = false;
+const bool hasAVX512F = false;
 #endif
 
-namespace sc::op
-{
 #ifdef LO_AVX512F_AVAILABLE // New processors
 
+using namespace AVX512;
+
 /** Kahan sum with AVX512.
   */
 static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
@@ -59,7 +66,7 @@ static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
 
 /** Execute Kahan sum with AVX512.
   */
-KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
+KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
 {
 #ifdef LO_AVX512F_AVAILABLE // New processors
     // Make sure we don't fall out of bounds.
@@ -85,8 +92,8 @@ KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
         static_assert(sizeof(double) == 8);
         double sums[8];
         double errs[8];
-        _mm512_storeu_pd(static_cast<void*>(&sums[0]), sum);
-        _mm512_storeu_pd(static_cast<void*>(&errs[0]), err);
+        _mm512_storeu_pd(&sums[0], sum);
+        _mm512_storeu_pd(&errs[0], err);
 
         // First Kahan & pairwise summation
         // 0+1 1+2 3+4 4+5 6+7 -> 0, 2, 4, 6
@@ -112,16 +119,14 @@ KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
         sumNeumanierNormal(sums[0], errs[0], errs[4]);
 
         // Return final result
-        return KahanSum(sums[0], errs[0]);
+        return { sums[0], errs[0] };
     }
-    else
-        return 0.0;
+    return { 0.0, 0.0 };
 #else
-    SAL_WARN("sc", "Failed to use AVX 512");
     (void)i;
     (void)nSize;
     (void)pCurrent;
-    return 0.0;
+    abort();
 #endif
 }
 
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index e2ab945acc4a..95b42f868461 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -8,23 +8,27 @@
  *
  */
 
-#include <arraysumfunctor.hxx>
-#include <sal/log.hxx>
+#define LO_ARRAYSUM_SPACE SSE2
+#include "arraysum.hxx"
+
+#include <arraysumfunctorinternal.hxx>
+
 #include <tools/simd.hxx>
 #include <tools/simdsupport.hxx>
 
-//AVX512VL + AVX512F + KNCNI
+#include <cstdlib>
 
 namespace sc::op
 {
 #ifdef LO_SSE2_AVAILABLE // Old processors
 
-const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
+using namespace SSE2;
 
-/** Kahan sum with SSE4.2.
+/** Kahan sum with SSE2.
   */
 static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
 {
+    const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
     // Temporal parameter
     __m128d t = _mm_add_pd(sum, value);
     // Absolute value of the total sum
@@ -47,7 +51,7 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
 
 /** Execute Kahan sum with SSE2.
   */
-KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
+KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
 {
 #ifdef LO_SSE2_AVAILABLE
     // Make sure we don't fall out of bounds.
@@ -113,15 +117,14 @@ KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
         sumNeumanierNormal(sums[0], errs[0], errs[1]);
 
         // Store result
-        return KahanSum(sums[0], errs[0]);
+        return { sums[0], errs[0] };
     }
-    return 0.0;
+    return { 0.0, 0.0 };
 #else
-    SAL_WARN("sc", "Failed to use SSE2");
     (void)i;
     (void)nSize;
     (void)pCurrent;
-    return 0.0;
+    abort();
 #endif
 }
 }