11 files changed, 37 insertions, 363 deletions
diff --git a/include/tools/cpuid.hxx b/include/tools/cpuid.hxx
index 1be897e84a4e..4f309ff11e96 100644
--- a/include/tools/cpuid.hxx
+++ b/include/tools/cpuid.hxx
@@ -23,6 +23,8 @@ or inline functions, otherwise their possibly emitted copies compiled
 with the CPU-specific instructions might be chosen by the linker as the copy
 to keep.
 
+Also see the note at the top of simdsupport.hxx .
+
 */
 
 namespace cpuid {
diff --git a/include/tools/simdsupport.hxx b/include/tools/simdsupport.hxx
index bc9227223da0..738b34e072db 100644
--- a/include/tools/simdsupport.hxx
+++ b/include/tools/simdsupport.hxx
@@ -8,6 +8,16 @@
  *
  */
 
+// IMPORTANT: Having CPU-specific routines turned out to be a maintenance
+// problem, because of various problems such as compilers moving CPU-specific
+// code out of #ifdef code into static initialization or our code using C++
+// features that caused the compiler to emit code that used CPU-specific
+// instructions (even cpuid.hxx isn't safe, see the comment there).
+// The only safe usage is using CPU-specific code that's always available,
+// such as SSE2-specific code for x86_64. Do not use for anything else
+// unless you really know what you are doing (and you check git history
+// to learn from past problems).
+
 // Determine the compiler support for SIMD compiler intrinsics.
 // This changes from one compiled unit to the other, depending if
 // the support has been detected and if the compiled unit contains
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index 936ca33901ee..3c8dcb3e5085 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -206,6 +206,7 @@ $(eval $(call gb_Library_add_exception_objects,sc,\
     sc/source/core/tool/address \
     sc/source/core/tool/adiasync \
     sc/source/core/tool/appoptio \
+    sc/source/core/tool/arraysumSSE2 \
     sc/source/core/tool/autoform \
     sc/source/core/tool/calcconfig \
     sc/source/core/tool/callform \
@@ -680,18 +681,6 @@ $(eval $(call gb_Library_add_exception_objects,sc,\
     sc/source/ui/xmlsource/xmlsourcedlg \
 ))
 
-$(eval $(call gb_Library_add_exception_objects,sc,\
-    sc/source/core/tool/arraysumAVX512, $(CXXFLAGS_INTRINSICS_AVX512F) \
-))
-
-$(eval $(call gb_Library_add_exception_objects,sc,\
-    sc/source/core/tool/arraysumAVX, $(CXXFLAGS_INTRINSICS_AVX) \
-))
-
-$(eval $(call gb_Library_add_exception_objects,sc,\
-    sc/source/core/tool/arraysumSSE2, $(CXXFLAGS_INTRINSICS_SSE2) \
-))
-
 ifeq ($(ENABLE_FORMULA_LOGGER),TRUE)
 $(eval $(call gb_Library_add_exception_objects,sc,\
     sc/source/core/tool/formulalogger \
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
index b727f5893a8c..eecfa59c3f65 100644
--- a/sc/inc/arraysumfunctor.hxx
+++ b/sc/inc/arraysumfunctor.hxx
@@ -12,16 +12,25 @@
 
 #include <cmath>
 #include "kahan.hxx"
-#include "arraysumfunctorinternal.hxx"
-#include <tools/cpuid.hxx>
+#include "arraysumfunctor.hxx"
 #include <formula/errorcodes.hxx>
 
 namespace sc::op
 {
-/* Checkout available optimization options */
-const bool hasAVX512F = hasAVX512FCode() && cpuid::hasAVX512F();
-const bool hasAVX = hasAVXCode() && cpuid::hasAVX();
-const bool hasSSE2 = hasSSE2Code() && cpuid::hasSSE2();
+// Checkout available optimization options.
+// Note that it turned out to be problematic to support CPU-specific code
+// that's not guaranteed to be available on that specific platform (see
+// git history). SSE2 is guaranteed on x86_64 and it is our baseline requirement
+// for x86 on Windows, so SSE2 use is hardcoded on those platforms.
+// Whenever we raise baseline to e.g. AVX, this may get
+// replaced with AVX code (get it from git history).
+// Do it similarly with other platforms.
+#if defined(X86_64) || (defined(X86) && defined(_WIN32))
+#define SC_USE_SSE2 1
+KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
+#else
+#define SC_USE_SSE2 0
+#endif
 
 /**
   * If no boosts available, Unrolled KahanSum.
@@ -60,12 +69,9 @@ static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pC
   */
 static inline KahanSum executeFast(size_t& i, size_t nSize, const double* pCurrent)
 {
-    if (hasAVX512F)
-        return executeAVX512F(i, nSize, pCurrent);
-    if (hasAVX)
-        return executeAVX(i, nSize, pCurrent);
-    if (hasSSE2)
-        return executeSSE2(i, nSize, pCurrent);
+#if SC_USE_SSE2
+    return executeSSE2(i, nSize, pCurrent);
+#endif
     return executeUnrolled(i, nSize, pCurrent);
 }
 
diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx
deleted file mode 100644
index e939dbd3037d..000000000000
--- a/sc/inc/arraysumfunctorinternal.hxx
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#pragma once
-
-#include "scdllapi.h"
-
-namespace sc::op
-{
-// Plain old data structure, to be used by code compiled with CPU intrinsics without generating any
-// code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy
-// when merging duplicates).
-struct KahanSumSimple
-{
-    double m_fSum;
-    double m_fError;
-};
-
-/* Available methods */
-SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
-
-SC_DLLPUBLIC bool hasAVX512FCode();
-SC_DLLPUBLIC bool hasAVXCode();
-SC_DLLPUBLIC bool hasSSE2Code();
-
-} // namespace
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sc/inc/kahan.hxx b/sc/inc/kahan.hxx
index 5418c8d23b35..6c84f6eeef2e 100644
--- a/sc/inc/kahan.hxx
+++ b/sc/inc/kahan.hxx
@@ -12,8 +12,6 @@
 #include <rtl/math.hxx>
 #include <cmath>
 
-#include "arraysumfunctorinternal.hxx"
-
 /**
   * This class provides LO with Kahan summation algorithm
   * About this algorithm: https://en.wikipedia.org/wiki/Kahan_summation_algorithm
@@ -40,12 +38,6 @@ public:
     {
     }
 
-    constexpr KahanSum(const sc::op::KahanSumSimple& sum)
-        : m_fSum(sum.m_fSum)
-        , m_fError(sum.m_fError)
-    {
-    }
-
     constexpr KahanSum(const KahanSum& fSum) = default;
 
 public:
diff --git a/sc/qa/unit/functions_statistical.cxx b/sc/qa/unit/functions_statistical.cxx
index a034964f4923..4d97d4cc1689 100644
--- a/sc/qa/unit/functions_statistical.cxx
+++ b/sc/qa/unit/functions_statistical.cxx
@@ -1,5 +1,4 @@
 #include "functions_test.hxx"
-#include <arraysumfunctor.hxx>
 
 class StatisticalFunctionsTest : public FunctionsTest
 {
@@ -7,11 +6,9 @@ public:
     StatisticalFunctionsTest();
 
     void testStatisticalFormulasFODS();
-    void testIntrinsicSums();
 
     CPPUNIT_TEST_SUITE(StatisticalFunctionsTest);
     CPPUNIT_TEST(testStatisticalFormulasFODS);
-    CPPUNIT_TEST(testIntrinsicSums);
     CPPUNIT_TEST_SUITE_END();
 
 };
@@ -29,25 +26,6 @@ StatisticalFunctionsTest::StatisticalFunctionsTest():
 {
 }
 
-void StatisticalFunctionsTest::testIntrinsicSums()
-{
-    // Checkout SSE2, AVX and AVX512 operations
-        // Needs exactly 9 terms
-    double summands[9] = { 0, 1, 2, 3, 4, 10, 20, 2, -1 };
-    double* pCurrent = summands;
-    size_t i = 0;
-    if (sc::op::hasAVX512F)
-        CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX512F(i, 9, pCurrent)).get());
-    i = 0;
-    if (sc::op::hasAVX)
-        CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX(i, 9, pCurrent)).get());
-    i = 0;
-    if (sc::op::hasSSE2)
-        CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeSSE2(i, 9, pCurrent)).get());
-    i = 0;
-    CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeUnrolled(i, 9, pCurrent).get());
-}
-
 CPPUNIT_TEST_SUITE_REGISTRATION(StatisticalFunctionsTest);
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/core/tool/arraysum.hxx b/sc/source/core/tool/arraysum.hxx
index 5d227bd85a48..ce8a7f30f4dc 100644
--- a/sc/source/core/tool/arraysum.hxx
+++ b/sc/source/core/tool/arraysum.hxx
@@ -14,19 +14,6 @@
 
 namespace sc::op
 {
-// Code must not be shared between different CPU instrinsics flags (e.g. in debug mode the compiler would not
-// inline them, and merge the copies, keeping only the one with the most demanding CPU set that's not available otherwise).
-// Put everything in a different namespace and additionally try to force inlining.
-namespace LO_ARRAYSUM_SPACE
-{
-#if defined _MSC_VER
-#define INLINE __forceinline static
-#elif defined __GNUC__
-#define INLINE __attribute__((always_inline)) static inline
-#else
-#define static inline
-#endif
-
 /**
   * Performs one step of the Neumanier sum between doubles
   * Overwrites the summand and error
@@ -34,7 +21,7 @@ namespace LO_ARRAYSUM_SPACE
   * @param err
   * @param value
   */
-INLINE void sumNeumanierNormal(double& sum, double& err, const double& value)
+inline void sumNeumanierNormal(double& sum, double& err, const double& value)
 {
     double t = sum + value;
     if (fabs(sum) >= fabs(value))
@@ -44,9 +31,6 @@ INLINE void sumNeumanierNormal(double& sum, double& err, const double& value)
     sum = t;
 }
 
-#undef INLINE
-
-} // end namespace LO_ARRAYSUM_SPACE
 } // end namespace sc::op
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx
deleted file mode 100644
index 4d9ee02285e7..000000000000
--- a/sc/source/core/tool/arraysumAVX.cxx
+++ /dev/null
@@ -1,121 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- */
-
-#define LO_ARRAYSUM_SPACE AVX
-#include "arraysum.hxx"
-
-#include <arraysumfunctorinternal.hxx>
-
-#include <tools/simd.hxx>
-#include <tools/simdsupport.hxx>
-
-#include <stdlib.h>
-
-namespace sc::op
-{
-#ifdef LO_AVX_AVAILABLE
-
-bool hasAVXCode() { return true; }
-
-using namespace AVX;
-
-/** Kahan sum with AVX.
-  */
-static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
-{
-    static const __m256d ANNULATE_SIGN_BIT
-        = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
-    // Temporal parameter
-    __m256d t = _mm256_add_pd(sum, value);
-    // Absolute value of the total sum
-    __m256d asum = _mm256_and_pd(sum, ANNULATE_SIGN_BIT);
-    // Absolute value of the value to add
-    __m256d avalue = _mm256_and_pd(value, ANNULATE_SIGN_BIT);
-    // Compare the absolute values sum >= value
-    __m256d mask = _mm256_cmp_pd(asum, avalue, _CMP_GE_OQ);
-    // The following code has this form ( a - t + b)
-    // Case 1: a = sum b = value
-    // Case 2: a = value b = sum
-    __m256d a = _mm256_add_pd(_mm256_and_pd(mask, sum), _mm256_andnot_pd(mask, value));
-    __m256d b = _mm256_add_pd(_mm256_and_pd(mask, value), _mm256_andnot_pd(mask, sum));
-    err = _mm256_add_pd(err, _mm256_add_pd(_mm256_sub_pd(a, t), b));
-    // Store result
-    sum = t;
-}
-
-/** Execute Kahan sum with AVX.
-  */
-KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
-{
-    // Make sure we don't fall out of bounds.
-    // This works by sums of 8 terms.
-    // So the 8'th term is i+7
-    // If we iterate until nSize won't fall out of bounds
-    if (nSize > i + 7)
-    {
-        // Setup sums and errors as 0
-        __m256d sum1 = _mm256_setzero_pd();
-        __m256d err1 = _mm256_setzero_pd();
-        __m256d sum2 = _mm256_setzero_pd();
-        __m256d err2 = _mm256_setzero_pd();
-
-        for (; i + 7 < nSize; i += 8)
-        {
-            // Kahan sum 1
-            __m256d load1 = _mm256_loadu_pd(pCurrent);
-            sumAVX(sum1, err1, load1);
-            pCurrent += 4;
-
-            // Kahan sum 2
-            __m256d load2 = _mm256_loadu_pd(pCurrent);
-            sumAVX(sum2, err2, load2);
-            pCurrent += 4;
-        }
-
-        // Now we combine pairwise summation with Kahan summation
-
-        // sum 1 + sum 2 -> sum 1
-        sumAVX(sum1, err1, sum2);
-        sumAVX(sum1, err1, err2);
-
-        // Store results
-        double sums[4];
-        double errs[4];
-        _mm256_storeu_pd(&sums[0], sum1);
-        _mm256_storeu_pd(&errs[0], err1);
-
-        // First Kahan & pairwise summation
-        // 0+1 1+2 -> 0, 2
-        sumNeumanierNormal(sums[0], errs[0], sums[1]);
-        sumNeumanierNormal(sums[2], errs[2], sums[3]);
-        sumNeumanierNormal(sums[0], errs[0], errs[1]);
-        sumNeumanierNormal(sums[2], errs[2], errs[3]);
-
-        // 0+2 -> 0
-        sumNeumanierNormal(sums[0], errs[0], sums[2]);
-        sumNeumanierNormal(sums[0], errs[0], errs[2]);
-
-        // Store result
-        return { sums[0], errs[0] };
-    }
-    return { 0.0, 0.0 };
-}
-
-#else // LO_AVX_AVAILABLE
-
-bool hasAVXCode() { return false; }
-
-KahanSumSimple executeAVX(size_t&, size_t, const double*) { abort(); }
-
-#endif
-
-} // end namespace sc::op
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx
deleted file mode 100644
index 6a3235a58e2e..000000000000
--- a/sc/source/core/tool/arraysumAVX512.cxx
+++ /dev/null
@@ -1,120 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- */
-
-#define LO_ARRAYSUM_SPACE AVX512
-#include "arraysum.hxx"
-
-#include <arraysumfunctorinternal.hxx>
-
-#include <tools/simd.hxx>
-#include <tools/simdsupport.hxx>
-
-#include <stdlib.h>
-
-namespace sc::op
-{
-#ifdef LO_AVX512F_AVAILABLE
-
-bool hasAVX512FCode() { return true; }
-
-using namespace AVX512;
-
-/** Kahan sum with AVX512.
-  */
-static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
-{
-    // Temporal parameter
-    __m512d t = _mm512_add_pd(sum, value);
-    // Absolute value of the total sum
-    __m512d asum = _mm512_abs_pd(sum);
-    // Absolute value of the value to add
-    __m512d avalue = _mm512_abs_pd(value);
-    // Compare the absolute values sum >= value
-    __mmask8 mask = _mm512_cmp_pd_mask(avalue, asum, _CMP_GE_OQ);
-    // The following code has this form ( a - t + b)
-    // Case 1: a = sum b = value
-    // Case 2: a = value b = sum
-    __m512d a = _mm512_mask_blend_pd(mask, sum, value);
-    __m512d b = _mm512_mask_blend_pd(mask, value, sum);
-    err = _mm512_add_pd(err, _mm512_add_pd(_mm512_sub_pd(a, t), b));
-    // Store result
-    sum = t;
-}
-
-/** Execute Kahan sum with AVX512.
-  */
-KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
-{
-    // Make sure we don't fall out of bounds.
-    // This works by sums of 8 terms.
-    // So the 8'th term is i+7
-    // If we iterate until nSize won't fall out of bounds
-    if (nSize > i + 7)
-    {
-        // Setup sums and errors as 0
-        __m512d sum = _mm512_setzero_pd();
-        __m512d err = _mm512_setzero_pd();
-
-        // Sum the stuff
-        for (; i + 7 < nSize; i += 8)
-        {
-            // Kahan sum
-            __m512d load = _mm512_loadu_pd(pCurrent);
-            sumAVX512(sum, err, load);
-            pCurrent += 8;
-        }
-
-        // Store result
-        static_assert(sizeof(double) == 8);
-        double sums[8];
-        double errs[8];
-        _mm512_storeu_pd(&sums[0], sum);
-        _mm512_storeu_pd(&errs[0], err);
-
-        // First Kahan & pairwise summation
-        // 0+1 1+2 3+4 4+5 6+7 -> 0, 2, 4, 6
-        sumNeumanierNormal(sums[0], errs[0], sums[1]);
-        sumNeumanierNormal(sums[2], errs[2], sums[3]);
-        sumNeumanierNormal(sums[4], errs[4], sums[5]);
-        sumNeumanierNormal(sums[6], errs[6], sums[7]);
-        sumNeumanierNormal(sums[0], errs[0], errs[1]);
-        sumNeumanierNormal(sums[2], errs[2], errs[3]);
-        sumNeumanierNormal(sums[4], errs[4], errs[5]);
-        sumNeumanierNormal(sums[6], errs[6], errs[7]);
-
-        // Second Kahan & pairwise summation
-        // 0+2 4+6 -> 0, 4
-        sumNeumanierNormal(sums[0], errs[0], sums[2]);
-        sumNeumanierNormal(sums[4], errs[4], sums[6]);
-        sumNeumanierNormal(sums[0], errs[0], errs[2]);
-        sumNeumanierNormal(sums[4], errs[4], errs[6]);
-
-        // Third Kahan & pairwise summation
-        // 0+4 -> 0
-        sumNeumanierNormal(sums[0], errs[0], sums[4]);
-        sumNeumanierNormal(sums[0], errs[0], errs[4]);
-
-        // Return final result
-        return { sums[0], errs[0] };
-    }
-    return { 0.0, 0.0 };
-}
-
-#else // LO_AVX512F_AVAILABLE
-
-bool hasAVX512FCode() { return false; }
-
-KahanSumSimple executeAVX512F(size_t&, size_t, const double*) { abort(); }
-
-#endif
-
-} // end namespace sc::op
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index 1a5cc2f00dfe..8542e2e2335d 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -8,24 +8,19 @@
  *
  */
 
-#define LO_ARRAYSUM_SPACE SSE2
 #include "arraysum.hxx"
 
-#include <arraysumfunctorinternal.hxx>
+#include <arraysumfunctor.hxx>
 
 #include <tools/simd.hxx>
 #include <tools/simdsupport.hxx>
 
 #include <stdlib.h>
 
+#if SC_USE_SSE2
+
 namespace sc::op
 {
-#ifdef LO_SSE2_AVAILABLE
-
-bool hasSSE2Code() { return true; }
-
-using namespace SSE2;
-
 /** Kahan sum with SSE2.
   */
 static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
@@ -51,7 +46,7 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
 
 /** Execute Kahan sum with SSE2.
   */
-KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
+KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
 {
     // Make sure we don't fall out of bounds.
     // This works by sums of 8 terms.
@@ -121,13 +116,8 @@ KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
     return { 0.0, 0.0 };
 }
 
-#else // LO_SSE2_AVAILABLE
-
-bool hasSSE2Code() { return false; }
-
-KahanSumSimple executeSSE2(size_t&, size_t, const double*) { abort(); }
+} // namespace
 
 #endif
-}
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */