summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/tools/cpuid.hxx2
-rw-r--r--include/tools/simdsupport.hxx10
-rw-r--r--sc/Library_sc.mk13
-rw-r--r--sc/inc/arraysumfunctor.hxx30
-rw-r--r--sc/inc/arraysumfunctorinternal.hxx36
-rw-r--r--sc/inc/kahan.hxx8
-rw-r--r--sc/qa/unit/functions_statistical.cxx22
-rw-r--r--sc/source/core/tool/arraysum.hxx18
-rw-r--r--sc/source/core/tool/arraysumAVX.cxx121
-rw-r--r--sc/source/core/tool/arraysumAVX512.cxx120
-rw-r--r--sc/source/core/tool/arraysumSSE2.cxx20
11 files changed, 37 insertions, 363 deletions
diff --git a/include/tools/cpuid.hxx b/include/tools/cpuid.hxx
index 1be897e84a4e..4f309ff11e96 100644
--- a/include/tools/cpuid.hxx
+++ b/include/tools/cpuid.hxx
@@ -23,6 +23,8 @@ or inline functions, otherwise their possibly emitted copies compiled
with the CPU-specific instructions might be chosen by the linker as the copy
to keep.
+Also see the note at the top of simdsupport.hxx .
+
*/
namespace cpuid {
diff --git a/include/tools/simdsupport.hxx b/include/tools/simdsupport.hxx
index bc9227223da0..738b34e072db 100644
--- a/include/tools/simdsupport.hxx
+++ b/include/tools/simdsupport.hxx
@@ -8,6 +8,16 @@
*
*/
+// IMPORTANT: Having CPU-specific routines turned out to be a maintenance
+// problem, because of various problems such as compilers moving CPU-specific
+// code out of #ifdef code into static initialization or our code using C++
+// features that caused the compiler to emit code that used CPU-specific
+// instructions (even cpuid.hxx isn't safe, see the comment there).
+// The only safe usage is using CPU-specific code that's always available,
+// such as SSE2-specific code for x86_64. Do not use for anything else
+// unless you really know what you are doing (and you check git history
+// to learn from past problems).
+
// Determine the compiler support for SIMD compiler intrinsics.
// This changes from one compiled unit to the other, depending if
// the support has been detected and if the compiled unit contains
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index 936ca33901ee..3c8dcb3e5085 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -206,6 +206,7 @@ $(eval $(call gb_Library_add_exception_objects,sc,\
sc/source/core/tool/address \
sc/source/core/tool/adiasync \
sc/source/core/tool/appoptio \
+ sc/source/core/tool/arraysumSSE2 \
sc/source/core/tool/autoform \
sc/source/core/tool/calcconfig \
sc/source/core/tool/callform \
@@ -680,18 +681,6 @@ $(eval $(call gb_Library_add_exception_objects,sc,\
sc/source/ui/xmlsource/xmlsourcedlg \
))
-$(eval $(call gb_Library_add_exception_objects,sc,\
- sc/source/core/tool/arraysumAVX512, $(CXXFLAGS_INTRINSICS_AVX512F) \
-))
-
-$(eval $(call gb_Library_add_exception_objects,sc,\
- sc/source/core/tool/arraysumAVX, $(CXXFLAGS_INTRINSICS_AVX) \
-))
-
-$(eval $(call gb_Library_add_exception_objects,sc,\
- sc/source/core/tool/arraysumSSE2, $(CXXFLAGS_INTRINSICS_SSE2) \
-))
-
ifeq ($(ENABLE_FORMULA_LOGGER),TRUE)
$(eval $(call gb_Library_add_exception_objects,sc,\
sc/source/core/tool/formulalogger \
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
index b727f5893a8c..eecfa59c3f65 100644
--- a/sc/inc/arraysumfunctor.hxx
+++ b/sc/inc/arraysumfunctor.hxx
@@ -12,16 +12,25 @@
#include <cmath>
#include "kahan.hxx"
-#include "arraysumfunctorinternal.hxx"
-#include <tools/cpuid.hxx>
+#include "arraysumfunctor.hxx"
#include <formula/errorcodes.hxx>
namespace sc::op
{
-/* Checkout available optimization options */
-const bool hasAVX512F = hasAVX512FCode() && cpuid::hasAVX512F();
-const bool hasAVX = hasAVXCode() && cpuid::hasAVX();
-const bool hasSSE2 = hasSSE2Code() && cpuid::hasSSE2();
+// Checkout available optimization options.
+// Note that it turned out to be problematic to support CPU-specific code
+// that's not guaranteed to be available on that specific platform (see
+// git history). SSE2 is guaranteed on x86_64 and it is our baseline requirement
+// for x86 on Windows, so SSE2 use is hardcoded on those platforms.
+// Whenever we raise baseline to e.g. AVX, this may get
+// replaced with AVX code (get it from git history).
+// Do it similarly with other platforms.
+#if defined(X86_64) || (defined(X86) && defined(_WIN32))
+#define SC_USE_SSE2 1
+KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
+#else
+#define SC_USE_SSE2 0
+#endif
/**
* If no boosts available, Unrolled KahanSum.
@@ -60,12 +69,9 @@ static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pC
*/
static inline KahanSum executeFast(size_t& i, size_t nSize, const double* pCurrent)
{
- if (hasAVX512F)
- return executeAVX512F(i, nSize, pCurrent);
- if (hasAVX)
- return executeAVX(i, nSize, pCurrent);
- if (hasSSE2)
- return executeSSE2(i, nSize, pCurrent);
+#if SC_USE_SSE2
+ return executeSSE2(i, nSize, pCurrent);
+#endif
return executeUnrolled(i, nSize, pCurrent);
}
diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx
deleted file mode 100644
index e939dbd3037d..000000000000
--- a/sc/inc/arraysumfunctorinternal.hxx
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#pragma once
-
-#include "scdllapi.h"
-
-namespace sc::op
-{
-// Plain old data structure, to be used by code compiled with CPU intrinsics without generating any
-// code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy
-// when merging duplicates).
-struct KahanSumSimple
-{
- double m_fSum;
- double m_fError;
-};
-
-/* Available methods */
-SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
-
-SC_DLLPUBLIC bool hasAVX512FCode();
-SC_DLLPUBLIC bool hasAVXCode();
-SC_DLLPUBLIC bool hasSSE2Code();
-
-} // namespace
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sc/inc/kahan.hxx b/sc/inc/kahan.hxx
index 5418c8d23b35..6c84f6eeef2e 100644
--- a/sc/inc/kahan.hxx
+++ b/sc/inc/kahan.hxx
@@ -12,8 +12,6 @@
#include <rtl/math.hxx>
#include <cmath>
-#include "arraysumfunctorinternal.hxx"
-
/**
* This class provides LO with Kahan summation algorithm
* About this algorithm: https://en.wikipedia.org/wiki/Kahan_summation_algorithm
@@ -40,12 +38,6 @@ public:
{
}
- constexpr KahanSum(const sc::op::KahanSumSimple& sum)
- : m_fSum(sum.m_fSum)
- , m_fError(sum.m_fError)
- {
- }
-
constexpr KahanSum(const KahanSum& fSum) = default;
public:
diff --git a/sc/qa/unit/functions_statistical.cxx b/sc/qa/unit/functions_statistical.cxx
index a034964f4923..4d97d4cc1689 100644
--- a/sc/qa/unit/functions_statistical.cxx
+++ b/sc/qa/unit/functions_statistical.cxx
@@ -1,5 +1,4 @@
#include "functions_test.hxx"
-#include <arraysumfunctor.hxx>
class StatisticalFunctionsTest : public FunctionsTest
{
@@ -7,11 +6,9 @@ public:
StatisticalFunctionsTest();
void testStatisticalFormulasFODS();
- void testIntrinsicSums();
CPPUNIT_TEST_SUITE(StatisticalFunctionsTest);
CPPUNIT_TEST(testStatisticalFormulasFODS);
- CPPUNIT_TEST(testIntrinsicSums);
CPPUNIT_TEST_SUITE_END();
};
@@ -29,25 +26,6 @@ StatisticalFunctionsTest::StatisticalFunctionsTest():
{
}
-void StatisticalFunctionsTest::testIntrinsicSums()
-{
- // Checkout SSE2, AVX and AVX512 operations
- // Needs exactly 9 terms
- double summands[9] = { 0, 1, 2, 3, 4, 10, 20, 2, -1 };
- double* pCurrent = summands;
- size_t i = 0;
- if (sc::op::hasAVX512F)
- CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX512F(i, 9, pCurrent)).get());
- i = 0;
- if (sc::op::hasAVX)
- CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX(i, 9, pCurrent)).get());
- i = 0;
- if (sc::op::hasSSE2)
- CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeSSE2(i, 9, pCurrent)).get());
- i = 0;
- CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeUnrolled(i, 9, pCurrent).get());
-}
-
CPPUNIT_TEST_SUITE_REGISTRATION(StatisticalFunctionsTest);
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/core/tool/arraysum.hxx b/sc/source/core/tool/arraysum.hxx
index 5d227bd85a48..ce8a7f30f4dc 100644
--- a/sc/source/core/tool/arraysum.hxx
+++ b/sc/source/core/tool/arraysum.hxx
@@ -14,19 +14,6 @@
namespace sc::op
{
-// Code must not be shared between different CPU instrinsics flags (e.g. in debug mode the compiler would not
-// inline them, and merge the copies, keeping only the one with the most demanding CPU set that's not available otherwise).
-// Put everything in a different namespace and additionally try to force inlining.
-namespace LO_ARRAYSUM_SPACE
-{
-#if defined _MSC_VER
-#define INLINE __forceinline static
-#elif defined __GNUC__
-#define INLINE __attribute__((always_inline)) static inline
-#else
-#define static inline
-#endif
-
/**
* Performs one step of the Neumanier sum between doubles
* Overwrites the summand and error
@@ -34,7 +21,7 @@ namespace LO_ARRAYSUM_SPACE
* @param err
* @param value
*/
-INLINE void sumNeumanierNormal(double& sum, double& err, const double& value)
+inline void sumNeumanierNormal(double& sum, double& err, const double& value)
{
double t = sum + value;
if (fabs(sum) >= fabs(value))
@@ -44,9 +31,6 @@ INLINE void sumNeumanierNormal(double& sum, double& err, const double& value)
sum = t;
}
-#undef INLINE
-
-} // end namespace LO_ARRAYSUM_SPACE
} // end namespace sc::op
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx
deleted file mode 100644
index 4d9ee02285e7..000000000000
--- a/sc/source/core/tool/arraysumAVX.cxx
+++ /dev/null
@@ -1,121 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- */
-
-#define LO_ARRAYSUM_SPACE AVX
-#include "arraysum.hxx"
-
-#include <arraysumfunctorinternal.hxx>
-
-#include <tools/simd.hxx>
-#include <tools/simdsupport.hxx>
-
-#include <stdlib.h>
-
-namespace sc::op
-{
-#ifdef LO_AVX_AVAILABLE
-
-bool hasAVXCode() { return true; }
-
-using namespace AVX;
-
-/** Kahan sum with AVX.
- */
-static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
-{
- static const __m256d ANNULATE_SIGN_BIT
- = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
- // Temporal parameter
- __m256d t = _mm256_add_pd(sum, value);
- // Absolute value of the total sum
- __m256d asum = _mm256_and_pd(sum, ANNULATE_SIGN_BIT);
- // Absolute value of the value to add
- __m256d avalue = _mm256_and_pd(value, ANNULATE_SIGN_BIT);
- // Compare the absolute values sum >= value
- __m256d mask = _mm256_cmp_pd(asum, avalue, _CMP_GE_OQ);
- // The following code has this form ( a - t + b)
- // Case 1: a = sum b = value
- // Case 2: a = value b = sum
- __m256d a = _mm256_add_pd(_mm256_and_pd(mask, sum), _mm256_andnot_pd(mask, value));
- __m256d b = _mm256_add_pd(_mm256_and_pd(mask, value), _mm256_andnot_pd(mask, sum));
- err = _mm256_add_pd(err, _mm256_add_pd(_mm256_sub_pd(a, t), b));
- // Store result
- sum = t;
-}
-
-/** Execute Kahan sum with AVX.
- */
-KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
-{
- // Make sure we don't fall out of bounds.
- // This works by sums of 8 terms.
- // So the 8'th term is i+7
- // If we iterate until nSize won't fall out of bounds
- if (nSize > i + 7)
- {
- // Setup sums and errors as 0
- __m256d sum1 = _mm256_setzero_pd();
- __m256d err1 = _mm256_setzero_pd();
- __m256d sum2 = _mm256_setzero_pd();
- __m256d err2 = _mm256_setzero_pd();
-
- for (; i + 7 < nSize; i += 8)
- {
- // Kahan sum 1
- __m256d load1 = _mm256_loadu_pd(pCurrent);
- sumAVX(sum1, err1, load1);
- pCurrent += 4;
-
- // Kahan sum 2
- __m256d load2 = _mm256_loadu_pd(pCurrent);
- sumAVX(sum2, err2, load2);
- pCurrent += 4;
- }
-
- // Now we combine pairwise summation with Kahan summation
-
- // sum 1 + sum 2 -> sum 1
- sumAVX(sum1, err1, sum2);
- sumAVX(sum1, err1, err2);
-
- // Store results
- double sums[4];
- double errs[4];
- _mm256_storeu_pd(&sums[0], sum1);
- _mm256_storeu_pd(&errs[0], err1);
-
- // First Kahan & pairwise summation
- // 0+1 1+2 -> 0, 2
- sumNeumanierNormal(sums[0], errs[0], sums[1]);
- sumNeumanierNormal(sums[2], errs[2], sums[3]);
- sumNeumanierNormal(sums[0], errs[0], errs[1]);
- sumNeumanierNormal(sums[2], errs[2], errs[3]);
-
- // 0+2 -> 0
- sumNeumanierNormal(sums[0], errs[0], sums[2]);
- sumNeumanierNormal(sums[0], errs[0], errs[2]);
-
- // Store result
- return { sums[0], errs[0] };
- }
- return { 0.0, 0.0 };
-}
-
-#else // LO_AVX_AVAILABLE
-
-bool hasAVXCode() { return false; }
-
-KahanSumSimple executeAVX(size_t&, size_t, const double*) { abort(); }
-
-#endif
-
-} // end namespace sc::op
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx
deleted file mode 100644
index 6a3235a58e2e..000000000000
--- a/sc/source/core/tool/arraysumAVX512.cxx
+++ /dev/null
@@ -1,120 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- */
-
-#define LO_ARRAYSUM_SPACE AVX512
-#include "arraysum.hxx"
-
-#include <arraysumfunctorinternal.hxx>
-
-#include <tools/simd.hxx>
-#include <tools/simdsupport.hxx>
-
-#include <stdlib.h>
-
-namespace sc::op
-{
-#ifdef LO_AVX512F_AVAILABLE
-
-bool hasAVX512FCode() { return true; }
-
-using namespace AVX512;
-
-/** Kahan sum with AVX512.
- */
-static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
-{
- // Temporal parameter
- __m512d t = _mm512_add_pd(sum, value);
- // Absolute value of the total sum
- __m512d asum = _mm512_abs_pd(sum);
- // Absolute value of the value to add
- __m512d avalue = _mm512_abs_pd(value);
- // Compare the absolute values sum >= value
- __mmask8 mask = _mm512_cmp_pd_mask(avalue, asum, _CMP_GE_OQ);
- // The following code has this form ( a - t + b)
- // Case 1: a = sum b = value
- // Case 2: a = value b = sum
- __m512d a = _mm512_mask_blend_pd(mask, sum, value);
- __m512d b = _mm512_mask_blend_pd(mask, value, sum);
- err = _mm512_add_pd(err, _mm512_add_pd(_mm512_sub_pd(a, t), b));
- // Store result
- sum = t;
-}
-
-/** Execute Kahan sum with AVX512.
- */
-KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
-{
- // Make sure we don't fall out of bounds.
- // This works by sums of 8 terms.
- // So the 8'th term is i+7
- // If we iterate until nSize won't fall out of bounds
- if (nSize > i + 7)
- {
- // Setup sums and errors as 0
- __m512d sum = _mm512_setzero_pd();
- __m512d err = _mm512_setzero_pd();
-
- // Sum the stuff
- for (; i + 7 < nSize; i += 8)
- {
- // Kahan sum
- __m512d load = _mm512_loadu_pd(pCurrent);
- sumAVX512(sum, err, load);
- pCurrent += 8;
- }
-
- // Store result
- static_assert(sizeof(double) == 8);
- double sums[8];
- double errs[8];
- _mm512_storeu_pd(&sums[0], sum);
- _mm512_storeu_pd(&errs[0], err);
-
- // First Kahan & pairwise summation
- // 0+1 1+2 3+4 4+5 6+7 -> 0, 2, 4, 6
- sumNeumanierNormal(sums[0], errs[0], sums[1]);
- sumNeumanierNormal(sums[2], errs[2], sums[3]);
- sumNeumanierNormal(sums[4], errs[4], sums[5]);
- sumNeumanierNormal(sums[6], errs[6], sums[7]);
- sumNeumanierNormal(sums[0], errs[0], errs[1]);
- sumNeumanierNormal(sums[2], errs[2], errs[3]);
- sumNeumanierNormal(sums[4], errs[4], errs[5]);
- sumNeumanierNormal(sums[6], errs[6], errs[7]);
-
- // Second Kahan & pairwise summation
- // 0+2 4+6 -> 0, 4
- sumNeumanierNormal(sums[0], errs[0], sums[2]);
- sumNeumanierNormal(sums[4], errs[4], sums[6]);
- sumNeumanierNormal(sums[0], errs[0], errs[2]);
- sumNeumanierNormal(sums[4], errs[4], errs[6]);
-
- // Third Kahan & pairwise summation
- // 0+4 -> 0
- sumNeumanierNormal(sums[0], errs[0], sums[4]);
- sumNeumanierNormal(sums[0], errs[0], errs[4]);
-
- // Return final result
- return { sums[0], errs[0] };
- }
- return { 0.0, 0.0 };
-}
-
-#else // LO_AVX512F_AVAILABLE
-
-bool hasAVX512FCode() { return false; }
-
-KahanSumSimple executeAVX512F(size_t&, size_t, const double*) { abort(); }
-
-#endif
-
-} // end namespace sc::op
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index 1a5cc2f00dfe..8542e2e2335d 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -8,24 +8,19 @@
*
*/
-#define LO_ARRAYSUM_SPACE SSE2
#include "arraysum.hxx"
-#include <arraysumfunctorinternal.hxx>
+#include <arraysumfunctor.hxx>
#include <tools/simd.hxx>
#include <tools/simdsupport.hxx>
#include <stdlib.h>
+#if SC_USE_SSE2
+
namespace sc::op
{
-#ifdef LO_SSE2_AVAILABLE
-
-bool hasSSE2Code() { return true; }
-
-using namespace SSE2;
-
/** Kahan sum with SSE2.
*/
static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
@@ -51,7 +46,7 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
/** Execute Kahan sum with SSE2.
*/
-KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
+KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
{
// Make sure we don't fall out of bounds.
// This works by sums of 8 terms.
@@ -121,13 +116,8 @@ KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
return { 0.0, 0.0 };
}
-#else // LO_SSE2_AVAILABLE
-
-bool hasSSE2Code() { return false; }
-
-KahanSumSimple executeSSE2(size_t&, size_t, const double*) { abort(); }
+} // namespace
#endif
-}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */