summaryrefslogtreecommitdiff
path: root/sc
diff options
context:
space:
mode:
Diffstat (limited to 'sc')
-rw-r--r--sc/inc/arraysumfunctor.hxx25
-rw-r--r--sc/inc/arraysumfunctorinternal.hxx34
-rw-r--r--sc/inc/kahan.hxx8
-rw-r--r--sc/qa/unit/functions_statistical.cxx6
-rw-r--r--sc/source/core/tool/arraysum.hxx52
-rw-r--r--sc/source/core/tool/arraysumAVX.cxx22
-rw-r--r--sc/source/core/tool/arraysumAVX512.cxx33
-rw-r--r--sc/source/core/tool/arraysumSSE2.cxx23
8 files changed, 144 insertions, 59 deletions
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
index ecd428e9f037..d251b4a6f9fb 100644
--- a/sc/inc/arraysumfunctor.hxx
+++ b/sc/inc/arraysumfunctor.hxx
@@ -12,35 +12,17 @@
#include <cmath>
#include "kahan.hxx"
-#include "scdllapi.h"
+#include "arraysumfunctorinternal.hxx"
#include <tools/cpuid.hxx>
#include <formula/errorcodes.hxx>
namespace sc::op
{
/* Checkout available optimization options */
-SC_DLLPUBLIC extern const bool hasAVX512F;
const bool hasAVX = cpuid::hasAVX();
const bool hasSSE2 = cpuid::hasSSE2();
/**
- * Performs one step of the Neumanier sum between doubles
- * Overwrites the summand and error
- * @parma sum
- * @param err
- * @param value
- */
-inline void sumNeumanierNormal(double& sum, double& err, const double& value)
-{
- double t = sum + value;
- if (std::abs(sum) >= std::abs(value))
- err += (sum - t) + value;
- else
- err += (value - t) + sum;
- sum = t;
-}
-
-/**
* If no boosts available, Unrolled KahanSum.
* Most likely to use on android.
*/
@@ -69,11 +51,6 @@ static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pC
return 0.0;
}
-/* Available methods */
-SC_DLLPUBLIC KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent);
-SC_DLLPUBLIC KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
-
/**
* This function task is to choose the fastest method available to perform the sum.
* @param i
diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx
new file mode 100644
index 000000000000..a06e3fc17439
--- /dev/null
+++ b/sc/inc/arraysumfunctorinternal.hxx
@@ -0,0 +1,34 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "scdllapi.h"
+
+namespace sc::op
+{
+SC_DLLPUBLIC extern const bool hasAVX512F;
+
+// Plain old data structure, to be used by code compiled with CPU intrinsics without generating any
+// code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy
+// when merging duplicates).
+struct KahanSumSimple
+{
+ double m_fSum;
+ double m_fError;
+};
+
+/* Available methods */
+SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent);
+SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent);
+SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
+
+} // namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sc/inc/kahan.hxx b/sc/inc/kahan.hxx
index 3404fb6d14a6..ded7bd78d70e 100644
--- a/sc/inc/kahan.hxx
+++ b/sc/inc/kahan.hxx
@@ -11,6 +11,8 @@
#include <cmath>
+#include "arraysumfunctorinternal.hxx"
+
/**
* This class provides LO with Kahan summation algorithm
* About this algorithm: https://en.wikipedia.org/wiki/Kahan_summation_algorithm
@@ -34,6 +36,12 @@ public:
{
}
+ constexpr KahanSum(const sc::op::KahanSumSimple& sum)
+ : m_fSum(sum.m_fSum)
+ , m_fError(sum.m_fError)
+ {
+ }
+
constexpr KahanSum(const KahanSum& fSum) = default;
public:
diff --git a/sc/qa/unit/functions_statistical.cxx b/sc/qa/unit/functions_statistical.cxx
index 2e489d26dd0d..a034964f4923 100644
--- a/sc/qa/unit/functions_statistical.cxx
+++ b/sc/qa/unit/functions_statistical.cxx
@@ -37,13 +37,13 @@ void StatisticalFunctionsTest::testIntrinsicSums()
double* pCurrent = summands;
size_t i = 0;
if (sc::op::hasAVX512F)
- CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeAVX512F(i, 9, pCurrent).get());
+ CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX512F(i, 9, pCurrent)).get());
i = 0;
if (sc::op::hasAVX)
- CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeAVX(i, 9, pCurrent).get());
+ CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeAVX(i, 9, pCurrent)).get());
i = 0;
if (sc::op::hasSSE2)
- CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeSSE2(i, 9, pCurrent).get());
+ CPPUNIT_ASSERT_EQUAL(42.0, KahanSum(sc::op::executeSSE2(i, 9, pCurrent)).get());
i = 0;
CPPUNIT_ASSERT_EQUAL(42.0, sc::op::executeUnrolled(i, 9, pCurrent).get());
}
diff --git a/sc/source/core/tool/arraysum.hxx b/sc/source/core/tool/arraysum.hxx
new file mode 100644
index 000000000000..62c2514182b7
--- /dev/null
+++ b/sc/source/core/tool/arraysum.hxx
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#pragma once
+
+#include <cmath>
+
+namespace sc::op
+{
+// Code must not be shared between different CPU instrinsics flags (e.g. in debug mode the compiler would not
+// inline them, and merge the copies, keeping only the one with the most demanding CPU set that's not available otherwise).
+// Put everything in a different namespace and additionally try to force inlining.
+namespace LO_ARRAYSUM_SPACE
+{
+#if defined _MSC_VER
+#define INLINE __forceinline static
+#elif defined __GNUC__
+#define INLINE __attribute__((always_inline)) static inline
+#else
+#define static inline
+#endif
+
+/**
+ * Performs one step of the Neumanier sum between doubles
+ * Overwrites the summand and error
+ * @parma sum
+ * @param err
+ * @param value
+ */
+INLINE void sumNeumanierNormal(double& sum, double& err, const double& value)
+{
+ double t = sum + value;
+ if (std::abs(sum) >= std::abs(value))
+ err += (sum - t) + value;
+ else
+ err += (value - t) + sum;
+ sum = t;
+}
+
+#undef INLINE
+
+} // end namespace LO_ARRAYSUM_SPACE
+} // end namespace sc::op
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx
index 49407b95dfb6..54c47780f63c 100644
--- a/sc/source/core/tool/arraysumAVX.cxx
+++ b/sc/source/core/tool/arraysumAVX.cxx
@@ -8,21 +8,28 @@
*
*/
-#include <arraysumfunctor.hxx>
-#include <sal/log.hxx>
+#define LO_ARRAYSUM_SPACE AVX
+#include "arraysum.hxx"
+
+#include <arraysumfunctorinternal.hxx>
+
#include <tools/simd.hxx>
#include <tools/simdsupport.hxx>
+#include <cstdlib>
+
namespace sc::op
{
#ifdef LO_AVX_AVAILABLE // Old processors
-const __m256d ANNULATE_SIGN_BIT = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
+using namespace AVX;
/** Kahan sum with AVX.
*/
static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
{
+ const __m256d ANNULATE_SIGN_BIT
+ = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
// Temporal parameter
__m256d t = _mm256_add_pd(sum, value);
// Absolute value of the total sum
@@ -45,7 +52,7 @@ static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
/** Execute Kahan sum with AVX.
*/
-KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent)
+KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
{
#ifdef LO_AVX_AVAILABLE
// Make sure we don't fall out of bounds.
@@ -97,15 +104,14 @@ KahanSum executeAVX(size_t& i, size_t nSize, const double* pCurrent)
sumNeumanierNormal(sums[0], errs[0], errs[2]);
// Store result
- return KahanSum(sums[0], errs[0]);
+ return { sums[0], errs[0] };
}
- return 0.0;
+ return { 0.0, 0.0 };
#else
- SAL_WARN("sc", "Failed to use AVX");
(void)i;
(void)nSize;
(void)pCurrent;
- return 0.0;
+ abort();
#endif
}
diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx
index 0fa49c6bccc8..f8e8de729279 100644
--- a/sc/source/core/tool/arraysumAVX512.cxx
+++ b/sc/source/core/tool/arraysumAVX512.cxx
@@ -8,11 +8,16 @@
*
*/
-#include <arraysumfunctor.hxx>
-#include <sal/log.hxx>
+#define LO_ARRAYSUM_SPACE AVX512
+#include "arraysum.hxx"
+
+#include <arraysumfunctorinternal.hxx>
+
#include <tools/simd.hxx>
#include <tools/simdsupport.hxx>
+#include <cstdlib>
+
/* TODO Remove this once GCC updated and AVX512 can work. */
#ifdef __GNUC__
#if __GNUC__ < 9
@@ -23,16 +28,18 @@
#endif
#endif
+namespace sc::op
+{
#ifdef LO_AVX512F_AVAILABLE
-const bool sc::op::hasAVX512F = cpuid::hasAVX512F();
+const bool hasAVX512F = cpuid::hasAVX512F();
#else
-const bool sc::op::hasAVX512F = false;
+const bool hasAVX512F = false;
#endif
-namespace sc::op
-{
#ifdef LO_AVX512F_AVAILABLE // New processors
+using namespace AVX512;
+
/** Kahan sum with AVX512.
*/
static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
@@ -59,7 +66,7 @@ static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
/** Execute Kahan sum with AVX512.
*/
-KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
+KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
{
#ifdef LO_AVX512F_AVAILABLE // New processors
// Make sure we don't fall out of bounds.
@@ -85,8 +92,8 @@ KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
static_assert(sizeof(double) == 8);
double sums[8];
double errs[8];
- _mm512_storeu_pd(static_cast<void*>(&sums[0]), sum);
- _mm512_storeu_pd(static_cast<void*>(&errs[0]), err);
+ _mm512_storeu_pd(&sums[0], sum);
+ _mm512_storeu_pd(&errs[0], err);
// First Kahan & pairwise summation
// 0+1 1+2 3+4 4+5 6+7 -> 0, 2, 4, 6
@@ -112,16 +119,14 @@ KahanSum executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
sumNeumanierNormal(sums[0], errs[0], errs[4]);
// Return final result
- return KahanSum(sums[0], errs[0]);
+ return { sums[0], errs[0] };
}
- else
- return 0.0;
+ return { 0.0, 0.0 };
#else
- SAL_WARN("sc", "Failed to use AVX 512");
(void)i;
(void)nSize;
(void)pCurrent;
- return 0.0;
+ abort();
#endif
}
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index e2ab945acc4a..95b42f868461 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -8,23 +8,27 @@
*
*/
-#include <arraysumfunctor.hxx>
-#include <sal/log.hxx>
+#define LO_ARRAYSUM_SPACE SSE2
+#include "arraysum.hxx"
+
+#include <arraysumfunctorinternal.hxx>
+
#include <tools/simd.hxx>
#include <tools/simdsupport.hxx>
-//AVX512VL + AVX512F + KNCNI
+#include <cstdlib>
namespace sc::op
{
#ifdef LO_SSE2_AVAILABLE // Old processors
-const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
+using namespace SSE2;
-/** Kahan sum with SSE4.2.
+/** Kahan sum with SSE2.
*/
static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
{
+ const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
// Temporal parameter
__m128d t = _mm_add_pd(sum, value);
// Absolute value of the total sum
@@ -47,7 +51,7 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
/** Execute Kahan sum with SSE2.
*/
-KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
+KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
{
#ifdef LO_SSE2_AVAILABLE
// Make sure we don't fall out of bounds.
@@ -113,15 +117,14 @@ KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
sumNeumanierNormal(sums[0], errs[0], errs[1]);
// Store result
- return KahanSum(sums[0], errs[0]);
+ return { sums[0], errs[0] };
}
- return 0.0;
+ return { 0.0, 0.0 };
#else
- SAL_WARN("sc", "Failed to use SSE2");
(void)i;
(void)nSize;
(void)pCurrent;
- return 0.0;
+ abort();
#endif
}
}