summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Francis <dennis.francis@collabora.com>2019-10-07 18:02:04 +0530
committerDennis Francis <dennis.francis@collabora.com>2019-10-17 08:07:32 +0200
commit3c2587a152476cbb0ca4a83138a4c34ec8065b32 (patch)
treea43ba7aea0b91c58aa896bf2a82449ca6822e81c
parenta5b394a4c6ce5aa93654ff6d57fc497bcea93001 (diff)
move SSE2sum code to separate cxx file...
and compile it with -arch:SSE2 if Windows. This code however gets called only if cpuid::hasSSE2() is true, so this does not cause problems with machines without SSE2 support. Change-Id: Ice23ac71d4c577b8811b08c74a3ca500a94fdc09 Reviewed-on: https://gerrit.libreoffice.org/80847 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
-rw-r--r--sc/Library_sc.mk11
-rw-r--r--sc/source/core/inc/arraysumfunctor.hxx51
-rw-r--r--sc/source/core/tool/arraysumSSE2.cxx65
3 files changed, 77 insertions, 50 deletions
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index d594ca110f6d..9c057eaf6a6d 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -98,6 +98,17 @@ $(eval $(call gb_Library_use_libraries,sc,\
xo \
))
+ifeq ($(OS),WNT)
+$(eval $(call gb_Library_add_exception_objects,sc,\
+ sc/source/core/tool/arraysumSSE2, -arch:SSE2 \
+))
+
+else
+$(eval $(call gb_Library_add_exception_objects,sc,\
+ sc/source/core/tool/arraysumSSE2 \
+))
+endif
+
$(eval $(call gb_Library_add_exception_objects,sc,\
sc/source/core/data/attarray \
sc/source/core/data/attrib \
diff --git a/sc/source/core/inc/arraysumfunctor.hxx b/sc/source/core/inc/arraysumfunctor.hxx
index 7ef8a7face05..f1182874b1a1 100644
--- a/sc/source/core/inc/arraysumfunctor.hxx
+++ b/sc/source/core/inc/arraysumfunctor.hxx
@@ -14,7 +14,6 @@
#include <cstdint>
#include <rtl/math.hxx>
-#include <tools/simdsupport.hxx>
#include <tools/simd.hxx>
#include <tools/cpuid.hxx>
@@ -87,56 +86,8 @@ public:
}
private:
- double executeSSE2(size_t& i, const double* pCurrent) const
- {
-#if defined(LO_SSE2_AVAILABLE)
- double fSum = 0.0;
- size_t nRealSize = mnSize - i;
- size_t nUnrolledSize = nRealSize - (nRealSize % 8);
-
- if (nUnrolledSize > 0)
- {
- __m128d sum1 = _mm_setzero_pd();
- __m128d sum2 = _mm_setzero_pd();
- __m128d sum3 = _mm_setzero_pd();
- __m128d sum4 = _mm_setzero_pd();
-
- for (; i < nUnrolledSize; i += 8)
- {
- __m128d load1 = _mm_load_pd(pCurrent);
- sum1 = _mm_add_pd(sum1, load1);
- pCurrent += 2;
-
- __m128d load2 = _mm_load_pd(pCurrent);
- sum2 = _mm_add_pd(sum2, load2);
- pCurrent += 2;
-
- __m128d load3 = _mm_load_pd(pCurrent);
- sum3 = _mm_add_pd(sum3, load3);
- pCurrent += 2;
-
- __m128d load4 = _mm_load_pd(pCurrent);
- sum4 = _mm_add_pd(sum4, load4);
- pCurrent += 2;
- }
- sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
-
- double temp;
-
- _mm_storel_pd(&temp, sum1);
- fSum += temp;
-
- _mm_storeh_pd(&temp, sum1);
- fSum += temp;
- }
- return fSum;
-#else
- (void) i;
- (void) pCurrent;
- return 0.0;
-#endif
- }
+ double executeSSE2(size_t& i, const double* pCurrent) const;
double executeUnrolled(size_t& i, const double* pCurrent) const
{
size_t nRealSize = mnSize - i;
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
new file mode 100644
index 000000000000..894675335834
--- /dev/null
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <arraysumfunctor.hxx>
+#include <tools/simdsupport.hxx>
+
+namespace sc
+{
+double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
+{
+#if defined(LO_SSE2_AVAILABLE)
+ double fSum = 0.0;
+ size_t nRealSize = mnSize - i;
+ size_t nUnrolledSize = nRealSize - (nRealSize % 8);
+
+ if (nUnrolledSize > 0)
+ {
+ __m128d sum1 = _mm_setzero_pd();
+ __m128d sum2 = _mm_setzero_pd();
+ __m128d sum3 = _mm_setzero_pd();
+ __m128d sum4 = _mm_setzero_pd();
+
+ for (; i < nUnrolledSize; i += 8)
+ {
+ __m128d load1 = _mm_load_pd(pCurrent);
+ sum1 = _mm_add_pd(sum1, load1);
+ pCurrent += 2;
+
+ __m128d load2 = _mm_load_pd(pCurrent);
+ sum2 = _mm_add_pd(sum2, load2);
+ pCurrent += 2;
+
+ __m128d load3 = _mm_load_pd(pCurrent);
+ sum3 = _mm_add_pd(sum3, load3);
+ pCurrent += 2;
+
+ __m128d load4 = _mm_load_pd(pCurrent);
+ sum4 = _mm_add_pd(sum4, load4);
+ pCurrent += 2;
+ }
+ sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
+
+ double temp;
+
+ _mm_storel_pd(&temp, sum1);
+ fSum += temp;
+
+ _mm_storeh_pd(&temp, sum1);
+ fSum += temp;
+ }
+ return fSum;
+#else
+ (void)i;
+ (void)pCurrent;
+ return 0.0;
+#endif
+}
+} \ No newline at end of file