diff options
author | Luboš Luňák <l.lunak@collabora.com> | 2021-10-26 23:40:47 +0200 |
---|---|---|
committer | Luboš Luňák <l.lunak@collabora.com> | 2021-10-27 15:02:11 +0200 |
commit | ef42ce579f0e4e4c436f70615f3adeb9f0f68217 (patch) | |
tree | b688259454d3b571831bc3dd706ec3c974dd1f45 | |
parent | 56b0d05991391d7a885e6928138d5512cbbdfb47 (diff) |
fix AVX512 detection
The value wasn't in config_host.mk.in, so it's never been used.
And also fix Calc Kahan CPU-specific code yet again :( .
Change-Id: Iacfd500e5a662b2b4b96a009d129a012d278a3ad
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/124248
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
-rw-r--r-- | config_host.mk.in | 4 | ||||
-rw-r--r-- | configure.ac | 6 | ||||
-rw-r--r-- | sc/inc/arraysumfunctor.hxx | 5 | ||||
-rw-r--r-- | sc/inc/arraysumfunctorinternal.hxx | 6 | ||||
-rw-r--r-- | sc/source/core/tool/arraysumAVX.cxx | 21 | ||||
-rw-r--r-- | sc/source/core/tool/arraysumAVX512.cxx | 41 | ||||
-rw-r--r-- | sc/source/core/tool/arraysumSSE2.cxx | 21 |
7 files changed, 47 insertions, 57 deletions
diff --git a/config_host.mk.in b/config_host.mk.in index f9d4e2f0d05d..3970cb6a61c9 100644 --- a/config_host.mk.in +++ b/config_host.mk.in @@ -110,6 +110,8 @@ export CXXFLAGS_INTRINSICS_SSE41=@CXXFLAGS_INTRINSICS_SSE41@ export CXXFLAGS_INTRINSICS_SSE42=@CXXFLAGS_INTRINSICS_SSE42@ export CXXFLAGS_INTRINSICS_AVX=@CXXFLAGS_INTRINSICS_AVX@ export CXXFLAGS_INTRINSICS_AVX2=@CXXFLAGS_INTRINSICS_AVX2@ +export CXXFLAGS_INTRINSICS_AVX512=@CXXFLAGS_INTRINSICS_AVX512@ +export CXXFLAGS_INTRINSICS_AVX512F=@CXXFLAGS_INTRINSICS_AVX512F@ export CXXFLAGS_INTRINSICS_F16C=@CXXFLAGS_INTRINSICS_F16C@ export CXXFLAGS_INTRINSICS_FMA=@CXXFLAGS_INTRINSICS_FMA@ export DATADIR=@DATADIR@ @@ -407,6 +409,8 @@ export LO_CLANG_CXXFLAGS_INTRINSICS_SSE41=@LO_CLANG_CXXFLAGS_INTRINSICS_SSE41@ export LO_CLANG_CXXFLAGS_INTRINSICS_SSE42=@LO_CLANG_CXXFLAGS_INTRINSICS_SSE42@ export LO_CLANG_CXXFLAGS_INTRINSICS_AVX=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX@ export LO_CLANG_CXXFLAGS_INTRINSICS_AVX2=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX2@ +export LO_CLANG_CXXFLAGS_INTRINSICS_AVX512=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX512@ +export LO_CLANG_CXXFLAGS_INTRINSICS_AVX512F=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX512F@ export LO_CLANG_CXXFLAGS_INTRINSICS_F16C=@LO_CLANG_CXXFLAGS_INTRINSICS_F16C@ export LO_CLANG_CXXFLAGS_INTRINSICS_FMA=@LO_CLANG_CXXFLAGS_INTRINSICS_FMA@ @x_LO_ELFCHECK_ALLOWLIST@ export LO_ELFCHECK_ALLOWLIST=@LO_ELFCHECK_ALLOWLIST@ diff --git a/configure.ac b/configure.ac index adf6da19b1f9..a810ba0b88af 100644 --- a/configure.ac +++ b/configure.ac @@ -7808,6 +7808,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ #include <immintrin.h> int main () { __m512i a = _mm512_loadu_si512(0); + __m512d v1 = _mm512_load_pd(0); + // https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/config/i386/avx512fintrin.h;h=23bce99cbe7016a04e14c2163ed3fe6a5a64f4e2 + __m512d v2 = _mm512_abs_pd(v1); return 0; } ])], @@ -12281,6 +12284,9 @@ if test "$ENABLE_SKIA" = TRUE -a "$COM_IS_CLANG" != TRUE; then #include <immintrin.h> int main () { __m512i a = _mm512_loadu_si512(0); + __m512d v1 = _mm512_load_pd(0); + // https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/config/i386/avx512fintrin.h;h=23bce99cbe7016a04e14c2163ed3fe6a5a64f4e2 + __m512d v2 = _mm512_abs_pd(v1); return 0; } ])], diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx index d251b4a6f9fb..b727f5893a8c 100644 --- a/sc/inc/arraysumfunctor.hxx +++ b/sc/inc/arraysumfunctor.hxx @@ -19,8 +19,9 @@ namespace sc::op { /* Checkout available optimization options */ -const bool hasAVX = cpuid::hasAVX(); -const bool hasSSE2 = cpuid::hasSSE2(); +const bool hasAVX512F = hasAVX512FCode() && cpuid::hasAVX512F(); +const bool hasAVX = hasAVXCode() && cpuid::hasAVX(); +const bool hasSSE2 = hasSSE2Code() && cpuid::hasSSE2(); /** * If no boosts available, Unrolled KahanSum. diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx index a06e3fc17439..e939dbd3037d 100644 --- a/sc/inc/arraysumfunctorinternal.hxx +++ b/sc/inc/arraysumfunctorinternal.hxx @@ -13,8 +13,6 @@ namespace sc::op { -SC_DLLPUBLIC extern const bool hasAVX512F; - // Plain old data structure, to be used by code compiled with CPU intrinsics without generating any // code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy // when merging duplicates). @@ -29,6 +27,10 @@ SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent); SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent); +SC_DLLPUBLIC bool hasAVX512FCode(); +SC_DLLPUBLIC bool hasAVXCode(); +SC_DLLPUBLIC bool hasSSE2Code(); + } // namespace /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx index c55d71f22983..e256248047d0 100644 --- a/sc/source/core/tool/arraysumAVX.cxx +++ b/sc/source/core/tool/arraysumAVX.cxx @@ -20,7 +20,9 @@ namespace sc::op { -#ifdef LO_AVX_AVAILABLE // Old processors +#ifdef LO_AVX_AVAILABLE + +bool hasAVXCode() { return true; } using namespace AVX; @@ -48,13 +50,10 @@ static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value) sum = t; } -#endif - /** Execute Kahan sum with AVX. */ KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent) { -#ifdef LO_AVX_AVAILABLE // Make sure we don't fall out of bounds. // This works by sums of 8 terms. // So the 8'th term is i+7 @@ -107,14 +106,16 @@ KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent) return { sums[0], errs[0] }; } return { 0.0, 0.0 }; -#else - (void)i; - (void)nSize; - (void)pCurrent; - abort(); -#endif } +#else // LO_AVX_AVAILABLE + +bool hasAVXCode() { return false; } + +KahanSumSimple executeAVX(size_t&, size_t, const double*) { abort(); } + +#endif + } // end namespace sc::op /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx index 987e5a3e6ff6..6a3235a58e2e 100644 --- a/sc/source/core/tool/arraysumAVX512.cxx +++ b/sc/source/core/tool/arraysumAVX512.cxx @@ -18,25 +18,11 @@ #include <stdlib.h> -/* TODO Remove this once GCC updated and AVX512 can work. */ -#ifdef __GNUC__ -#if __GNUC__ < 9 -#ifdef LO_AVX512F_AVAILABLE -#define HAS_LO_AVX512F_AVAILABLE -#undef LO_AVX512F_AVAILABLE -#endif -#endif -#endif - namespace sc::op { #ifdef LO_AVX512F_AVAILABLE -const bool hasAVX512F = cpuid::hasAVX512F(); -#else -const bool hasAVX512F = false; -#endif -#ifdef LO_AVX512F_AVAILABLE // New processors +bool hasAVX512FCode() { return true; } using namespace AVX512; @@ -62,13 +48,10 @@ static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value) sum = t; } -#endif - /** Execute Kahan sum with AVX512. */ KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent) { -#ifdef LO_AVX512F_AVAILABLE // New processors // Make sure we don't fall out of bounds. // This works by sums of 8 terms. // So the 8'th term is i+7 @@ -122,24 +105,16 @@ KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent) return { sums[0], errs[0] }; } return { 0.0, 0.0 }; -#else - (void)i; - (void)nSize; - (void)pCurrent; - abort(); -#endif } -} // end namespace sc::op +#else // LO_AVX512F_AVAILABLE + +bool hasAVX512FCode() { return false; } + +KahanSumSimple executeAVX512F(size_t&, size_t, const double*) { abort(); } -/* TODO Remove this once GCC updated and AVX512 can work. */ -#ifdef __GNUC__ -#if __GNUC__ < 9 -#ifdef HAS_LO_AVX512F_AVAILABLE -#define LO_AVX512F_AVAILABLE -#undef HAS_LO_AVX512F_AVAILABLE -#endif -#endif #endif +} // end namespace sc::op + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx index b4edb98286f9..1a5cc2f00dfe 100644 --- a/sc/source/core/tool/arraysumSSE2.cxx +++ b/sc/source/core/tool/arraysumSSE2.cxx @@ -20,7 +20,9 @@ namespace sc::op { -#ifdef LO_SSE2_AVAILABLE // Old processors +#ifdef LO_SSE2_AVAILABLE + +bool hasSSE2Code() { return true; } using namespace SSE2; @@ -47,13 +49,10 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value) sum = t; } -#endif - /** Execute Kahan sum with SSE2. */ KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent) { -#ifdef LO_SSE2_AVAILABLE // Make sure we don't fall out of bounds. // This works by sums of 8 terms. // So the 8'th term is i+7 @@ -120,13 +119,15 @@ KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent) return { sums[0], errs[0] }; } return { 0.0, 0.0 }; -#else - (void)i; - (void)nSize; - (void)pCurrent; - abort(); -#endif } + +#else // LO_SSE2_AVAILABLE + +bool hasSSE2Code() { return false; } + +KahanSumSimple executeSSE2(size_t&, size_t, const double*) { abort(); } + +#endif } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |