diff options
-rw-r--r-- | config_host.mk.in | 1 | ||||
-rw-r--r-- | configure.ac | 102 | ||||
-rw-r--r-- | include/tools/cpuid.hxx | 71 | ||||
-rw-r--r-- | include/tools/simd.hxx | 30 | ||||
-rw-r--r-- | include/tools/simdsupport.hxx | 62 | ||||
-rw-r--r-- | sc/source/core/data/formulacell.cxx | 2 | ||||
-rw-r--r-- | sc/source/core/inc/arraysumfunctor.hxx | 15 | ||||
-rw-r--r-- | tools/CppunitTest_tools_test.mk | 4 | ||||
-rw-r--r-- | tools/qa/cppunit/test_cpuid.cxx | 74 | ||||
-rw-r--r-- | tools/source/misc/cpuid.cxx | 119 |
10 files changed, 434 insertions, 46 deletions
diff --git a/config_host.mk.in b/config_host.mk.in index 5c7038211a11..9bde2ff2a214 100644 --- a/config_host.mk.in +++ b/config_host.mk.in @@ -648,6 +648,7 @@ export ZLIB_LIBS=$(gb_SPACE)@ZLIB_LIBS@ export ZMF_CFLAGS=$(gb_SPACE)@ZMF_CFLAGS@ export ZMF_LIBS=$(gb_SPACE)@ZMF_LIBS@ export USE_AVMEDIA_DUMMY=@USE_AVMEDIA_DUMMY@ +export INTRINSICS_CXXFLAGS=@INTRINSICS_CXXFLAGS@ # lang-related stuff include $(BUILDDIR)/config_$(gb_Side)_lang.mk diff --git a/configure.ac b/configure.ac index ba079753729f..b9785d895366 100644 --- a/configure.ac +++ b/configure.ac @@ -6748,6 +6748,108 @@ fi AC_SUBST([HAVE_BROKEN_GCC_WMAYBE_UNINITIALIZED]) dnl =================================================================== +dnl CPU Intrinsincs support - SSE, AVX +dnl =================================================================== + +INTRINSICS_CXXFLAGS="" + +if test "$GCC" = "yes"; then + AC_MSG_CHECKING([whether $CXX can compile SSE2 intrinsics]) + AC_LANG_PUSH([C++]) + save_CXXFLAGS=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse2" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include <x86intrin.h> + int main () { + volatile __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; + c = _mm_xor_si128 (a, b); + return 0; + } + ])], + [can_compile_sse2=yes], + [can_compile_sse2=no]) + AC_LANG_POP([C++]) + CXXFLAGS=$save_CXXFLAGS + AC_MSG_RESULT([${can_compile_sse2}]) + if test "${can_compile_sse2}" = "yes" ; then + INTRINSICS_CXXFLAGS="-msse2" + else + AC_MSG_WARN([cannot compile SSE2 intrinsics]) + fi + + AC_MSG_CHECKING([whether $CXX can compile SSSE3 intrinsics]) + AC_LANG_PUSH([C++]) + save_CXXFLAGS=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mssse3" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include <x86intrin.h> + int main () { + volatile __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; + c = _mm_maddubs_epi16 (a, b); + return 0; + } + ])], + [can_compile_ssse3=yes], + [can_compile_ssse3=no]) + AC_LANG_POP([C++]) + CXXFLAGS=$save_CXXFLAGS + AC_MSG_RESULT([${can_compile_ssse3}]) + if test "${can_compile_ssse3}" = "yes" ; then + INTRINSICS_CXXFLAGS="-mssse3" + else + AC_MSG_WARN([cannot compile SSSE3 intrinsics]) + fi + + AC_MSG_CHECKING([whether $CXX can compile AVX intrinsics]) + AC_LANG_PUSH([C++]) + save_CXXFLAGS=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mavx" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include <x86intrin.h> + int main () { + volatile __m256 a = _mm256_set1_ps (0.0f), b = _mm256_set1_ps (0.0f), c; + c = _mm256_xor_ps(a, b); + return 0; + } + ])], + [can_compile_avx=yes], + [can_compile_avx=no]) + AC_LANG_POP([C++]) + CXXFLAGS=$save_CXXFLAGS + AC_MSG_RESULT([${can_compile_avx}]) + if test "${can_compile_avx}" = "yes" ; then + INTRINSICS_CXXFLAGS="-mavx" + else + AC_MSG_WARN([cannot compile AVX intrinsics]) + fi + + AC_MSG_CHECKING([whether $CXX can compile AVX2 intrinsics]) + AC_LANG_PUSH([C++]) + save_CXXFLAGS=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mavx2" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include <x86intrin.h> + int main () { + volatile __m256i a = _mm256_set1_epi32 (0), b = _mm256_set1_epi32 (0), c; + c = _mm256_maddubs_epi16(a, b); + return 0; + } + ])], + [can_compile_avx2=yes], + [can_compile_avx2=no]) + AC_LANG_POP([C++]) + CXXFLAGS=$save_CXXFLAGS + AC_MSG_RESULT([${can_compile_avx2}]) + if test "${can_compile_avx2}" = "yes" ; then + INTRINSICS_CXXFLAGS="-mavx2" + else + AC_MSG_WARN([cannot compile AVX2 intrinsics]) + fi +fi + +AC_SUBST([INTRINSICS_CXXFLAGS]) + +dnl =================================================================== dnl system stl sanity tests dnl =================================================================== if test "$_os" != "WINNT"; then diff --git a/include/tools/cpuid.hxx b/include/tools/cpuid.hxx index 419d05714ae4..27e9987398e4 100644 --- a/include/tools/cpuid.hxx +++ b/include/tools/cpuid.hxx @@ -13,22 +13,73 @@ #include <sal/config.h> #include <tools/toolsdllapi.h> +#include <o3tl/typed_flags_set.hxx> +#include <rtl/ustring.hxx> -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__) -#define LO_SSE2_AVAILABLE 1 -#elif defined(_MSC_VER) -#define LO_SSE2_AVAILABLE 1 -#endif +namespace cpuid { -namespace tools +enum class InstructionSetFlags { -namespace cpuid + NONE = 0x00, + HYPER = 0x01, + SSE2 = 0x02, + SSSE3 = 0x04, + SSE41 = 0x08, + SSE42 = 0x10, + AVX = 0x20, + AVX2 = 0x40 +}; + +} // end cpuid + +namespace o3tl { + template<> struct typed_flags<cpuid::InstructionSetFlags> : is_typed_flags<cpuid::InstructionSetFlags, 0x07f> {}; +} + +namespace cpuid { + +/** Get supported instruction set flags determined at runtime by probing the CPU. + */ +TOOLS_DLLPUBLIC InstructionSetFlags getCpuInstructionSetFlags(); + +/** Check if a certain instruction set is supported by the CPU at runtime. + */ +TOOLS_DLLPUBLIC bool isCpuInstructionSetSupported(InstructionSetFlags eInstructions); + +/** Returns a string of supported instructions. + */ +TOOLS_DLLPUBLIC OUString instructionSetSupportedString(); + +/** Check if SSE2 is supported by the CPU + */ +inline bool hasSSE2() { - TOOLS_DLLPUBLIC bool hasSSE2(); - TOOLS_DLLPUBLIC bool hasHyperThreading(); + return isCpuInstructionSetSupported(InstructionSetFlags::SSE2); } + +/** Check if SSSE3 is supported by the CPU + */ +inline bool hasSSSE3() +{ + return isCpuInstructionSetSupported(InstructionSetFlags::SSSE3); +} + +/** Check if AVX2 is supported by the CPU + */ +inline bool hasAVX2() +{ + return isCpuInstructionSetSupported(InstructionSetFlags::AVX2); +} + +/** Check if Hyper Threading is supported + */ +inline bool hasHyperThreading() +{ + return isCpuInstructionSetSupported(InstructionSetFlags::HYPER); } -#endif +} // end cpuid + +#endif // INCLUDED_TOOLS_CPUID_HXX /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/tools/simd.hxx b/include/tools/simd.hxx new file mode 100644 index 000000000000..bdfdb8928271 --- /dev/null +++ b/include/tools/simd.hxx @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_TOOLS_SIMD_HXX +#define INCLUDED_TOOLS_SIMD_HXX + +namespace simd +{ +template <typename T, unsigned int N> inline bool isAligned(const T* pointer) +{ + return 0 == (uintptr_t(pointer) % N); +} + +template <typename T> inline T roundDown(T value, unsigned int multiple) +{ + return value & ~(multiple - 1); +} + +} // end namespace simd + +#endif // INCLUDED_TOOLS_SIMD_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/tools/simdsupport.hxx b/include/tools/simdsupport.hxx new file mode 100644 index 000000000000..74afc9300b1a --- /dev/null +++ b/include/tools/simdsupport.hxx @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +// Determine the compiler support for SIMD compiler intrinsics. +// This changes from one compiled unit to the other, depending if +// the support has been detected and if the compiled unit contains +// code using intrinsics or not. So we have to (re)set them again +// every time this file has been included. + +#undef LO_SSE2_AVAILABLE +#undef LO_SSSE3_AVAILABLE +#undef LO_AVX_AVAILABLE +#undef LO_AVX2_AVAILABLE + +#if defined(_MSC_VER) // VISUAL STUDIO COMPILER + +// SSE2 is required for X64 +#if (defined(_M_X64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2) +#define LO_SSE2_AVAILABLE +#endif + +// compiled with /arch:AVX +#if defined(__AVX__) +#ifndef LO_SSE2_AVAILABLE +#define LO_SSE2_AVAILABLE +#endif +#define LO_SSSE3_AVAILABLE +#define LO_AVX_AVAILABLE +#endif + +// compiled with /arch:AVX2 +#if defined(__AVX2__) +#define LO_AVX2_AVAILABLE +#endif + +#else // Clang and GCC + +#if defined(__SSE2__) || defined(__x86_64__) // SSE2 is required for X64 +#define LO_SSE2_AVAILABLE +#endif + +#if defined(__SSSE3__) +#define LO_SSSE3_AVAILABLE +#endif +#if defined(__AVX__) +#define LO_AVX_AVAILABLE + +#endif +#if defined(__AVX2__) +#define LO_AVX2_AVAILABLE +#endif + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx index 48cb55fae27a..365f4151aca6 100644 --- a/sc/source/core/data/formulacell.cxx +++ b/sc/source/core/data/formulacell.cxx @@ -4673,7 +4673,7 @@ bool ScFormulaCell::InterpretFormulaGroupThreading(sc::FormulaLogger::GroupScope bDependencyComputed = true; - const static bool bHyperThreadingActive = tools::cpuid::hasHyperThreading(); + const static bool bHyperThreadingActive = cpuid::hasHyperThreading(); // Then do the threaded calculation diff --git a/sc/source/core/inc/arraysumfunctor.hxx b/sc/source/core/inc/arraysumfunctor.hxx index 226962cf1f0f..05977c026361 100644 --- a/sc/source/core/inc/arraysumfunctor.hxx +++ b/sc/source/core/inc/arraysumfunctor.hxx @@ -13,21 +13,18 @@ #include <cstdint> #include <rtl/math.hxx> + +#include <tools/simdsupport.hxx> +#include <tools/simd.hxx> #include <tools/cpuid.hxx> #if defined(LO_SSE2_AVAILABLE) -#include <emmintrin.h> +#include <x86intrin.h> #endif namespace sc { -template<typename T, unsigned int N> -inline bool isAligned(const T* pointer) -{ - return 0 == (uintptr_t(pointer) % N); -} - struct ArraySumFunctor { private: @@ -43,7 +40,7 @@ public: double operator() () { - const static bool hasSSE2 = tools::cpuid::hasSSE2(); + const static bool hasSSE2 = cpuid::hasSSE2(); double fSum = 0.0; size_t i = 0; @@ -51,7 +48,7 @@ public: if (hasSSE2) { - while ( i < mnSize && !isAligned<double, 16>(pCurrent)) + while ( i < mnSize && !simd::isAligned<double, 16>(pCurrent)) { fSum += *pCurrent++; i++; diff --git a/tools/CppunitTest_tools_test.mk b/tools/CppunitTest_tools_test.mk index ad56d893ae80..a4cdf8626f3a 100644 --- a/tools/CppunitTest_tools_test.mk +++ b/tools/CppunitTest_tools_test.mk @@ -31,6 +31,10 @@ $(eval $(call gb_CppunitTest_add_exception_objects,tools_test, \ tools/qa/cppunit/test_xmlwalker \ )) +$(eval $(call gb_CppunitTest_add_cxxobjects,tools_test,\ + tools/qa/cppunit/test_cpuid, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS) \ +)) + $(eval $(call gb_CppunitTest_use_sdk_api,tools_test)) $(eval $(call gb_CppunitTest_use_libraries,tools_test, \ diff --git a/tools/qa/cppunit/test_cpuid.cxx b/tools/qa/cppunit/test_cpuid.cxx new file mode 100644 index 000000000000..fdb19d0ec133 --- /dev/null +++ b/tools/qa/cppunit/test_cpuid.cxx @@ -0,0 +1,74 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <cppunit/TestAssert.h> +#include <cppunit/TestFixture.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> +#include <tools/cpuid.hxx> +#include <tools/simd.hxx> +#include <rtl/ustring.hxx> + +namespace +{ +class CpuInstructionSetSupport : public CppUnit::TestFixture +{ +public: + void testCpuInstructionSetSupport(); + + CPPUNIT_TEST_SUITE(CpuInstructionSetSupport); + CPPUNIT_TEST(testCpuInstructionSetSupport); + CPPUNIT_TEST_SUITE_END(); +}; + +void CpuInstructionSetSupport::testCpuInstructionSetSupport() +{ + OUString aString = cpuid::instructionSetSupportedString(); + + if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSE2)) + { + CPPUNIT_ASSERT(aString.indexOf("SSE2") >= 0); + } + + if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSSE3)) + { + CPPUNIT_ASSERT(aString.indexOf("SSSE3") >= 0); + } + + if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::AVX)) + { + CPPUNIT_ASSERT(aString.indexOf("AVX") > 0); + } + + if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::AVX2)) + { + CPPUNIT_ASSERT(aString.indexOf("AVX2") > 0); + } + +#ifdef LO_SSE2_AVAILABLE + CPPUNIT_ASSERT_EQUAL(cpuid::hasSSE2(), + cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSE2)); +#endif + +#ifdef LO_SSSE3_AVAILABLE + CPPUNIT_ASSERT_EQUAL(cpuid::hasSSSE3(), + cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSE2)); +#endif + +#ifdef LO_AVX2_AVAILABLE + CPPUNIT_ASSERT_EQUAL(cpuid::hasAVX2(), + cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::AVX2)); +#endif +} + +CPPUNIT_TEST_SUITE_REGISTRATION(CpuInstructionSetSupport); + +} // end anonymous namespace + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/tools/source/misc/cpuid.cxx b/tools/source/misc/cpuid.cxx index ee5093ce1892..e8699cbdf51c 100644 --- a/tools/source/misc/cpuid.cxx +++ b/tools/source/misc/cpuid.cxx @@ -11,25 +11,21 @@ #include <tools/cpuid.hxx> #include <cstdint> -namespace tools -{ -namespace cpuid -{ +namespace cpuid { + +namespace { -namespace -{ #if defined(_MSC_VER) #include <intrin.h> -void getCpuId(uint32_t array[4]) +void getCpuId(uint32_t array[4], uint32_t nInfoType) { - __cpuid(reinterpret_cast<int*>(array), 1); + __cpuid(reinterpret_cast<int*>(array), nInfoType); } -#else -#if (defined(__i386__) || defined(__x86_64__)) +#elif (defined(__i386__) || defined(__x86_64__)) #include <cpuid.h> -void getCpuId(uint32_t array[4]) +void getCpuId(uint32_t array[4], uint32_t nInfoType) { - __get_cpuid(1, array + 0, array + 1, array + 2, array + 3); + __cpuid_count(nInfoType, 0, *(array + 0), *(array + 1), *(array + 2), *(array + 3)); } #else void getCpuId(uint32_t array[4]) @@ -37,33 +33,104 @@ void getCpuId(uint32_t array[4]) array[0] = array[1] = array[2] = array[3] = 0; } #endif + +// For AVX we need to check if OS has support for ymm registers +bool checkAVXSupportInOS() +{ + uint32_t xcr0 = 0; +#if defined(_MSC_VER) + xcr0 = uint32_t(_xgetbv(0)); +#elif (defined(__i386__) || defined(__x86_64__)) + __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); #endif + return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */ } -#if defined(LO_SSE2_AVAILABLE) +} // end anonymous namespace -bool hasSSE2() +#define HYPER_bit (1 << 28) +#define SSE2_bit (1 << 26) +#define SSSE3_bit (1 << 9) +#define SSE41_bit (1 << 19) +#define SSE42_bit (1 << 20) +#define XSAVE_bit (1 << 27) +#define AVX_bit (1 << 28) +#define AVX2_bit (1 << 5) + +InstructionSetFlags getCpuInstructionSetFlags() { - uint32_t cpuInfoArray[] = {0, 0, 0, 0}; - getCpuId(cpuInfoArray); - return (cpuInfoArray[3] & (1 << 26)) != 0; -} + InstructionSetFlags eInstructions = InstructionSetFlags::NONE; -#else + uint32_t info[] = {0, 0, 0, 0}; + getCpuId(info, 0); + int nLevel = info[0]; -bool hasSSE2() { return false; } + if (nLevel >= 1) + { + uint32_t aCpuInfoArray[] = {0, 0, 0, 0}; + getCpuId(aCpuInfoArray, 1); -#endif + if ((aCpuInfoArray[3] & HYPER_bit) != 0) + eInstructions |= InstructionSetFlags::HYPER; -bool hasHyperThreading() -{ - uint32_t cpuInfoArray[] = {0, 0, 0, 0}; - getCpuId(cpuInfoArray); - return (cpuInfoArray[3] & (1 << 28)) != 0; + if ((aCpuInfoArray[3] & SSE2_bit) != 0) + eInstructions |= InstructionSetFlags::SSE2; + + if ((aCpuInfoArray[2] & SSSE3_bit) != 0) + eInstructions |= InstructionSetFlags::SSSE3; + + if ((aCpuInfoArray[2] & SSE41_bit ) != 0) + eInstructions |= InstructionSetFlags::SSE41; + + if ((aCpuInfoArray[2] & SSE42_bit) != 0) + eInstructions |= InstructionSetFlags::SSE42; + + if (((aCpuInfoArray[2] & AVX_bit) != 0) && + ((aCpuInfoArray[2] & XSAVE_bit) != 0)) + { + if (checkAVXSupportInOS()) + { + eInstructions |= InstructionSetFlags::AVX; + + if (nLevel >= 7) + { + uint32_t aExtendedInfo[] = {0, 0, 0, 0}; + getCpuId(aExtendedInfo, 7); + + if ((aExtendedInfo[1] & AVX2_bit) != 0) + eInstructions |= InstructionSetFlags::AVX2; + } + } + } + } + + return eInstructions; } +bool isCpuInstructionSetSupported(InstructionSetFlags eInstructions) +{ + static InstructionSetFlags eCPUFlags = getCpuInstructionSetFlags(); + return (eCPUFlags & eInstructions) == eInstructions; } + +OUString instructionSetSupportedString() +{ + OUString aString; + if (isCpuInstructionSetSupported(InstructionSetFlags::SSE2)) + aString += "SSE2 "; + if (isCpuInstructionSetSupported(InstructionSetFlags::SSSE3)) + aString += "SSSE3 "; + if (isCpuInstructionSetSupported(InstructionSetFlags::SSE41)) + aString += "SSE4.1 "; + if (isCpuInstructionSetSupported(InstructionSetFlags::SSE42)) + aString += "SSE4.2 "; + if (isCpuInstructionSetSupported(InstructionSetFlags::AVX)) + aString += "AVX "; + if (isCpuInstructionSetSupported(InstructionSetFlags::AVX2)) + aString += "AVX2 "; + return aString; } +} // end cpuid /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |