diff options
author | Xisco Fauli <xiscofauli@libreoffice.org> | 2022-08-17 12:59:51 +0200 |
---|---|---|
committer | Andras Timar <andras.timar@collabora.com> | 2022-08-28 14:00:18 +0200 |
commit | 612fc2f6e7065473833b4aac8ea76ec2bffdfd62 (patch) | |
tree | 8d1f45d41872d3ab94779db8a507538c660d69a5 /svl | |
parent | 9662729a1ea4ca7fe8bbbcb5193a6eeb97ff8ac6 (diff) |
tdf#150452: Revert "tdf#130795 use concurrent hashmap in SharedStringPool"
This commit reverts 3749d9af3745c0eaff7239e379578e4e2af89e9d
which removes the dependency on the external library cuckoo
Without using cuckoo the same file in tdf#130795 takes
real 0m4,892s
user 0m5,298s
sys 0m0,449s
With it, it takes
real 0m4,914s
user 0m5,276s
sys 0m0,444s
pretty much the same time
Change-Id: I4cc9000ac5bf26de22bb9835283ae8d5b3230196
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138435
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org>
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138446
Diffstat (limited to 'svl')
-rw-r--r-- | svl/Library_svl.mk | 1 | ||||
-rw-r--r-- | svl/source/misc/sharedstringpool.cxx | 161 |
2 files changed, 72 insertions, 90 deletions
diff --git a/svl/Library_svl.mk b/svl/Library_svl.mk index 4e388ca0d471..6e79be4fcf06 100644 --- a/svl/Library_svl.mk +++ b/svl/Library_svl.mk @@ -21,7 +21,6 @@ $(eval $(call gb_Library_Library,svl)) $(eval $(call gb_Library_use_externals,svl,\ boost_headers \ - cuckoo_headers \ $(if $(filter LINUX MACOSX ANDROID iOS %BSD SOLARIS HAIKU,$(OS)), \ curl) \ dtoa \ diff --git a/svl/source/misc/sharedstringpool.cxx b/svl/source/misc/sharedstringpool.cxx index d88b0a2c22ef..2fe8fd8a13ff 100644 --- a/svl/source/misc/sharedstringpool.cxx +++ b/svl/source/misc/sharedstringpool.cxx @@ -15,64 +15,50 @@ #include <unordered_map> #include <unordered_set> -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" -#endif -#if defined __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-parameter" -#endif -#if defined _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) // structure was padded due to alignment specifier -#endif -#include <libcuckoo/cuckoohash_map.hh> -#if defined _MSC_VER -#pragma warning(pop) -#endif -#if defined __clang__ -#pragma clang diagnostic pop -#endif -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -namespace svl -{ +/** create a key class that caches the hashcode */ namespace { -sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); } - -// we store the key twice, because the concurrent hashtable we are using does not provide any way to return the key in use -typedef std::pair<OUString, OUString> Mapped; - -struct HashFunction +struct StringWithHash { - size_t operator()(rtl_uString* const key) const + OUString str; + sal_Int32 hashCode; + StringWithHash(OUString s) + : str(s) + , hashCode(s.hashCode()) { - return rtl_ustr_hashCode_WithLength(key->buffer, key->length); } -}; -struct EqualsFunction -{ - bool operator()(rtl_uString* const lhs, rtl_uString* const rhs) const + bool operator==(StringWithHash const& rhs) const { - return OUString::unacquired(&lhs) == OUString::unacquired(&rhs); + if (hashCode != rhs.hashCode) + return false; + return str == rhs.str; } }; } +namespace std +{ +template <> struct hash<StringWithHash> +{ + std::size_t operator()(const StringWithHash& k) const { return k.hashCode; } +}; +} + +namespace svl +{ +namespace +{ +sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); } +} + struct SharedStringPool::Impl { + mutable std::mutex maMutex; // We use this map for two purposes - to store lower->upper case mappings - // and to store an upper->upper mapping. - // The second mapping is used so that we can - // share the same rtl_uString object between different keys which map to the same uppercase string to save memory. - // - // Docs for this concurrent hashtable here: http://efficient.github.io/libcuckoo/classlibcuckoo_1_1cuckoohash__map.html - libcuckoo::cuckoohash_map<rtl_uString*, Mapped, HashFunction, EqualsFunction> maStrMap; + // and to retrieve a shared uppercase object, so the management logic + // is quite complex. + std::unordered_map<StringWithHash, OUString> maStrMap; const CharClass& mrCharClass; explicit Impl(const CharClass& rCharClass) @@ -90,50 +76,43 @@ SharedStringPool::~SharedStringPool() {} SharedString SharedStringPool::intern(const OUString& rStr) { - auto& rMap = mpImpl->maStrMap; + StringWithHash aStrWithHash(rStr); + std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex); - rtl_uString *pResultLower = {}, *pResultUpper = {}; // bogus GCC 12 -Werror=maybe-uninitialized - if (rMap.find_fn(rStr.pData, [&](const Mapped& rMapped) { - pResultLower = rMapped.first.pData; - pResultUpper = rMapped.second.pData; - })) + auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr); + if (!bInserted) // there is already a mapping - return SharedString(pResultLower, pResultUpper); + return SharedString(mapIt->first.str.pData, mapIt->second.pData); // This is a new string insertion. Establish mapping to upper-case variant. OUString aUpper = mpImpl->mrCharClass.uppercase(rStr); - - // either insert a new upper->upper mapping, or write the existing mapping into aUpper - mpImpl->maStrMap.uprase_fn(aUpper.pData, - [&](Mapped& mapped) -> bool { - aUpper = mapped.second; - return false; - }, - aUpper, aUpper); - if (aUpper == rStr) - // no need to do anything more, because the key is already uppercase - return SharedString(aUpper.pData, aUpper.pData); - - // either insert a new lower->upper mapping, or write the existing mapping into aLower - if (mpImpl->maStrMap.uprase_fn(rStr.pData, - [&](Mapped& mapped) -> bool { - pResultLower = mapped.first.pData; - pResultUpper = mapped.second.pData; - return false; - }, - rStr, aUpper)) + // no need to do anything more, because we inserted an upper->upper mapping + return SharedString(mapIt->first.str.pData, mapIt->second.pData); + + // We need to insert a lower->upper mapping, so also insert + // an upper->upper mapping, which we can use both for when an upper string + // is interned, and to look up a shared upper string. + StringWithHash aUpperWithHash(aUpper); + auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash); + if (mapIt2 != mpImpl->maStrMap.end()) { - pResultLower = rStr.pData; - pResultUpper = aUpper.pData; + // there is an already existing upper string + mapIt->second = mapIt2->first.str; + return SharedString(mapIt->first.str.pData, mapIt->second.pData); } - return SharedString(pResultLower, pResultUpper); + // There is no already existing upper string. + // First, update using the iterator, can't do this later because + // the iterator will be invalid. + mapIt->second = aUpper; + mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper); + return SharedString(rStr.pData, aUpper.pData); } void SharedStringPool::purge() { - auto locked_table = mpImpl->maStrMap.lock_table(); + std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex); // Because we can have an uppercase entry mapped to itself, // and then a bunch of lowercase entries mapped to that same @@ -141,12 +120,12 @@ void SharedStringPool::purge() // time to remove lowercase entries, and then only can we // check for unused uppercase entries. - auto it = locked_table.begin(); - auto itEnd = locked_table.end(); + auto it = mpImpl->maStrMap.begin(); + auto itEnd = mpImpl->maStrMap.end(); while (it != itEnd) { - rtl_uString* p1 = it->second.first.pData; - rtl_uString* p2 = it->second.second.pData; + rtl_uString* p1 = it->first.str.pData; + rtl_uString* p2 = it->second.pData; if (p1 != p2) { // normal case - lowercase mapped to uppercase, which @@ -154,19 +133,19 @@ void SharedStringPool::purge() // entry as the key in the map if (getRefCount(p1) == 1) { - it = locked_table.erase(it); + it = mpImpl->maStrMap.erase(it); continue; } } ++it; } - it = locked_table.begin(); - itEnd = locked_table.end(); + it = mpImpl->maStrMap.begin(); + itEnd = mpImpl->maStrMap.end(); while (it != itEnd) { - rtl_uString* p1 = it->second.first.pData; - rtl_uString* p2 = it->second.second.pData; + rtl_uString* p1 = it->first.str.pData; + rtl_uString* p2 = it->second.pData; if (p1 == p2) { // uppercase which is mapped to itself, which means @@ -174,7 +153,7 @@ void SharedStringPool::purge() // one ref-counted entry in the value in the map if (getRefCount(p1) == 2) { - it = locked_table.erase(it); + it = mpImpl->maStrMap.erase(it); continue; } } @@ -182,15 +161,19 @@ void SharedStringPool::purge() } } -size_t SharedStringPool::getCount() const { return mpImpl->maStrMap.size(); } +size_t SharedStringPool::getCount() const +{ + std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex); + return mpImpl->maStrMap.size(); +} size_t SharedStringPool::getCountIgnoreCase() const { + std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex); // this is only called from unit tests, so no need to be efficient std::unordered_set<OUString> aUpperSet; - auto locked_table = mpImpl->maStrMap.lock_table(); - for (auto const& pair : locked_table) - aUpperSet.insert(pair.second.second); + for (auto const& pair : mpImpl->maStrMap) + aUpperSet.insert(pair.second); return aUpperSet.size(); } } |