summaryrefslogtreecommitdiff
path: root/svl
diff options
context:
space:
mode:
authorNoel Grandin <noel@peralex.com>2021-09-06 12:46:51 +0200
committerNoel Grandin <noel.grandin@collabora.co.uk>2021-09-09 09:06:45 +0200
commit3749d9af3745c0eaff7239e379578e4e2af89e9d (patch)
treeb769195ba13d277ce1d9d6f2a97ecfe95a6f13b9 /svl
parent4bf266233daa7d9ed030a20fa4f487f9f5a82379 (diff)
tdf#130795 use concurrent hashmap in SharedStringPool
we are loading a spreadsheet in parallel here, but the parallel threads achievei very little actual concurrency because of heavy contention in the SharedStringPool mutex. So switch to a concurrent hash map. I looked at a couple of different ones (including the Folly one), and this was the one with the simplest resulting code. This takes my load time from 12.5s to 8s Change-Id: I04d6d8e11d613b510eb3bc981f3338819b7ac813 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/121717 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Diffstat (limited to 'svl')
-rw-r--r--svl/Library_svl.mk1
-rw-r--r--svl/source/misc/sharedstringpool.cxx154
2 files changed, 83 insertions, 72 deletions
diff --git a/svl/Library_svl.mk b/svl/Library_svl.mk
index 17d64fe971fd..b81660089ea0 100644
--- a/svl/Library_svl.mk
+++ b/svl/Library_svl.mk
@@ -21,6 +21,7 @@ $(eval $(call gb_Library_Library,svl))
$(eval $(call gb_Library_use_externals,svl,\
boost_headers \
+ cuckoo_headers \
$(if $(filter LINUX MACOSX ANDROID iOS %BSD SOLARIS HAIKU,$(OS)), \
curl) \
dtoa \
diff --git a/svl/source/misc/sharedstringpool.cxx b/svl/source/misc/sharedstringpool.cxx
index 2fe8fd8a13ff..bf1f5554b957 100644
--- a/svl/source/misc/sharedstringpool.cxx
+++ b/svl/source/misc/sharedstringpool.cxx
@@ -15,50 +15,57 @@
#include <unordered_map>
#include <unordered_set>
-/** create a key class that caches the hashcode */
-namespace
+#if defined __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+#if defined _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4324) // structure was padded due to alignment specifier
+#endif
+#include <libcuckoo/cuckoohash_map.hh>
+#if defined _MSC_VER
+#pragma warning(pop)
+#endif
+#if defined __clang__
+#pragma clang diagnostic pop
+#endif
+
+namespace svl
{
-struct StringWithHash
+namespace
{
- OUString str;
- sal_Int32 hashCode;
- StringWithHash(OUString s)
- : str(s)
- , hashCode(s.hashCode())
- {
- }
+sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
+
+// we store the key twice, because the concurrent hashtable we are using does not provide any way to return the key in use
+typedef std::pair<OUString, OUString> Mapped;
- bool operator==(StringWithHash const& rhs) const
+struct HashFunction
+{
+ size_t operator()(rtl_uString* const key) const
{
- if (hashCode != rhs.hashCode)
- return false;
- return str == rhs.str;
+ return rtl_ustr_hashCode_WithLength(key->buffer, key->length);
}
};
-}
-namespace std
-{
-template <> struct hash<StringWithHash>
+struct EqualsFunction
{
- std::size_t operator()(const StringWithHash& k) const { return k.hashCode; }
+ bool operator()(rtl_uString* const lhs, rtl_uString* const rhs) const
+ {
+ return OUString::unacquired(&lhs) == OUString::unacquired(&rhs);
+ }
};
}
-namespace svl
-{
-namespace
-{
-sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
-}
-
struct SharedStringPool::Impl
{
- mutable std::mutex maMutex;
// We use this map for two purposes - to store lower->upper case mappings
- // and to retrieve a shared uppercase object, so the management logic
- // is quite complex.
- std::unordered_map<StringWithHash, OUString> maStrMap;
+ // and to store an upper->upper mapping.
+ // The second mapping is used so that we can
+ // share the same rtl_uString object between different keys which map to the same uppercase string to save memory.
+ //
+ // Docs for this concurrent hashtable here: http://efficient.github.io/libcuckoo/classlibcuckoo_1_1cuckoohash__map.html
+ libcuckoo::cuckoohash_map<rtl_uString*, Mapped, HashFunction, EqualsFunction> maStrMap;
const CharClass& mrCharClass;
explicit Impl(const CharClass& rCharClass)
@@ -76,43 +83,50 @@ SharedStringPool::~SharedStringPool() {}
SharedString SharedStringPool::intern(const OUString& rStr)
{
- StringWithHash aStrWithHash(rStr);
- std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
+ auto& rMap = mpImpl->maStrMap;
- auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr);
- if (!bInserted)
+ rtl_uString *pResultLower, *pResultUpper;
+ if (rMap.find_fn(rStr.pData, [&](const Mapped& rMapped) {
+ pResultLower = rMapped.first.pData;
+ pResultUpper = rMapped.second.pData;
+ }))
// there is already a mapping
- return SharedString(mapIt->first.str.pData, mapIt->second.pData);
+ return SharedString(pResultLower, pResultUpper);
// This is a new string insertion. Establish mapping to upper-case variant.
OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
+
+ // either insert a new upper->upper mapping, or write the existing mapping into aUpper
+ mpImpl->maStrMap.uprase_fn(aUpper.pData,
+ [&](Mapped& mapped) -> bool {
+ aUpper = mapped.second;
+ return false;
+ },
+ aUpper, aUpper);
+
if (aUpper == rStr)
- // no need to do anything more, because we inserted an upper->upper mapping
- return SharedString(mapIt->first.str.pData, mapIt->second.pData);
-
- // We need to insert a lower->upper mapping, so also insert
- // an upper->upper mapping, which we can use both for when an upper string
- // is interned, and to look up a shared upper string.
- StringWithHash aUpperWithHash(aUpper);
- auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash);
- if (mapIt2 != mpImpl->maStrMap.end())
+ // no need to do anything more, because the key is already uppercase
+ return SharedString(aUpper.pData, aUpper.pData);
+
+ // either insert a new lower->upper mapping, or write the existing mapping into aLower
+ if (mpImpl->maStrMap.uprase_fn(rStr.pData,
+ [&](Mapped& mapped) -> bool {
+ pResultLower = mapped.first.pData;
+ pResultUpper = mapped.second.pData;
+ return false;
+ },
+ rStr, aUpper))
{
- // there is an already existing upper string
- mapIt->second = mapIt2->first.str;
- return SharedString(mapIt->first.str.pData, mapIt->second.pData);
+ pResultLower = rStr.pData;
+ pResultUpper = aUpper.pData;
}
- // There is no already existing upper string.
- // First, update using the iterator, can't do this later because
- // the iterator will be invalid.
- mapIt->second = aUpper;
- mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper);
- return SharedString(rStr.pData, aUpper.pData);
+ return SharedString(pResultLower, pResultUpper);
}
void SharedStringPool::purge()
{
- std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
+ auto locked_table = mpImpl->maStrMap.lock_table();
// Because we can have an uppercase entry mapped to itself,
// and then a bunch of lowercase entries mapped to that same
@@ -120,12 +134,12 @@ void SharedStringPool::purge()
// time to remove lowercase entries, and then only can we
// check for unused uppercase entries.
- auto it = mpImpl->maStrMap.begin();
- auto itEnd = mpImpl->maStrMap.end();
+ auto it = locked_table.begin();
+ auto itEnd = locked_table.end();
while (it != itEnd)
{
- rtl_uString* p1 = it->first.str.pData;
- rtl_uString* p2 = it->second.pData;
+ rtl_uString* p1 = it->second.first.pData;
+ rtl_uString* p2 = it->second.second.pData;
if (p1 != p2)
{
// normal case - lowercase mapped to uppercase, which
@@ -133,19 +147,19 @@ void SharedStringPool::purge()
// entry as the key in the map
if (getRefCount(p1) == 1)
{
- it = mpImpl->maStrMap.erase(it);
+ it = locked_table.erase(it);
continue;
}
}
++it;
}
- it = mpImpl->maStrMap.begin();
- itEnd = mpImpl->maStrMap.end();
+ it = locked_table.begin();
+ itEnd = locked_table.end();
while (it != itEnd)
{
- rtl_uString* p1 = it->first.str.pData;
- rtl_uString* p2 = it->second.pData;
+ rtl_uString* p1 = it->second.first.pData;
+ rtl_uString* p2 = it->second.second.pData;
if (p1 == p2)
{
// uppercase which is mapped to itself, which means
@@ -153,7 +167,7 @@ void SharedStringPool::purge()
// one ref-counted entry in the value in the map
if (getRefCount(p1) == 2)
{
- it = mpImpl->maStrMap.erase(it);
+ it = locked_table.erase(it);
continue;
}
}
@@ -161,19 +175,15 @@ void SharedStringPool::purge()
}
}
-size_t SharedStringPool::getCount() const
-{
- std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
- return mpImpl->maStrMap.size();
-}
+size_t SharedStringPool::getCount() const { return mpImpl->maStrMap.size(); }
size_t SharedStringPool::getCountIgnoreCase() const
{
- std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
// this is only called from unit tests, so no need to be efficient
std::unordered_set<OUString> aUpperSet;
- for (auto const& pair : mpImpl->maStrMap)
- aUpperSet.insert(pair.second);
+ auto locked_table = mpImpl->maStrMap.lock_table();
+ for (auto const& pair : locked_table)
+ aUpperSet.insert(pair.second.second);
return aUpperSet.size();
}
}