diff options
author | Michael Meeks <michael.meeks@novell.com> | 2010-09-17 13:13:04 +0200 |
---|---|---|
committer | Cédric Bosdonnat <cedricbosdo@openoffice.org> | 2010-09-17 13:16:27 +0200 |
commit | d02318df60b8ab8abe0006bacc5ea15b896be553 (patch) | |
tree | b0ae6659b770ec601556babff7749acc6d60da30 /sal/rtl | |
parent | 2d2ef785745a258659ccb3c262265ae2cd1990a9 (diff) |
sal-strintern-speed.diff: sal-strintern - speedup
i#78496
Diffstat (limited to 'sal/rtl')
-rw-r--r-- | sal/rtl/source/hash.cxx | 202 |
1 files changed, 165 insertions, 37 deletions
diff --git a/sal/rtl/source/hash.cxx b/sal/rtl/source/hash.cxx index 7caa2341ca11..86454545ccd5 100644 --- a/sal/rtl/source/hash.cxx +++ b/sal/rtl/source/hash.cxx @@ -27,67 +27,164 @@ // MARKER(update_precomp.py): autogen include statement, do not remove #include "precompiled_sal.hxx" -#include "rtl/allocator.hxx" #include "hash.h" #include "strimp.h" +#include <osl/diagnose.h> +struct StringHashTableImpl { + sal_uInt32 nEntries; + sal_uInt32 nSize; + rtl_uString **pData; +}; + +typedef StringHashTableImpl StringHashTable; -#include <hash_set> +// Only for use in the implementation +static StringHashTable *rtl_str_hash_new (sal_uInt32 nSize); +static void rtl_str_hash_free (StringHashTable *pHash); -namespace { +} -struct UStringHash +StringHashTable * +getHashTable () { - size_t operator()(rtl_uString * const &rString) const - { return (size_t)rtl_ustr_hashCode_WithLength( rString->buffer, rString->length ); } -}; + static StringHashTable *pInternPool = NULL; + if (pInternPool == NULL) { + static StringHashTable* pHash = rtl_str_hash_new(1024); + pInternPool = pHash; + } + return pInternPool; +} + +// Better / smaller / faster hash set .... + +// TODO: add bottom bit-set list terminator to string list -struct UStringEqual +static sal_uInt32 +getNextSize (sal_uInt32 nSize) { - sal_Bool operator() ( rtl_uString * const &pStringA, - rtl_uString * const &pStringB) const + // Sedgewick - Algorithms in C P577. + static const sal_uInt32 nPrimes[] = { 1021, 2039, 4093, 8191, 16381, 32749, + 65521, 131071,262139, 524287, 1048573, + 2097143, 4194301, 8388593, 16777213, + 33554393, 67108859, 134217689 }; + #define NUM_PRIMES (sizeof (nPrimes)/ sizeof (nPrimes[0])) + for (sal_uInt32 i = 0; i < NUM_PRIMES; i++) { - if (pStringA == pStringB) - return true; - if (pStringA->length != pStringB->length) - return false; - return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length, - pStringB->buffer, pStringB->length); + if (nPrimes[i] > nSize) + return nPrimes[i]; } -}; + return nSize * 2; +} -typedef std::hash_set< rtl_uString *, UStringHash, UStringEqual, - rtl::Allocator<rtl_uString *> > StringHashTable; +static sal_uInt32 +hashString (rtl_uString *pString) +{ + return (sal_uInt32) rtl_ustr_hashCode_WithLength (pString->buffer, + pString->length); +} -StringHashTable * -getHashTable () +static StringHashTable * +rtl_str_hash_new (sal_uInt32 nSize) { - static StringHashTable *pInternPool = NULL; - if (pInternPool == NULL) { - static StringHashTable aImpl(1024); - pInternPool = &aImpl; + StringHashTable *pHash = (StringHashTable *)malloc (sizeof (StringHashTable)); + + pHash->nEntries = 0; + pHash->nSize = getNextSize (nSize); + pHash->pData = (rtl_uString **) calloc (sizeof (rtl_uString *), pHash->nSize); + + return pHash; +} + +static void +rtl_str_hash_free (StringHashTable *pHash) +{ + if (!pHash) + return; + if (pHash->pData) + free (pHash->pData); + free (pHash); +} + +static void +rtl_str_hash_insert_nonequal (StringHashTable *pHash, + rtl_uString *pString) +{ + sal_uInt32 nHash = hashString (pString); + sal_uInt32 n; + rtl_uString *pHashStr; + + n = nHash % pHash->nSize; + while ((pHashStr = pHash->pData[n]) != NULL) { + n++; + if (n >= pHash->nSize) + n = 0; } - return pInternPool; + pHash->pData[n] = pString; +} + +static void +rtl_str_hash_resize (sal_uInt32 nNewSize) +{ + sal_uInt32 i; + StringHashTable *pNewHash; + StringHashTable *pHash = getHashTable(); + + OSL_ASSERT (nNewSize > pHash->nEntries); + + pNewHash = rtl_str_hash_new (nNewSize); + + for (i = 0; i < pHash->nSize; i++) + { + if (pHash->pData[i] != NULL) + rtl_str_hash_insert_nonequal (pNewHash, pHash->pData[i]); + } + pNewHash->nEntries = pHash->nEntries; + free (pHash->pData); + *pHash = *pNewHash; + pNewHash->pData = NULL; + rtl_str_hash_free (pNewHash); } +static int +compareEqual (rtl_uString *pStringA, rtl_uString *pStringB) +{ + if (pStringA == pStringB) + return 1; + if (pStringA->length != pStringB->length) + return 0; + return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length, + pStringB->buffer, pStringB->length); } -extern "C" { rtl_uString * rtl_str_hash_intern (rtl_uString *pString, int can_return) { + sal_uInt32 nHash = hashString (pString); + sal_uInt32 n; + rtl_uString *pHashStr; + StringHashTable *pHash = getHashTable(); - StringHashTable::iterator aIter; - aIter = pHash->find(pString); - if (aIter != pHash->end()) - { - rtl_uString *pHashStr = *aIter; - rtl_uString_acquire (pHashStr); - return pHashStr; + + // Should we resize ? + if (pHash->nEntries >= pHash->nSize/2) + rtl_str_hash_resize (getNextSize(pHash->nSize)); + + n = nHash % pHash->nSize; + while ((pHashStr = pHash->pData[n]) != NULL) { + if (compareEqual (pHashStr, pString)) + { + rtl_uString_acquire (pHashStr); + return pHashStr; + } + n++; + if (n >= pHash->nSize) + n = 0; } + if (!can_return) { rtl_uString *pCopy = NULL; @@ -99,7 +196,8 @@ rtl_str_hash_intern (rtl_uString *pString, if (!SAL_STRING_IS_STATIC (pString)) pString->refCount |= SAL_STRING_INTERN_FLAG; - pHash->insert(pString); + pHash->pData[n] = pString; + pHash->nEntries++; return pString; } @@ -107,7 +205,37 @@ rtl_str_hash_intern (rtl_uString *pString, void rtl_str_hash_remove (rtl_uString *pString) { - getHashTable()->erase(pString); -} + sal_uInt32 n; + sal_uInt32 nHash = hashString (pString); + rtl_uString *pHashStr; + + StringHashTable *pHash = getHashTable(); + n = nHash % pHash->nSize; + while ((pHashStr = pHash->pData[n]) != NULL) { + if (compareEqual (pHashStr, pString)) + break; + n++; + if (n >= pHash->nSize) + n = 0; + } + OSL_ASSERT (pHash->pData[n] != 0); + if (pHash->pData[n] == NULL) + return; + + pHash->pData[n++] = NULL; + pHash->nEntries--; + + if (n >= pHash->nSize) + n = 0; + + while ((pHashStr = pHash->pData[n]) != NULL) { + pHash->pData[n] = NULL; + // FIXME: rather unsophisticated and N^2 in chain-length, but robust. + rtl_str_hash_insert_nonequal (pHash, pHashStr); + n++; + if (n >= pHash->nSize) + n = 0; + } + // FIXME: Should we down-size ? } |