diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2022-02-16 09:45:40 +0100 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2022-02-16 18:15:27 +0100 |
commit | 4e4a01302a140d75a49055821b3197a2eda81db5 (patch) | |
tree | 56d4d125b79c24397d651d83540e52cc16ce19f2 | |
parent | acdf524e12d302b4bf9367a8b25408746737e123 (diff) |
Related: tdf#147421: optimize O(U)String's replaceAll*
... and underlying low-level C functions.
Change-Id: I78c2dd75e272e6d3cdd2cfae007ca0a0ec1635e1
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129942
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r-- | sal/rtl/string.cxx | 9 | ||||
-rw-r--r-- | sal/rtl/strtmpl.hxx | 71 | ||||
-rw-r--r-- | sal/rtl/ustring.cxx | 41 |
3 files changed, 76 insertions, 45 deletions
diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx index d67246dc159c..a2020561a7c7 100644 --- a/sal/rtl/string.cxx +++ b/sal/rtl/string.cxx @@ -361,14 +361,7 @@ void rtl_string_newReplaceAll( sal_Int32 fromLength, char const * to, sal_Int32 toLength) SAL_THROW_EXTERN_C() { - rtl_string_assign(newStr, str); - for (sal_Int32 i = 0;; i += toLength) { - rtl_string_newReplaceFirst( - newStr, *newStr, from, fromLength, to, toLength, &i); - if (i == -1) { - break; - } - } + rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0); } sal_Int32 SAL_CALL rtl_str_getLength(const char* pStr) SAL_THROW_EXTERN_C() diff --git a/sal/rtl/strtmpl.hxx b/sal/rtl/strtmpl.hxx index 7a909bb5efe3..4f04cc404ee5 100644 --- a/sal/rtl/strtmpl.hxx +++ b/sal/rtl/strtmpl.hxx @@ -35,6 +35,9 @@ #include <wchar.h> #include <sal/log.hxx> #include <rtl/character.hxx> +#include <rtl/strbuf.h> +#include <rtl/ustrbuf.h> +#include <rtl/ustring.hxx> namespace rtl::str { @@ -1808,6 +1811,74 @@ sal_Int32 getToken ( IMPL_RTL_STRINGDATA** ppThis } } +namespace detail +{ +template <typename CharType1, typename CharType2> +sal_Int32 indexOf(const CharType1* s, sal_Int32 len, const CharType2* subStr, sal_Int32 subLen) +{ + if constexpr (std::is_same_v<CharType1, CharType2>) + return indexOfStr_WithLength(s, len, subStr, subLen); + else if constexpr (std::is_same_v<CharType1, sal_Unicode> && std::is_same_v<CharType2, char>) + return rtl_ustr_indexOfAscii_WithLength(s, len, subStr, subLen); +} + +template <class S, typename CharType1> +void append(S** s, sal_Int32* capacity, const CharType1* s1, sal_Int32 len) +{ + if constexpr (std::is_same_v<S, rtl_uString> && std::is_same_v<CharType1, sal_Unicode>) + return rtl_uStringbuffer_insert(s, capacity, (*s)->length, s1, len); + else if constexpr (std::is_same_v<S, rtl_uString> && std::is_same_v<CharType1, char>) + return rtl_uStringbuffer_insert_ascii(s, capacity, (*s)->length, s1, len); + else if constexpr (std::is_same_v<S, rtl_String> && std::is_same_v<CharType1, char>) + return rtl_stringbuffer_insert(s, capacity, (*s)->length, s1, len); +} +} + +template <class S, typename CharTypeFrom, typename CharTypeTo> +void newReplaceAllFromIndex(S** s, S* s1, CharTypeFrom const* from, sal_Int32 fromLength, + CharTypeTo const* to, sal_Int32 toLength, sal_Int32 fromIndex) +{ + assert(s != nullptr); + assert(s1 != nullptr); + assert(fromLength >= 0); + assert(from != nullptr || fromLength == 0); + assert(toLength >= 0); + assert(to != nullptr || toLength == 0); + assert(fromIndex >= 0 && fromIndex <= s1->length); + sal_Int32 i = detail::indexOf(s1->buffer + fromIndex, s1->length - fromIndex, from, fromLength); + if (i >= 0) + { + if (s1->length - fromLength > SAL_MAX_INT32 - toLength) + std::abort(); + acquire(s1); // in case *s == s1 + sal_Int32 nCapacity = s1->length + (toLength - fromLength); + if (fromLength < toLength) + { + // Pre-allocate up to 16 replacements more + const sal_Int32 nMaxMoreFinds = (s1->length - fromIndex - i - fromLength) / fromLength; + const sal_Int32 nIncrease = toLength - fromLength; + const sal_Int32 nMoreReplacements = std::min( + { nMaxMoreFinds, (SAL_MAX_INT32 - nCapacity) / nIncrease, sal_Int32(16) }); + nCapacity += nMoreReplacements * nIncrease; + } + new_WithLength(s, nCapacity); + i += fromIndex; + fromIndex = 0; + do + { + detail::append(s, &nCapacity, s1->buffer + fromIndex, i); + detail::append(s, &nCapacity, to, toLength); + fromIndex += i + fromLength; + i = detail::indexOf(s1->buffer + fromIndex, s1->length - fromIndex, from, fromLength); + } while (i >= 0); + // the rest + detail::append(s, &nCapacity, s1->buffer + fromIndex, s1->length - fromIndex); + release(s1); + } + else + assign(s, s1); +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index c550c99c419e..70048db58f6e 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -1367,15 +1367,7 @@ void rtl_uString_newReplaceAllAsciiLAsciiL( sal_Int32 fromLength, char const * to, sal_Int32 toLength) SAL_THROW_EXTERN_C() { - assert(toLength >= 0); - rtl_uString_assign(newStr, str); - for (sal_Int32 i = 0;; i += toLength) { - rtl_uString_newReplaceFirstAsciiLAsciiL( - newStr, *newStr, from, fromLength, to, toLength, &i); - if (i == -1) { - break; - } - } + rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0); } void rtl_uString_newReplaceAllAsciiLUtf16L( @@ -1383,15 +1375,7 @@ void rtl_uString_newReplaceAllAsciiLUtf16L( sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength) SAL_THROW_EXTERN_C() { - assert(toLength >= 0); - rtl_uString_assign(newStr, str); - for (sal_Int32 i = 0;; i += toLength) { - rtl_uString_newReplaceFirstAsciiLUtf16L( - newStr, *newStr, from, fromLength, to, toLength, &i); - if (i == -1 || *newStr == nullptr) { - break; - } - } + rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0); } void rtl_uString_newReplaceAllUtf16LAsciiL( @@ -1399,15 +1383,7 @@ void rtl_uString_newReplaceAllUtf16LAsciiL( sal_Int32 fromLength, char const * to, sal_Int32 toLength) SAL_THROW_EXTERN_C() { - assert(toLength >= 0); - rtl_uString_assign(newStr, str); - for (sal_Int32 i = 0;; i += toLength) { - rtl_uString_newReplaceFirstUtf16LAsciiL( - newStr, *newStr, from, fromLength, to, toLength, &i); - if (i == -1 || *newStr == nullptr) { - break; - } - } + rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0); } void rtl_uString_newReplaceAllUtf16LUtf16L( @@ -1423,16 +1399,7 @@ void rtl_uString_newReplaceAllFromIndexUtf16LUtf16L( sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength, sal_Int32 fromIndex) SAL_THROW_EXTERN_C() { - assert(toLength >= 0); - assert(fromIndex >= 0 && fromIndex <= str->length); - rtl_uString_assign(newStr, str); - for (sal_Int32 i = fromIndex;; i += toLength) { - rtl_uString_newReplaceFirstUtf16LUtf16L( - newStr, *newStr, from, fromLength, to, toLength, &i); - if (i == -1 || *newStr == nullptr) { - break; - } - } + rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, fromIndex); } sal_Int32 SAL_CALL rtl_ustr_getLength(const sal_Unicode* pStr) SAL_THROW_EXTERN_C() |