summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2022-02-16 09:45:40 +0100
committerMike Kaganski <mike.kaganski@collabora.com>2022-02-16 18:15:27 +0100
commit4e4a01302a140d75a49055821b3197a2eda81db5 (patch)
tree56d4d125b79c24397d651d83540e52cc16ce19f2
parentacdf524e12d302b4bf9367a8b25408746737e123 (diff)
Related: tdf#147421: optimize O(U)String's replaceAll*
... and underlying low-level C functions. Change-Id: I78c2dd75e272e6d3cdd2cfae007ca0a0ec1635e1 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129942 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r--sal/rtl/string.cxx9
-rw-r--r--sal/rtl/strtmpl.hxx71
-rw-r--r--sal/rtl/ustring.cxx41
3 files changed, 76 insertions, 45 deletions
diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx
index d67246dc159c..a2020561a7c7 100644
--- a/sal/rtl/string.cxx
+++ b/sal/rtl/string.cxx
@@ -361,14 +361,7 @@ void rtl_string_newReplaceAll(
sal_Int32 fromLength, char const * to, sal_Int32 toLength)
SAL_THROW_EXTERN_C()
{
- rtl_string_assign(newStr, str);
- for (sal_Int32 i = 0;; i += toLength) {
- rtl_string_newReplaceFirst(
- newStr, *newStr, from, fromLength, to, toLength, &i);
- if (i == -1) {
- break;
- }
- }
+ rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0);
}
sal_Int32 SAL_CALL rtl_str_getLength(const char* pStr) SAL_THROW_EXTERN_C()
diff --git a/sal/rtl/strtmpl.hxx b/sal/rtl/strtmpl.hxx
index 7a909bb5efe3..4f04cc404ee5 100644
--- a/sal/rtl/strtmpl.hxx
+++ b/sal/rtl/strtmpl.hxx
@@ -35,6 +35,9 @@
#include <wchar.h>
#include <sal/log.hxx>
#include <rtl/character.hxx>
+#include <rtl/strbuf.h>
+#include <rtl/ustrbuf.h>
+#include <rtl/ustring.hxx>
namespace rtl::str
{
@@ -1808,6 +1811,74 @@ sal_Int32 getToken ( IMPL_RTL_STRINGDATA** ppThis
}
}
+namespace detail
+{
+template <typename CharType1, typename CharType2>
+sal_Int32 indexOf(const CharType1* s, sal_Int32 len, const CharType2* subStr, sal_Int32 subLen)
+{
+ if constexpr (std::is_same_v<CharType1, CharType2>)
+ return indexOfStr_WithLength(s, len, subStr, subLen);
+ else if constexpr (std::is_same_v<CharType1, sal_Unicode> && std::is_same_v<CharType2, char>)
+ return rtl_ustr_indexOfAscii_WithLength(s, len, subStr, subLen);
+}
+
+template <class S, typename CharType1>
+void append(S** s, sal_Int32* capacity, const CharType1* s1, sal_Int32 len)
+{
+ if constexpr (std::is_same_v<S, rtl_uString> && std::is_same_v<CharType1, sal_Unicode>)
+ return rtl_uStringbuffer_insert(s, capacity, (*s)->length, s1, len);
+ else if constexpr (std::is_same_v<S, rtl_uString> && std::is_same_v<CharType1, char>)
+ return rtl_uStringbuffer_insert_ascii(s, capacity, (*s)->length, s1, len);
+ else if constexpr (std::is_same_v<S, rtl_String> && std::is_same_v<CharType1, char>)
+ return rtl_stringbuffer_insert(s, capacity, (*s)->length, s1, len);
+}
+}
+
+template <class S, typename CharTypeFrom, typename CharTypeTo>
+void newReplaceAllFromIndex(S** s, S* s1, CharTypeFrom const* from, sal_Int32 fromLength,
+ CharTypeTo const* to, sal_Int32 toLength, sal_Int32 fromIndex)
+{
+ assert(s != nullptr);
+ assert(s1 != nullptr);
+ assert(fromLength >= 0);
+ assert(from != nullptr || fromLength == 0);
+ assert(toLength >= 0);
+ assert(to != nullptr || toLength == 0);
+ assert(fromIndex >= 0 && fromIndex <= s1->length);
+ sal_Int32 i = detail::indexOf(s1->buffer + fromIndex, s1->length - fromIndex, from, fromLength);
+ if (i >= 0)
+ {
+ if (s1->length - fromLength > SAL_MAX_INT32 - toLength)
+ std::abort();
+ acquire(s1); // in case *s == s1
+ sal_Int32 nCapacity = s1->length + (toLength - fromLength);
+ if (fromLength < toLength)
+ {
+ // Pre-allocate up to 16 replacements more
+ const sal_Int32 nMaxMoreFinds = (s1->length - fromIndex - i - fromLength) / fromLength;
+ const sal_Int32 nIncrease = toLength - fromLength;
+ const sal_Int32 nMoreReplacements = std::min(
+ { nMaxMoreFinds, (SAL_MAX_INT32 - nCapacity) / nIncrease, sal_Int32(16) });
+ nCapacity += nMoreReplacements * nIncrease;
+ }
+ new_WithLength(s, nCapacity);
+ i += fromIndex;
+ fromIndex = 0;
+ do
+ {
+ detail::append(s, &nCapacity, s1->buffer + fromIndex, i);
+ detail::append(s, &nCapacity, to, toLength);
+ fromIndex += i + fromLength;
+ i = detail::indexOf(s1->buffer + fromIndex, s1->length - fromIndex, from, fromLength);
+ } while (i >= 0);
+ // the rest
+ detail::append(s, &nCapacity, s1->buffer + fromIndex, s1->length - fromIndex);
+ release(s1);
+ }
+ else
+ assign(s, s1);
+}
+
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index c550c99c419e..70048db58f6e 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -1367,15 +1367,7 @@ void rtl_uString_newReplaceAllAsciiLAsciiL(
sal_Int32 fromLength, char const * to, sal_Int32 toLength)
SAL_THROW_EXTERN_C()
{
- assert(toLength >= 0);
- rtl_uString_assign(newStr, str);
- for (sal_Int32 i = 0;; i += toLength) {
- rtl_uString_newReplaceFirstAsciiLAsciiL(
- newStr, *newStr, from, fromLength, to, toLength, &i);
- if (i == -1) {
- break;
- }
- }
+ rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0);
}
void rtl_uString_newReplaceAllAsciiLUtf16L(
@@ -1383,15 +1375,7 @@ void rtl_uString_newReplaceAllAsciiLUtf16L(
sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength)
SAL_THROW_EXTERN_C()
{
- assert(toLength >= 0);
- rtl_uString_assign(newStr, str);
- for (sal_Int32 i = 0;; i += toLength) {
- rtl_uString_newReplaceFirstAsciiLUtf16L(
- newStr, *newStr, from, fromLength, to, toLength, &i);
- if (i == -1 || *newStr == nullptr) {
- break;
- }
- }
+ rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0);
}
void rtl_uString_newReplaceAllUtf16LAsciiL(
@@ -1399,15 +1383,7 @@ void rtl_uString_newReplaceAllUtf16LAsciiL(
sal_Int32 fromLength, char const * to, sal_Int32 toLength)
SAL_THROW_EXTERN_C()
{
- assert(toLength >= 0);
- rtl_uString_assign(newStr, str);
- for (sal_Int32 i = 0;; i += toLength) {
- rtl_uString_newReplaceFirstUtf16LAsciiL(
- newStr, *newStr, from, fromLength, to, toLength, &i);
- if (i == -1 || *newStr == nullptr) {
- break;
- }
- }
+ rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, 0);
}
void rtl_uString_newReplaceAllUtf16LUtf16L(
@@ -1423,16 +1399,7 @@ void rtl_uString_newReplaceAllFromIndexUtf16LUtf16L(
sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength, sal_Int32 fromIndex)
SAL_THROW_EXTERN_C()
{
- assert(toLength >= 0);
- assert(fromIndex >= 0 && fromIndex <= str->length);
- rtl_uString_assign(newStr, str);
- for (sal_Int32 i = fromIndex;; i += toLength) {
- rtl_uString_newReplaceFirstUtf16LUtf16L(
- newStr, *newStr, from, fromLength, to, toLength, &i);
- if (i == -1 || *newStr == nullptr) {
- break;
- }
- }
+ rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, toLength, fromIndex);
}
sal_Int32 SAL_CALL rtl_ustr_getLength(const sal_Unicode* pStr) SAL_THROW_EXTERN_C()