summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephan Bergmann <sbergman@redhat.com>2023-05-05 09:46:52 +0200
committerStephan Bergmann <sbergman@redhat.com>2023-05-05 11:30:12 +0200
commit28cc0bff10f5dcec0c7b698ae7ba275845b2cad1 (patch)
treecdcccaa2356b8bea6c5b919faa6cd497a72e2336
parentc1bd421eae5449a005f2ee0f01b3b4e72002296e (diff)
Break comphelper::adjustIndexToStartOfSurrogate out of o3tl::iterateCodePoints
...as what they do is orthogonal (and it turned out that the use case that motivated the addition of o3tl::iterateCodePoints in the first place needs them independently, anyway) Change-Id: Id33901a2f7ac627253654ee6d883305dcf5a456f Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151415 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
-rw-r--r--comphelper/qa/string/test_string.cxx20
-rw-r--r--comphelper/source/misc/string.cxx9
-rw-r--r--include/comphelper/string.hxx13
-rw-r--r--include/o3tl/string_view.hxx11
-rw-r--r--o3tl/qa/test-string_view.cxx16
5 files changed, 46 insertions, 23 deletions
diff --git a/comphelper/qa/string/test_string.cxx b/comphelper/qa/string/test_string.cxx
index 58f9c3f63c16..974673ca2940 100644
--- a/comphelper/qa/string/test_string.cxx
+++ b/comphelper/qa/string/test_string.cxx
@@ -50,6 +50,7 @@ public:
void testReverseCodePoints();
void testSplit();
void testRemoveAny();
+ void testAdjustIndexToStartOfSurrogate();
CPPUNIT_TEST_SUITE(TestString);
CPPUNIT_TEST(testStripStart);
@@ -63,6 +64,7 @@ public:
CPPUNIT_TEST(testReverseCodePoints);
CPPUNIT_TEST(testSplit);
CPPUNIT_TEST(testRemoveAny);
+ CPPUNIT_TEST(testAdjustIndexToStartOfSurrogate);
CPPUNIT_TEST_SUITE_END();
};
@@ -237,6 +239,24 @@ void TestString::testRemoveAny()
CPPUNIT_ASSERT_EQUAL(OUString(), removeAny(in, test7));
}
+void TestString::testAdjustIndexToStartOfSurrogate() {
+ CPPUNIT_ASSERT_EQUAL(
+ sal_Int32(0),
+ comphelper::string::adjustIndexToStartOfSurrogate("", 0));
+ CPPUNIT_ASSERT_EQUAL(
+ sal_Int32(0),
+ comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 0));
+ CPPUNIT_ASSERT_EQUAL(
+ sal_Int32(0),
+ comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 1));
+ CPPUNIT_ASSERT_EQUAL(
+ sal_Int32(2),
+ comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 2));
+ CPPUNIT_ASSERT_EQUAL(
+ sal_Int32(1),
+ comphelper::string::adjustIndexToStartOfSurrogate(u"\xD800", 1));
+}
+
CPPUNIT_TEST_SUITE_REGISTRATION(TestString);
}
diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx
index da5c8b92c05c..0fdd24c83d7e 100644
--- a/comphelper/source/misc/string.cxx
+++ b/comphelper/source/misc/string.cxx
@@ -19,6 +19,7 @@
#include <sal/config.h>
+#include <cassert>
#include <cstddef>
#include <string_view>
#include <utility>
@@ -679,6 +680,14 @@ OUString sanitizeStringSurrogates(const OUString& rString)
return rString;
}
+sal_Int32 adjustIndexToStartOfSurrogate(OUString const & string, sal_Int32 index) {
+ assert(index >= 0 && index <= string.getLength());
+ return
+ (index > 0 && rtl::isHighSurrogate(string[index - 1])
+ && index < string.getLength() && rtl::isLowSurrogate(string[index]))
+ ? index - 1 : index;
+}
+
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/comphelper/string.hxx b/include/comphelper/string.hxx
index 38564bc16673..8144a7e95e28 100644
--- a/include/comphelper/string.hxx
+++ b/include/comphelper/string.hxx
@@ -384,6 +384,19 @@ COMPHELPER_DLLPUBLIC bool isdigitAsciiString(std::u16string_view rString);
*/
COMPHELPER_DLLPUBLIC OUString sanitizeStringSurrogates(const OUString& rString);
+/** Adjust an index in case it points into the middle of a surrogate pair.
+
+ @param string An OUString
+
+ @param index A valid index into the string or to its end (i.e., must be in the range from zero
+ to the length of string, inclusive)
+
+ @return index - 1 if the original index pointed into the middle of a surrogate pair; otherwise
+ the unchanged index
+ */
+COMPHELPER_DLLPUBLIC sal_Int32 adjustIndexToStartOfSurrogate(
+ OUString const & string, sal_Int32 index);
+
} // namespace comphelper::string
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/o3tl/string_view.hxx b/include/o3tl/string_view.hxx
index b5b64d5991dd..5d03b2cfba87 100644
--- a/include/o3tl/string_view.hxx
+++ b/include/o3tl/string_view.hxx
@@ -508,11 +508,9 @@ inline double toDouble(std::string_view str)
return rtl_math_stringToDouble(str.data(), str.data() + str.size(), '.', 0, nullptr, nullptr);
}
-// Similar to OUString::iterateCodePoints, but for std::string_view.
-// If preAdjustIndex is true: prior to any other operation, *indexUtf16 is adjusted by -1 if it
-// originally pointed into the middle of a surrogate pair.
+// Like OUString::iterateCodePoints, but for std::string_view:
inline sal_uInt32 iterateCodePoints(std::u16string_view string, std::size_t* indexUtf16,
- sal_Int32 incrementCodePoints = 1, bool preAdjustIndex = false)
+ sal_Int32 incrementCodePoints = 1)
{
std::size_t n;
char16_t cu;
@@ -520,11 +518,6 @@ inline sal_uInt32 iterateCodePoints(std::u16string_view string, std::size_t* ind
assert(indexUtf16 != nullptr);
n = *indexUtf16;
assert(n <= string.length());
- if (preAdjustIndex && n != 0 && rtl::isHighSurrogate(string[n - 1])
- && rtl::isLowSurrogate(string[n]))
- {
- --n;
- }
while (incrementCodePoints < 0)
{
assert(n > 0);
diff --git a/o3tl/qa/test-string_view.cxx b/o3tl/qa/test-string_view.cxx
index 5378ce9415ed..c658da16b5dc 100644
--- a/o3tl/qa/test-string_view.cxx
+++ b/o3tl/qa/test-string_view.cxx
@@ -737,28 +737,16 @@ private:
{
{
std::size_t i = 1;
- auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1, false);
+ auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1);
CPPUNIT_ASSERT_EQUAL(std::size_t(2), i);
CPPUNIT_ASSERT_EQUAL(sal_uInt32(0xDC00), c);
}
{
- std::size_t i = 1;
- auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1, true);
- CPPUNIT_ASSERT_EQUAL(std::size_t(2), i);
- CPPUNIT_ASSERT_EQUAL(sal_uInt32(0x10000), c);
- }
- {
std::size_t i = 2;
- auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1, false);
+ auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1);
CPPUNIT_ASSERT_EQUAL(std::size_t(1), i);
CPPUNIT_ASSERT_EQUAL(sal_uInt32(0x10000), c);
}
- {
- std::size_t i = 2;
- auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1, true);
- CPPUNIT_ASSERT_EQUAL(std::size_t(0), i);
- CPPUNIT_ASSERT_EQUAL(sal_uInt32('a'), c);
- }
}
};