diff options
author | Stephan Bergmann <sbergman@redhat.com> | 2023-05-05 09:46:52 +0200 |
---|---|---|
committer | Stephan Bergmann <sbergman@redhat.com> | 2023-05-05 11:30:12 +0200 |
commit | 28cc0bff10f5dcec0c7b698ae7ba275845b2cad1 (patch) | |
tree | cdcccaa2356b8bea6c5b919faa6cd497a72e2336 | |
parent | c1bd421eae5449a005f2ee0f01b3b4e72002296e (diff) |
Break comphelper::adjustIndexToStartOfSurrogate out of o3tl::iterateCodePoints
...as what they do is orthogonal (and it turned out that the use case that
motivated the addition of o3tl::iterateCodePoints in the first place needs them
independently, anyway)
Change-Id: Id33901a2f7ac627253654ee6d883305dcf5a456f
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151415
Tested-by: Jenkins
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
-rw-r--r-- | comphelper/qa/string/test_string.cxx | 20 | ||||
-rw-r--r-- | comphelper/source/misc/string.cxx | 9 | ||||
-rw-r--r-- | include/comphelper/string.hxx | 13 | ||||
-rw-r--r-- | include/o3tl/string_view.hxx | 11 | ||||
-rw-r--r-- | o3tl/qa/test-string_view.cxx | 16 |
5 files changed, 46 insertions, 23 deletions
diff --git a/comphelper/qa/string/test_string.cxx b/comphelper/qa/string/test_string.cxx index 58f9c3f63c16..974673ca2940 100644 --- a/comphelper/qa/string/test_string.cxx +++ b/comphelper/qa/string/test_string.cxx @@ -50,6 +50,7 @@ public: void testReverseCodePoints(); void testSplit(); void testRemoveAny(); + void testAdjustIndexToStartOfSurrogate(); CPPUNIT_TEST_SUITE(TestString); CPPUNIT_TEST(testStripStart); @@ -63,6 +64,7 @@ public: CPPUNIT_TEST(testReverseCodePoints); CPPUNIT_TEST(testSplit); CPPUNIT_TEST(testRemoveAny); + CPPUNIT_TEST(testAdjustIndexToStartOfSurrogate); CPPUNIT_TEST_SUITE_END(); }; @@ -237,6 +239,24 @@ void TestString::testRemoveAny() CPPUNIT_ASSERT_EQUAL(OUString(), removeAny(in, test7)); } +void TestString::testAdjustIndexToStartOfSurrogate() { + CPPUNIT_ASSERT_EQUAL( + sal_Int32(0), + comphelper::string::adjustIndexToStartOfSurrogate("", 0)); + CPPUNIT_ASSERT_EQUAL( + sal_Int32(0), + comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 0)); + CPPUNIT_ASSERT_EQUAL( + sal_Int32(0), + comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 1)); + CPPUNIT_ASSERT_EQUAL( + sal_Int32(2), + comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 2)); + CPPUNIT_ASSERT_EQUAL( + sal_Int32(1), + comphelper::string::adjustIndexToStartOfSurrogate(u"\xD800", 1)); +} + CPPUNIT_TEST_SUITE_REGISTRATION(TestString); } diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx index da5c8b92c05c..0fdd24c83d7e 100644 --- a/comphelper/source/misc/string.cxx +++ b/comphelper/source/misc/string.cxx @@ -19,6 +19,7 @@ #include <sal/config.h> +#include <cassert> #include <cstddef> #include <string_view> #include <utility> @@ -679,6 +680,14 @@ OUString sanitizeStringSurrogates(const OUString& rString) return rString; } +sal_Int32 adjustIndexToStartOfSurrogate(OUString const & string, sal_Int32 index) { + assert(index >= 0 && index <= string.getLength()); + return + (index > 0 && rtl::isHighSurrogate(string[index - 1]) + && index < string.getLength() && rtl::isLowSurrogate(string[index])) + ? index - 1 : index; +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/comphelper/string.hxx b/include/comphelper/string.hxx index 38564bc16673..8144a7e95e28 100644 --- a/include/comphelper/string.hxx +++ b/include/comphelper/string.hxx @@ -384,6 +384,19 @@ COMPHELPER_DLLPUBLIC bool isdigitAsciiString(std::u16string_view rString); */ COMPHELPER_DLLPUBLIC OUString sanitizeStringSurrogates(const OUString& rString); +/** Adjust an index in case it points into the middle of a surrogate pair. + + @param string An OUString + + @param index A valid index into the string or to its end (i.e., must be in the range from zero + to the length of string, inclusive) + + @return index - 1 if the original index pointed into the middle of a surrogate pair; otherwise + the unchanged index + */ +COMPHELPER_DLLPUBLIC sal_Int32 adjustIndexToStartOfSurrogate( + OUString const & string, sal_Int32 index); + } // namespace comphelper::string /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/o3tl/string_view.hxx b/include/o3tl/string_view.hxx index b5b64d5991dd..5d03b2cfba87 100644 --- a/include/o3tl/string_view.hxx +++ b/include/o3tl/string_view.hxx @@ -508,11 +508,9 @@ inline double toDouble(std::string_view str) return rtl_math_stringToDouble(str.data(), str.data() + str.size(), '.', 0, nullptr, nullptr); } -// Similar to OUString::iterateCodePoints, but for std::string_view. -// If preAdjustIndex is true: prior to any other operation, *indexUtf16 is adjusted by -1 if it -// originally pointed into the middle of a surrogate pair. +// Like OUString::iterateCodePoints, but for std::string_view: inline sal_uInt32 iterateCodePoints(std::u16string_view string, std::size_t* indexUtf16, - sal_Int32 incrementCodePoints = 1, bool preAdjustIndex = false) + sal_Int32 incrementCodePoints = 1) { std::size_t n; char16_t cu; @@ -520,11 +518,6 @@ inline sal_uInt32 iterateCodePoints(std::u16string_view string, std::size_t* ind assert(indexUtf16 != nullptr); n = *indexUtf16; assert(n <= string.length()); - if (preAdjustIndex && n != 0 && rtl::isHighSurrogate(string[n - 1]) - && rtl::isLowSurrogate(string[n])) - { - --n; - } while (incrementCodePoints < 0) { assert(n > 0); diff --git a/o3tl/qa/test-string_view.cxx b/o3tl/qa/test-string_view.cxx index 5378ce9415ed..c658da16b5dc 100644 --- a/o3tl/qa/test-string_view.cxx +++ b/o3tl/qa/test-string_view.cxx @@ -737,28 +737,16 @@ private: { { std::size_t i = 1; - auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1, false); + auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1); CPPUNIT_ASSERT_EQUAL(std::size_t(2), i); CPPUNIT_ASSERT_EQUAL(sal_uInt32(0xDC00), c); } { - std::size_t i = 1; - auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1, true); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), i); - CPPUNIT_ASSERT_EQUAL(sal_uInt32(0x10000), c); - } - { std::size_t i = 2; - auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1, false); + auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1); CPPUNIT_ASSERT_EQUAL(std::size_t(1), i); CPPUNIT_ASSERT_EQUAL(sal_uInt32(0x10000), c); } - { - std::size_t i = 2; - auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1, true); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), i); - CPPUNIT_ASSERT_EQUAL(sal_uInt32('a'), c); - } } }; |