From fa0c012d6c06e9a92093dacf997fe3151272648e Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Thu, 4 May 2023 14:09:53 +0200 Subject: Provide std::u16string_view based o3tl::iterateCodePoints ...as requested in the comments of "a11y: Fix returning unpaired surrogates when retrieving characters" (incl. the additional preAdjustIndex parameter). The type of the indexUtf16 parameter obviously needed to be adapted to std::u16string_view's std::size_t. But there is no obvious best choice for the type of the incrementCodePoints parameter (int? std::ssize_t?), so lets leave it as sal_Int32. For simplicity of avoiding a Library_o3tl, and to allow o3tl::iterateCodePoints to be used in the implementation of rtl_uString_iterateCodePoints now, o3tl::iterateCodePoints is provided as an inline function defined in the include file. Change-Id: I8280ca11d2a943bd2b7150a266807b358f321a72 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151366 Tested-by: Jenkins Reviewed-by: Stephan Bergmann --- sal/rtl/ustring.cxx | 49 +++++++++++++------------------------------------ 1 file changed, 13 insertions(+), 36 deletions(-) (limited to 'sal/rtl') diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index 45ab6e166871..fc23cf37a338 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -26,6 +26,9 @@ #include #include +#include +#include +#include #include #include #include @@ -769,43 +772,17 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( rtl_uString const * string, sal_Int32 * indexUtf16, sal_Int32 incrementCodePoints) { - sal_Int32 n; - sal_Unicode cu; - sal_uInt32 cp; assert(string != nullptr && indexUtf16 != nullptr); - n = *indexUtf16; - assert(n >= 0 && n <= string->length); - while (incrementCodePoints < 0) { - assert(n > 0); - cu = string->buffer[--n]; - if (rtl::isLowSurrogate(cu) && n != 0 && - rtl::isHighSurrogate(string->buffer[n - 1])) - { - --n; - } - ++incrementCodePoints; - } - assert(n >= 0 && n < string->length); - cu = string->buffer[n]; - if (rtl::isHighSurrogate(cu) && string->length - n >= 2 && - rtl::isLowSurrogate(string->buffer[n + 1])) - { - cp = rtl::combineSurrogates(cu, string->buffer[n + 1]); - } else { - cp = cu; - } - while (incrementCodePoints > 0) { - assert(n < string->length); - cu = string->buffer[n++]; - if (rtl::isHighSurrogate(cu) && n != string->length && - rtl::isLowSurrogate(string->buffer[n])) - { - ++n; - } - --incrementCodePoints; - } - assert(n >= 0 && n <= string->length); - *indexUtf16 = n; + assert( + *indexUtf16 >= 0 + && o3tl::cmp_less_equal(*indexUtf16, std::numeric_limits::max())); + // using o3tl::cmp_less_equal nicely avoids potential + // -Wtautological-constant-out-of-range-compare + std::size_t i = *indexUtf16; + auto const cp = o3tl::iterateCodePoints( + std::u16string_view(string->buffer, string->length), &i, incrementCodePoints); + assert(i <= o3tl::make_unsigned(std::numeric_limits::max())); + *indexUtf16 = i; return cp; } -- cgit