From 0c8fa58a2d73702770687ed15b98822d09f96ac3 Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Sat, 27 Aug 2016 21:27:38 +0200 Subject: Support ConstCharArrayDetector also for UTF-16 arrays The long-term benefit will be support of C++11 char16_t string literals (for cases of string literals with non-ASCII content) once we drop any compilers that don't support those yet. The short-term benefit is support for an improved OUStringLiteral1 that accepts any sal_Unicode value, not just ASCII ones (see next commit). Change-Id: I3f8f6697d7eb62b5176b7e812b5a5113c53b83a4 Reviewed-on: https://gerrit.libreoffice.org/28445 Tested-by: Jenkins Reviewed-by: Stephan Bergmann --- .../rtl/strings/test_oustring_stringliterals.cxx | 73 ++++++++ sal/rtl/ustring.cxx | 203 +++++++++++++++++++++ sal/util/sal.map | 11 ++ 3 files changed, 287 insertions(+) (limited to 'sal') diff --git a/sal/qa/rtl/strings/test_oustring_stringliterals.cxx b/sal/qa/rtl/strings/test_oustring_stringliterals.cxx index df9aecd4fb21..aa496a4779f1 100644 --- a/sal/qa/rtl/strings/test_oustring_stringliterals.cxx +++ b/sal/qa/rtl/strings/test_oustring_stringliterals.cxx @@ -19,6 +19,7 @@ extern bool rtl_string_unittest_non_const_literal_function; #include #include +#include #include #include #include "rtl/string.h" @@ -37,6 +38,7 @@ private: void checkBuffer(); void checkOUStringLiteral(); void checkOUStringLiteral1(); + void checkUtf16(); void testcall( const char str[] ); @@ -48,6 +50,7 @@ CPPUNIT_TEST(checkNonconstChar); CPPUNIT_TEST(checkBuffer); CPPUNIT_TEST(checkOUStringLiteral); CPPUNIT_TEST(checkOUStringLiteral1); +CPPUNIT_TEST(checkUtf16); CPPUNIT_TEST_SUITE_END(); }; @@ -247,6 +250,76 @@ void test::oustring::StringLiterals::checkOUStringLiteral1() CPPUNIT_ASSERT_EQUAL(sal_Unicode('b'), s2[1]); } +void test::oustring::StringLiterals::checkUtf16() { +#if HAVE_CXX11_UTF16_STRING_LITERAL + rtl::OUString s1(u"abc"); + CPPUNIT_ASSERT_EQUAL(rtl::OUString("abc"), s1); + s1 = u"de"; + CPPUNIT_ASSERT_EQUAL(rtl::OUString("de"), s1); + s1 += u"fde"; + CPPUNIT_ASSERT_EQUAL(rtl::OUString("defde"), s1); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), s1.reverseCompareTo(u"defde")); + CPPUNIT_ASSERT(s1.equalIgnoreAsciiCase(u"DEFDE")); + CPPUNIT_ASSERT(s1.match(u"fde", 2)); + CPPUNIT_ASSERT(s1.matchIgnoreAsciiCase(u"FDE", 2)); + rtl::OUString s2; + CPPUNIT_ASSERT(s1.startsWith(u"de", &s2)); + CPPUNIT_ASSERT_EQUAL(rtl::OUString(u"fde"), s2); + CPPUNIT_ASSERT(s1.startsWithIgnoreAsciiCase(u"DEFD", &s2)); + CPPUNIT_ASSERT_EQUAL(rtl::OUString(u"e"), s2); + CPPUNIT_ASSERT(s1.endsWith(u"de", &s2)); + CPPUNIT_ASSERT_EQUAL(rtl::OUString(u"def"), s2); + CPPUNIT_ASSERT(s1.endsWithIgnoreAsciiCase(u"EFDE", &s2)); + CPPUNIT_ASSERT_EQUAL(rtl::OUString(u"d"), s2); + CPPUNIT_ASSERT(s1 == u"defde"); + CPPUNIT_ASSERT(u"defde" == s1); + CPPUNIT_ASSERT(s1 != u"abc"); + CPPUNIT_ASSERT(u"abc" != s1); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), s1.indexOf(u"de", 1)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), s1.lastIndexOf(u"de")); + sal_Int32 i = 0; + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfde"), + s1.replaceFirst(u"de", rtl::OUString("abc"), &i)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), i); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfde"), + s1.replaceFirst(rtl::OUString("de"), u"abc", &i)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), i); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfde"), s1.replaceFirst(u"de", u"abc", &i)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), i); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfde"), s1.replaceFirst(u"de", "abc", &i)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), i); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfde"), s1.replaceFirst("de", u"abc", &i)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), i); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfabc"), s1.replaceAll(u"de", rtl::OUString("abc"))); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfabc"), s1.replaceAll(rtl::OUString("de"), u"abc")); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfabc"), s1.replaceAll(u"de", u"abc")); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfabc"), s1.replaceAll(u"de", "abc")); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString(u"abcfabc"), s1.replaceAll("de", u"abc")); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString("abcdef"), rtl::OUString(rtl::OUString("abc") + u"def")); + CPPUNIT_ASSERT_EQUAL( + rtl::OUString("abcdef"), rtl::OUString(u"abc" + rtl::OUString("def"))); + rtl::OUStringBuffer b(u"abc"); + CPPUNIT_ASSERT_EQUAL(rtl::OUString("abc"), b.toString()); + b.append(u"def"); + CPPUNIT_ASSERT_EQUAL(rtl::OUString("abcdef"), b.toString()); + b.insert(2, u"gabab"); + CPPUNIT_ASSERT_EQUAL(rtl::OUString("abgababcdef"), b.toString()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), b.indexOf(u"ab", 1)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), b.lastIndexOf(u"ab")); +#endif +} + }} // namespace CPPUNIT_TEST_SUITE_REGISTRATION(test::oustring::StringLiterals); diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index b00c9c80ef76..b1fb82a375dd 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -631,6 +631,27 @@ void rtl_uString_newConcatAsciiL( (*newString)->length = n; } +void rtl_uString_newConcatUtf16L( + rtl_uString ** newString, rtl_uString * left, sal_Unicode const * right, + sal_Int32 rightLength) +{ + assert(newString != nullptr); + assert(left != nullptr); + assert(right != nullptr); + assert(rightLength >= 0); + if (left->length > std::numeric_limits::max() - rightLength) { + throw std::length_error("rtl_uString_newConcatUtf16L"); + } + sal_Int32 n = left->length + rightLength; + rtl_uString_assign(newString, left); + rtl_uString_ensureCapacity(newString, n); + memcpy( + (*newString)->buffer + (*newString)->length, right, + rightLength * sizeof (sal_Unicode)); + (*newString)->buffer[n] = 0; + (*newString)->length = n; +} + /* ======================================================================= */ static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen, bool * ascii ) @@ -1296,6 +1317,140 @@ void rtl_uString_newReplaceFirstAsciiLAsciiL( *index = i; } +void rtl_uString_newReplaceFirstAsciiLUtf16L( + rtl_uString ** newStr, rtl_uString * str, char const * from, + sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength, + sal_Int32 * index) SAL_THROW_EXTERN_C() +{ + assert(str != nullptr); + assert(index != nullptr); + assert(*index >= 0 && *index <= str->length); + assert(fromLength >= 0); + assert(to != nullptr); + assert(toLength >= 0); + sal_Int32 i = rtl_ustr_indexOfAscii_WithLength( + str->buffer + *index, str->length - *index, from, fromLength); + if (i == -1) { + rtl_uString_assign(newStr, str); + } else { + assert(i <= str->length - *index); + i += *index; + assert(fromLength <= str->length); + if (str->length - fromLength > SAL_MAX_INT32 - toLength) { + rtl_uString_release(*newStr); + *newStr = nullptr; + } else { + sal_Int32 n = str->length - fromLength + toLength; + rtl_uString_acquire(str); // in case *newStr == str + rtl_uString_new_WithLength(newStr, n); + if (n != 0 && /*TODO:*/ *newStr != nullptr) { + (*newStr)->length = n; + assert(i >= 0 && i < str->length); + memcpy( + (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode)); + memcpy( + (*newStr)->buffer + i, to, toLength * sizeof (sal_Unicode)); + memcpy( + (*newStr)->buffer + i + toLength, + str->buffer + i + fromLength, + (str->length - i - fromLength) * sizeof (sal_Unicode)); + } + rtl_uString_release(str); + } + } + *index = i; +} + +void rtl_uString_newReplaceFirstUtf16LAsciiL( + rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from, + sal_Int32 fromLength, char const * to, sal_Int32 toLength, + sal_Int32 * index) SAL_THROW_EXTERN_C() +{ + assert(str != nullptr); + assert(index != nullptr); + assert(*index >= 0 && *index <= str->length); + assert(fromLength >= 0); + assert(to != nullptr); + assert(toLength >= 0); + sal_Int32 i = rtl_ustr_indexOfStr_WithLength( + str->buffer + *index, str->length - *index, from, fromLength); + if (i == -1) { + rtl_uString_assign(newStr, str); + } else { + assert(i <= str->length - *index); + i += *index; + assert(fromLength <= str->length); + if (str->length - fromLength > SAL_MAX_INT32 - toLength) { + rtl_uString_release(*newStr); + *newStr = nullptr; + } else { + sal_Int32 n = str->length - fromLength + toLength; + rtl_uString_acquire(str); // in case *newStr == str + rtl_uString_new_WithLength(newStr, n); + if (n != 0 && /*TODO:*/ *newStr != nullptr) { + (*newStr)->length = n; + assert(i >= 0 && i < str->length); + memcpy( + (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode)); + for (sal_Int32 j = 0; j != toLength; ++j) { + assert(static_cast< unsigned char >(to[j]) <= 0x7F); + (*newStr)->buffer[i + j] = to[j]; + } + memcpy( + (*newStr)->buffer + i + toLength, + str->buffer + i + fromLength, + (str->length - i - fromLength) * sizeof (sal_Unicode)); + } + rtl_uString_release(str); + } + } + *index = i; +} + +void rtl_uString_newReplaceFirstUtf16LUtf16L( + rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from, + sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength, + sal_Int32 * index) SAL_THROW_EXTERN_C() +{ + assert(str != nullptr); + assert(index != nullptr); + assert(*index >= 0 && *index <= str->length); + assert(fromLength >= 0); + assert(to != nullptr); + assert(toLength >= 0); + sal_Int32 i = rtl_ustr_indexOfStr_WithLength( + str->buffer + *index, str->length - *index, from, fromLength); + if (i == -1) { + rtl_uString_assign(newStr, str); + } else { + assert(i <= str->length - *index); + i += *index; + assert(fromLength <= str->length); + if (str->length - fromLength > SAL_MAX_INT32 - toLength) { + rtl_uString_release(*newStr); + *newStr = nullptr; + } else { + sal_Int32 n = str->length - fromLength + toLength; + rtl_uString_acquire(str); // in case *newStr == str + rtl_uString_new_WithLength(newStr, n); + if (n != 0 && /*TODO:*/ *newStr != nullptr) { + (*newStr)->length = n; + assert(i >= 0 && i < str->length); + memcpy( + (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode)); + memcpy( + (*newStr)->buffer + i, to, toLength * sizeof (sal_Unicode)); + memcpy( + (*newStr)->buffer + i + toLength, + str->buffer + i + fromLength, + (str->length - i - fromLength) * sizeof (sal_Unicode)); + } + rtl_uString_release(str); + } + } + *index = i; +} + void rtl_uString_newReplaceAll( rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from, rtl_uString const * to) SAL_THROW_EXTERN_C() @@ -1364,4 +1519,52 @@ void rtl_uString_newReplaceAllAsciiLAsciiL( } } +void rtl_uString_newReplaceAllAsciiLUtf16L( + rtl_uString ** newStr, rtl_uString * str, char const * from, + sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength) + SAL_THROW_EXTERN_C() +{ + assert(toLength >= 0); + rtl_uString_assign(newStr, str); + for (sal_Int32 i = 0;; i += toLength) { + rtl_uString_newReplaceFirstAsciiLUtf16L( + newStr, *newStr, from, fromLength, to, toLength, &i); + if (i == -1 || *newStr == nullptr) { + break; + } + } +} + +void rtl_uString_newReplaceAllUtf16LAsciiL( + rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from, + sal_Int32 fromLength, char const * to, sal_Int32 toLength) + SAL_THROW_EXTERN_C() +{ + assert(toLength >= 0); + rtl_uString_assign(newStr, str); + for (sal_Int32 i = 0;; i += toLength) { + rtl_uString_newReplaceFirstUtf16LAsciiL( + newStr, *newStr, from, fromLength, to, toLength, &i); + if (i == -1 || *newStr == nullptr) { + break; + } + } +} + +void rtl_uString_newReplaceAllUtf16LUtf16L( + rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from, + sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength) + SAL_THROW_EXTERN_C() +{ + assert(toLength >= 0); + rtl_uString_assign(newStr, str); + for (sal_Int32 i = 0;; i += toLength) { + rtl_uString_newReplaceFirstUtf16LUtf16L( + newStr, *newStr, from, fromLength, to, toLength, &i); + if (i == -1 || *newStr == nullptr) { + break; + } + } +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/util/sal.map b/sal/util/sal.map index 1ccac615b67f..d9995e1e4cae 100644 --- a/sal/util/sal.map +++ b/sal/util/sal.map @@ -695,6 +695,17 @@ LIBO_UDK_5.2 { # symbols available in >= LibO 5.2 osl_getShortUserName; } LIBO_UDK_5.1; +LIBO_UDK_5.3 { # symbols available in >= LibO 5.3 + global: + rtl_uString_newConcatUtf16L; + rtl_uString_newReplaceAllAsciiLUtf16L; + rtl_uString_newReplaceAllUtf16LAsciiL; + rtl_uString_newReplaceAllUtf16LUtf16L; + rtl_uString_newReplaceFirstAsciiLUtf16L; + rtl_uString_newReplaceFirstUtf16LAsciiL; + rtl_uString_newReplaceFirstUtf16LUtf16L; +} LIBO_UDK_5.2; + PRIVATE_1.0 { global: osl_detail_ObjectRegistry_storeAddresses; -- cgit