diff options
Diffstat (limited to 'sal')
-rw-r--r-- | sal/qa/rtl/textenc/rtl_textcvt.cxx | 332 | ||||
-rw-r--r-- | sal/textenc/tcvtutf8.cxx | 73 |
2 files changed, 352 insertions, 53 deletions
diff --git a/sal/qa/rtl/textenc/rtl_textcvt.cxx b/sal/qa/rtl/textenc/rtl_textcvt.cxx index d698bc22cd74..3c36852bebfc 100644 --- a/sal/qa/rtl/textenc/rtl_textcvt.cxx +++ b/sal/qa/rtl/textenc/rtl_textcvt.cxx @@ -453,6 +453,8 @@ public: void testComplexCut(); + void testInvalidUtf8(); + void testSRCBUFFERTOSMALL(); void testMime(); @@ -465,6 +467,7 @@ public: CPPUNIT_TEST(testSingleByte); CPPUNIT_TEST(testComplex); CPPUNIT_TEST(testComplexCut); + CPPUNIT_TEST(testInvalidUtf8); CPPUNIT_TEST(testSRCBUFFERTOSMALL); CPPUNIT_TEST(testMime); CPPUNIT_TEST(testWindows); @@ -2330,35 +2333,6 @@ void Test::testComplex() { true, false, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR }, - { RTL_TEXTENCODING_UTF8, - RTL_CONSTASCII_STRINGPARAM( - "\xC0\x80\xE0\x80\x81\xF0\x80\x80\x82\xF8\x80\x80\x80\x83" - "\xFC\x80\x80\x80\x80\x84"), - { 0x0000,0x0001,0x0002,0x0003,0x0004 }, - 5, - false, - true, - false, - false, - RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR }, - { RTL_TEXTENCODING_UTF8, - RTL_CONSTASCII_STRINGPARAM("\xED\xA1\x89\xED\xB4\x93"), - { 0xD849,0xDD13 }, - 2, - false, - true, - false, - false, - RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR }, - { RTL_TEXTENCODING_UTF8, - RTL_CONSTASCII_STRINGPARAM("\xED\xA1\x89\x41"), - { 0xD849,0x0041 }, - 2, - false, - true, - false, - false, - RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR }, // Test Java UTF-8: @@ -2664,6 +2638,306 @@ void Test::testComplexCut() { #endif } +void Test::testInvalidUtf8() { + // UTF-8, invalid bytes: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\x80\xBF\xFE\xFF"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(4), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD\uFFFD\uFFFD\uFFFD"), + OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, non-shortest two-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xC0\x80"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(2), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, cut two-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xC0"), buf, + TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(0), size); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL, info); + CPPUNIT_ASSERT(converted <= 1); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, non-shortest three-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xE0\x9F\xBF"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(3), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, cut three-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xE0\x80"), buf, + TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(0), size); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL, info); + CPPUNIT_ASSERT(converted <= 2); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, cut three-byte sequence followed by more: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xE0\x80."), buf, + TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(2), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD."), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(3), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, surrogates: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, + RTL_CONSTASCII_STRINGPARAM("\xED\xA0\x80\xED\xB0\x80"), buf, + TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(2), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(6), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, non-shortest four-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xF0\x8F\xBF\xBF"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, too-large four-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xF4\x90\x80\x80"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, five-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, + RTL_CONSTASCII_STRINGPARAM("\xFB\xBF\xBF\xBF\xBF"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(5), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // UTF-8, six-byte sequence: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, + RTL_CONSTASCII_STRINGPARAM("\xFD\xBF\xBF\xBF\xBF\xBF"), + buf, TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(6), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // Java UTF-8, U+0000: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_JAVA_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\0"), buf, + TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), converted); + rtl_destroyTextToUnicodeConverter(converter); + } + // Java UTF-8, U+10000: + { + auto const converter = rtl_createTextToUnicodeConverter( + RTL_TEXTENCODING_JAVA_UTF8); + CPPUNIT_ASSERT(converter != nullptr); + sal_Unicode buf[TEST_STRING_SIZE]; + sal_uInt32 info; + sal_Size converted; + auto const size = rtl_convertTextToUnicode( + converter, nullptr, RTL_CONSTASCII_STRINGPARAM(u8"\U00010000"), buf, + TEST_STRING_SIZE, + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + | RTL_TEXTTOUNICODE_FLAGS_FLUSH), + &info, &converted); + CPPUNIT_ASSERT_EQUAL(sal_Size(1), size); + CPPUNIT_ASSERT_EQUAL( + OUString(u"\uFFFD"), OUString(buf, sal_Int32(size))); + CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info); + CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted); + rtl_destroyTextToUnicodeConverter(converter); + } +} + void Test::testSRCBUFFERTOSMALL() { rtl_TextToUnicodeConverter cv = rtl_createTextToUnicodeConverter( RTL_TEXTENCODING_EUC_JP); diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx index c1a6949c01c5..4943f6987a29 100644 --- a/sal/textenc/tcvtutf8.cxx +++ b/sal/textenc/tcvtutf8.cxx @@ -30,6 +30,7 @@ struct ImplUtf8ToUnicodeContext { sal_uInt32 nUtf32; + int nBytes; int nShift; bool bCheckBom; }; @@ -65,18 +66,9 @@ sal_Size ImplConvertUtf8ToUnicode( sal_Size nSrcBytes, sal_Unicode * pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags, sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes) { - /* - This function is very liberal with the UTF-8 input. Accepted are: - - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041) - - surrogates (e.g., ED A0 80 to represent U+D800) - - encodings with up to six bytes (everything outside the range - U+0000..10FFFF is considered "undefined") - The first two of these points allow this routine to translate from both - RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8. - */ - bool bJavaUtf8 = pData != nullptr; sal_uInt32 nUtf32 = 0; + int nBytes; int nShift = -1; bool bCheckBom = true; sal_uInt32 nInfo = 0; @@ -88,19 +80,22 @@ sal_Size ImplConvertUtf8ToUnicode( if (pContext != nullptr) { nUtf32 = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nUtf32; + nBytes = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nBytes; nShift = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nShift; bCheckBom = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->bCheckBom; } while (pSrcBufPtr < pSrcBufEnd) { - bool bUndefined = false; bool bConsume = true; sal_uInt32 nChar = *pSrcBufPtr++; if (nShift < 0) + // Allow (illegal) 5 and 6 byte sequences, so they are read as a + // single individual bad character: if (nChar <= 0x7F) { nUtf32 = nChar; + nBytes = 1; goto transform; } else if (nChar <= 0xBF) @@ -108,26 +103,31 @@ sal_Size ImplConvertUtf8ToUnicode( else if (nChar <= 0xDF) { nUtf32 = (nChar & 0x1F) << 6; + nBytes = 2; nShift = 0; } else if (nChar <= 0xEF) { nUtf32 = (nChar & 0x0F) << 12; + nBytes = 3; nShift = 6; } else if (nChar <= 0xF7) { nUtf32 = (nChar & 0x07) << 18; + nBytes = 4; nShift = 12; } else if (nChar <= 0xFB) { nUtf32 = (nChar & 0x03) << 24; + nBytes = 5; nShift = 18; } else if (nChar <= 0xFD) { nUtf32 = (nChar & 0x01) << 30; + nBytes = 6; nShift = 24; } else @@ -154,28 +154,52 @@ sal_Size ImplConvertUtf8ToUnicode( continue; transform: - if (!bCheckBom || nUtf32 != 0xFEFF + if (!bCheckBom || nUtf32 != 0xFEFF || nBytes != 3 || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0 || bJavaUtf8) { + switch (nBytes) { + case 1: + if (bJavaUtf8 && nUtf32 == 0) { + goto bad_input; + } + break; + case 2: + if (nUtf32 < 0x80 && !(bJavaUtf8 && nUtf32 == 0)) { + goto bad_input; + } + break; + case 3: + if (nUtf32 < 0x800 + || (!bJavaUtf8 + && (rtl::isHighSurrogate(nUtf32) + || rtl::isLowSurrogate(nUtf32)))) + { + goto bad_input; + } + break; + case 4: + if (nUtf32 < 0x10000 || !rtl::isUnicodeCodePoint(nUtf32) + || bJavaUtf8) + { + goto bad_input; + } + break; + default: + goto bad_input; + } if (nUtf32 <= 0xFFFF) if (pDestBufPtr != pDestBufEnd) *pDestBufPtr++ = (sal_Unicode) nUtf32; else goto no_output; - else if (rtl::isUnicodeCodePoint(nUtf32)) - if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32); - *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32); - } - else - goto no_output; - else + else if (pDestBufEnd - pDestBufPtr >= 2) { - bUndefined = true; - goto bad_input; + *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32); + *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32); } + else + goto no_output; } nShift = -1; bCheckBom = false; @@ -183,7 +207,7 @@ sal_Size ImplConvertUtf8ToUnicode( bad_input: switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion( - bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + false, nBytes != 1, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo)) { case sal::detail::textenc::BAD_INPUT_STOP: @@ -238,6 +262,7 @@ sal_Size ImplConvertUtf8ToUnicode( if (pContext != nullptr) { static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nUtf32 = nUtf32; + static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nBytes = nBytes; static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nShift = nShift; static_cast< ImplUtf8ToUnicodeContext * >(pContext)->bCheckBom = bCheckBom; } |