summaryrefslogtreecommitdiff
path: root/sal/qa/rtl/textenc/rtl_textcvt.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sal/qa/rtl/textenc/rtl_textcvt.cxx')
-rw-r--r--sal/qa/rtl/textenc/rtl_textcvt.cxx332
1 files changed, 303 insertions, 29 deletions
diff --git a/sal/qa/rtl/textenc/rtl_textcvt.cxx b/sal/qa/rtl/textenc/rtl_textcvt.cxx
index d698bc22cd74..3c36852bebfc 100644
--- a/sal/qa/rtl/textenc/rtl_textcvt.cxx
+++ b/sal/qa/rtl/textenc/rtl_textcvt.cxx
@@ -453,6 +453,8 @@ public:
void testComplexCut();
+ void testInvalidUtf8();
+
void testSRCBUFFERTOSMALL();
void testMime();
@@ -465,6 +467,7 @@ public:
CPPUNIT_TEST(testSingleByte);
CPPUNIT_TEST(testComplex);
CPPUNIT_TEST(testComplexCut);
+ CPPUNIT_TEST(testInvalidUtf8);
CPPUNIT_TEST(testSRCBUFFERTOSMALL);
CPPUNIT_TEST(testMime);
CPPUNIT_TEST(testWindows);
@@ -2330,35 +2333,6 @@ void Test::testComplex() {
true,
false,
RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
- { RTL_TEXTENCODING_UTF8,
- RTL_CONSTASCII_STRINGPARAM(
- "\xC0\x80\xE0\x80\x81\xF0\x80\x80\x82\xF8\x80\x80\x80\x83"
- "\xFC\x80\x80\x80\x80\x84"),
- { 0x0000,0x0001,0x0002,0x0003,0x0004 },
- 5,
- false,
- true,
- false,
- false,
- RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
- { RTL_TEXTENCODING_UTF8,
- RTL_CONSTASCII_STRINGPARAM("\xED\xA1\x89\xED\xB4\x93"),
- { 0xD849,0xDD13 },
- 2,
- false,
- true,
- false,
- false,
- RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
- { RTL_TEXTENCODING_UTF8,
- RTL_CONSTASCII_STRINGPARAM("\xED\xA1\x89\x41"),
- { 0xD849,0x0041 },
- 2,
- false,
- true,
- false,
- false,
- RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
// Test Java UTF-8:
@@ -2664,6 +2638,306 @@ void Test::testComplexCut() {
#endif
}
+void Test::testInvalidUtf8() {
+ // UTF-8, invalid bytes:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\x80\xBF\xFE\xFF"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(4), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD\uFFFD\uFFFD\uFFFD"),
+ OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, non-shortest two-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xC0\x80"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(2), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, cut two-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xC0"), buf,
+ TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(0), size);
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL, info);
+ CPPUNIT_ASSERT(converted <= 1);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, non-shortest three-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xE0\x9F\xBF"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(3), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, cut three-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xE0\x80"), buf,
+ TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(0), size);
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL, info);
+ CPPUNIT_ASSERT(converted <= 2);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, cut three-byte sequence followed by more:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xE0\x80."), buf,
+ TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(2), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD."), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(3), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, surrogates:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr,
+ RTL_CONSTASCII_STRINGPARAM("\xED\xA0\x80\xED\xB0\x80"), buf,
+ TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(2), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(6), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, non-shortest four-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xF0\x8F\xBF\xBF"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, too-large four-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\xF4\x90\x80\x80"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, five-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr,
+ RTL_CONSTASCII_STRINGPARAM("\xFB\xBF\xBF\xBF\xBF"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(5), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // UTF-8, six-byte sequence:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr,
+ RTL_CONSTASCII_STRINGPARAM("\xFD\xBF\xBF\xBF\xBF\xBF"),
+ buf, TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(6), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // Java UTF-8, U+0000:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_JAVA_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM("\0"), buf,
+ TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+ // Java UTF-8, U+10000:
+ {
+ auto const converter = rtl_createTextToUnicodeConverter(
+ RTL_TEXTENCODING_JAVA_UTF8);
+ CPPUNIT_ASSERT(converter != nullptr);
+ sal_Unicode buf[TEST_STRING_SIZE];
+ sal_uInt32 info;
+ sal_Size converted;
+ auto const size = rtl_convertTextToUnicode(
+ converter, nullptr, RTL_CONSTASCII_STRINGPARAM(u8"\U00010000"), buf,
+ TEST_STRING_SIZE,
+ (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
+ | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
+ | RTL_TEXTTOUNICODE_FLAGS_FLUSH),
+ &info, &converted);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(1), size);
+ CPPUNIT_ASSERT_EQUAL(
+ OUString(u"\uFFFD"), OUString(buf, sal_Int32(size)));
+ CPPUNIT_ASSERT_EQUAL(RTL_TEXTTOUNICODE_INFO_INVALID, info);
+ CPPUNIT_ASSERT_EQUAL(sal_Size(4), converted);
+ rtl_destroyTextToUnicodeConverter(converter);
+ }
+}
+
void Test::testSRCBUFFERTOSMALL() {
rtl_TextToUnicodeConverter cv = rtl_createTextToUnicodeConverter(
RTL_TEXTENCODING_EUC_JP);