diff options
author | Caolán McNamara <caolanm@redhat.com> | 2020-10-01 14:43:42 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2020-10-01 18:00:47 +0200 |
commit | 46abe9243091c72b271f0f316796947527eeb562 (patch) | |
tree | cae5de4fbe382e6f9789c2d2cd5bea1c60248b7d /sw/source | |
parent | a0cefd04fc2abaadea9b066596f22372179beeea (diff) |
crashtesting: ucsdet_detect may return nullptr
"a UCharsetMatch representing the best matching charset, or NULL if no charset
matches the byte data."
e.g. with fdo39418-4-25.mtp
seen since...
commit ef77a256de527f6d00212839e55f949024f2e7bc
Date: Wed Sep 16 18:11:22 2020 +0900
tdf#60145 sw: fix UTF-8 encoding without BOM is not detected
Writer can now detect Unicode type even if importing text file does not
have a BOM.
Change-Id: I7502f895b49c26dff632510936953e93900e03a9
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103768
Tested-by: Jenkins
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Diffstat (limited to 'sw/source')
-rw-r--r-- | sw/source/filter/basflt/iodetect.cxx | 30 |
1 files changed, 16 insertions, 14 deletions
diff --git a/sw/source/filter/basflt/iodetect.cxx b/sw/source/filter/basflt/iodetect.cxx index a47bb9e82b8d..04466aa80648 100644 --- a/sw/source/filter/basflt/iodetect.cxx +++ b/sw/source/filter/basflt/iodetect.cxx @@ -275,21 +275,23 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen, UErrorCode uerr = U_ZERO_ERROR; UCharsetDetector* ucd = ucsdet_open(&uerr); ucsdet_setText(ucd, pBuf, rLen, &uerr); - const UCharsetMatch* match = ucsdet_detect(ucd, &uerr); - const char* pEncodingName = ucsdet_getName(match, &uerr); - - if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName)) - { - eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8 - } - else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName)) + if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr)) { - eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE - bLE = false; - } - else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName)) - { - eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE + const char* pEncodingName = ucsdet_getName(match, &uerr); + + if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName)) + { + eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8 + } + else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName)) + { + eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE + bLE = false; + } + else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName)) + { + eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE + } } ucsdet_close(ucd); |