summaryrefslogtreecommitdiff
path: root/sw/source
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2020-10-01 14:43:42 +0100
committerCaolán McNamara <caolanm@redhat.com>2020-10-01 18:00:47 +0200
commit46abe9243091c72b271f0f316796947527eeb562 (patch)
treecae5de4fbe382e6f9789c2d2cd5bea1c60248b7d /sw/source
parenta0cefd04fc2abaadea9b066596f22372179beeea (diff)
crashtesting: ucsdet_detect may return nullptr
"a UCharsetMatch representing the best matching charset, or NULL if no charset matches the byte data." e.g. with fdo39418-4-25.mtp seen since... commit ef77a256de527f6d00212839e55f949024f2e7bc Date: Wed Sep 16 18:11:22 2020 +0900 tdf#60145 sw: fix UTF-8 encoding without BOM is not detected Writer can now detect Unicode type even if importing text file does not have a BOM. Change-Id: I7502f895b49c26dff632510936953e93900e03a9 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103768 Tested-by: Jenkins Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Diffstat (limited to 'sw/source')
-rw-r--r--sw/source/filter/basflt/iodetect.cxx30
1 files changed, 16 insertions, 14 deletions
diff --git a/sw/source/filter/basflt/iodetect.cxx b/sw/source/filter/basflt/iodetect.cxx
index a47bb9e82b8d..04466aa80648 100644
--- a/sw/source/filter/basflt/iodetect.cxx
+++ b/sw/source/filter/basflt/iodetect.cxx
@@ -275,21 +275,23 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
UErrorCode uerr = U_ZERO_ERROR;
UCharsetDetector* ucd = ucsdet_open(&uerr);
ucsdet_setText(ucd, pBuf, rLen, &uerr);
- const UCharsetMatch* match = ucsdet_detect(ucd, &uerr);
- const char* pEncodingName = ucsdet_getName(match, &uerr);
-
- if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName))
- {
- eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8
- }
- else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName))
+ if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
{
- eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE
- bLE = false;
- }
- else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName))
- {
- eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE
+ const char* pEncodingName = ucsdet_getName(match, &uerr);
+
+ if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName))
+ {
+ eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8
+ }
+ else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName))
+ {
+ eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE
+ bLE = false;
+ }
+ else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName))
+ {
+ eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE
+ }
}
ucsdet_close(ucd);