diff options
author | Khaled Hosny <khaled@libreoffice.org> | 2023-07-24 18:45:38 +0300 |
---|---|---|
committer | خالد حسني <khaled@libreoffice.org> | 2023-07-25 01:02:30 +0200 |
commit | 89229524298398ca1b5239c2a7ca798900733f5d (patch) | |
tree | 2251b488d78c2e40030a31a1f4005c26069431db /i18nutil/source/utility/casefolding.cxx | |
parent | 87ca7d2f146be2c309fc6fd36f9154f3ea4e4bd8 (diff) |
i18nutil: Fix ICU case folding of surrogate pairs
It can’t be easily made to work when mixing our case folding data (which
seems to work on individual surrogates) and ICU functions, so port it
over completely to ICU.
Change-Id: I0e12b81b23a053a2a9a299e51908930a3d0b82d6
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/154843
Tested-by: Jenkins
Reviewed-by: خالد حسني <khaled@libreoffice.org>
Diffstat (limited to 'i18nutil/source/utility/casefolding.cxx')
-rw-r--r-- | i18nutil/source/utility/casefolding.cxx | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/i18nutil/source/utility/casefolding.cxx b/i18nutil/source/utility/casefolding.cxx index d4f79927c131..52dbeb1ca1c5 100644 --- a/i18nutil/source/utility/casefolding.cxx +++ b/i18nutil/source/utility/casefolding.cxx @@ -94,18 +94,20 @@ const Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int3 Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale const & aLocale, MappingType nMappingType) { + if (pos > 0 && rtl::isHighSurrogate(str[pos-1]) && rtl::isLowSurrogate(str[pos])) + return { 0, 0, { 0, 0, 0 } }; + Mapping dummy = { 0, 1, { str[pos], 0, 0 } }; sal_uInt32 c; - if (pos > 0 && rtl::isHighSurrogate(str[pos-1]) && rtl::isLowSurrogate(str[pos])) { - c = rtl::combineSurrogates(str[pos-1], str[pos]); - if (c >= SAL_N_ELEMENTS(CaseMappingIndex) * 256) - return dummy; - } else { + if (pos + 1 < len && rtl::isHighSurrogate(str[pos]) && rtl::isLowSurrogate(str[pos + 1])) + c = rtl::combineSurrogates(str[pos], str[pos + 1]); + else c = str[pos]; - } - sal_Int16 address = CaseMappingIndex[c >> 8]; + sal_Int16 address = -1; + if (c < SAL_N_ELEMENTS(CaseMappingIndex) * 256) + address = CaseMappingIndex[c >> 8]; if (address >= 0) { address = (address << 8) + (c & 0xFF); @@ -142,7 +144,7 @@ Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 l // using ustring.h APIs, which work on the whole string not character // by character. // TODO: what is the difference between ToLower and UpperToLower etc.? - sal_uInt32 value = 0; + sal_uInt32 value = c; switch (nMappingType) { case MappingType::ToLower: @@ -163,8 +165,7 @@ Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 l default: break; } - if (value && value != c) - dummy.nmap = rtl::splitSurrogates(value, dummy.map); + dummy.nmap = rtl::splitSurrogates(value, dummy.map); return dummy; } |