summaryrefslogtreecommitdiff
path: root/i18nutil/source/utility/casefolding.cxx
diff options
context:
space:
mode:
authorKhaled Hosny <khaled@libreoffice.org>2023-07-24 18:45:38 +0300
committerخالد حسني <khaled@libreoffice.org>2023-07-25 01:02:30 +0200
commit89229524298398ca1b5239c2a7ca798900733f5d (patch)
tree2251b488d78c2e40030a31a1f4005c26069431db /i18nutil/source/utility/casefolding.cxx
parent87ca7d2f146be2c309fc6fd36f9154f3ea4e4bd8 (diff)
i18nutil: Fix ICU case folding of surrogate pairs
It can’t be easily made to work when mixing our case folding data (which seems to work on individual surrogates) and ICU functions, so port it over completely to ICU. Change-Id: I0e12b81b23a053a2a9a299e51908930a3d0b82d6 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/154843 Tested-by: Jenkins Reviewed-by: خالد حسني <khaled@libreoffice.org>
Diffstat (limited to 'i18nutil/source/utility/casefolding.cxx')
-rw-r--r--i18nutil/source/utility/casefolding.cxx21
1 files changed, 11 insertions, 10 deletions
diff --git a/i18nutil/source/utility/casefolding.cxx b/i18nutil/source/utility/casefolding.cxx
index d4f79927c131..52dbeb1ca1c5 100644
--- a/i18nutil/source/utility/casefolding.cxx
+++ b/i18nutil/source/utility/casefolding.cxx
@@ -94,18 +94,20 @@ const Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int3
Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale const & aLocale, MappingType nMappingType)
{
+ if (pos > 0 && rtl::isHighSurrogate(str[pos-1]) && rtl::isLowSurrogate(str[pos]))
+ return { 0, 0, { 0, 0, 0 } };
+
Mapping dummy = { 0, 1, { str[pos], 0, 0 } };
sal_uInt32 c;
- if (pos > 0 && rtl::isHighSurrogate(str[pos-1]) && rtl::isLowSurrogate(str[pos])) {
- c = rtl::combineSurrogates(str[pos-1], str[pos]);
- if (c >= SAL_N_ELEMENTS(CaseMappingIndex) * 256)
- return dummy;
- } else {
+ if (pos + 1 < len && rtl::isHighSurrogate(str[pos]) && rtl::isLowSurrogate(str[pos + 1]))
+ c = rtl::combineSurrogates(str[pos], str[pos + 1]);
+ else
c = str[pos];
- }
- sal_Int16 address = CaseMappingIndex[c >> 8];
+ sal_Int16 address = -1;
+ if (c < SAL_N_ELEMENTS(CaseMappingIndex) * 256)
+ address = CaseMappingIndex[c >> 8];
if (address >= 0) {
address = (address << 8) + (c & 0xFF);
@@ -142,7 +144,7 @@ Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 l
// using ustring.h APIs, which work on the whole string not character
// by character.
// TODO: what is the difference between ToLower and UpperToLower etc.?
- sal_uInt32 value = 0;
+ sal_uInt32 value = c;
switch (nMappingType)
{
case MappingType::ToLower:
@@ -163,8 +165,7 @@ Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 l
default: break;
}
- if (value && value != c)
- dummy.nmap = rtl::splitSurrogates(value, dummy.map);
+ dummy.nmap = rtl::splitSurrogates(value, dummy.map);
return dummy;
}