diff options
author | Eike Rathke <erack@redhat.com> | 2022-10-04 11:14:38 +0200 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2022-10-04 19:15:45 +0200 |
commit | 437abb3abbc506c1e20c6fec8e574abfe3487842 (patch) | |
tree | a107154f0737c2c74bb70aecd752b8b648a410f1 /i18nlangtag | |
parent | 71e11268569201f5dcd4dbc1e7560c4530b077fb (diff) |
Check acor_langtag for language-script fallback instead of only language
A tag with script could be added for which we don't have locale
data but for the same language in another script. Do not fall back
to that.
With that the share/autocorr/acor_zh-{CN,TW}.dat files created
additional Asian language listbox entries
"Chinese (Simplified, China) {zh-Hans-CN}"
"Chinese (Traditional, Taiwan) {zh-Hant-TW}"
because those are the canonicalized language tags.
Prefer the known legacy zh-CN and zh-TW tags instead.
Could also had happened with any document import.
Which again revealed a flaw in the handling of mapping overrides
where converting from a LanguageType LCID to Locale could yield a
different mapping than from BCP47 to LCID, which in the case of
a fallback for locale data lead to odd side effects.
Change-Id: I1e2aaa8e9f99b6b3bc2c9a661215cb00bddd33d6
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140939
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
Diffstat (limited to 'i18nlangtag')
-rw-r--r-- | i18nlangtag/qa/cppunit/test_languagetag.cxx | 2 | ||||
-rw-r--r-- | i18nlangtag/source/isolang/isolang.cxx | 2 | ||||
-rw-r--r-- | i18nlangtag/source/isolang/mslangid.cxx | 6 | ||||
-rw-r--r-- | i18nlangtag/source/languagetag/languagetag.cxx | 6 |
4 files changed, 10 insertions, 6 deletions
diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx index 4a211012e80d..f7864d6104fd 100644 --- a/i18nlangtag/qa/cppunit/test_languagetag.cxx +++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx @@ -753,6 +753,8 @@ bool checkMapping( std::u16string_view rStr1, std::u16string_view rStr2 ) if (rStr1 == u"kw-UK" ) return rStr2 == u"kw-GB"; if (rStr1 == u"oc-FR" ) return rStr2 == u"oc-FR-lengadoc"; if (rStr1 == u"oc-ES" ) return rStr2 == u"oc-ES-aranes"; + if (rStr1 == u"zh-Hans-CN" ) return rStr2 == u"zh-CN"; + if (rStr1 == u"zh-Hant-TW" ) return rStr2 == u"zh-TW"; return rStr1 == rStr2; } diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx index d20014e67f4b..4287fe247cb4 100644 --- a/i18nlangtag/source/isolang/isolang.cxx +++ b/i18nlangtag/source/isolang/isolang.cxx @@ -754,6 +754,8 @@ IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] = { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs-Latn", "BA", kSAME }, // MS, though Latn is suppress-script { LANGUAGE_BOSNIAN_LATIN_LSO, "bs-Latn", "" , LANGUAGE_BOSNIAN_LSO }, // MS, though Latn is suppress-script { LANGUAGE_CHINESE_TRADITIONAL_LSO, "zh-Hant", "" , k0 }, + { LANGUAGE_CHINESE_SIMPLIFIED, "zh-Hans", "CN", kSAME }, // canonical, but prefer legacy zh-CN + { LANGUAGE_CHINESE_TRADITIONAL, "zh-Hant", "TW", kSAME }, // canonical, but prefer legacy zh-TW { LANGUAGE_USER_MANINKAKAN_EASTERN_LATIN, "emk-Latn", "GN", k0 }, { LANGUAGE_USER_CREE_PLAINS_LATIN, "crk-Latn", "CA", k0 }, { LANGUAGE_USER_CREE_PLAINS_SYLLABICS, "crk-Cans", "CA", k0 }, diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx index 34c55b66e261..419e28445774 100644 --- a/i18nlangtag/source/isolang/mslangid.cxx +++ b/i18nlangtag/source/isolang/mslangid.cxx @@ -191,14 +191,14 @@ bool MsLangId::usesHyphenation(LanguageType nLang) // static css::lang::Locale MsLangId::Conversion::convertLanguageToLocale( - LanguageType nLang ) + LanguageType nLang, bool bIgnoreOverride ) { css::lang::Locale aLocale; // Still resolve LANGUAGE_DONTKNOW if resolving is not requested, // but not LANGUAGE_SYSTEM or others. LanguageType nOrigLang = nLang; nLang = MsLangId::getRealLanguage(nLang); - convertLanguageToLocaleImpl( nLang, aLocale, true ); + convertLanguageToLocaleImpl( nLang, aLocale, bIgnoreOverride ); if (aLocale.Language.isEmpty() && simplifySystemLanguages(nOrigLang) == LANGUAGE_SYSTEM) { // None found but resolve requested, last resort is "en-US". @@ -228,7 +228,7 @@ css::lang::Locale MsLangId::getFallbackLocale( { // empty language => LANGUAGE_SYSTEM if (rLocale.Language.isEmpty()) - return Conversion::lookupFallbackLocale( Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM )); + return Conversion::lookupFallbackLocale( Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false)); else return Conversion::lookupFallbackLocale( rLocale); } diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 239215a187c1..7071f2dcb116 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -961,7 +961,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const // May have involved canonicalize(), so compare with // pImpl->maBcp47 instead of maBcp47! aBcp47 = LanguageTagImpl::convertToBcp47( - MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID )); + MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true)); bInsert = (aBcp47 == pImpl->maBcp47); } } @@ -1352,7 +1352,7 @@ void LanguageTagImpl::convertLocaleToBcp47() // locale via LanguageTag::convertToBcp47(LanguageType) and // LanguageTag::convertToLocale(LanguageType) would instantiate another // LanguageTag. - maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM ); + maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false); } if (maLocale.Language.isEmpty()) { @@ -1496,7 +1496,7 @@ void LanguageTagImpl::convertLangToLocale() mbInitializedLangID = true; } // Resolve system here! The original is remembered as mbSystemLocale. - maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID ); + maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false); mbInitializedLocale = true; } |