diff options
-rw-r--r-- | filter/source/msfilter/countryid.cxx | 4 | ||||
-rw-r--r-- | i18nlangtag/qa/cppunit/test_languagetag.cxx | 32 | ||||
-rw-r--r-- | i18nlangtag/source/isolang/isolang.cxx | 193 | ||||
-rw-r--r-- | i18nlangtag/source/isolang/mslangid.cxx | 47 | ||||
-rw-r--r-- | i18nlangtag/source/languagetag/languagetag.cxx | 22 | ||||
-rw-r--r-- | include/i18nlangtag/lang.h | 126 | ||||
-rw-r--r-- | svtools/source/misc/langtab.src | 10 | ||||
-rw-r--r-- | svtools/source/misc/sampletext.cxx | 14 | ||||
-rw-r--r-- | vcl/source/gdi/sallayout.cxx | 17 |
9 files changed, 356 insertions, 109 deletions
diff --git a/filter/source/msfilter/countryid.cxx b/filter/source/msfilter/countryid.cxx index eac02e5960de..b36f51bd7304 100644 --- a/filter/source/msfilter/countryid.cxx +++ b/filter/source/msfilter/countryid.cxx @@ -197,7 +197,7 @@ static const CountryEntry pTable[] = { COUNTRY_BELARUS, LANGUAGE_BELARUSIAN, false }, { COUNTRY_MONACO, LANGUAGE_FRENCH_MONACO, true }, { COUNTRY_UKRAINE, LANGUAGE_UKRAINIAN, false }, - { COUNTRY_SERBIA, LANGUAGE_SERBIAN_LATIN, false }, + { COUNTRY_SERBIA, LANGUAGE_SERBIAN_LATIN_SAM, false }, { COUNTRY_CROATIA, LANGUAGE_CROATIAN, true }, // sub type of LANGUAGE_SERBIAN { COUNTRY_SLOVENIA, LANGUAGE_SLOVENIAN, false }, { COUNTRY_MACEDONIA, LANGUAGE_MACEDONIAN, false }, @@ -235,7 +235,7 @@ static const CountryEntry pTable[] = { COUNTRY_ISRAEL, LANGUAGE_HEBREW, false }, { COUNTRY_BAHRAIN, LANGUAGE_ARABIC_BAHRAIN, true }, { COUNTRY_QATAR, LANGUAGE_ARABIC_QATAR, true }, - { COUNTRY_MONGOLIA, LANGUAGE_MONGOLIAN, false }, + { COUNTRY_MONGOLIA, LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, false }, { COUNTRY_NEPAL, LANGUAGE_NEPALI, false }, { COUNTRY_IRAN, LANGUAGE_FARSI, false }, { COUNTRY_TAJIKISTAN, LANGUAGE_TAJIK, false }, diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx index 6f7cc2d3dc49..0d083db83dad 100644 --- a/i18nlangtag/qa/cppunit/test_languagetag.cxx +++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx @@ -154,7 +154,7 @@ void TestLanguageTag::testAllTags() CPPUNIT_ASSERT( aLocale.Language == "qlt" ); CPPUNIT_ASSERT( aLocale.Country == "CS" ); CPPUNIT_ASSERT( aLocale.Variant == s_sr_Latn_CS ); - CPPUNIT_ASSERT( sr_Latn_CS.getLanguageType() == LANGUAGE_SERBIAN_LATIN ); + CPPUNIT_ASSERT( sr_Latn_CS.getLanguageType() == LANGUAGE_SERBIAN_LATIN_SAM ); CPPUNIT_ASSERT( sr_Latn_CS.isValidBcp47() == true ); CPPUNIT_ASSERT( sr_Latn_CS.isIsoLocale() == false ); CPPUNIT_ASSERT( sr_Latn_CS.isIsoODF() == true ); @@ -352,14 +352,15 @@ void TestLanguageTag::testAllTags() } // "no", "nb" and "nn" share the same primary language ID, which even is - // assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly. + // assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly + // except if there is a defined MS-LCID for LanguageScriptOnly (LSO). { LanguageTag no( "no", true ); CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( no.getLanguageType()) ); LanguageTag nb( "nb", true ); - CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nb.getLanguageType()) ); + CPPUNIT_ASSERT( nb.getLanguageType() == LANGUAGE_NORWEGIAN_BOKMAL_LSO ); LanguageTag nn( "nn", true ); - CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nn.getLanguageType()) ); + CPPUNIT_ASSERT( nn.getLanguageType() == LANGUAGE_NORWEGIAN_NYNORSK_LSO ); LanguageTag no_NO( "no-NO", true ); CPPUNIT_ASSERT( no_NO.getLanguageType() == LANGUAGE_NORWEGIAN ); } @@ -410,6 +411,29 @@ void TestLanguageTag::testAllTags() // 'en-oed' is not a valid fallback! } +#if USE_LIBLANGTAG + // 'zh-yue-HK' uses extlang and should be preferred 'yue-HK' + { + OUString s_zh_yue_HK( "zh-yue-HK" ); + LanguageTag zh_yue_HK( s_zh_yue_HK ); + lang::Locale aLocale = zh_yue_HK.getLocale(); + CPPUNIT_ASSERT( zh_yue_HK.getBcp47() == "yue-HK" ); + CPPUNIT_ASSERT( aLocale.Language == "yue" ); + CPPUNIT_ASSERT( aLocale.Country == "HK" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( zh_yue_HK.getLanguageType() == LANGUAGE_YUE_CHINESE_HONGKONG ); + CPPUNIT_ASSERT( zh_yue_HK.isValidBcp47() == true ); + CPPUNIT_ASSERT( zh_yue_HK.isIsoLocale() == true ); + CPPUNIT_ASSERT( zh_yue_HK.isIsoODF() == true ); + CPPUNIT_ASSERT( zh_yue_HK.getLanguageAndScript() == "yue" ); + CPPUNIT_ASSERT( zh_yue_HK.getVariants() == "" ); + ::std::vector< OUString > zh_yue_HK_Fallbacks( zh_yue_HK.getFallbackStrings( true)); + CPPUNIT_ASSERT( zh_yue_HK_Fallbacks.size() == 2); + CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[0] == "yue-HK"); + CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[1] == "yue"); + } +#endif + // 'qtz' is a local use known pseudolocale for key ID resource { OUString s_qtz( "qtz" ); diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx index cf4f2e1f8a42..f904b78bb019 100644 --- a/i18nlangtag/source/isolang/isolang.cxx +++ b/i18nlangtag/source/isolang/isolang.cxx @@ -161,10 +161,11 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_CHINESE_SIMPLIFIED, "zh", "CN", false }, { LANGUAGE_CHINESE_SIMPLIFIED_LEGACY, "zh", "CN", false }, { LANGUAGE_CHINESE_TRADITIONAL, "zh", "TW", false }, - { LANGUAGE_CHINESE_TRADITIONAL_LEGACY, "zh", "TW", false }, { LANGUAGE_CHINESE_HONGKONG, "zh", "HK", false }, { LANGUAGE_CHINESE_SINGAPORE, "zh", "SG", false }, { LANGUAGE_CHINESE_MACAU, "zh", "MO", false }, + { LANGUAGE_CHINESE_LSO, "zh", "" , false }, + { LANGUAGE_YUE_CHINESE_HONGKONG, "yue", "HK", false }, { LANGUAGE_ENGLISH_HONG_KONG_SAR, "en", "HK", false }, { LANGUAGE_JAPANESE, "ja", "JP", false }, { LANGUAGE_KOREAN, "ko", "KR", false }, @@ -236,7 +237,9 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_INDONESIAN, "in", "ID", true }, // old: new is "id" { LANGUAGE_NORWEGIAN, "no", "NO", false }, { LANGUAGE_NORWEGIAN_BOKMAL, "nb", "NO", false }, + { LANGUAGE_NORWEGIAN_BOKMAL_LSO, "nb", "" , false }, { LANGUAGE_NORWEGIAN_NYNORSK, "nn", "NO", false }, + { LANGUAGE_NORWEGIAN_NYNORSK_LSO, "nn", "" , false }, { LANGUAGE_POLISH, "pl", "PL", false }, { LANGUAGE_RHAETO_ROMAN, "rm", "CH", false }, { LANGUAGE_ROMANIAN, "ro", "RO", false }, @@ -276,21 +279,28 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_LITHUANIAN_CLASSIC, "lt", "LT", false }, { LANGUAGE_CROATIAN, "hr", "HR", false }, // Croatian in Croatia { LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA, "hr", "BA", false }, - { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA", false }, - { LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, "sr", "RS", false }, // Serbian Cyrillic in Serbia - { LANGUAGE_SERBIAN_CYRILLIC, "sr", "CS", false }, // Serbian Cyrillic in Serbia and Montenegro - { LANGUAGE_SERBIAN_CYRILLIC, "sr", "YU", true }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, sr_CS not supported by ICU 2.6 (3.4 does) - { LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME", false }, + { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA", false }, + { LANGUAGE_BOSNIAN_LSO, "bs", "" , false }, // so what is 'bs' vs 'bs-Latn'? + { LANGUAGE_SERBIAN_CYRILLIC_SERBIA, "sr", "RS", false }, // Serbian Cyrillic in Serbia + { LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA,"sr", "RS", false }, + { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr", "CS", false }, // Serbian Cyrillic in Serbia and Montenegro + { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr", "YU", true }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, sr_CS not supported by ICU 2.6 (3.4 does) + { LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME", false }, + { LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO,"sr", "ME", false }, { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr", "BA", false }, - { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS", true }, // legacy kludge, is sr-Latn-RS now - { LANGUAGE_SERBIAN_LATIN, "sh", "CS", true }, // legacy kludge, is sr-Latn-CS now - { LANGUAGE_SERBIAN_LATIN, "sh", "YU", true }, // legacy kludge, is sr-Latn-YU now - { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sh", "ME", true }, // legacy kludge, is sr-Latn-ME now + { LANGUAGE_SERBIAN_CYRILLIC_LSO, "sr", "" , false }, + { LANGUAGE_SERBIAN_LATIN_SERBIA, "sh", "RS", true }, // legacy kludge, is sr-Latn-RS now + { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS", true }, // legacy kludge, is sr-Latn-RS now + { LANGUAGE_SERBIAN_LATIN_SAM, "sh", "CS", true }, // legacy kludge, is sr-Latn-CS now + { LANGUAGE_SERBIAN_LATIN_SAM, "sh", "YU", true }, // legacy kludge, is sr-Latn-YU now + { LANGUAGE_SERBIAN_LATIN_MONTENEGRO, "sh", "ME", true }, // legacy kludge, is sr-Latn-ME now + { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO,"sh", "ME", true }, // legacy kludge, is sr-Latn-ME now { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sh", "BA", true }, // legacy kludge, is sr-Latn-BA now - { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sh", "" , true }, // legacy kludge, is sr-Latn now + { LANGUAGE_SERBIAN_LATIN_LSO, "sh", "" , true }, // legacy kludge, is sr-Latn now { LANGUAGE_ARMENIAN, "hy", "AM", false }, { LANGUAGE_AZERI_LATIN, "az", "AZ", false }, // macrolanguage code { LANGUAGE_UZBEK_LATIN, "uz", "UZ", false }, // macrolanguage code + { LANGUAGE_UZBEK_LATIN_LSO, "uz", "" , false }, // macrolanguage code { LANGUAGE_BENGALI_BANGLADESH, "bn", "BD", false }, { LANGUAGE_BENGALI, "bn", "IN", false }, { LANGUAGE_BURMESE, "my", "MM", false }, @@ -313,12 +323,15 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_ORIYA, "or", "IN", false }, { LANGUAGE_PUNJABI, "pa", "IN", false }, { LANGUAGE_SANSKRIT, "sa", "IN", false }, - { LANGUAGE_SINDHI, "sd", "IN", false }, + { LANGUAGE_SINDHI, "sd", "IN", false }, // TODO: there's Deva(nagari) and Arab(ic) script, which do we use in 'sd' translation? MS maps this to 'sd-Deva-IN' { LANGUAGE_TAMIL, "ta", "IN", false }, + { LANGUAGE_TAMIL_SRI_LANKA, "ta", "LK", false }, { LANGUAGE_TELUGU, "te", "IN", false }, - { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK", false }, // preferring "lah" over "pa" for Western Punjabi, see http://www.ethnologue.com/show_language.asp?code=PNB - { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK", false }, - { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK", false }, + { LANGUAGE_PUNJABI_PAKISTAN, "pnb", "PK", false }, + { LANGUAGE_PUNJABI_ARABIC_LSO, "pnb", "" , false }, + { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK", true }, // macrolanguage code, earlier preferred 'lah' over 'pa' for Western Panjabi, now there is 'pnb' + { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK", true }, // MS maps this to 'pa-Arab-PK', but 'pa'='pan' Eastern Panjabi is not used in PK, only in IN + { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK", false }, // TODO: there's Deva(nagari) and Arab(ic) script, which do we use in 'sd' translation? MS maps this to 'sd-Arab-PK' { LANGUAGE_BELARUSIAN, "be", "BY", false }, { LANGUAGE_CATALAN, "ca", "ES", false }, // Spain (default) { LANGUAGE_CATALAN, "ca", "AD", false }, // Andorra @@ -334,23 +347,27 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_FRENCH_REUNION, "fr", "RE", false }, { LANGUAGE_FRENCH, "fr", "" , false }, // needed as a catcher before other "fr" entries! { LANGUAGE_FRENCH_NORTH_AFRICA, "fr", "" , false }, - { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" , false }, // unknown ISO country code + { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" , false }, // no ISO country code; MS "Neither defined nor reserved" { LANGUAGE_FRISIAN_NETHERLANDS, "fy", "NL", false }, { LANGUAGE_GAELIC_IRELAND, "ga", "IE", false }, { LANGUAGE_GAELIC_SCOTLAND, "gd", "GB", false }, + { LANGUAGE_GAELIC_SCOTLAND_LEGACY, "gd", "GB", false }, { LANGUAGE_GALICIAN, "gl", "ES", false }, { LANGUAGE_GEORGIAN, "ka", "GE", false }, { LANGUAGE_KHMER, "km", "KH", false }, { LANGUAGE_KIRGHIZ, "ky", "KG", false }, { LANGUAGE_LAO, "lo", "LA", false }, { LANGUAGE_MALTESE, "mt", "MT", false }, - { LANGUAGE_MONGOLIAN, "mn", "MN", true }, // Cyrillic script + { LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, "mn", "MN", false }, // macrolanguage code; should be khk-MN; Cyrillic script + { LANGUAGE_MONGOLIAN_CYRILLIC_LSO, "mn", "" , false }, // macrolanguage code; should be khk; Cyrillic script { LANGUAGE_RUSSIAN_MOLDOVA, "mo", "MD", false }, { LANGUAGE_SWAHILI, "sw", "KE", false }, { LANGUAGE_USER_SWAHILI_TANZANIA, "sw", "TZ", false }, { LANGUAGE_TAJIK, "tg", "TJ", false }, + { LANGUAGE_TAJIK_LSO, "tg", "" , false }, { LANGUAGE_TIBETAN, "bo", "CN", false }, // CN politically correct? { LANGUAGE_USER_TIBETAN_INDIA, "bo", "IN", false }, + { LANGUAGE_USER_TIBETAN_BHUTAN, "bo", "BT", false }, // MS reserved, but with the ID error instead { LANGUAGE_DZONGKHA, "dz", "BT", false }, { LANGUAGE_USER_DZONGKHA_MAP_LONLY, "dz", "" , false }, // because of the MS error, see lang.h { LANGUAGE_TURKMEN, "tk", "TM", false }, @@ -366,9 +383,12 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_VENDA, "ven", "ZA", true }, // 639-2 may have been used temporarily since 2004-07-23 { LANGUAGE_XHOSA, "xh", "ZA", false }, { LANGUAGE_ZULU, "zu", "ZA", false }, - { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC", false }, - { LANGUAGE_QUECHUA_PERU, "qu", "PE", false }, - { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO", false }, // macro: quh-BO, qul-BO +// { LANGUAGE_QUECHUA_COLOMBIA, "quc", "CO", false }, // MS reserved, and looks wrong, quc would be in Guatemala, not Colombia + { LANGUAGE_QUECHUA_ECUADOR, "quz", "EC", false }, // MS + { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC", true }, // macrolanguage code + { LANGUAGE_QUECHUA_PERU, "quz", "PE", false }, // MS + { LANGUAGE_QUECHUA_PERU, "qu", "PE", true }, // macrolanguage code + { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO", false }, // macrolanguage code, TODO instead: quh-BO or qul-BO; MS says quz-BO which is wrong { LANGUAGE_PASHTO, "ps", "AF", false }, { LANGUAGE_OROMO, "om", "ET", false }, { LANGUAGE_DHIVEHI, "dv", "MV", false }, @@ -379,7 +399,9 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_GUARANI_PARAGUAY, "gug", "PY", false }, { LANGUAGE_HAWAIIAN_UNITED_STATES, "haw", "US", false }, { LANGUAGE_EDO, "bin", "NG", false }, - { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG", false }, + { LANGUAGE_FULFULDE_NIGERIA, "fuv", "NG", false }, + { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG", true }, // macrolanguage code + { LANGUAGE_FULFULDE_SENEGAL, "ff", "SN", false }, // macrolanguage code { LANGUAGE_HAUSA_NIGERIA, "ha", "NG", false }, { LANGUAGE_USER_HAUSA_GHANA, "ha", "GH", false }, { LANGUAGE_IGBO_NIGERIA, "ig", "NG", false }, @@ -397,17 +419,21 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_SYRIAC, "syr", "TR", false }, // "TR" according to http://www.ethnologue.com/show_language.asp?code=SYC { LANGUAGE_SINHALESE_SRI_LANKA, "si", "LK", false }, { LANGUAGE_CHEROKEE_UNITED_STATES, "chr", "US", false }, - { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA", false }, -// { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu", "CA", false }, // script codes not supported yet + { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA", true }, // macrolanguage code + { LANGUAGE_INUKTITUT_LATIN_LSO, "iu", "" , true }, // macrolanguage code { LANGUAGE_SAMI_NORTHERN_NORWAY, "se", "NO", false }, { LANGUAGE_SAMI_INARI, "smn", "FI", false }, + { LANGUAGE_SAMI_INARI_LSO, "smn", "" , false }, { LANGUAGE_SAMI_LULE_NORWAY, "smj", "NO", false }, { LANGUAGE_SAMI_LULE_SWEDEN, "smj", "SE", false }, + { LANGUAGE_SAMI_LULE_LSO, "smj", "" , false }, { LANGUAGE_SAMI_NORTHERN_FINLAND, "se", "FI", false }, { LANGUAGE_SAMI_NORTHERN_SWEDEN, "se", "SE", false }, { LANGUAGE_SAMI_SKOLT, "sms", "FI", false }, + { LANGUAGE_SAMI_SKOLT_LSO, "sms", "" , false }, { LANGUAGE_SAMI_SOUTHERN_NORWAY, "sma", "NO", false }, { LANGUAGE_SAMI_SOUTHERN_SWEDEN, "sma", "SE", false }, + { LANGUAGE_SAMI_SOUTHERN_LSO, "sma", "" , false }, { LANGUAGE_USER_SAMI_KILDIN_RUSSIA, "sjd", "RU", false }, { LANGUAGE_MAPUDUNGUN_CHILE, "arn", "CL", false }, { LANGUAGE_CORSICAN_FRANCE, "co", "FR", false }, @@ -416,18 +442,27 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_MOHAWK_CANADA, "moh", "CA", false }, { LANGUAGE_BASHKIR_RUSSIA, "ba", "RU", false }, { LANGUAGE_KICHE_GUATEMALA, "qut", "GT", false }, - { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF", false }, + { LANGUAGE_DARI_AFGHANISTAN, "prs", "AF", false }, + { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF", true }, // was an error { LANGUAGE_WOLOF_SENEGAL, "wo", "SN", false }, { LANGUAGE_FILIPINO, "fil", "PH", false }, { LANGUAGE_USER_TAGALOG, "tl", "PH", false }, { LANGUAGE_ENGLISH_PHILIPPINES, "en", "PH", false }, -// { LANGUAGE_IBIBIO_NIGERIA, "nic", "NG", false }, // ISO "nic" is only a collective language code + { LANGUAGE_IBIBIO_NIGERIA, "ibb", "NG", false }, { LANGUAGE_YI, "ii", "CN", false }, - { LANGUAGE_TAMAZIGHT_LATIN, "kab", "DZ", false }, // In practice Kabyle is the language used for this + { LANGUAGE_ENGLISH_ARAB_EMIRATES, "en", "AE", false }, // MS reserved + { LANGUAGE_ENGLISH_BAHRAIN, "en", "BH", false }, // MS reserved + { LANGUAGE_ENGLISH_EGYPT, "en", "EG", false }, // MS reserved + { LANGUAGE_ENGLISH_JORDAN, "en", "JO", false }, // MS reserved + { LANGUAGE_ENGLISH_KUWAIT, "en", "KW", false }, // MS reserved + { LANGUAGE_ENGLISH_TURKEY, "en", "TR", false }, // MS reserved + { LANGUAGE_ENGLISH_YEMEN, "en", "YE", false }, // MS reserved + { LANGUAGE_TAMAZIGHT_LATIN_ALGERIA, "kab", "DZ", false }, // In practice Kabyle is the language used for this { LANGUAGE_OBSOLETE_USER_KABYLE, "kab", "DZ", false }, - { LANGUAGE_TAMAZIGHT_LATIN, "ber", "DZ", false }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there. - { LANGUAGE_TAMAZIGHT_TIFINAGH, "ber", "MA", false }, // Morocco is officially using Tifinagh for its Berber languages so store it to distinguish explicitly from LANGUAGE_TAMAZIGHT_LATIN, even though as a collective language its not of much use -// { LANGUAGE_TAMAZIGHT_ARABIC, "ber", "" , false }, // ISO "ber" only collective! + { LANGUAGE_TAMAZIGHT_LATIN_ALGERIA, "ber", "DZ", true }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there. + { LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO, "tmz", "MA", true }, + { LANGUAGE_TAMAZIGHT_MOROCCO, "tmz", "MA", false }, // MS reserved + { LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO, "ber", "MA", true }, // Morocco is officially using Tifinagh for its Berber languages, old kludge to distinguish from LANGUAGE_TAMAZIGHT_LATIN_ALGERIA { LANGUAGE_LATIN, "la", "VA", false }, { LANGUAGE_OBSOLETE_USER_LATIN, "la", "VA", false }, { LANGUAGE_USER_ESPERANTO, "eo", "" , false }, @@ -439,6 +474,7 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_UPPER_SORBIAN_GERMANY, "hsb", "DE", false }, // MS maps this to 'wen-DE', which is nonsense. 'wen' is a collective language code, 'WEN' is a SIL code, see http://www.ethnologue.com/14/show_iso639.asp?code=wen and http://www.ethnologue.com/14/show_language.asp?code=WEN { LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN,"hsb", "DE", false }, { LANGUAGE_LOWER_SORBIAN_GERMANY, "dsb", "DE", false }, // MS maps this to 'wee-DE', which is nonsense. 'WEE' is a SIL code, see http://www.ethnologue.com/14/show_language.asp?code=WEE + { LANGUAGE_LOWER_SORBIAN_LSO, "dsb", "" , false }, { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE", false }, { LANGUAGE_OCCITAN_FRANCE, "oc", "FR", false }, { LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR", false }, @@ -457,7 +493,8 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_OBSOLETE_USER_KALAALLISUT, "kl", "GL", false }, { LANGUAGE_USER_SWAZI, "ss", "ZA", false }, { LANGUAGE_USER_NDEBELE_SOUTH, "nr", "ZA", false }, - { LANGUAGE_USER_TSWANA_BOTSWANA, "tn", "BW", false }, + { LANGUAGE_TSWANA_BOTSWANA, "tn", "BW", false }, + { LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA, "tn", "BW", false }, { LANGUAGE_USER_MOORE, "mos", "BF", false }, { LANGUAGE_USER_BAMBARA, "bm", "ML", false }, { LANGUAGE_USER_AKAN, "ak", "GH", false }, @@ -499,8 +536,9 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_USER_MAORE, "swb", "YT", false }, { LANGUAGE_USER_BUSHI, "buc", "YT", false }, { LANGUAGE_USER_TAHITIAN, "ty", "PF", false }, - { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG", false }, - { LANGUAGE_USER_MALAGASY_PLATEAU, "mg", "MG", false }, + { LANGUAGE_MALAGASY_PLATEAU, "plt", "MG", false }, + { LANGUAGE_MALAGASY_PLATEAU, "mg", "MG", true }, + { LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU, "plt", "MG", false }, { LANGUAGE_USER_BAFIA, "ksf", "CM", false }, { LANGUAGE_USER_GIKUYU, "ki", "KE", false }, { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA", false }, @@ -574,21 +612,75 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = static IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] = { - // MS-LangID ISO639-ISO15924, ISO3166 - { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr-Latn", "RS" }, - { LANGUAGE_SERBIAN_LATIN, "sr-Latn", "CS" }, // Serbian Latin in Serbia and Montenegro; note that not all applications may know about the 'CS' reusage mess, see https://en.wikipedia.org/wiki/ISO_3166-2:CS - { LANGUAGE_SERBIAN_LATIN, "sr-Latn", "YU" }, // legacy Serbian Latin in Yugoslavia - { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr-Latn", "ME" }, - { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA,"sr-Latn", "BA" }, - { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr-Latn", "" }, + // MS-LangID ISO639-ISO15924, ISO3166 + { LANGUAGE_SERBIAN_LATIN_SERBIA, "sr-Latn", "RS" }, + { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA, "sr-Latn", "RS" }, + { LANGUAGE_SERBIAN_LATIN_MONTENEGRO, "sr-Latn", "ME" }, + { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO,"sr-Latn", "ME" }, + { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sr-Latn", "BA" }, + { LANGUAGE_SERBIAN_LATIN_SAM, "sr-Latn", "CS" }, // Serbian Latin in Serbia and Montenegro; note that not all applications may know about the 'CS' reusage mess, see https://en.wikipedia.org/wiki/ISO_3166-2:CS + { LANGUAGE_SERBIAN_LATIN_SAM, "sr-Latn", "YU" }, // legacy Serbian Latin in Yugoslavia + { LANGUAGE_SERBIAN_LATIN_LSO, "sr-Latn", "" }, + { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr-Latn", "" }, // MS lists this as 'sr' only, what a mess + { LANGUAGE_SERBIAN_CYRILLIC_SERBIA, "sr-Cyrl", "RS" }, // MS + { LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO, "sr-Cyrl", "ME" }, // MS + { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr-Cyrl", "BA" }, // MS + { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr-Cyrl", "CS" }, // MS + { LANGUAGE_SERBIAN_CYRILLIC_LSO, "sr-Cyrl", "" }, // MS { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs-Cyrl", "BA" }, - { LANGUAGE_AZERI_CYRILLIC, "az-Cyrl", "AZ" }, // macrolanguage code - { LANGUAGE_UZBEK_CYRILLIC, "uz-Cyrl", "UZ" }, // macrolanguage code - { LANGUAGE_MONGOLIAN, "mn-Cyrl", "MN" }, // macrolanguage code - { LANGUAGE_MONGOLIAN_MONGOLIAN, "mn-Mong", "MN" }, // macrolanguage code - { LANGUAGE_USER_PALI_LATIN, "pi-Latn", "" }, - { LANGUAGE_USER_KARAKALPAK_LATIN, "kaa-Latn", "UZ" }, - { LANGUAGE_DONTKNOW, "", "" } // marks end of table + { LANGUAGE_BOSNIAN_CYRILLIC_LSO, "bs-Cyrl", "" }, + { LANGUAGE_AZERI_CYRILLIC, "az-Cyrl", "AZ" }, // macrolanguage code + { LANGUAGE_AZERI_CYRILLIC_LSO, "az-Cyrl", "" }, // macrolanguage code + { LANGUAGE_UZBEK_CYRILLIC, "uz-Cyrl", "UZ" }, // macrolanguage code + { LANGUAGE_UZBEK_CYRILLIC_LSO, "uz-Cyrl", "" }, // macrolanguage code + { LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, "mn-Cyrl", "MN" }, // macrolanguage code; should be khk-MN or khk-Cyrl-MN + { LANGUAGE_MONGOLIAN_CYRILLIC_LSO, "mn-Cyrl", "" }, // macrolanguage code; MS, should be khk or khk-Cyrl + { LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA, "mn-Mong", "MN" }, // macrolanguage code; MS, should be khk-Mong-MN + { LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA, "mn-Mong", "CN" }, // macrolanguage code; MS, should actually be mvf-CN + { LANGUAGE_MONGOLIAN_MONGOLIAN_LSO, "mn-Mong", "" }, // macrolanguage code + { LANGUAGE_USER_PALI_LATIN, "pi-Latn", "" }, + { LANGUAGE_USER_KARAKALPAK_LATIN, "kaa-Latn", "UZ" }, + { LANGUAGE_TAJIK, "tg-Cyrl", "TJ" }, // MS + { LANGUAGE_TAJIK_LSO, "tg-Cyrl", "" }, // MS + { LANGUAGE_AZERI_LATIN, "az-Latn", "AZ" }, // macrolanguage code; MS + { LANGUAGE_AZERI_LATIN_LSO, "az-Latn", "" }, // macrolanguage code; MS + { LANGUAGE_USER_YIDDISH_US, "yi-Hebr", "US" }, // macrolanguage code; MS + { LANGUAGE_YIDDISH, "yi-Hebr", "IL" }, // macrolanguage code; MS + { LANGUAGE_UZBEK_LATIN, "uz-Latn", "UZ" }, // macrolanguage code + { LANGUAGE_UZBEK_LATIN_LSO, "uz-Latn", "" }, +// { LANGUAGE_SINDHI, "sd-Deva", "IN" }, // MS, TODO: see comment above in aImplIsoLangEntries +// { LANGUAGE_SINDHI_PAKISTAN, "sd-Arab", "PK" }, // MS, TODO: see comment above in aImplIsoLangEntries + { LANGUAGE_SINDHI_ARABIC_LSO, "sd-Arab", "" }, + { LANGUAGE_CHEROKEE_UNITED_STATES, "chr-Cher", "US" }, // MS + { LANGUAGE_CHEROKEE_CHEROKEE_LSO, "chr-Cher", "" }, + { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu-Cans", "CA" }, // macrolanguage code, MS + { LANGUAGE_INUKTITUT_SYLLABICS_LSO, "iu-Cans", "" }, // macrolanguage code, MS + { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu-Latn", "CA" }, // macrolanguage code, MS + { LANGUAGE_INUKTITUT_LATIN_LSO, "iu-Latn", "" }, // macrolanguage code, MS + { LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO, "tzm-Tfng", "MA" }, + { LANGUAGE_TAMAZIGHT_TIFINAGH_LSO, "tzm-Tfng", "" }, + { LANGUAGE_KASHMIRI_INDIA, "ks-Deva", "IN" }, // MS + { LANGUAGE_KASHMIRI, "ks-Arab", "" }, // MS, Kashmiri in "Jammu and Kashmir" ... no ISO3166 code for that + { LANGUAGE_HAUSA_NIGERIA, "ha-Latn", "NG" }, // MS + { LANGUAGE_USER_HAUSA_GHANA, "ha-Latn", "GH" }, // MS + { LANGUAGE_HAUSA_LATIN_LSO, "ha-Latn", "" }, + { LANGUAGE_LATIN, "la-Latn", "" }, // MS + { LANGUAGE_TAI_NUA_CHINA, "tdd-Tale", "CN" }, // MS reserved + { LANGUAGE_LU_CHINA, "khb-Talu", "CN" }, // MS reserved + { LANGUAGE_KURDISH_ARABIC_IRAQ, "ku-Arab", "IQ" }, // macrolanguage code, MS + { LANGUAGE_KURDISH_ARABIC_LSO, "ku-Arab", "" }, // macrolanguage code + { LANGUAGE_PUNJABI_PAKISTAN, "pnb-Arab", "PK" }, + { LANGUAGE_PUNJABI_ARABIC_LSO, "pnb-Arab", "" }, + { LANGUAGE_PUNJABI_PAKISTAN, "pa-Arab", "PK" }, // MS, incorrect + { LANGUAGE_PUNJABI_ARABIC_LSO, "pa-Arab", "" }, // MS, incorrect + { LANGUAGE_TAMAZIGHT_LATIN_ALGERIA, "tzm-Latn", "DZ" }, // MS + { LANGUAGE_TAMAZIGHT_LATIN_LSO, "tzm-Latn", "" }, // MS + { LANGUAGE_FULFULDE_SENEGAL, "ff-Latn", "SN" }, // macrolanguage code, MS + { LANGUAGE_FULFULDE_LATIN_LSO, "ff-Latn", "" }, // macrolanguage code + { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs-Latn", "BA" }, // MS, though Latn is suppress-script + { LANGUAGE_BOSNIAN_LATIN_LSO, "bs-Latn", "" }, // MS, though Latn is suppress-script + { LANGUAGE_CHINESE_TRADITIONAL_LSO, "zh-Hant", "" }, + { LANGUAGE_DONTKNOW, "", "" } // marks end of table }; static Bcp47CountryEntry const aImplBcp47CountryEntries[] = @@ -597,6 +689,7 @@ static Bcp47CountryEntry const aImplBcp47CountryEntries[] = { LANGUAGE_CATALAN_VALENCIAN, "ca-ES-valencia", "ES", "ca-valencia" }, { LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN, "ca-ES-valencia", "ES", "" }, // In case MS format files using the old value escaped into the wild, map them back. { LANGUAGE_USER_ENGLISH_UK_OED, "en-GB-oed", "GB", "" }, // grandfathered +// { LANGUAGE_YUE_CHINESE_HONGKONG, "zh-yue-HK", "HK", "" }, // MS reserved, prefer yue-HK; do not add unless LanguageTag::simpleExtract() can handle it to not call liblangtag for rsc! { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table }; @@ -719,8 +812,8 @@ static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries[] = { { LANGUAGE_NORWEGIAN_BOKMAL, "no", "BOK" }, // registered subtags for "no" in rfc1766 { LANGUAGE_NORWEGIAN_NYNORSK, "no", "NYN" }, // registered subtags for "no" in rfc1766 - { LANGUAGE_SERBIAN_LATIN, "sr", "latin" }, - { LANGUAGE_SERBIAN_CYRILLIC, "sr", "cyrillic" }, + { LANGUAGE_SERBIAN_LATIN_SAM, "sr", "latin" }, + { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr", "cyrillic" }, { LANGUAGE_AZERI_LATIN, "az", "latin" }, { LANGUAGE_AZERI_CYRILLIC, "az", "cyrillic" }, { LANGUAGE_DONTKNOW, "", "" } // marks end of table @@ -1138,9 +1231,9 @@ static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] = // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" }, { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia - { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro + { LANGUAGE_SERBIAN_LATIN_SAM, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro - { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" }, + { LANGUAGE_SERBIAN_LATIN_LSO, "sr", "", "latin" }, { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" }, { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" }, { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx index 36d5d8a4f853..72d5051ee32e 100644 --- a/i18nlangtag/source/isolang/mslangid.cxx +++ b/i18nlangtag/source/isolang/mslangid.cxx @@ -252,7 +252,8 @@ bool MsLangId::isTraditionalChinese( const ::com::sun::star::lang::Locale & rLoc //static bool MsLangId::isChinese( LanguageType nLang ) { - return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE); + return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE) || + MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_YUE_CHINESE_HONGKONG); } //static @@ -266,9 +267,10 @@ bool MsLangId::isCJK( LanguageType nLang ) { switch (nLang & LANGUAGE_MASK_PRIMARY) { - case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_YUE_CHINESE_HONGKONG & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: return true; default: break; @@ -313,7 +315,9 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang ) switch( nLang ) { // CTL - case LANGUAGE_MONGOLIAN_MONGOLIAN: + case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA: + case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA: + case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO: case LANGUAGE_USER_KURDISH_IRAN: case LANGUAGE_USER_KURDISH_IRAQ: case LANGUAGE_USER_KYRGYZ_CHINA: @@ -321,7 +325,8 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang ) break; // "Western" - case LANGUAGE_MONGOLIAN: + case LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA: + case LANGUAGE_MONGOLIAN_CYRILLIC_LSO: case LANGUAGE_USER_KURDISH_SYRIA: case LANGUAGE_USER_KURDISH_TURKEY: nScript = ::com::sun::star::i18n::ScriptType::LATIN; @@ -341,9 +346,10 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang ) switch ( nLang & LANGUAGE_MASK_PRIMARY ) { // CJK catcher - case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_YUE_CHINESE_HONGKONG & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: nScript = ::com::sun::star::i18n::ScriptType::ASIAN; break; @@ -433,11 +439,32 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG; break; case LANGUAGE_OBSOLETE_USER_KABYLE: - nLang = LANGUAGE_TAMAZIGHT_LATIN; + nLang = LANGUAGE_TAMAZIGHT_LATIN_ALGERIA; break; case LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN: nLang = LANGUAGE_CATALAN_VALENCIAN; break; + case LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU: + nLang = LANGUAGE_MALAGASY_PLATEAU; + break; + case LANGUAGE_GAELIC_SCOTLAND_LEGACY: + nLang = LANGUAGE_GAELIC_SCOTLAND; + break; + case LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA: + nLang = LANGUAGE_TSWANA_BOTSWANA; + break; + case LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA: + nLang = LANGUAGE_SERBIAN_LATIN_SERBIA; + break; + case LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO: + nLang = LANGUAGE_SERBIAN_LATIN_MONTENEGRO; + break; + case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA: + nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; + break; + case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO: + nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO; + break; // The following are not strictly obsolete but should be mapped to a // replacement locale when encountered. diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 4f8af9ade4bb..689a9854f3b4 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -1174,7 +1174,7 @@ bool LanguageTagImpl::canonicalize() return bChanged; // that's it } meIsLiblangtagNeeded = DECISION_YES; - SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for " << maBcp47); + SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'"); if (!mpImplLangtag) { @@ -1187,7 +1187,7 @@ bool LanguageTagImpl::canonicalize() if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p)) { char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p); - SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize " << maBcp47); + SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'"); if (pTag) { OUString aOld( maBcp47); @@ -1201,7 +1201,7 @@ bool LanguageTagImpl::canonicalize() meIsIsoODF = DECISION_DONTKNOW; if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p)) { - SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse " << maBcp47); + SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'"); free( pTag); meIsValid = DECISION_NO; return bChanged; @@ -1214,7 +1214,7 @@ bool LanguageTagImpl::canonicalize() } else { - SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse " << maBcp47); + SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'"); } meIsValid = DECISION_NO; return bChanged; @@ -1536,11 +1536,13 @@ OUString LanguageTagImpl::getLanguageFromLangtag() if (mpImplLangtag) { const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag); - SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL"); + SAL_WARN_IF( !pLangT, "i18nlangtag", + "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'"); if (!pLangT) return aLanguage; const char* pLang = lt_lang_get_tag( pLangT); - SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL"); + SAL_WARN_IF( !pLang, "i18nlangtag", + "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'"); if (pLang) aLanguage = OUString::createFromAscii( pLang); } @@ -1595,11 +1597,12 @@ OUString LanguageTagImpl::getRegionFromLangtag() SAL_WARN_IF( !pRegionT && maBcp47.getLength() != 2 && maBcp47.getLength() != 3 && maBcp47.getLength() != 7 && maBcp47.getLength() != 8, - "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL"); + "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'"); if (!pRegionT) return aRegion; const char* pRegion = lt_region_get_tag( pRegionT); - SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL"); + SAL_WARN_IF( !pRegion, "i18nlangtag", + "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'"); if (pRegion) aRegion = OUString::createFromAscii( pRegion); } @@ -2399,7 +2402,10 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp } } if (eRet == EXTRACTED_NONE) + { + SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'"); rLanguage = rScript = rCountry = rVariants = OUString(); + } return eRet; } diff --git a/include/i18nlangtag/lang.h b/include/i18nlangtag/lang.h index 185f7bc580c1..f66cd5972ed3 100644 --- a/include/i18nlangtag/lang.h +++ b/include/i18nlangtag/lang.h @@ -63,6 +63,17 @@ ! ! Use THAT ^^^ as of 2013-09-17 it includes also Windows 8 + ! BUT, you can download a PDF document from + ! http://msdn.microsoft.com/library/cc233965.aspx + ! that has YET MORE definitions, sigh.. didn't cross-check if any are missing + ! from that.. however, it also contains a few MS-reserved definitions that use + ! ISO 639-3 codes reserved for local use, such as 'qps-ploc' and 'qps-ploca' + ! (sic!), or strange things like 'ar-Ploc-SA' and 'ja-Ploc-JP' ('Ploc'??). + ! + ! Use THAT ^^^ as the ultimate reference source (haha?) as of 2013-10-17 it + ! inludes also Windows 8.1 (Release: Monday, July 22, 2013; well, its table + ! says 08/08/2013 Revision 6.0, but hey, who cares ...) + For completeness, you can never have enough lists: List of supported locale identifiers in Word Applies to Microsoft Word 2000 and 2002 @@ -121,38 +132,45 @@ typedef unsigned short LanguageType; #define LANGUAGE_ARABIC_LIBYA 0x1001 #define LANGUAGE_ARABIC_MOROCCO 0x1801 #define LANGUAGE_ARABIC_OMAN 0x2001 +#define LANGUAGE_ARABIC_PRIMARY_ONLY 0x0001 /* primary only, not a locale! */ #define LANGUAGE_ARABIC_QATAR 0x4001 #define LANGUAGE_ARABIC_SAUDI_ARABIA 0x0401 #define LANGUAGE_ARABIC_SYRIA 0x2801 #define LANGUAGE_ARABIC_TUNISIA 0x1C01 #define LANGUAGE_ARABIC_UAE 0x3801 #define LANGUAGE_ARABIC_YEMEN 0x2401 -#define LANGUAGE_ARABIC_PRIMARY_ONLY 0x0001 /* primary only, not a locale! */ #define LANGUAGE_ARMENIAN 0x042B #define LANGUAGE_ASSAMESE 0x044D #define LANGUAGE_AZERI_CYRILLIC 0x082C +#define LANGUAGE_AZERI_CYRILLIC_LSO 0x742C #define LANGUAGE_AZERI_LATIN 0x042C +#define LANGUAGE_AZERI_LATIN_LSO 0x782C #define LANGUAGE_BASHKIR_RUSSIA 0x046D #define LANGUAGE_BASQUE 0x042D #define LANGUAGE_BELARUSIAN 0x0423 #define LANGUAGE_BENGALI 0x0445 /* in India */ #define LANGUAGE_BENGALI_BANGLADESH 0x0845 -#define LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA 0x141A +#define LANGUAGE_BOSNIAN_CYRILLIC_LSO 0x641A #define LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA 0x201A +#define LANGUAGE_BOSNIAN_LATIN_LSO 0x681A +#define LANGUAGE_BOSNIAN_LSO 0x781A +#define LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA 0x141A #define LANGUAGE_BOSNIAN_BOSNIA_HERZEGOVINA LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA /* TODO: remove, only for langtab.src & localize.sdf compatibility */ #define LANGUAGE_BRETON_FRANCE 0x047E /* obsoletes LANGUAGE_USER_BRETON 0x0629 */ #define LANGUAGE_BULGARIAN 0x0402 #define LANGUAGE_BURMESE 0x0455 #define LANGUAGE_CATALAN 0x0403 -#define LANGUAGE_CATALAN_VALENCIAN 0x0803 +#define LANGUAGE_CATALAN_VALENCIAN 0x0803 /* obsoletes LANGUAGE_USER_CATALAN_VALENCIAN */ #define LANGUAGE_CHEROKEE_UNITED_STATES 0x045C +#define LANGUAGE_CHEROKEE_CHEROKEE_LSO 0x7C5C #define LANGUAGE_CHINESE_HONGKONG 0x0C04 +#define LANGUAGE_CHINESE_LSO 0x7804 #define LANGUAGE_CHINESE_MACAU 0x1404 #define LANGUAGE_CHINESE_SIMPLIFIED 0x0804 #define LANGUAGE_CHINESE_SINGAPORE 0x1004 #define LANGUAGE_CHINESE_TRADITIONAL 0x0404 #define LANGUAGE_CHINESE_SIMPLIFIED_LEGACY 0x0004 /* MS-.NET 'zh-CHS', primary only! but maps to 'zh-CN' */ -#define LANGUAGE_CHINESE_TRADITIONAL_LEGACY 0x7C04 /* MS-.NET 'zh-CHT' */ +#define LANGUAGE_CHINESE_TRADITIONAL_LSO 0x7C04 /* MS-.NET 'zh-CHT' but maps to 'zh-Hant' */ #define LANGUAGE_CHINESE LANGUAGE_CHINESE_SIMPLIFIED /* most code uses LANGUAGE_CHINESE */ #define LANGUAGE_CORSICAN_FRANCE 0x0483 #define LANGUAGE_CROATIAN 0x041A @@ -165,23 +183,30 @@ typedef unsigned short LanguageType; #define LANGUAGE_DUTCH_BELGIAN 0x0813 #define LANGUAGE_EDO 0x0466 #define LANGUAGE_ENGLISH 0x0009 /* primary only, not a locale! */ +#define LANGUAGE_ENGLISH_ARAB_EMIRATES 0x4C09 #define LANGUAGE_ENGLISH_AUS 0x0C09 +#define LANGUAGE_ENGLISH_BAHRAIN 0x5009 #define LANGUAGE_ENGLISH_BELIZE 0x2809 #define LANGUAGE_ENGLISH_CAN 0x1009 #define LANGUAGE_ENGLISH_CARRIBEAN 0x2409 +#define LANGUAGE_ENGLISH_EGYPT 0x5409 #define LANGUAGE_ENGLISH_EIRE 0x1809 #define LANGUAGE_ENGLISH_HONG_KONG_SAR 0x3C09 #define LANGUAGE_ENGLISH_INDIA 0x4009 #define LANGUAGE_ENGLISH_INDONESIA 0x3809 #define LANGUAGE_ENGLISH_JAMAICA 0x2009 +#define LANGUAGE_ENGLISH_JORDAN 0x5809 +#define LANGUAGE_ENGLISH_KUWAIT 0x5C09 #define LANGUAGE_ENGLISH_MALAYSIA 0x4409 #define LANGUAGE_ENGLISH_NZ 0x1409 #define LANGUAGE_ENGLISH_PHILIPPINES 0x3409 #define LANGUAGE_ENGLISH_SAFRICA 0x1C09 #define LANGUAGE_ENGLISH_SINGAPORE 0x4809 #define LANGUAGE_ENGLISH_TRINIDAD 0x2C09 +#define LANGUAGE_ENGLISH_TURKEY 0x6009 #define LANGUAGE_ENGLISH_UK 0x0809 #define LANGUAGE_ENGLISH_US 0x0409 +#define LANGUAGE_ENGLISH_YEMEN 0x6409 #define LANGUAGE_ENGLISH_ZIMBABWE 0x3009 #define LANGUAGE_ESTONIAN 0x0425 #define LANGUAGE_FAEROESE 0x0438 @@ -203,12 +228,15 @@ typedef unsigned short LanguageType; #define LANGUAGE_FRENCH_REUNION 0x200C #define LANGUAGE_FRENCH_SENEGAL 0x280C #define LANGUAGE_FRENCH_SWISS 0x100C -#define LANGUAGE_FRENCH_WEST_INDIES 0x1C0C +#define LANGUAGE_FRENCH_WEST_INDIES 0x1C0C /* MS in its MS-LCID.pdf now says "Neither defined nor reserved" */ #define LANGUAGE_FRENCH_ZAIRE 0x240C #define LANGUAGE_FRISIAN_NETHERLANDS 0x0462 +#define LANGUAGE_FULFULDE_LATIN_LSO 0x7C67 #define LANGUAGE_FULFULDE_NIGERIA 0x0467 +#define LANGUAGE_FULFULDE_SENEGAL 0x0867 #define LANGUAGE_GAELIC_IRELAND 0x083C -#define LANGUAGE_GAELIC_SCOTLAND 0x043C +#define LANGUAGE_GAELIC_SCOTLAND 0x0491 /* apparently it occurred to MS that those are different languages */ +#define LANGUAGE_GAELIC_SCOTLAND_LEGACY 0x043C #define LANGUAGE_GALICIAN 0x0456 #define LANGUAGE_GEORGIAN 0x0437 #define LANGUAGE_GERMAN 0x0407 @@ -219,6 +247,7 @@ typedef unsigned short LanguageType; #define LANGUAGE_GREEK 0x0408 #define LANGUAGE_GUARANI_PARAGUAY 0x0474 #define LANGUAGE_GUJARATI 0x0447 +#define LANGUAGE_HAUSA_LATIN_LSO 0x7C68 #define LANGUAGE_HAUSA_NIGERIA 0x0468 #define LANGUAGE_HAWAIIAN_UNITED_STATES 0x0475 #define LANGUAGE_HEBREW 0x040D @@ -229,7 +258,9 @@ typedef unsigned short LanguageType; #define LANGUAGE_IGBO_NIGERIA 0x0470 #define LANGUAGE_INDONESIAN 0x0421 #define LANGUAGE_INUKTITUT_SYLLABICS_CANADA 0x045D +#define LANGUAGE_INUKTITUT_SYLLABICS_LSO 0x785D #define LANGUAGE_INUKTITUT_LATIN_CANADA 0x085D +#define LANGUAGE_INUKTITUT_LATIN_LSO 0x7C5D #define LANGUAGE_ITALIAN 0x0410 #define LANGUAGE_ITALIAN_SWISS 0x0810 #define LANGUAGE_JAPANESE 0x0411 @@ -246,13 +277,17 @@ typedef unsigned short LanguageType; #define LANGUAGE_KONKANI 0x0457 #define LANGUAGE_KOREAN 0x0412 #define LANGUAGE_KOREAN_JOHAB 0x0812 +#define LANGUAGE_KURDISH_ARABIC_IRAQ 0x0492 /* TODO: obsoletes LANGUAGE_USER_KURDISH_IRAQ */ +#define LANGUAGE_KURDISH_ARABIC_LSO 0x7C92 #define LANGUAGE_LAO 0x0454 #define LANGUAGE_LATIN 0x0476 /* obsoletes LANGUAGE_USER_LATIN 0x0610 */ #define LANGUAGE_LATVIAN 0x0426 #define LANGUAGE_LITHUANIAN 0x0427 -#define LANGUAGE_LITHUANIAN_CLASSIC 0x0827 +#define LANGUAGE_LITHUANIAN_CLASSIC 0x0827 /* MS in its MS-LCID.pdf now says "Neither defined nor reserved" */ +#define LANGUAGE_LU_CHINA 0x0490 #define LANGUAGE_LUXEMBOURGISH_LUXEMBOURG 0x046E /* obsoletes LANGUAGE_USER_LUXEMBOURGISH 0x0630 */ #define LANGUAGE_MACEDONIAN 0x042F +#define LANGUAGE_MALAGASY_PLATEAU 0x048D /* obsoletes LANGUAGE_USER_MALAGASY_PLATEAU */ #define LANGUAGE_MALAYALAM 0x044C /* in India */ #define LANGUAGE_MALAY_BRUNEI_DARUSSALAM 0x083E #define LANGUAGE_MALAY_MALAYSIA 0x043E @@ -262,13 +297,18 @@ typedef unsigned short LanguageType; #define LANGUAGE_MAPUDUNGUN_CHILE 0x047A /* AKA Araucanian */ #define LANGUAGE_MARATHI 0x044E #define LANGUAGE_MOHAWK_CANADA 0x047C -#define LANGUAGE_MONGOLIAN 0x0450 /* Cyrillic script */ -#define LANGUAGE_MONGOLIAN_MONGOLIAN 0x0850 +#define LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA 0x0450 +#define LANGUAGE_MONGOLIAN_CYRILLIC_LSO 0x7850 +#define LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA 0x0C50 +#define LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA 0x0850 +#define LANGUAGE_MONGOLIAN_MONGOLIAN_LSO 0x7C50 #define LANGUAGE_NEPALI 0x0461 #define LANGUAGE_NEPALI_INDIA 0x0861 #define LANGUAGE_NORWEGIAN 0x0014 /* primary only, not a locale! */ #define LANGUAGE_NORWEGIAN_BOKMAL 0x0414 +#define LANGUAGE_NORWEGIAN_BOKMAL_LSO 0x7C14 #define LANGUAGE_NORWEGIAN_NYNORSK 0x0814 +#define LANGUAGE_NORWEGIAN_NYNORSK_LSO 0x7814 #define LANGUAGE_OCCITAN_FRANCE 0x0482 /* obsoletes LANGUAGE_USER_OCCITAN 0x0625 */ #define LANGUAGE_ORIYA 0x0448 #define LANGUAGE_OROMO 0x0472 @@ -278,8 +318,10 @@ typedef unsigned short LanguageType; #define LANGUAGE_PORTUGUESE 0x0816 #define LANGUAGE_PORTUGUESE_BRAZILIAN 0x0416 #define LANGUAGE_PUNJABI 0x0446 +#define LANGUAGE_PUNJABI_ARABIC_LSO 0x7C46 #define LANGUAGE_PUNJABI_PAKISTAN 0x0846 #define LANGUAGE_QUECHUA_BOLIVIA 0x046B +#define LANGUAGE_QUECHUA_COLOMBIA 0x0493 /* different primary ID */ #define LANGUAGE_QUECHUA_ECUADOR 0x086B #define LANGUAGE_QUECHUA_PERU 0x0C6B #define LANGUAGE_RHAETO_ROMAN 0x0417 @@ -290,23 +332,34 @@ typedef unsigned short LanguageType; #define LANGUAGE_SAMI_NORTHERN_NORWAY 0x043B #define LANGUAGE_SAMI_LAPPISH LANGUAGE_SAMI_NORTHERN_NORWAY /* the old MS definition */ #define LANGUAGE_SAMI_INARI 0x243B +#define LANGUAGE_SAMI_INARI_LSO 0x703B +#define LANGUAGE_SAMI_LULE_LSO 0x7C3B #define LANGUAGE_SAMI_LULE_NORWAY 0x103B #define LANGUAGE_SAMI_LULE_SWEDEN 0x143B #define LANGUAGE_SAMI_NORTHERN_FINLAND 0x0C3B #define LANGUAGE_SAMI_NORTHERN_SWEDEN 0x083B #define LANGUAGE_SAMI_SKOLT 0x203B +#define LANGUAGE_SAMI_SKOLT_LSO 0x743B +#define LANGUAGE_SAMI_SOUTHERN_LSO 0x783B #define LANGUAGE_SAMI_SOUTHERN_NORWAY 0x183B #define LANGUAGE_SAMI_SOUTHERN_SWEDEN 0x1C3B #define LANGUAGE_SANSKRIT 0x044F #define LANGUAGE_SEPEDI 0x046C #define LANGUAGE_NORTHERNSOTHO LANGUAGE_SEPEDI /* just an alias for the already existing localization */ -#define LANGUAGE_SERBIAN_CYRILLIC 0x0C1A /* MS lists this as Serbian (Cyrillic, Serbia) 'sr-Cyrl-SP', but they use 'SP' since at least Windows2003 where it was Serbia and Montenegro! */ +#define LANGUAGE_SERBIAN_CYRILLIC_LSO 0x6C1A +#define LANGUAGE_SERBIAN_CYRILLIC_SAM 0x0C1A /* Serbia and Montenegro (former) */ #define LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA 0x1C1A -#define LANGUAGE_SERBIAN_LATIN 0x081A /* MS lists this as Serbian (Latin, Serbia) 'sr-Latn-SP', but they use 'SP' since at least Windows2003 where it was Serbia and Montenegro! */ +#define LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO 0x301A +#define LANGUAGE_SERBIAN_CYRILLIC_SERBIA 0x281A +#define LANGUAGE_SERBIAN_LATIN_LSO 0x701A +#define LANGUAGE_SERBIAN_LATIN_SAM 0x081A #define LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA 0x181A -#define LANGUAGE_SERBIAN_LATIN_NEUTRAL 0x7C1A /* MS lists this as 'sr' only. What a mess. */ +#define LANGUAGE_SERBIAN_LATIN_MONTENEGRO 0x2C1A +#define LANGUAGE_SERBIAN_LATIN_NEUTRAL 0x7C1A +#define LANGUAGE_SERBIAN_LATIN_SERBIA 0x241A #define LANGUAGE_SESOTHO 0x0430 /* also called Sutu now by MS */ #define LANGUAGE_SINDHI 0x0459 +#define LANGUAGE_SINDHI_ARABIC_LSO 0x7C59 #define LANGUAGE_SINDHI_PAKISTAN 0x0859 #define LANGUAGE_SINHALESE_SRI_LANKA 0x045B #define LANGUAGE_SLOVAK 0x041B @@ -314,6 +367,7 @@ typedef unsigned short LanguageType; #define LANGUAGE_SOMALI 0x0477 #define LANGUAGE_UPPER_SORBIAN_GERMANY 0x042E /* obsoletes LANGUAGE_USER_UPPER_SORBIAN 0x0623 */ #define LANGUAGE_LOWER_SORBIAN_GERMANY 0x082E /* obsoletes LANGUAGE_USER_LOWER_SORBIAN 0x0624. NOTE: the primary ID is identical to Upper Sorbian, which is not quite correct because they're distinct languages */ +#define LANGUAGE_LOWER_SORBIAN_LSO 0x7C2E #define LANGUAGE_SORBIAN LANGUAGE_USER_UPPER_SORBIAN /* a strange MS definition */ #define LANGUAGE_SPANISH_DATED 0x040A /* old collation, not supported, see #i94435# */ #define LANGUAGE_SPANISH_ARGENTINA 0x2C0A @@ -342,11 +396,17 @@ typedef unsigned short LanguageType; #define LANGUAGE_SWEDISH 0x041D #define LANGUAGE_SWEDISH_FINLAND 0x081D #define LANGUAGE_SYRIAC 0x045A +#define LANGUAGE_TAI_NUA_CHINA 0x048F #define LANGUAGE_TAJIK 0x0428 -#define LANGUAGE_TAMAZIGHT_ARABIC 0x045F -#define LANGUAGE_TAMAZIGHT_LATIN 0x085F -#define LANGUAGE_TAMAZIGHT_TIFINAGH 0x0C5F +#define LANGUAGE_TAJIK_LSO 0x7C28 +#define LANGUAGE_TAMAZIGHT_ARABIC_MOROCCO 0x045F +#define LANGUAGE_TAMAZIGHT_LATIN_ALGERIA 0x085F +#define LANGUAGE_TAMAZIGHT_LATIN_LSO 0x7C5F +#define LANGUAGE_TAMAZIGHT_MOROCCO 0x0C5F +#define LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO 0x105F +#define LANGUAGE_TAMAZIGHT_TIFINAGH_LSO 0x785F #define LANGUAGE_TAMIL 0x0449 +#define LANGUAGE_TAMIL_SRI_LANKA 0x0849 #define LANGUAGE_TATAR 0x0444 #define LANGUAGE_TELUGU 0x044A #define LANGUAGE_THAI 0x041E @@ -357,6 +417,7 @@ typedef unsigned short LanguageType; #define LANGUAGE_TIGRIGNA_ETHIOPIA 0x0473 #define LANGUAGE_TSONGA 0x0431 #define LANGUAGE_TSWANA 0x0432 /* AKA Setsuana, for South Africa */ +#define LANGUAGE_TSWANA_BOTSWANA 0x0832 /* obsoletes LANGUAGE_USER_TSWANA_BOTSWANA */ #define LANGUAGE_TURKISH 0x041F #define LANGUAGE_TURKMEN 0x0442 #define LANGUAGE_UIGHUR_CHINA 0x0480 @@ -364,7 +425,9 @@ typedef unsigned short LanguageType; #define LANGUAGE_URDU_INDIA 0x0820 #define LANGUAGE_URDU_PAKISTAN 0x0420 #define LANGUAGE_UZBEK_CYRILLIC 0x0843 +#define LANGUAGE_UZBEK_CYRILLIC_LSO 0x7843 #define LANGUAGE_UZBEK_LATIN 0x0443 +#define LANGUAGE_UZBEK_LATIN_LSO 0x7C43 #define LANGUAGE_VENDA 0x0433 #define LANGUAGE_VIETNAMESE 0x042A #define LANGUAGE_WELSH 0x0452 @@ -374,8 +437,20 @@ typedef unsigned short LanguageType; #define LANGUAGE_YI 0x0478 /* Sichuan Yi */ #define LANGUAGE_YIDDISH 0x043D #define LANGUAGE_YORUBA 0x046A +#define LANGUAGE_YUE_CHINESE_HONGKONG 0x048E #define LANGUAGE_ZULU 0x0435 +#define LANGUAGE_qps_ploc 0x0501 /* 'qps-ploc', qps is a reserved for local use code */ +#define LANGUAGE_qps_ploca 0x05FE /* 'qps-ploca', qps is a reserved for local use code */ +#define LANGUAGE_qps_plocm 0x09FF /* 'qps-plocm', qps is a reserved for local use code */ + +#define LANGUAGE_ar_Ploc_SA__reserved 0x4401 /* 'ar-Ploc-SA', 'Ploc'?? */ +#define LANGUAGE_ja_Ploc_JP__reserved 0x0811 /* 'ja-Ploc-JP', 'Ploc'?? */ +#define LANGUAGE_pap_029__reserved 0x0479 /* 'pap-029' */ +#define LANGUAGE_ar_145__reserved 0x4801 /* 'ar-145' */ +#define LANGUAGE_es_419 0x580A /* 'es-419', not reserved, used? */ +#define LANGUAGE_Neither_defined_nor_reserved_0x2008 0x2008 + /*! use only for import/export of MS documents, number formatter maps it to *! LANGUAGE_SYSTEM and then to effective system language */ #define LANGUAGE_SYSTEM_DEFAULT 0x0800 @@ -454,7 +529,8 @@ typedef unsigned short LanguageType; #define LANGUAGE_USER_KALAALLISUT LANGUAGE_KALAALLISUT_GREENLAND #define LANGUAGE_USER_SWAZI 0x062B #define LANGUAGE_USER_NDEBELE_SOUTH 0x062C -#define LANGUAGE_USER_TSWANA_BOTSWANA 0x8032 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_TSWANA)) */ +#define LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA 0x8032 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_TSWANA)) */ +#define LANGUAGE_USER_TSWANA_BOTSWANA LANGUAGE_TSWANA_BOTSWANA #define LANGUAGE_USER_MOORE 0x062D #define LANGUAGE_USER_BAMBARA 0x062E #define LANGUAGE_USER_AKAN 0x062F @@ -486,10 +562,14 @@ typedef unsigned short LanguageType; #define LANGUAGE_USER_TETUN 0x0640 #define LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA 0x0641 #define LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA 0x0642 -#define LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA 0x8C1A /* makeLangID( 0x20+0x03, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC)) */ -#define LANGUAGE_USER_SERBIAN_LATIN_SERBIA 0x881A /* makeLangID( 0x20+0x02, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN)) */ -#define LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO 0xCC1A /* makeLangID( 0x20+0x13, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC)) */ -#define LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO 0xC81A /* makeLangID( 0x20+0x12, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN)) */ +#define LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA 0x8C1A /* makeLangID( 0x20+0x03, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC_LSO)) */ +#define LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA LANGUAGE_SERBIAN_CYRILLIC_SERBIA +#define LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA 0x881A /* makeLangID( 0x20+0x02, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN_LSO)) */ +#define LANGUAGE_USER_SERBIAN_LATIN_SERBIA LANGUAGE_SERBIAN_LATIN_SERBIA +#define LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO 0xCC1A /* makeLangID( 0x20+0x13, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC_LSO)) */ +#define LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO +#define LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO 0xC81A /* makeLangID( 0x20+0x12, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN_LSO)) */ +#define LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO LANGUAGE_SERBIAN_LATIN_MONTENEGRO #define LANGUAGE_USER_SAMI_KILDIN_RUSSIA 0x803B /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_SAMI_NORTHERN_NORWAY)) */ #define LANGUAGE_USER_BODO_INDIA 0x0643 #define LANGUAGE_USER_DOGRI_INDIA 0x0644 @@ -513,7 +593,8 @@ typedef unsigned short LanguageType; #define LANGUAGE_USER_MAORE 0x064C #define LANGUAGE_USER_BUSHI 0x064D #define LANGUAGE_USER_TAHITIAN 0x064E -#define LANGUAGE_USER_MALAGASY_PLATEAU 0x064F +#define LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU 0x064F +#define LANGUAGE_USER_MALAGASY_PLATEAU LANGUAGE_MALAGASY_PLATEAU #define LANGUAGE_USER_PAPIAMENTU_ARUBA 0x8079 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_PAPIAMENTU)) */ #define LANGUAGE_USER_SARDINIAN_CAMPIDANESE 0x0650 #define LANGUAGE_USER_SARDINIAN_GALLURESE 0x0651 @@ -527,7 +608,7 @@ typedef unsigned short LanguageType; #define LANGUAGE_USER_LIMBU 0x0657 #define LANGUAGE_USER_LOJBAN 0x0658 /* no locale */ #define LANGUAGE_OBSOLETE_USER_KABYLE 0x0659 -#define LANGUAGE_USER_KABYLE LANGUAGE_TAMAZIGHT_LATIN +#define LANGUAGE_USER_KABYLE LANGUAGE_TAMAZIGHT_LATIN_ALGERIA #define LANGUAGE_USER_HAITIAN 0x065A #define LANGUAGE_USER_BEEMBE 0x065B #define LANGUAGE_USER_BEKWEL 0x065C @@ -592,6 +673,7 @@ typedef unsigned short LanguageType; #define LANGUAGE_USER_NOGAI 0x068B #define LANGUAGE_USER_KARAKALPAK_LATIN 0x068C #define LANGUAGE_USER_LADIN 0x068D +#define LANGUAGE_USER_TIBETAN_BHUTAN 0x8451 /* makeLangID( 0x21, getPrimaryLanguage( LANGUAGE_TIBETAN)) */ /* Primary language ID range for on-the-fly assignment. */ diff --git a/svtools/source/misc/langtab.src b/svtools/source/misc/langtab.src index d782bce46006..42f93e8c624d 100644 --- a/svtools/source/misc/langtab.src +++ b/svtools/source/misc/langtab.src @@ -152,8 +152,8 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE < "Romanian (Moldova)" ; LANGUAGE_ROMANIAN_MOLDOVA ; > ; < "Russian" ; LANGUAGE_RUSSIAN ; > ; < "Sanskrit" ; LANGUAGE_SANSKRIT ; > ; - < "Serbian Cyrillic (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_CYRILLIC ; > ; - < "Serbian Latin (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_LATIN ; > ; + < "Serbian Cyrillic (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_CYRILLIC_SAM ; > ; + < "Serbian Latin (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_LATIN_SAM ; > ; < "Serbian Cyrillic (Serbia)" ; LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA ; > ; < "Serbian Latin (Serbia)" ; LANGUAGE_USER_SERBIAN_LATIN_SERBIA ; > ; < "Serbian Cyrillic (Montenegro)" ; LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO ; > ; @@ -205,8 +205,8 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE < "Dhivehi" ; LANGUAGE_DHIVEHI ; > ; < "Northern Sotho" ; LANGUAGE_SEPEDI ; > ; < "Gaelic (Scotland)" ; LANGUAGE_GAELIC_SCOTLAND ; > ; - < "Mongolian Cyrillic" ; LANGUAGE_MONGOLIAN ; > ; - < "Mongolian Mongolian" ; LANGUAGE_MONGOLIAN_MONGOLIAN ; > ; + < "Mongolian Cyrillic" ; LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA ; > ; + < "Mongolian Mongolian" ; LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA ; > ; < "Interlingua" ; LANGUAGE_USER_INTERLINGUA ; > ; < "Bosnian" ; LANGUAGE_BOSNIAN_BOSNIA_HERZEGOVINA ; > ; < "Bengali (Bangladesh)" ; LANGUAGE_BENGALI_BANGLADESH ; > ; @@ -315,7 +315,7 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE < "Yoruba" ; LANGUAGE_YORUBA ; > ; < "Rusyn (Ukraine)" ; LANGUAGE_USER_RUSYN_UKRAINE ; > ; < "Rusyn (Slovakia)" ; LANGUAGE_USER_RUSYN_SLOVAKIA ; > ; - < "Kabyle Latin" ; LANGUAGE_TAMAZIGHT_LATIN ; > ; + < "Kabyle Latin" ; LANGUAGE_TAMAZIGHT_LATIN_ALGERIA ; > ; < "Yiddish (USA)" ; LANGUAGE_USER_YIDDISH_US ; > ; < "Hawaiian" ; LANGUAGE_HAWAIIAN_UNITED_STATES ; > ; < "Limbu" ; LANGUAGE_USER_LIMBU ; > ; diff --git a/svtools/source/misc/sampletext.cxx b/svtools/source/misc/sampletext.cxx index c8ba114c6ebf..3523383ccfa5 100644 --- a/svtools/source/misc/sampletext.cxx +++ b/svtools/source/misc/sampletext.cxx @@ -586,9 +586,17 @@ OUString makeRepresentativeTextForLanguage(LanguageType eLang) case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY: sRet = makeRepresentativeTextForScript(USCRIPT_KHMER); break; - case LANGUAGE_MONGOLIAN & LANGUAGE_MASK_PRIMARY: - if (eLang == LANGUAGE_MONGOLIAN_MONGOLIAN) - sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN); + case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO & LANGUAGE_MASK_PRIMARY: + switch (eLang) + { + case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA: + case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA: + case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO: + sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN); + break; + default: + break; + } break; case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: sRet = makeRepresentativeTextForScript(USCRIPT_JAPANESE); diff --git a/vcl/source/gdi/sallayout.cxx b/vcl/source/gdi/sallayout.cxx index fe01037931b8..f395936b34d7 100644 --- a/vcl/source/gdi/sallayout.cxx +++ b/vcl/source/gdi/sallayout.cxx @@ -287,11 +287,18 @@ VCL_DLLPUBLIC sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType eLang ) case LANGUAGE_MALAYALAM & LANGUAGE_MASK_PRIMARY: nOffset = 0x0D66 - '0'; // malayalam break; - case LANGUAGE_MONGOLIAN & LANGUAGE_MASK_PRIMARY: - if (eLang == LANGUAGE_MONGOLIAN_MONGOLIAN) - nOffset = 0x1810 - '0'; // mongolian - else - nOffset = 0; // mongolian cyrillic + case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO & LANGUAGE_MASK_PRIMARY: + switch (eLang) + { + case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA: + case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA: + case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO: + nOffset = 0x1810 - '0'; // mongolian + break; + default: + nOffset = 0; // mongolian cyrillic + break; + } break; case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY: nOffset = 0x1040 - '0'; // myanmar |