summaryrefslogtreecommitdiff
path: root/i18nlangtag/source
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2013-04-05 18:40:39 +0200
committerEike Rathke <erack@redhat.com>2013-04-05 19:10:48 +0200
commit876c619b944dfbc88464045f1400c549a01a1164 (patch)
treef15f930fe100bda4c0a0503728654801ac977fcd /i18nlangtag/source
parent8ef9e38aa84675c57b331a796d900b3c10e04f44 (diff)
new module i18nlangtag
Moved portions from module i18npool, all of former i18nisolang1 library that now is i18nlangtag. Included are languagetag, isolang and mslangid. This i18nlangtag code is now even used by module comphelper, so disentangling i18npool and making this an own module was needed to not create circular module dependencies. Change-Id: Ib887c3d6dde667403fd22d382310ba5f1a9b0015
Diffstat (limited to 'i18nlangtag/source')
-rw-r--r--i18nlangtag/source/isolang/insys.cxx35
-rw-r--r--i18nlangtag/source/isolang/inunx.cxx139
-rw-r--r--i18nlangtag/source/isolang/inwnt.cxx104
-rw-r--r--i18nlangtag/source/isolang/isolang.cxx1085
-rwxr-xr-xi18nlangtag/source/isolang/langid.pl409
-rw-r--r--i18nlangtag/source/isolang/lcid.awk187
-rw-r--r--i18nlangtag/source/isolang/mslangid.cxx492
-rw-r--r--i18nlangtag/source/languagetag/languagetag.cxx1254
-rw-r--r--i18nlangtag/source/languagetag/simple-langtag.cxx400
9 files changed, 4105 insertions, 0 deletions
diff --git a/i18nlangtag/source/isolang/insys.cxx b/i18nlangtag/source/isolang/insys.cxx
new file mode 100644
index 000000000000..a0f1ab496c4d
--- /dev/null
+++ b/i18nlangtag/source/isolang/insys.cxx
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#if defined( WNT )
+
+#include "inwnt.cxx"
+
+#elif defined( UNX )
+
+#include "inunx.cxx"
+
+#else
+
+#error unknown platform
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18nlangtag/source/isolang/inunx.cxx b/i18nlangtag/source/isolang/inunx.cxx
new file mode 100644
index 000000000000..f47bfa4b67c2
--- /dev/null
+++ b/i18nlangtag/source/isolang/inunx.cxx
@@ -0,0 +1,139 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <stdlib.h> // for getenv()
+#include <stdio.h>
+
+#ifdef MACOSX
+#include <osl/process.h>
+#include <rtl/locale.h>
+#include <rtl/ustring.hxx>
+
+#else // MACOSX
+#include <rtl/string.hxx>
+
+#endif // MACOSX
+#include <rtl/instance.hxx>
+#include "i18nlangtag/languagetag.hxx"
+#include "i18nlangtag/mslangid.hxx"
+
+// =======================================================================
+
+static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW;
+static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW;
+
+// -----------------------------------------------------------------------
+
+// Get locale of category LC_CTYPE of environment variables
+static const sal_Char* getLangFromEnvironment()
+{
+ static const sal_Char* pFallback = "C";
+ const sal_Char *pLang = NULL;
+
+ pLang = getenv ( "LC_ALL" );
+ if (! pLang || pLang[0] == 0)
+ pLang = getenv ( "LC_CTYPE" );
+ if (! pLang || pLang[0] == 0)
+ pLang = getenv( "LANG" );
+ if (! pLang || pLang[0] == 0)
+ pLang = pFallback;
+
+ return pLang;
+}
+
+// -----------------------------------------------------------------------
+
+// Get locale of category LC_MESSAGES of environment variables
+static const sal_Char* getUILangFromEnvironment()
+{
+ static const sal_Char* pFallback = "C";
+ const sal_Char *pLang = NULL;
+
+ pLang = getenv ( "LANGUAGE" ); // respect the GNU extension
+ if (! pLang || pLang[0] == 0)
+ pLang = getenv ( "LC_ALL" );
+ if (! pLang || pLang[0] == 0)
+ pLang = getenv ( "LC_MESSAGES" );
+ if (! pLang || pLang[0] == 0)
+ pLang = getenv( "LANG" );
+ if (! pLang || pLang[0] == 0)
+ pLang = pFallback;
+
+ return pLang;
+}
+
+// -----------------------------------------------------------------------
+
+typedef const sal_Char * (*getLangFromEnv)();
+
+static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage,
+ getLangFromEnv pGetLangFromEnv )
+{
+ /* get the language from the user environment */
+ LanguageType nLang = rSystemLanguage;
+ if ( nLang == LANGUAGE_DONTKNOW )
+ {
+ ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex());
+ nLang = rSystemLanguage;
+ if ( nLang == LANGUAGE_DONTKNOW )
+ {
+#ifdef MACOSX
+ rtl_Locale *procLocale;
+ (void) pGetLangFromEnv; /* unused */
+
+ if ( osl_getProcessLocale(&procLocale) == osl_Process_E_None )
+ {
+ nLang = LanguageTag( *procLocale ).getLanguageType();
+ OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
+ rSystemLanguage = nLang;
+#ifdef DEBUG
+ if ( rSystemLanguage == LANGUAGE_DONTKNOW )
+ fprintf( stderr, "intnunx.cxx: failed to convert osl_getProcessLocale() language to system language.\n" );
+#endif
+ }
+#else /* MACOSX */
+ rtl::OString aUnxLang( (pGetLangFromEnv)() );
+ nLang = MsLangId::convertUnxByteStringToLanguage( aUnxLang );
+ OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
+ rSystemLanguage = nLang;
+#endif /* MACOSX */
+ }
+ else {
+ OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
+ }
+ }
+}
+
+// -----------------------------------------------------------------------
+
+LanguageType MsLangId::getPlatformSystemLanguage()
+{
+ getPlatformSystemLanguageImpl( nImplSystemLanguage, &getLangFromEnvironment);
+ return nImplSystemLanguage;
+}
+
+// -----------------------------------------------------------------------
+
+LanguageType MsLangId::getPlatformSystemUILanguage()
+{
+ getPlatformSystemLanguageImpl( nImplSystemUILanguage, &getUILangFromEnvironment);
+ return nImplSystemUILanguage;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18nlangtag/source/isolang/inwnt.cxx b/i18nlangtag/source/isolang/inwnt.cxx
new file mode 100644
index 000000000000..c0971673d1f3
--- /dev/null
+++ b/i18nlangtag/source/isolang/inwnt.cxx
@@ -0,0 +1,104 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <sal/config.h>
+
+#define WINVER 0x0500
+
+#ifdef _MSC_VER
+#pragma warning(push,1) // disable warnings within system headers
+#endif
+#include <windef.h> // needed by winnls.h
+#include <winbase.h> // needed by winnls.h
+#include <winnls.h>
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+#include <rtl/instance.hxx>
+#include "i18nlangtag/mslangid.hxx"
+
+static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW;
+static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW;
+
+// =======================================================================
+
+static LanguageType GetSVLang( LANGID nWinLangId )
+{
+ // No Translation, we work with the original MS code without the SORT_ID.
+ // So we can get never LANG-ID's from MS, which are currently not defined
+ // by us.
+ return LanguageType( static_cast<sal_uInt16>(nWinLangId & 0xffff));
+}
+
+// -----------------------------------------------------------------------
+
+typedef LANGID (WINAPI *getLangFromEnv)();
+
+static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage,
+ getLangFromEnv pGetUserDefault, getLangFromEnv pGetSystemDefault )
+{
+ LanguageType nLang = rSystemLanguage;
+ if ( nLang == LANGUAGE_DONTKNOW )
+ {
+ ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex());
+ nLang = rSystemLanguage;
+ if ( nLang == LANGUAGE_DONTKNOW )
+ {
+ LANGID nLangId;
+
+ nLangId = (pGetUserDefault)();
+ nLang = GetSVLang( nLangId );
+
+ if ( nLang == LANGUAGE_DONTKNOW )
+ {
+ nLangId = (pGetSystemDefault)();
+ nLang = GetSVLang( nLangId );
+ }
+ OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
+ rSystemLanguage = nLang;
+ }
+ else
+ {
+ OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
+ }
+ }
+}
+
+// -----------------------------------------------------------------------
+
+LanguageType MsLangId::getPlatformSystemLanguage()
+{
+ getPlatformSystemLanguageImpl( nImplSystemLanguage,
+ &GetUserDefaultLangID, &GetSystemDefaultLangID);
+ return nImplSystemLanguage;
+}
+
+// -----------------------------------------------------------------------
+
+LanguageType MsLangId::getPlatformSystemUILanguage()
+{
+ // TODO: this could be distinguished, #if(WINVER >= 0x0500)
+ // needs _run_ time differentiation though, not at compile time.
+ getPlatformSystemLanguageImpl( nImplSystemUILanguage,
+ &GetUserDefaultUILanguage, &GetSystemDefaultUILanguage);
+ return nImplSystemUILanguage;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx
new file mode 100644
index 000000000000..a6a467ce4671
--- /dev/null
+++ b/i18nlangtag/source/isolang/isolang.cxx
@@ -0,0 +1,1085 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <rtl/ustring.hxx>
+#include <rtl/string.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <rtl/strbuf.hxx>
+
+#include "i18nlangtag/mslangid.hxx"
+
+// =======================================================================
+
+struct IsoLangEngEntry
+{
+ LanguageType mnLang;
+ sal_Char maCountry[3];
+};
+
+struct IsoLangNoneStdEntry
+{
+ LanguageType mnLang;
+ sal_Char maLangStr[4];
+ sal_Char maCountry[9];
+};
+
+struct IsoLangOtherEntry
+{
+ LanguageType mnLang;
+ const sal_Char* mpLangStr;
+};
+
+// -----------------------------------------------------------------------
+
+// Entries for languages are lower case, for countries upper case, as
+// recommended by rfc4646 (obsoletes rfc3066 (obsoletes rfc1766)).
+// convertIsoNamesToLanguage() is case insensitive
+//
+// Sort order: Most used first.
+//
+// The default entry for a LangID <-> ISO mapping has to be first. For
+// conversion of legacy mappings one LangID can map to multiple ISO codes, and
+// one ISO code combination can map to multiple LangIDs. For compatibility with
+// already existing calls it can also be a sequence as follows:
+
+// LANGUAGE_ENGLISH, "en", ""
+// LANGUAGE_ENGLISH_US, "en", "US"
+
+// Here, in a convertIsoNamesToLanguage() call "en-US" is converted to
+// LANGUAGE_ENGLISH_US and "en" is converted to LANGUAGE_ENGLISH. A call with
+// "en-ZZ" (not in table) would result in LANGUAGE_ENGLISH because the first
+// entry matching the language and not having a country is returned, regardless
+// of whether being sorted before or after other entries of the same language
+// with some country. To obtain a _locale_ (not language only) in the order
+// given, convertLocaleToLanguageWithFallback() must be called.
+
+// If the sequence instead was
+
+// LANGUAGE_ENGLISH_US, "en", "US"
+// LANGUAGE_ENGLISH, "en", ""
+
+// in a convertIsoNamesToLanguage() call "en-US" is still converted to
+// LANGUAGE_ENGLISH_US, but "en" is _also_ converted to LANGUAGE_ENGLISH_US
+// because no country was passed and it is the first entry to match the
+// language, see code. A call with "en-ZZ" (not in table) would still result in
+// LANGUAGE_ENGLISH.
+
+/* erAck: 2007-07-05T20:01+0200 TODO: The entire suite's "primary language
+ * only" usage and locale fall back should be cleaned up and made consistent. I
+ * strongly doubt that most callers exactly expect the behavior described.
+ * Currently these primary LangIDs are used literally in OOo code:
+ * LANGUAGE_ENGLISH LANGUAGE_CHINESE LANGUAGE_MALAY
+ * LANGUAGE_AZERI LANGUAGE_URDU LANGUAGE_KASHMIRI
+ */
+
+static MsLangId::IsoLangEntry const aImplIsoLangEntries[] =
+{
+ // MS-LANGID codes ISO639-1/2/3 ISO3166
+ { LANGUAGE_ENGLISH, "en", "" },
+ { LANGUAGE_ENGLISH_US, "en", "US" },
+ { LANGUAGE_ENGLISH_UK, "en", "GB" },
+ { LANGUAGE_ENGLISH_AUS, "en", "AU" },
+ { LANGUAGE_ENGLISH_CAN, "en", "CA" },
+ { LANGUAGE_FRENCH, "fr", "FR" },
+ { LANGUAGE_FRENCH, "fr", "" },
+ { LANGUAGE_GERMAN, "de", "DE" },
+ { LANGUAGE_ITALIAN, "it", "IT" },
+ { LANGUAGE_DUTCH, "nl", "NL" },
+ { LANGUAGE_SPANISH_MODERN, "es", "ES" },
+ { LANGUAGE_SPANISH_DATED, "es", "ES" },
+ { LANGUAGE_PORTUGUESE, "pt", "PT" },
+ { LANGUAGE_PORTUGUESE_BRAZILIAN, "pt", "BR" },
+ { LANGUAGE_DANISH, "da", "DK" },
+ { LANGUAGE_GREEK, "el", "GR" },
+ { LANGUAGE_CHINESE, "zh", "" },
+ { LANGUAGE_CHINESE_SIMPLIFIED, "zh", "CN" },
+ { LANGUAGE_CHINESE_TRADITIONAL, "zh", "TW" },
+ { LANGUAGE_CHINESE_HONGKONG, "zh", "HK" },
+ { LANGUAGE_CHINESE_SINGAPORE, "zh", "SG" },
+ { LANGUAGE_CHINESE_MACAU, "zh", "MO" },
+ { LANGUAGE_ENGLISH_HONG_KONG_SAR, "en", "HK" },
+ { LANGUAGE_JAPANESE, "ja", "JP" },
+ { LANGUAGE_KOREAN, "ko", "KR" },
+ { LANGUAGE_KOREAN_JOHAB, "ko", "KR" },
+ { LANGUAGE_USER_KOREAN_NORTH, "ko", "KP" },
+ { LANGUAGE_SWEDISH, "sv", "SE" },
+ { LANGUAGE_SWEDISH_FINLAND, "sv", "FI" },
+ { LANGUAGE_FINNISH, "fi", "FI" },
+ { LANGUAGE_RUSSIAN, "ru", "RU" },
+ { LANGUAGE_TATAR, "tt", "RU" },
+ { LANGUAGE_ENGLISH_NZ, "en", "NZ" },
+ { LANGUAGE_ENGLISH_EIRE, "en", "IE" },
+ { LANGUAGE_DUTCH_BELGIAN, "nl", "BE" },
+ { LANGUAGE_FRENCH_BELGIAN, "fr", "BE" },
+ { LANGUAGE_FRENCH_CANADIAN, "fr", "CA" },
+ { LANGUAGE_FRENCH_SWISS, "fr", "CH" },
+ { LANGUAGE_GERMAN_SWISS, "de", "CH" },
+ { LANGUAGE_GERMAN_AUSTRIAN, "de", "AT" },
+ { LANGUAGE_ITALIAN_SWISS, "it", "CH" },
+ { LANGUAGE_ALBANIAN, "sq", "AL" },
+ { LANGUAGE_ARABIC_SAUDI_ARABIA, "ar", "SA" },
+ { LANGUAGE_ARABIC_EGYPT, "ar", "EG" },
+ { LANGUAGE_ARABIC_UAE, "ar", "AE" },
+ { LANGUAGE_ARABIC_IRAQ, "ar", "IQ" },
+ { LANGUAGE_ARABIC_LIBYA, "ar", "LY" },
+ { LANGUAGE_ARABIC_ALGERIA, "ar", "DZ" },
+ { LANGUAGE_ARABIC_MOROCCO, "ar", "MA" },
+ { LANGUAGE_ARABIC_TUNISIA, "ar", "TN" },
+ { LANGUAGE_ARABIC_OMAN, "ar", "OM" },
+ { LANGUAGE_ARABIC_YEMEN, "ar", "YE" },
+ { LANGUAGE_ARABIC_SYRIA, "ar", "SY" },
+ { LANGUAGE_ARABIC_JORDAN, "ar", "JO" },
+ { LANGUAGE_ARABIC_LEBANON, "ar", "LB" },
+ { LANGUAGE_ARABIC_KUWAIT, "ar", "KW" },
+ { LANGUAGE_ARABIC_BAHRAIN, "ar", "BH" },
+ { LANGUAGE_ARABIC_QATAR, "ar", "QA" },
+ { LANGUAGE_USER_ARABIC_CHAD, "ar", "TD" },
+ { LANGUAGE_USER_ARABIC_COMOROS, "ar", "KM" },
+ { LANGUAGE_USER_ARABIC_DJIBOUTI, "ar", "DJ" },
+ { LANGUAGE_USER_ARABIC_ERITREA, "ar", "ER" },
+ { LANGUAGE_USER_ARABIC_ISRAEL, "ar", "IL" },
+ { LANGUAGE_USER_ARABIC_MAURITANIA, "ar", "MR" },
+ { LANGUAGE_USER_ARABIC_PALESTINE, "ar", "PS" },
+ { LANGUAGE_USER_ARABIC_SOMALIA, "ar", "SO" },
+ { LANGUAGE_USER_ARABIC_SUDAN, "ar", "SD" },
+ { LANGUAGE_ARABIC_PRIMARY_ONLY, "ar", "" },
+ { LANGUAGE_BASQUE, "eu", "" },
+ { LANGUAGE_BULGARIAN, "bg", "BG" },
+ { LANGUAGE_CZECH, "cs", "CZ" },
+ { LANGUAGE_CZECH, "cz", "" },
+ { LANGUAGE_ENGLISH_JAMAICA, "en", "JM" },
+ { LANGUAGE_ENGLISH_CARRIBEAN, "en", "BS" }, // not 100%, because AG is Bahamas
+ { LANGUAGE_ENGLISH_BELIZE, "en", "BZ" },
+ { LANGUAGE_ENGLISH_TRINIDAD, "en", "TT" },
+ { LANGUAGE_ENGLISH_ZIMBABWE, "en", "ZW" },
+ { LANGUAGE_ENGLISH_INDONESIA, "en", "ID" },
+ { LANGUAGE_ESTONIAN, "et", "EE" },
+ { LANGUAGE_FAEROESE, "fo", "FO" },
+ { LANGUAGE_FARSI, "fa", "IR" },
+ { LANGUAGE_FRENCH_LUXEMBOURG, "fr", "LU" },
+ { LANGUAGE_FRENCH_MONACO, "fr", "MC" },
+ { LANGUAGE_GERMAN_LUXEMBOURG, "de", "LU" },
+ { LANGUAGE_GERMAN_LIECHTENSTEIN, "de", "LI" },
+ { LANGUAGE_HEBREW, "he", "IL" }, // new: old was "iw"
+ { LANGUAGE_HEBREW, "iw", "IL" }, // old: new is "he"
+ { LANGUAGE_HUNGARIAN, "hu", "HU" },
+ { LANGUAGE_ICELANDIC, "is", "IS" },
+ { LANGUAGE_INDONESIAN, "id", "ID" }, // new: old was "in"
+ { LANGUAGE_INDONESIAN, "in", "ID" }, // old: new is "id"
+ { LANGUAGE_NORWEGIAN, "no", "NO" },
+ { LANGUAGE_NORWEGIAN_BOKMAL, "nb", "NO" },
+ { LANGUAGE_NORWEGIAN_NYNORSK, "nn", "NO" },
+ { LANGUAGE_POLISH, "pl", "PL" },
+ { LANGUAGE_RHAETO_ROMAN, "rm", "CH" },
+ { LANGUAGE_ROMANIAN, "ro", "RO" },
+ { LANGUAGE_ROMANIAN_MOLDOVA, "ro", "MD" },
+ { LANGUAGE_SLOVAK, "sk", "SK" },
+ { LANGUAGE_SLOVENIAN, "sl", "SI" },
+ { LANGUAGE_SPANISH_MEXICAN, "es", "MX" },
+ { LANGUAGE_SPANISH_GUATEMALA, "es", "GT" },
+ { LANGUAGE_SPANISH_COSTARICA, "es", "CR" },
+ { LANGUAGE_SPANISH_PANAMA, "es", "PA" },
+ { LANGUAGE_SPANISH_DOMINICAN_REPUBLIC, "es", "DO" },
+ { LANGUAGE_SPANISH_VENEZUELA, "es", "VE" },
+ { LANGUAGE_SPANISH_COLOMBIA, "es", "CO" },
+ { LANGUAGE_SPANISH_PERU, "es", "PE" },
+ { LANGUAGE_SPANISH_ARGENTINA, "es", "AR" },
+ { LANGUAGE_SPANISH_ECUADOR, "es", "EC" },
+ { LANGUAGE_SPANISH_CHILE, "es", "CL" },
+ { LANGUAGE_SPANISH_URUGUAY, "es", "UY" },
+ { LANGUAGE_SPANISH_PARAGUAY, "es", "PY" },
+ { LANGUAGE_SPANISH_BOLIVIA, "es", "BO" },
+ { LANGUAGE_SPANISH_EL_SALVADOR, "es", "SV" },
+ { LANGUAGE_SPANISH_HONDURAS, "es", "HN" },
+ { LANGUAGE_SPANISH_NICARAGUA, "es", "NI" },
+ { LANGUAGE_SPANISH_PUERTO_RICO, "es", "PR" },
+ { LANGUAGE_SPANISH_UNITED_STATES, "es", "US" },
+ { LANGUAGE_SPANISH_LATIN_AMERICA, "es", "" },
+ { LANGUAGE_TURKISH, "tr", "TR" },
+ { LANGUAGE_UKRAINIAN, "uk", "UA" },
+ { LANGUAGE_VIETNAMESE, "vi", "VN" },
+ { LANGUAGE_LATVIAN, "lv", "LV" },
+ { LANGUAGE_MACEDONIAN, "mk", "MK" },
+ { LANGUAGE_MALAY, "ms", "" },
+ { LANGUAGE_MALAY_MALAYSIA, "ms", "MY" },
+ { LANGUAGE_MALAY_BRUNEI_DARUSSALAM, "ms", "BN" },
+ { LANGUAGE_ENGLISH_MALAYSIA, "en", "MY" },
+ { LANGUAGE_THAI, "th", "TH" },
+ { LANGUAGE_LITHUANIAN, "lt", "LT" },
+ { LANGUAGE_LITHUANIAN_CLASSIC, "lt", "LT" },
+ { LANGUAGE_CROATIAN, "hr", "HR" }, // Croatian in Croatia
+ { LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA, "hr", "BA" },
+ { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA" },
+// { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_AND_HERZEGOVINA, "bs", "BA" }, // script codes not supported yet
+ { LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, "sr", "RS" }, // Serbian Cyrillic in Serbia
+ { LANGUAGE_SERBIAN_CYRILLIC, "sr", "YU" }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_CS instead, sr_CS not supported by ICU 2.6 (3.4 does)
+ { LANGUAGE_SERBIAN_CYRILLIC, "sr", "CS" }, // alias to be able to integrate localizations, rsc needs it
+ { LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME" },
+ { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr", "BA" },
+ { LANGUAGE_SERBIAN, "sr", "" }, // SERBIAN is only LID, MS-LCID not defined (was dupe of CROATIAN)
+ { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS" }, // Serbian Latin in Serbia; kludge, needed to be sr_Latn_RS instead, script codes not supported yet
+ { LANGUAGE_SERBIAN_LATIN, "sh", "YU" }, // legacy Serbian Latin in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_Latn_CS instead, script codes not supported yet
+ { LANGUAGE_SERBIAN_LATIN, "sh", "CS" }, // Serbian Latin in Serbia and Montenegro; kludge, needed to be sr_Latn_CS instead, script codes not supported yet
+ { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sh", "ME" }, // Serbian Latin in Montenegro; kludge, needed to be sr_Latn_ME instead, script codes not supported yet
+ { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sh", "BA" },
+ { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sh", "" }, // kludge, needed to be sr_Latn instead, script codes not supported yet
+ { LANGUAGE_ARMENIAN, "hy", "AM" },
+ { LANGUAGE_AZERI, "az", "" },
+ { LANGUAGE_AZERI_LATIN, "az", "AZ" },
+// { LANGUAGE_AZERI_CYRILLIC, "az", "AZ" }, // script codes not supported yet
+ { LANGUAGE_UZBEK_LATIN, "uz", "UZ" },
+// { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ" }, // script codes not supported yet
+ { LANGUAGE_BENGALI_BANGLADESH, "bn", "BD" },
+ { LANGUAGE_BENGALI, "bn", "IN" },
+ { LANGUAGE_BURMESE, "my", "MM" },
+ { LANGUAGE_KAZAKH, "kk", "KZ" },
+ { LANGUAGE_ENGLISH_INDIA, "en", "IN" },
+ { LANGUAGE_URDU, "ur", "" },
+ { LANGUAGE_URDU_INDIA, "ur", "IN" },
+ { LANGUAGE_URDU_PAKISTAN, "ur", "PK" },
+ { LANGUAGE_HINDI, "hi", "IN" },
+ { LANGUAGE_GUJARATI, "gu", "IN" },
+ { LANGUAGE_KANNADA, "kn", "IN" },
+ { LANGUAGE_ASSAMESE, "as", "IN" },
+ { LANGUAGE_KASHMIRI, "ks", "" },
+ { LANGUAGE_KASHMIRI_INDIA, "ks", "IN" },
+ { LANGUAGE_MALAYALAM, "ml", "IN" },
+ { LANGUAGE_MANIPURI, "mni", "IN" },
+ { LANGUAGE_MARATHI, "mr", "IN" },
+ { LANGUAGE_KONKANI, "kok", "IN" },
+ { LANGUAGE_NEPALI, "ne", "NP" },
+ { LANGUAGE_NEPALI_INDIA, "ne", "IN" },
+ { LANGUAGE_ORIYA, "or", "IN" },
+ { LANGUAGE_PUNJABI, "pa", "IN" },
+ { LANGUAGE_SANSKRIT, "sa", "IN" },
+ { LANGUAGE_SINDHI, "sd", "IN" },
+ { LANGUAGE_TAMIL, "ta", "IN" },
+ { LANGUAGE_TELUGU, "te", "IN" },
+ { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK" }, // preferring "lah" over "pa" for Western Punjabi, see http://www.ethnologue.com/show_language.asp?code=PNB
+ { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK" },
+ { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK" },
+ { LANGUAGE_BELARUSIAN, "be", "BY" },
+ { LANGUAGE_CATALAN, "ca", "ES" }, // Spain (default)
+ { LANGUAGE_CATALAN, "ca", "AD" }, // Andorra
+ { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; workaround for UI localization only, do not use in document content!
+ { LANGUAGE_CATALAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; UI localization quirk only, do not use in document content!
+// { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "ES" }, // In case MS format files escaped into the wild, map them back.
+ { LANGUAGE_FRENCH_CAMEROON, "fr", "CM" },
+ { LANGUAGE_FRENCH_COTE_D_IVOIRE, "fr", "CI" },
+ { LANGUAGE_FRENCH_MALI, "fr", "ML" },
+ { LANGUAGE_FRENCH_SENEGAL, "fr", "SN" },
+ { LANGUAGE_FRENCH_ZAIRE, "fr", "CD" }, // Democratic Republic Of Congo
+ { LANGUAGE_FRENCH_MOROCCO, "fr", "MA" },
+ { LANGUAGE_FRENCH_REUNION, "fr", "RE" },
+ { LANGUAGE_FRENCH_NORTH_AFRICA, "fr", "" },
+ { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" }, // unknown ISO country code
+ { LANGUAGE_FRISIAN_NETHERLANDS, "fy", "NL" },
+ { LANGUAGE_GAELIC_IRELAND, "ga", "IE" },
+ { LANGUAGE_GAELIC_SCOTLAND, "gd", "GB" },
+ { LANGUAGE_GALICIAN, "gl", "ES" },
+ { LANGUAGE_GEORGIAN, "ka", "GE" },
+ { LANGUAGE_KHMER, "km", "KH" },
+ { LANGUAGE_KIRGHIZ, "ky", "KG" },
+ { LANGUAGE_LAO, "lo", "LA" },
+ { LANGUAGE_MALTESE, "mt", "MT" },
+ { LANGUAGE_MONGOLIAN, "mn", "MN" }, // Cyrillic script
+ { LANGUAGE_MONGOLIAN_MONGOLIAN, "mn", "MN" },
+ { LANGUAGE_RUSSIAN_MOLDOVA, "mo", "MD" },
+ { LANGUAGE_SWAHILI, "sw", "KE" },
+ { LANGUAGE_USER_SWAHILI_TANZANIA, "sw", "TZ" },
+ { LANGUAGE_TAJIK, "tg", "TJ" },
+ { LANGUAGE_TIBETAN, "bo", "CN" }, // CN politically correct?
+ { LANGUAGE_DZONGKHA, "dz", "BT" },
+ { LANGUAGE_TURKMEN, "tk", "TM" },
+ { LANGUAGE_WELSH, "cy", "GB" },
+ { LANGUAGE_SESOTHO, "st", "ZA" },
+ { LANGUAGE_SEPEDI, "nso", "ZA" },
+ { LANGUAGE_SEPEDI, "ns", "ZA" }, // fake "ns" for compatibility with existing OOo1.1.x localization to be able to read those documents
+ { LANGUAGE_TSONGA, "ts", "ZA" },
+ { LANGUAGE_TSWANA, "tn", "ZA" },
+ { LANGUAGE_ENGLISH_SAFRICA, "en", "ZA" },
+ { LANGUAGE_AFRIKAANS, "af", "ZA" },
+ { LANGUAGE_VENDA, "ve", "ZA" }, // default 639-1
+ { LANGUAGE_VENDA, "ven", "ZA" }, // 639-2 may have been used temporarily since 2004-07-23
+ { LANGUAGE_XHOSA, "xh", "ZA" },
+ { LANGUAGE_ZULU, "zu", "ZA" },
+ { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC" },
+ { LANGUAGE_QUECHUA_PERU, "qu", "PE" },
+ { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO" }, // macro: quh-BO, qul-BO
+ { LANGUAGE_PASHTO, "ps", "AF" },
+ { LANGUAGE_OROMO, "om", "ET" },
+ { LANGUAGE_DHIVEHI, "dv", "MV" },
+ { LANGUAGE_UIGHUR_CHINA, "ug", "CN" },
+ { LANGUAGE_TIGRIGNA_ETHIOPIA, "ti", "ET" },
+ { LANGUAGE_TIGRIGNA_ERITREA, "ti", "ER" },
+ { LANGUAGE_AMHARIC_ETHIOPIA, "am", "ET" },
+ { LANGUAGE_GUARANI_PARAGUAY, "gug", "PY" },
+ { LANGUAGE_HAWAIIAN_UNITED_STATES, "haw", "US" },
+ { LANGUAGE_EDO, "bin", "NG" },
+ { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG" },
+ { LANGUAGE_HAUSA_NIGERIA, "ha", "NG" },
+ { LANGUAGE_USER_HAUSA_GHANA, "ha", "GH" },
+ { LANGUAGE_IGBO_NIGERIA, "ig", "NG" },
+ { LANGUAGE_KANURI_NIGERIA, "kr", "NG" },
+ { LANGUAGE_YORUBA, "yo", "NG" },
+ { LANGUAGE_SOMALI, "so", "SO" },
+ { LANGUAGE_PAPIAMENTU, "pap", "AN" },
+ { LANGUAGE_USER_PAPIAMENTU_ARUBA, "pap", "AW" },
+ { LANGUAGE_USER_PAPIAMENTU_CURACAO, "pap", "CW" },
+ { LANGUAGE_USER_PAPIAMENTU_BONAIRE, "pap", "BQ" },
+ { LANGUAGE_ENGLISH_SINGAPORE, "en", "SG" },
+ { LANGUAGE_USER_YIDDISH_US, "yi", "US" },
+ { LANGUAGE_YIDDISH, "yi", "IL" }, // new: old was "ji"
+ { LANGUAGE_YIDDISH, "ji", "IL" }, // old: new is "yi"
+ { LANGUAGE_SYRIAC, "syr", "TR" }, // "TR" according to http://www.ethnologue.com/show_language.asp?code=SYC
+ { LANGUAGE_SINHALESE_SRI_LANKA, "si", "LK" },
+ { LANGUAGE_CHEROKEE_UNITED_STATES, "chr", "US" },
+ { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA" },
+// { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu", "CA" }, // script codes not supported yet
+ { LANGUAGE_SAMI_NORTHERN_NORWAY, "se", "NO" },
+ { LANGUAGE_SAMI_INARI, "smn", "FI" },
+ { LANGUAGE_SAMI_LULE_NORWAY, "smj", "NO" },
+ { LANGUAGE_SAMI_LULE_SWEDEN, "smj", "SE" },
+ { LANGUAGE_SAMI_NORTHERN_FINLAND, "se", "FI" },
+ { LANGUAGE_SAMI_NORTHERN_SWEDEN, "se", "SE" },
+ { LANGUAGE_SAMI_SKOLT, "sms", "FI" },
+ { LANGUAGE_SAMI_SOUTHERN_NORWAY, "sma", "NO" },
+ { LANGUAGE_SAMI_SOUTHERN_SWEDEN, "sma", "SE" },
+ { LANGUAGE_USER_SAMI_KILDIN_RUSSIA, "sjd", "RU" },
+ { LANGUAGE_MAPUDUNGUN_CHILE, "arn", "CL" },
+ { LANGUAGE_CORSICAN_FRANCE, "co", "FR" },
+ { LANGUAGE_ALSATIAN_FRANCE, "gsw", "FR" }, // in fact 'gsw' is Schwyzerduetsch (Swiss German), which is a dialect of Alemannic German, as is Alsatian. They aren't distinct languages and share this code.
+ { LANGUAGE_YAKUT_RUSSIA, "sah", "RU" },
+ { LANGUAGE_MOHAWK_CANADA, "moh", "CA" },
+ { LANGUAGE_BASHKIR_RUSSIA, "ba", "RU" },
+ { LANGUAGE_KICHE_GUATEMALA, "qut", "GT" },
+ { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF" },
+ { LANGUAGE_WOLOF_SENEGAL, "wo", "SN" },
+ { LANGUAGE_FILIPINO, "fil", "PH" },
+ { LANGUAGE_USER_TAGALOG, "tl", "PH" },
+ { LANGUAGE_ENGLISH_PHILIPPINES, "en", "PH" },
+// { LANGUAGE_IBIBIO_NIGERIA, "nic", "NG" }, // ISO "nic" is only a collective language code
+ { LANGUAGE_YI, "ii", "CN" },
+ { LANGUAGE_TAMAZIGHT_LATIN, "kab", "DZ" }, // In practice Kabyle is the language used for this
+ { LANGUAGE_OBSOLETE_USER_KABYLE, "kab", "DZ" },
+ { LANGUAGE_TAMAZIGHT_LATIN, "ber", "DZ" }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there.
+ { LANGUAGE_TAMAZIGHT_TIFINAGH, "ber", "MA" }, // Morocco is officially using Tifinagh for its Berber languages so store it to distinguish explicitly from LANGUAGE_TAMAZIGHT_LATIN, even though as a collective language its not of much use
+// { LANGUAGE_TAMAZIGHT_ARABIC, "ber", "" }, // ISO "ber" only collective!
+ { LANGUAGE_LATIN, "la", "VA" },
+ { LANGUAGE_OBSOLETE_USER_LATIN, "la", "VA" },
+ { LANGUAGE_USER_ESPERANTO, "eo", "" },
+ { LANGUAGE_USER_INTERLINGUA, "ia", "" },
+ { LANGUAGE_MAORI_NEW_ZEALAND, "mi", "NZ" },
+ { LANGUAGE_OBSOLETE_USER_MAORI, "mi", "NZ" },
+ { LANGUAGE_KINYARWANDA_RWANDA, "rw", "RW" },
+ { LANGUAGE_OBSOLETE_USER_KINYARWANDA, "rw", "RW" },
+ { LANGUAGE_UPPER_SORBIAN_GERMANY, "hsb", "DE" }, // MS maps this to 'wen-DE', which is nonsense. 'wen' is a collective language code, 'WEN' is a SIL code, see http://www.ethnologue.com/14/show_iso639.asp?code=wen and http://www.ethnologue.com/14/show_language.asp?code=WEN
+ { LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN,"hsb", "DE" },
+ { LANGUAGE_LOWER_SORBIAN_GERMANY, "dsb", "DE" }, // MS maps this to 'wee-DE', which is nonsense. 'WEE' is a SIL code, see http://www.ethnologue.com/14/show_language.asp?code=WEE
+ { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE" },
+ { LANGUAGE_OCCITAN_FRANCE, "oc", "FR" },
+ { LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR" },
+ { LANGUAGE_USER_KURDISH_TURKEY, "ku", "TR" },
+ { LANGUAGE_USER_KURDISH_SYRIA, "ku", "SY" },
+ { LANGUAGE_USER_KURDISH_IRAQ, "ku", "IQ" },
+ { LANGUAGE_USER_KURDISH_IRAN, "ku", "IR" },
+ { LANGUAGE_USER_SARDINIAN, "sc", "IT" }, // macrolanguage code
+ { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT" },
+ { LANGUAGE_USER_SARDINIAN_GALLURESE, "sdn", "IT" },
+ { LANGUAGE_USER_SARDINIAN_LOGUDORESE, "src", "IT" },
+ { LANGUAGE_USER_SARDINIAN_SASSARESE, "sdc", "IT" },
+ { LANGUAGE_BRETON_FRANCE, "br", "FR" },
+ { LANGUAGE_OBSOLETE_USER_BRETON, "br", "FR" },
+ { LANGUAGE_KALAALLISUT_GREENLAND, "kl", "GL" },
+ { LANGUAGE_OBSOLETE_USER_KALAALLISUT, "kl", "GL" },
+ { LANGUAGE_USER_SWAZI, "ss", "ZA" },
+ { LANGUAGE_USER_NDEBELE_SOUTH, "nr", "ZA" },
+ { LANGUAGE_USER_TSWANA_BOTSWANA, "tn", "BW" },
+ { LANGUAGE_USER_MOORE, "mos", "BF" },
+ { LANGUAGE_USER_BAMBARA, "bm", "ML" },
+ { LANGUAGE_USER_AKAN, "ak", "GH" },
+ { LANGUAGE_LUXEMBOURGISH_LUXEMBOURG, "lb", "LU" },
+ { LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH, "lb", "LU" },
+ { LANGUAGE_USER_FRIULIAN, "fur", "IT" },
+ { LANGUAGE_USER_FIJIAN, "fj", "FJ" },
+ { LANGUAGE_USER_AFRIKAANS_NAMIBIA, "af", "NA" },
+ { LANGUAGE_USER_ENGLISH_NAMIBIA, "en", "NA" },
+ { LANGUAGE_USER_WALLOON, "wa", "BE" },
+ { LANGUAGE_USER_COPTIC, "cop", "EG" },
+ { LANGUAGE_USER_GASCON, "gsc", "FR" },
+ { LANGUAGE_USER_GERMAN_BELGIUM, "de", "BE" },
+ { LANGUAGE_USER_CHUVASH, "cv", "RU" },
+ { LANGUAGE_USER_EWE_GHANA, "ee", "GH" },
+ { LANGUAGE_USER_ENGLISH_GHANA, "en", "GH" },
+ { LANGUAGE_USER_SANGO, "sg", "CF" },
+ { LANGUAGE_USER_GANDA, "lg", "UG" },
+ { LANGUAGE_USER_LINGALA_DRCONGO, "ln", "CD" },
+ { LANGUAGE_USER_LOW_GERMAN, "nds", "DE" },
+ { LANGUAGE_USER_HILIGAYNON, "hil", "PH" },
+ { LANGUAGE_USER_ENGLISH_MALAWI, "en", "MW" }, /* en default for MW */
+ { LANGUAGE_USER_NYANJA, "ny", "MW" },
+ { LANGUAGE_USER_KASHUBIAN, "csb", "PL" },
+ { LANGUAGE_USER_SPANISH_CUBA, "es", "CU" },
+ { LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA, "qul", "BO" },
+ { LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA, "quh", "BO" },
+ { LANGUAGE_USER_BODO_INDIA, "brx", "IN" },
+ { LANGUAGE_USER_DOGRI_INDIA, "dgo", "IN" },
+ { LANGUAGE_USER_MAITHILI_INDIA, "mai", "IN" },
+ { LANGUAGE_USER_SANTALI_INDIA, "sat", "IN" },
+ { LANGUAGE_USER_TETUN, "tet", "ID" },
+ { LANGUAGE_USER_TETUN_TIMOR_LESTE, "tet", "TL" },
+ { LANGUAGE_USER_TOK_PISIN, "tpi", "PG" },
+ { LANGUAGE_USER_SHUSWAP, "shs", "CA" },
+ { LANGUAGE_USER_ANCIENT_GREEK, "grc", "GR" },
+ { LANGUAGE_USER_ASTURIAN, "ast", "ES" },
+ { LANGUAGE_USER_LATGALIAN, "ltg", "LV" },
+ { LANGUAGE_USER_MAORE, "swb", "YT" },
+ { LANGUAGE_USER_BUSHI, "buc", "YT" },
+ { LANGUAGE_USER_TAHITIAN, "ty", "PF" },
+ { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG" },
+ { LANGUAGE_USER_MALAGASY_PLATEAU, "mg", "MG" },
+ { LANGUAGE_USER_BAFIA, "ksf", "CM" },
+ { LANGUAGE_USER_GIKUYU, "ki", "KE" },
+ { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA" },
+ { LANGUAGE_USER_RUSYN_SLOVAKIA, "rue", "SK" },
+ { LANGUAGE_USER_LIMBU, "lif", "NP" },
+ { LANGUAGE_USER_LOJBAN, "jbo", "" },
+ { LANGUAGE_USER_HAITIAN, "ht", "HT" },
+ { LANGUAGE_FRENCH_HAITI, "fr", "HT" },
+ { LANGUAGE_USER_BEEMBE, "beq", "CG" },
+ { LANGUAGE_USER_BEKWEL, "bkw", "CG" },
+ { LANGUAGE_USER_KITUBA, "mkw", "CG" },
+ { LANGUAGE_USER_LARI, "ldi", "CG" },
+ { LANGUAGE_USER_MBOCHI, "mdw", "CG" },
+ { LANGUAGE_USER_TEKE_EBOO, "ebo", "CG" },
+ { LANGUAGE_USER_TEKE_IBALI, "tek", "CG" },
+ { LANGUAGE_USER_TEKE_TYEE, "tyx", "CG" },
+ { LANGUAGE_USER_VILI, "vif", "CG" },
+ { LANGUAGE_USER_PORTUGUESE_ANGOLA, "pt", "AO" },
+ { LANGUAGE_USER_MANX, "gv", "GB" },
+ { LANGUAGE_USER_ARAGONESE, "an", "ES" },
+ { LANGUAGE_USER_KEYID, "qtz", "" }, // key id pseudolanguage used for UI testing
+ { LANGUAGE_USER_PALI_LATIN, "pli", "" }, // Pali with Latin script
+ { LANGUAGE_USER_KYRGYZ_CHINA, "ky", "CN" },
+ { LANGUAGE_USER_KOMI_ZYRIAN, "kpv", "RU" },
+ { LANGUAGE_USER_KOMI_PERMYAK, "koi", "RU" },
+ { LANGUAGE_USER_PITJANTJATJARA, "pjt", "AU" },
+ { LANGUAGE_USER_ERZYA, "myv", "RU" },
+ { LANGUAGE_USER_MARI_MEADOW, "mhr", "RU" },
+ { LANGUAGE_USER_KHANTY, "kca", "RU" },
+ { LANGUAGE_USER_LIVONIAN, "liv", "RU" },
+ { LANGUAGE_USER_MOKSHA, "mdf", "RU" },
+ { LANGUAGE_USER_MARI_HILL, "mrj", "RU" },
+ { LANGUAGE_USER_NGANASAN, "nio", "RU" },
+ { LANGUAGE_USER_OLONETS, "olo", "RU" },
+ { LANGUAGE_USER_VEPS, "vep", "RU" },
+ { LANGUAGE_USER_VORO, "vro", "EE" },
+ { LANGUAGE_USER_NENETS, "yrk", "RU" },
+ { LANGUAGE_USER_AKA, "axk", "CF" },
+ { LANGUAGE_USER_AKA_CONGO, "axk", "CG" },
+ { LANGUAGE_USER_DIBOLE, "bvx", "CG" },
+ { LANGUAGE_USER_DOONDO, "dde", "CG" },
+ { LANGUAGE_USER_KAAMBA, "xku", "CG" },
+ { LANGUAGE_USER_KOONGO, "kng", "CD" },
+ { LANGUAGE_USER_KOONGO_CONGO, "kng", "CG" },
+ { LANGUAGE_USER_KUNYI, "njx", "CG" },
+ { LANGUAGE_USER_NGUNGWEL, "ngz", "CG" },
+ { LANGUAGE_USER_NJYEM, "njy", "CM" },
+ { LANGUAGE_USER_NJYEM_CONGO, "njy", "CG" },
+ { LANGUAGE_USER_PUNU, "puu", "GA" },
+ { LANGUAGE_USER_PUNU_CONGO, "puu", "CG" },
+ { LANGUAGE_USER_SUUNDI, "sdj", "CG" },
+ { LANGUAGE_USER_TEKE_KUKUYA, "kkw", "CG" },
+ { LANGUAGE_USER_TSAANGI, "tsa", "CG" },
+ { LANGUAGE_USER_YAKA, "iyx", "CG" },
+ { LANGUAGE_USER_YOMBE, "yom", "CD" },
+ { LANGUAGE_USER_YOMBE_CONGO, "yom", "CG" },
+ { LANGUAGE_USER_SIDAMA, "sid", "ET" },
+ { LANGUAGE_MULTIPLE, "mul", "" }, // multiple languages, many languages are used
+ { LANGUAGE_UNDETERMINED, "und", "" }, // undetermined language, language cannot be identified
+ { LANGUAGE_NONE, "zxx", "" }, // added to ISO 639-2 on 2006-01-11: Used to declare the absence of linguistic information
+ { LANGUAGE_DONTKNOW, "", "" } // marks end of table
+};
+
+static MsLangId::IsoLangEntry aLastResortFallbackEntry =
+{ LANGUAGE_ENGLISH_US, "en", "US" };
+
+OUString MsLangId::IsoLangEntry::getTagString() const
+{
+ if (maCountry[0])
+ return OUString( OUString::createFromAscii( maLangStr) + "-" + OUString::createFromAscii( maCountry));
+ else
+ return OUString::createFromAscii( maLangStr);
+}
+
+// -----------------------------------------------------------------------
+
+// In this table are the countries which should mapped to a specific
+// english language
+static IsoLangEngEntry const aImplIsoLangEngEntries[] =
+{
+ { LANGUAGE_ENGLISH_UK, "AO" }, // Angola
+ { LANGUAGE_ENGLISH_UK, "BJ" }, // Benin
+ { LANGUAGE_ENGLISH_UK, "BW" }, // Botswana
+ { LANGUAGE_ENGLISH_UK, "BI" }, // Burundi
+ { LANGUAGE_ENGLISH_UK, "CM" }, // Cameroon
+ { LANGUAGE_ENGLISH_UK, "GA" }, // Gabon
+ { LANGUAGE_ENGLISH_UK, "GM" }, // Gambia
+ { LANGUAGE_ENGLISH_UK, "GH" }, // Ghana
+ { LANGUAGE_ENGLISH_UK, "GN" }, // Guinea
+ { LANGUAGE_ENGLISH_UK, "LS" }, // Lesotho
+ { LANGUAGE_ENGLISH_UK, "MW" }, // Malawi
+ { LANGUAGE_ENGLISH_UK, "MT" }, // Malta
+ { LANGUAGE_ENGLISH_UK, "NA" }, // Namibia
+ { LANGUAGE_ENGLISH_UK, "NG" }, // Nigeria
+ { LANGUAGE_ENGLISH_UK, "UG" }, // Uganda
+ { LANGUAGE_ENGLISH_UK, "ZM" }, // Zambia
+ { LANGUAGE_ENGLISH_UK, "ZW" }, // Zimbabwe
+ { LANGUAGE_ENGLISH_UK, "SZ" }, // Swaziland
+ { LANGUAGE_ENGLISH_UK, "NG" }, // Sierra Leone
+ { LANGUAGE_ENGLISH_UK, "KN" }, // Saint Kitts and Nevis
+ { LANGUAGE_ENGLISH_UK, "SH" }, // St. Helena
+ { LANGUAGE_ENGLISH_UK, "IO" }, // British Indian Oceanic Territory
+ { LANGUAGE_ENGLISH_UK, "FK" }, // Falkland Islands
+ { LANGUAGE_ENGLISH_UK, "GI" }, // Gibraltar
+ { LANGUAGE_ENGLISH_UK, "KI" }, // Kiribati
+ { LANGUAGE_ENGLISH_UK, "VG" }, // Virgin Islands
+ { LANGUAGE_ENGLISH_UK, "MU" }, // Mauritius
+ { LANGUAGE_ENGLISH_UK, "FJ" }, // Fiji
+ { LANGUAGE_ENGLISH_US, "KI" }, // Kiribati
+ { LANGUAGE_ENGLISH_US, "LR" }, // Liberia
+ { LANGUAGE_ENGLISH_US, "GU" }, // Guam
+ { LANGUAGE_ENGLISH_US, "MH" }, // Marshall Islands
+ { LANGUAGE_ENGLISH_US, "PW" }, // Palau
+ { LANGUAGE_ENGLISH_CARRIBEAN, "AI" }, // Anguilla
+ { LANGUAGE_ENGLISH_CARRIBEAN, "AG" }, // Antigua and Barbuda
+ { LANGUAGE_ENGLISH_CARRIBEAN, "BS" }, // Bahamas
+ { LANGUAGE_ENGLISH_CARRIBEAN, "BB" }, // Barbedos
+ { LANGUAGE_ENGLISH_CARRIBEAN, "BM" }, // Bermuda
+ { LANGUAGE_ENGLISH_CARRIBEAN, "KY" }, // Cayman Islands
+ { LANGUAGE_ENGLISH_CARRIBEAN, "GD" }, // Grenada
+ { LANGUAGE_ENGLISH_CARRIBEAN, "DM" }, // Dominica
+ { LANGUAGE_ENGLISH_CARRIBEAN, "HT" }, // Haiti
+ { LANGUAGE_ENGLISH_CARRIBEAN, "MS" }, // Montserrat
+ { LANGUAGE_ENGLISH_CARRIBEAN, "FM" }, // Micronesia
+ { LANGUAGE_ENGLISH_CARRIBEAN, "VC" }, // St. Vincent / Grenadines
+ { LANGUAGE_ENGLISH_CARRIBEAN, "LC" }, // Saint Lucia
+ { LANGUAGE_ENGLISH_CARRIBEAN, "TC" }, // Turks & Caicos Islands
+ { LANGUAGE_ENGLISH_CARRIBEAN, "GY" }, // Guyana
+ { LANGUAGE_ENGLISH_CARRIBEAN, "TT" }, // Trinidad and Tobago
+ { LANGUAGE_ENGLISH_AUS, "CX" }, // Christmas Islands
+ { LANGUAGE_ENGLISH_AUS, "CC" }, // Cocos (Keeling) Islands
+ { LANGUAGE_ENGLISH_AUS, "NF" }, // Norfolk Island
+ { LANGUAGE_ENGLISH_AUS, "PG" }, // Papua New Guinea
+ { LANGUAGE_ENGLISH_AUS, "SB" }, // Solomon Islands
+ { LANGUAGE_ENGLISH_AUS, "TV" }, // Tuvalu
+ { LANGUAGE_ENGLISH_AUS, "NR" }, // Nauru
+ { LANGUAGE_ENGLISH_NZ, "CK" }, // Cook Islands
+ { LANGUAGE_ENGLISH_NZ, "NU" }, // Niue
+ { LANGUAGE_ENGLISH_NZ, "TK" }, // Tokelau
+ { LANGUAGE_ENGLISH_NZ, "TO" }, // Tonga
+ { LANGUAGE_DONTKNOW, "" } // marks end of table
+};
+
+// -----------------------------------------------------------------------
+
+static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries[] =
+{
+ { LANGUAGE_NORWEGIAN_BOKMAL, "no", "BOK" }, // registered subtags for "no" in rfc1766
+ { LANGUAGE_NORWEGIAN_NYNORSK, "no", "NYN" }, // registered subtags for "no" in rfc1766
+ { LANGUAGE_SERBIAN_LATIN, "sr", "latin" },
+ { LANGUAGE_SERBIAN_CYRILLIC, "sr", "cyrillic" },
+ { LANGUAGE_AZERI_LATIN, "az", "latin" },
+ { LANGUAGE_AZERI_CYRILLIC, "az", "cyrillic" },
+ { LANGUAGE_DONTKNOW, "", "" } // marks end of table
+};
+
+// -----------------------------------------------------------------------
+
+// in this table are only names to find the best language
+static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries2[] =
+{
+ { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmaal" },
+ { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmal" },
+ { LANGUAGE_NORWEGIAN_NYNORSK, "no", "nynorsk" },
+ { LANGUAGE_DONTKNOW, "", "" } // marks end of table
+};
+
+// -----------------------------------------------------------------------
+
+// in this table are only names to find the best language
+static IsoLangOtherEntry const aImplOtherEntries[] =
+{
+ { LANGUAGE_ENGLISH_US, "c" },
+ { LANGUAGE_CHINESE, "chinese" },
+ { LANGUAGE_GERMAN, "german" },
+ { LANGUAGE_JAPANESE, "japanese" },
+ { LANGUAGE_KOREAN, "korean" },
+ { LANGUAGE_ENGLISH_US, "posix" },
+ { LANGUAGE_CHINESE_TRADITIONAL, "tchinese" },
+ { LANGUAGE_DONTKNOW, NULL } // marks end of table
+};
+
+
+// in this table are only privateuse names
+static IsoLangOtherEntry const aImplPrivateUseEntries[] =
+{
+ { LANGUAGE_USER_PRIV_NOTRANSLATE, "x-no-translate" }, //! not BCP47 but legacy in .xcu configmgr
+ { LANGUAGE_USER_PRIV_DEFAULT, "x-default" },
+ { LANGUAGE_USER_PRIV_COMMENT, "x-comment" },
+ { LANGUAGE_USER_PRIV_JOKER, "*" }, //! not BCP47 but transferable in configmgr
+ { LANGUAGE_DONTKNOW, NULL } // marks end of table
+};
+
+// =======================================================================
+
+// static
+void MsLangId::Conversion::convertLanguageToIsoNames( LanguageType nLang,
+ OUString& rLangStr, OUString& rCountry )
+{
+ if ( nLang == LANGUAGE_SYSTEM )
+ nLang = MsLangId::getSystemLanguage();
+
+ // Search for LangID (in this table we find only defined ISO combinations)
+ const IsoLangEntry* pEntry = aImplIsoLangEntries;
+ do
+ {
+ if ( pEntry->mnLang == nLang )
+ {
+ rLangStr = OUString::createFromAscii( pEntry->maLangStr );
+ rCountry = OUString::createFromAscii( pEntry->maCountry );
+ return;
+ }
+ ++pEntry;
+ }
+ while ( pEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // Search for LangID if we didn't find a specific ISO combination.
+ // All entries in this table are allowed for mime specifications,
+ // but not defined ISO combinations.
+ const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries;
+ do
+ {
+ if ( pNoneStdEntry->mnLang == nLang )
+ {
+ rLangStr = OUString::createFromAscii( pNoneStdEntry->maLangStr );
+ rCountry = OUString::createFromAscii( pNoneStdEntry->maCountry );
+ return;
+ }
+ ++pNoneStdEntry;
+ }
+ while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // Look for privateuse definitions.
+ const IsoLangOtherEntry* pPrivateEntry = aImplPrivateUseEntries;
+ do
+ {
+ if ( pPrivateEntry->mnLang == nLang )
+ {
+ rLangStr = OUString::createFromAscii( pPrivateEntry->mpLangStr );
+ rCountry = OUString();
+ return;
+ }
+ ++pPrivateEntry;
+ }
+ while ( pPrivateEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // not found
+ rLangStr = OUString();
+ rCountry = OUString();
+}
+
+// -----------------------------------------------------------------------
+
+// -----------------------------------------------------------------------
+
+static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( LanguageType nLang )
+{
+ LanguageType nPrimary = MsLangId::getPrimaryLanguage( nLang);
+
+ // Search for LangID and remember first lang-only.
+ const MsLangId::IsoLangEntry* pFirstPrimary = NULL;
+ const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries;
+ do
+ {
+ if (pEntry->mnLang == nLang)
+ {
+ if (*pEntry->maCountry)
+ return *pEntry;
+ switch (nLang)
+ {
+ // These are known to have no country assigned.
+ case LANGUAGE_BASQUE:
+ case LANGUAGE_USER_ESPERANTO:
+ case LANGUAGE_USER_INTERLINGUA:
+ case LANGUAGE_USER_LOJBAN:
+ return *pEntry;
+ default:
+ ; // nothing
+ }
+ }
+ if (!pFirstPrimary &&
+ MsLangId::getPrimaryLanguage( pEntry->mnLang) == nPrimary)
+ pFirstPrimary = pEntry;
+ ++pEntry;
+ }
+ while ( pEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // Language not found at all => use default.
+ if (!pFirstPrimary)
+ return aLastResortFallbackEntry;
+
+ // Search for first entry of primary language with any country.
+ pEntry = pFirstPrimary;
+ do
+ {
+ if (MsLangId::getPrimaryLanguage( pEntry->mnLang) == nLang)
+ {
+ if (*pEntry->maCountry)
+ return *pEntry;
+ }
+ ++pEntry;
+ }
+ while ( pEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ return aLastResortFallbackEntry;
+}
+
+// static
+LanguageType MsLangId::Conversion::lookupFallbackLanguage( LanguageType nLang )
+{
+ return lcl_lookupFallbackEntry( nLang).mnLang;
+}
+
+
+// static
+::com::sun::star::lang::Locale MsLangId::Conversion::lookupFallbackLocale( LanguageType nLang )
+{
+ const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( nLang);
+ return ::com::sun::star::lang::Locale(
+ OUString::createFromAscii( rEntry.maLangStr),
+ OUString::createFromAscii( rEntry.maCountry),
+ OUString());
+}
+
+// -----------------------------------------------------------------------
+
+static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry(
+ const ::com::sun::star::lang::Locale & rLocale )
+{
+ // language is lower case in table
+ OUString aLowerLang = rLocale.Language.toAsciiLowerCase();
+ // country is upper case in table
+ OUString aUpperCountry = rLocale.Country.toAsciiUpperCase();
+ sal_Int32 nCountryLen = aUpperCountry.getLength();
+
+ // Search for locale and remember first lang-only.
+ const MsLangId::IsoLangEntry* pFirstLang = NULL;
+ const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries;
+ do
+ {
+ if (aLowerLang.equalsAscii( pEntry->maLangStr))
+ {
+ if (*pEntry->maCountry)
+ {
+ if (nCountryLen && aUpperCountry.equalsAscii( pEntry->maCountry))
+ return *pEntry;
+ }
+ else
+ {
+ switch (pEntry->mnLang)
+ {
+ // These are known to have no country assigned.
+ case LANGUAGE_BASQUE:
+ case LANGUAGE_USER_ESPERANTO:
+ case LANGUAGE_USER_INTERLINGUA:
+ case LANGUAGE_USER_LOJBAN:
+ return *pEntry;
+ default:
+ ; // nothing
+ }
+ }
+ if (!pFirstLang)
+ pFirstLang = pEntry;
+ }
+ ++pEntry;
+ }
+ while ( pEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // Language not found at all => use default.
+ if (!pFirstLang)
+ return aLastResortFallbackEntry;
+
+ // Search for first entry of language with any country.
+ pEntry = pFirstLang;
+ do
+ {
+ if (aLowerLang.equalsAscii( pEntry->maLangStr))
+ {
+ if (*pEntry->maCountry)
+ return *pEntry;
+ }
+ ++pEntry;
+ }
+ while ( pEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ return aLastResortFallbackEntry;
+}
+
+
+// static
+::com::sun::star::lang::Locale MsLangId::Conversion::lookupFallbackLocale(
+ const ::com::sun::star::lang::Locale & rLocale )
+{
+ const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( rLocale);
+ return ::com::sun::star::lang::Locale(
+ OUString::createFromAscii( rEntry.maLangStr),
+ OUString::createFromAscii( rEntry.maCountry),
+ OUString());
+}
+
+// =======================================================================
+
+// static
+LanguageType MsLangId::Conversion::convertPrivateUseToLanguage( const OUString& rPriv )
+{
+ const IsoLangOtherEntry* pPrivateEntry = aImplPrivateUseEntries;
+ do
+ {
+ if ( rPriv.equalsIgnoreAsciiCaseAscii( pPrivateEntry->mpLangStr ) )
+ return pPrivateEntry->mnLang;
+ ++pPrivateEntry;
+ } while ( pPrivateEntry->mnLang != LANGUAGE_DONTKNOW );
+ return LANGUAGE_DONTKNOW;
+}
+
+
+// static
+LanguageType MsLangId::Conversion::convertIsoNamesToLanguage( const OUString& rLang,
+ const OUString& rCountry )
+{
+ // language is lower case in table
+ OUString aLowerLang = rLang.toAsciiLowerCase();
+ // country is upper case in table
+ OUString aUpperCountry = rCountry.toAsciiUpperCase();
+
+ // first look for exact match
+ const IsoLangEntry* pFirstLang = NULL;
+ const IsoLangEntry* pEntry = aImplIsoLangEntries;
+ do
+ {
+ if ( aLowerLang.equalsAscii( pEntry->maLangStr ) )
+ {
+ if ( aUpperCountry.isEmpty() ||
+ aUpperCountry.equalsAscii( pEntry->maCountry ) )
+ return pEntry->mnLang;
+ if ( !pFirstLang )
+ pFirstLang = pEntry;
+ else if ( !*pEntry->maCountry )
+ pFirstLang = pEntry;
+ }
+ ++pEntry;
+ }
+ while ( pEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // some eng countries should be mapped to a specific english language
+ if ( aLowerLang == "en" )
+ {
+ const IsoLangEngEntry* pEngEntry = aImplIsoLangEngEntries;
+ do
+ {
+ if ( aUpperCountry.equalsAscii( pEngEntry->maCountry ) )
+ return pEngEntry->mnLang;
+ ++pEngEntry;
+ }
+ while ( pEngEntry->mnLang != LANGUAGE_DONTKNOW );
+ }
+
+ // test for specific languages which are not used standard ISO 3166 codes
+ const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries;
+ do
+ {
+ if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) )
+ {
+ // The countries in this table are not all in upper case
+ if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) )
+ return pNoneStdEntry->mnLang;
+ }
+ ++pNoneStdEntry;
+ }
+ while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW );
+ pNoneStdEntry = aImplIsoNoneStdLangEntries2;
+ do
+ {
+ if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) )
+ {
+ // The countries in this table are not all in upper case
+ if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) )
+ return pNoneStdEntry->mnLang;
+ }
+ ++pNoneStdEntry;
+ }
+ while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ // If the language is correct, than we return the default language
+ if ( pFirstLang )
+ return pFirstLang->mnLang;
+
+ // if only the country is set, look for any entry matching the country
+ // (to allow reading country and language in separate steps, in any order)
+ if ( !rCountry.isEmpty() && rLang.isEmpty() )
+ {
+ const IsoLangEntry* pEntry2 = aImplIsoLangEntries;
+ do
+ {
+ if ( aUpperCountry.equalsAscii( pEntry2->maCountry ) )
+ return pEntry2->mnLang;
+ ++pEntry2;
+ }
+ while ( pEntry2->mnLang != LANGUAGE_DONTKNOW );
+
+ aLowerLang = aUpperCountry.toAsciiLowerCase();
+ }
+
+ // Look for privateuse definitions.
+ LanguageType nLang = convertPrivateUseToLanguage( aLowerLang);
+ if (nLang != LANGUAGE_DONTKNOW)
+ return nLang;
+
+ // Now look for all other definitions, which are not standard
+ const IsoLangOtherEntry* pOtherEntry = aImplOtherEntries;
+ do
+ {
+ if ( aLowerLang.equalsAscii( pOtherEntry->mpLangStr ) )
+ return pOtherEntry->mnLang;
+ ++pOtherEntry;
+ }
+ while ( pOtherEntry->mnLang != LANGUAGE_DONTKNOW );
+
+ return LANGUAGE_DONTKNOW;
+}
+
+// -----------------------------------------------------------------------
+
+// static
+LanguageType MsLangId::Conversion::convertIsoNamesToLanguage( const OString& rLang,
+ const OString& rCountry )
+{
+ OUString aLang = OStringToOUString( rLang, RTL_TEXTENCODING_ASCII_US);
+ OUString aCountry = OStringToOUString( rCountry, RTL_TEXTENCODING_ASCII_US);
+ return convertIsoNamesToLanguage( aLang, aCountry);
+}
+
+// -----------------------------------------------------------------------
+
+struct IsoLangGLIBCModifiersEntry
+{
+ LanguageType mnLang;
+ sal_Char maLangStr[4];
+ sal_Char maCountry[3];
+ sal_Char maAtString[9];
+};
+
+static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] =
+{
+ // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier
+ { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" },
+ { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia
+ { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro
+ { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro
+ { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" },
+ { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" },
+ { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" },
+ { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table
+};
+
+// convert a unix locale string into LanguageType
+
+// static
+LanguageType MsLangId::convertUnxByteStringToLanguage(
+ const OString& rString )
+{
+ OString aLang;
+ OString aCountry;
+ OString aAtString;
+
+ sal_Int32 nLangSepPos = rString.indexOf( (sal_Char)'_' );
+ sal_Int32 nCountrySepPos = rString.indexOf( (sal_Char)'.' );
+ sal_Int32 nAtPos = rString.indexOf( (sal_Char)'@' );
+
+ if (nCountrySepPos < 0)
+ nCountrySepPos = nAtPos;
+ if (nCountrySepPos < 0)
+ nCountrySepPos = rString.getLength();
+
+ if (nAtPos >= 0)
+ aAtString = rString.copy( nAtPos+1 );
+
+ if ( ((nLangSepPos >= 0) && (nLangSepPos > nCountrySepPos))
+ || ((nLangSepPos < 0)) )
+ {
+ // eg. "el.sun_eu_greek", "tchinese", "es.ISO8859-15"
+ aLang = rString.copy( 0, nCountrySepPos );
+ }
+ else if ( nLangSepPos >= 0 )
+ {
+ // well formed iso names like "en_US.UTF-8", "sh_BA.ISO8859-2@bosnia"
+ aLang = rString.copy( 0, nLangSepPos );
+ aCountry = rString.copy( nLangSepPos+1, nCountrySepPos - nLangSepPos - 1);
+ }
+
+ // if there is a glibc modifier, first look for exact match in modifier table
+ if (!aAtString.isEmpty())
+ {
+ // language is lower case in table
+ OString aLowerLang = aLang.toAsciiLowerCase();
+ // country is upper case in table
+ OString aUpperCountry = aCountry.toAsciiUpperCase();
+ const IsoLangGLIBCModifiersEntry* pGLIBCModifiersEntry = aImplIsoLangGLIBCModifiersEntries;
+ do
+ { // avoid embedded \0 warning
+ if (( aLowerLang.equals( static_cast< const char* >( pGLIBCModifiersEntry->maLangStr ))) &&
+ ( aAtString.equals( static_cast< const char* >( pGLIBCModifiersEntry->maAtString ))))
+ {
+ if ( aUpperCountry.isEmpty() ||
+ aUpperCountry.equals( static_cast< const char* >( pGLIBCModifiersEntry->maCountry )))
+ {
+ return pGLIBCModifiersEntry->mnLang;
+ }
+ }
+ ++pGLIBCModifiersEntry;
+ }
+ while ( pGLIBCModifiersEntry->mnLang != LANGUAGE_DONTKNOW );
+ }
+
+ return Conversion::convertIsoNamesToLanguage( aLang, aCountry );
+}
+
+// -----------------------------------------------------------------------
+// pass one IsoLangEntry to the outer world of the resource compiler
+
+// static
+const MsLangId::IsoLangEntry* MsLangId::getIsoLangEntry( size_t nIndex )
+{
+ if (nIndex < SAL_N_ELEMENTS(aImplIsoLangEntries))
+ return &aImplIsoLangEntries[ nIndex];
+ return 0;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18nlangtag/source/isolang/langid.pl b/i18nlangtag/source/isolang/langid.pl
new file mode 100755
index 000000000000..4504f08eabed
--- /dev/null
+++ b/i18nlangtag/source/isolang/langid.pl
@@ -0,0 +1,409 @@
+: # -*- perl -*- vim: ft=perl
+eval 'exec perl -w -S $0 ${1+"$@"}'
+if 0;
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+
+# See Usage() below or invoke without arguments for short instructions.
+# For long instructions use the source, Luke ;-)
+
+use strict;
+
+sub Usage()
+{
+ print STDERR
+ "\n",
+ "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n",
+ "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n",
+ "listbox entries, langlist.mk, file_ooo.scp registry name, languages.pm and\n",
+ "msi-encodinglist.txt\n\n",
+
+ "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n",
+
+ "A language string will be used as a generic string match in all searched files.\n",
+ "You may enclose the language string in word delimiters,\n",
+ "e.g. \\blanguage_german\\b for a specific match.\n",
+ "If the language string expression matches more than one define,\n",
+ "e.g. as in 'german', all matching defines will be processed.\n",
+ "If the language string does not match a define or an identifier in\n",
+ "langtab.src, a generic string match of the listbox entries will be tried.\n\n",
+
+ "Numeric values of LangID,primarylanguage,sublanguage can be given\n",
+ "decimal, hexadecimal (leading 0x), octal (leading 0) or binary (leading 0b).\n",
+ "The exact language_define of an exact match will be used in remaining lookups.\n\n",
+
+ "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n",
+ "for example: 'en-US' or 'de-' or '-CH',\n",
+ "xx and YY can be given case insensitive, will be lowered-uppered internally,\n",
+ "and xx and YY themselves may be regular expressions.\n",
+ "Also here a list of matches will be processed.\n\n",
+
+ "If option --single is given, only the first match will be processed.\n\n";
+}
+
+my $SOLARVERSION = $ENV{"SOLARVERSION"};
+my $INPATH = $ENV{"INPATH"};
+my $SRC_ROOT = $ENV{"SRC_ROOT"};
+my $UPDMINOREXT = $ENV{"UPDMINOREXT"};
+if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT))
+{
+ print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n";
+ Usage();
+ exit 1;
+}
+if (!defined($UPDMINOREXT)) {
+ $UPDMINOREXT = '';
+}
+my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT";
+
+my $LANGUAGE_MASK_PRIMARY = 0x03ff;
+
+sub getPrimaryLanguage($)
+{
+ my($lcid) = @_;
+ return $lcid & $LANGUAGE_MASK_PRIMARY;
+}
+
+sub getSubLanguage($)
+{
+ my($lcid) = @_;
+ return $lcid >> 10;
+}
+
+sub makeLangID($$)
+{
+ my( $sub, $pri) = @_;
+ return ($sub << 10) | $pri;
+}
+
+
+sub grepFile($$$$@)
+{
+ my( $regex, $path, $module, $name, @addregex) = @_;
+ my @result;
+ my $found = 0;
+ my $areopen = 0;
+ my $arecloser = '';
+ my $file;
+ # Try module under current working directory first to catch local
+ # modifications. A Not yet delivered lang.h is a special case.
+ if ("$path/$module/$name" eq "$SOLENVINC/i18nlangtag/lang.h") {
+ $file = "./$module/inc/i18nlangtag/lang.h"; }
+ else {
+ $file = "./$module/$name"; }
+ if (!($found = open( IN, $file)))
+ {
+ # Then with the given path.
+ $file = "$path/$module/$name";
+ if (!($found = open( IN, $file)))
+ {
+ print "No $file\n";
+ $file = "$path/$module.lnk/$name";
+ if (!($found = open( IN, $file))) {
+ print "No $file.\n";
+ $file = "$path/$module.link/$name";
+ if (!($found = open( IN, $file))) {
+ print "No $file either.\n"; }
+ }
+ }
+ }
+ if ($found)
+ {
+ $found = 0;
+ while (my $line = <IN>)
+ {
+ if ($line =~ /$regex/)
+ {
+ if (!$found)
+ {
+ $found = 1;
+ print "$file:\n";
+ }
+ chomp( $line);
+ print "$line\n";
+ push( @result, $line);
+ }
+ elsif (@addregex)
+ {
+ # By convention first element is opener, second element is closer.
+ if (!$areopen)
+ {
+ if ($line =~ /$addregex[0]/)
+ {
+ $areopen = 1;
+ $arecloser = $addregex[1];
+ }
+ }
+ if ($areopen)
+ {
+ for (my $i = 2; $i < @addregex; ++$i)
+ {
+ if ($line =~ /$addregex[$i]/)
+ {
+ if (!$found)
+ {
+ $found = 1;
+ print "$file:\n";
+ }
+ chomp( $line);
+ print "$line\n";
+ push( @result, $line);
+ }
+ }
+ if ($line =~ /$arecloser/)
+ {
+ $areopen = 0;
+ }
+ }
+ }
+ }
+ close( IN);
+ }
+ if (!$found) {
+ print "Not found in $file\n";
+ #print "Not found in $file for $regex @addregex\n";
+ }
+ return @result;
+}
+
+
+sub main()
+{
+ my( $lcid, @parts, $grepdef, $options, $single);
+ $grepdef = 0;
+ $single = 0;
+ for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options)
+ {
+ if ($ARGV[$options] eq '--single') { $single = 1; }
+ else { print "Unknown option: $ARGV[$options]\n"; }
+ }
+ if (@ARGV == 1 + $options)
+ {
+ # 0x hex, 0b bin, 0 oct
+ if ($ARGV[$options] =~ /^0/) {
+ $lcid = oct( $ARGV[0]); }
+ elsif ($ARGV[$options] =~ /^[0-9]/) {
+ $lcid = $ARGV[$options]; }
+ else
+ {
+ $grepdef = $ARGV[$options];
+ $lcid = 0;
+ }
+ $parts[0] = getPrimaryLanguage( $lcid);
+ $parts[1] = getSubLanguage( $lcid);
+ }
+ elsif (@ARGV == 2 + $options)
+ {
+ for (my $i = $options; $i < 2 + $options; ++$i)
+ {
+ if ($ARGV[$i] =~ /^0/) {
+ $parts[$i] = oct( $ARGV[$i]); }
+ else {
+ $parts[$i] = $ARGV[$i]; }
+ }
+ $lcid = makeLangID( $parts[1], $parts[0]);
+ }
+ else
+ {
+ Usage();
+ return 1;
+ }
+ my $modifier = "(?i)";
+ my (@resultlist, @greplist, $result);
+ # If no string was given on the command line, but value(s) were, lookup the
+ # LangID value to obtain the define identifier.
+ if ($grepdef)
+ {
+ # #define LANGUAGE_AFRIKAANS 0x0436
+ @resultlist = grepFile(
+ $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
+ $SOLENVINC, "i18nlangtag", "lang.h", ());
+ }
+ else
+ {
+ printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid,
+ $lcid, $parts[0], $parts[1]);
+ my $buf = sprintf( "0x%04X", $lcid);
+ @resultlist = grepFile(
+ '^\s*#\s*define\s+\w+\s+' . $buf,
+ $SOLENVINC, "i18nlangtag", "lang.h", ());
+ }
+ for $result (@resultlist)
+ {
+ # #define LANGUAGE_AFRIKAANS 0x0436
+ if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
+ {
+ push( @greplist, '\b' . $1 . '\b');
+ $modifier = ""; # complete identifier now case sensitive
+ if ($single) {
+ last; }
+ }
+ }
+ # If the string given is of the form xx-yy lookup a language,country pair
+ # to obtain the define identifier. xx and yy themselfs may be regexps.
+ # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy'
+ if ($grepdef =~ /^(.*)-$/) {
+ $grepdef = $1 . "-.*"; }
+ if ($grepdef =~ /^-(.*)$/) {
+ $grepdef = ".*-" . $1; }
+ if ($grepdef =~ /^(.*)-(.*)$/)
+ {
+ my $lang = $1;
+ my $coun = $2;
+ $lang = lc($lang);
+ $coun = uc($coun);
+ # { LANGUAGE_AFRIKAANS, "af", "ZA" },
+ @resultlist = grepFile(
+ '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,',
+ "$SRC_ROOT", "i18nlangtag", "source/isolang/isolang.cxx", ());
+ for $result (@resultlist)
+ {
+ if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
+ {
+ push( @greplist, '\b' . $1 . '\b');
+ $modifier = ""; # complete identifier now case sensitive
+ if ($single) {
+ last; }
+ }
+ }
+ $grepdef = 0;
+ }
+ if (!@greplist && $grepdef) {
+ push( @greplist, $grepdef); }
+ for $grepdef (@greplist)
+ {
+ print "\nUsing: " . $grepdef . "\n";
+
+ # Decimal LCID, was needed for Langpack.ulf but isn't used anymore,
+ # keep just in case we'd need it again.
+ # #define LANGUAGE_AFRIKAANS 0x0436
+ @resultlist = grepFile(
+ $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
+ $SOLENVINC, "i18nlangtag", "lang.h", ());
+ my @lcidlist;
+ for $result (@resultlist)
+ {
+ # #define LANGUAGE_AFRIKAANS 0x0436
+ if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
+ {
+ push( @lcidlist, oct( $2));
+ }
+ }
+
+ # { LANGUAGE_AFRIKAANS, "af", "ZA" },
+ @resultlist = grepFile(
+ $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,',
+ "$SRC_ROOT", "i18nlangtag", "source/isolang/isolang.cxx", ());
+
+ my @langcoungreplist;
+ for $result (@resultlist)
+ {
+ if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
+ {
+ my $lang = $1;
+ my $coun = $2;
+ my $loca;
+ if ($coun)
+ {
+ $loca = $lang . "_" . $coun;
+ push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?');
+ }
+ else
+ {
+ $loca = $lang;
+ $coun = "";
+ push( @langcoungreplist, '\b' . $lang . '\b');
+ }
+ my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml";
+ my $found;
+ if (!($found = open( LD, $file)))
+ {
+ $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml";
+ if (!($found = open( LD, $file)))
+ {
+ $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml";
+ $found = open( LD, $file);
+ }
+ }
+ if ($found)
+ {
+ print "Found $file:\n";
+ my $on = 0;
+ while (my $line = <LD>)
+ {
+ if ($line =~ /<(Language|Country)>/) {
+ $on = 1; }
+ if ($on) {
+ print $line; }
+ if ($line =~ /<\/(Language|Country)>/) {
+ $on = 0; }
+ }
+ close( LD);
+ }
+ else {
+ print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; }
+ }
+ }
+
+ # case LANGUAGE_ARABIC:
+ grepFile(
+ $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:',
+ "$SRC_ROOT", "i18nlangtag", "source/isolang/mslangid.cxx", ());
+
+ # With CWS 'langstatusbar' the language listbox resource file gets a new location.
+ my $module = "svx";
+ my $name = "source/dialog/langtab.src";
+ if (!(-e "$SRC_ROOT/$module/$name")) {
+ $module = "svtools";
+ $name = "source/misc/langtab.src";
+ }
+ # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ;
+ # lookup define
+ @resultlist = grepFile(
+ $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;',
+ "$SRC_ROOT", $module, $name, ());
+ # lookup string
+ if (!@resultlist) {
+ grepFile(
+ $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;',
+ "$SRC_ROOT", $module, $name, ()); }
+
+ for my $langcoun (@langcoungreplist)
+ {
+ # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu";
+ grepFile(
+ '^\s*Name\s*\(' . $langcoun . '\)\s*=',
+ "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ());
+
+ # completelangiso=af ar as-IN ... zu
+ grepFile(
+ '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '',
+ "$SRC_ROOT", "solenv", "inc/langlist.mk",
+ # needs a duplicated pair of backslashes to produce a literal \\
+ ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$'));
+
+ # af 1252 1078 # Afrikaans
+ grepFile(
+ '^\s*' . $langcoun . '',
+ "$SRC_ROOT", "l10ntools", "source/ulfconv/msi-encodinglist.txt", ());
+ }
+ }
+ return 0;
+}
+
+main();
diff --git a/i18nlangtag/source/isolang/lcid.awk b/i18nlangtag/source/isolang/lcid.awk
new file mode 100644
index 000000000000..db1a48d57ed8
--- /dev/null
+++ b/i18nlangtag/source/isolang/lcid.awk
@@ -0,0 +1,187 @@
+#!/usr/bin/awk -f
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+# Utility to compare MS-LANGID definitions with those defined in ../../inc/i18nlangtag/lang.h
+# Run in i18nlangtag/source/isolang
+#
+# outputs new #define LANGUAGE_... 0x... and also some commented out substrings
+# that were matched in already existing defines.
+#
+# ATTENTION! The sed filter in the command line examples below assures that a
+# '|' border is drawn by html2text in data tables, and nowhere else, on which
+# this awk script relies. This script also heavily relies on the column layout
+# encountered. Should MS decide to change their layout or their CSS names
+# ("data..."), this would probably break. Should html2text decide that the last
+# border="..." attribute encountered wins instead of the first, this may break
+# also.
+#
+# sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g'
+#
+# After html2text best if file cleaned up to _only_ contain the table entries,
+# but not necessary, entries are filtered. Check output.
+#
+# Expects input from the saved page of one of
+#
+# (1)
+# http://www.microsoft.com/globaldev/reference/lcid-all.mspx
+# filtered through ``html2text -nobs ...'', generated table:
+# blank,name,hex,dec,blank fields:
+# |Afrikaans_-_South_Africa___|0436___|1078___|
+#
+# complete command line:
+# lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
+#
+#
+# (2)
+# http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx
+# filtered through ``html2text -nobs ...'', generated table:
+# blank,name,hex,dec,inputlocales,collection,blank fields:
+# |Afrikaans |0436 |1078 |0436:00000409, |Basic |
+#
+# complete command line:
+# lynx -dump -source http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
+#
+#
+# (3)
+# http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp
+# filtered through ``html2text -nobs ...'', generated table:
+# blank,hex,locale,name,blank fields:
+# |0x0436___|af-ZA___|Afrikaans_(South_Africa)___|
+#
+# complete command line:
+# lynx -dump -source http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile
+#
+# Author: Eike Rathke <erack@sun.com>, <er@openoffice.org>
+#
+
+BEGIN {
+ while ((getline < "../../inc/i18nlangtag/lang.h") > 0)
+ {
+ if ($0 ~ /^#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/)
+ {
+ # lang[HEX]=NAME
+ lang[toupper(substr($3,3))] = toupper($2)
+ #print substr($3,3) "=" $2
+ }
+ }
+ # html2text table follows
+ FS = "\|"
+ filetype = 0
+ lcid_all = 1
+ xp_lcid = 2
+ nls_238z = 3
+ filetypename[filetype] = "unknown"
+ filetypename[lcid_all] = "lcid_all"
+ filetypename[xp_lcid] = "xp_lcid"
+ filetypename[nls_238z] = "nls_238z"
+ namefield[lcid_all] = 2
+ namefield[xp_lcid] = 2
+ namefield[nls_238z] = 4
+ hexfield[lcid_all] = 3
+ hexfield[xp_lcid] = 3
+ hexfield[nls_238z] = 2
+ locfield[lcid_all] = 0
+ locfield[xp_lcid] = 0
+ locfield[nls_238z] = 3
+}
+
+(NF < 5) { next }
+
+!filetype {
+ if (NF == 5)
+ {
+ if ($2 ~ /^0x/)
+ filetype = nls_238z
+ else if ($2 ~ /^Afrikaans/)
+ filetype = lcid_all
+ }
+ else if (NF == 7)
+ filetype = xp_lcid
+ if (!filetype)
+ next
+ name = namefield[filetype]
+ hex = hexfield[filetype]
+ loc = locfield[filetype]
+}
+
+{
+ gsub( /^[^:]*:/, "", $name)
+ gsub( /\..*/, "", $name)
+ gsub( /(^[ _]+)|([ _]+$)/, "", $hex)
+ gsub( /(^[ _]+)|([ _]+$)/, "", $name)
+ if (loc)
+ gsub( /(^[ _]+)|([ _]+$)/, "", $loc)
+}
+
+($hex ~ /^0x/) { $hex = substr( $hex, 3) }
+
+# if only 464 instead of 0464, make it match lang.h
+(length($hex) < 4) { $hex = "0" $hex }
+
+($hex !~ /^[0-9a-fA-F][0-9a-fA-F]*$/) { filtered[$hex] = $0; next }
+
+# all[HEX]=string
+{ all[toupper($hex)] = $name }
+
+(loc) { comment[toupper($hex)] = " /* " $loc " */" }
+
+# new hex: newlang[HEX]=string
+!(toupper($hex) in lang) { newlang[toupper($hex)] = $name }
+
+END {
+ if (!filetype)
+ {
+ print "No file type recognized." >>"/dev/stderr"
+ exit(1)
+ }
+ print "// assuming " filetypename[filetype] " file"
+ # every new language
+ for (x in newlang)
+ {
+ printf( "xxxxxxx LANGUAGE_%-26s 0x%s%s\n", newlang[x], x, comment[x])
+ n = split(newlang[x],arr,/[^A-Za-z0-9]/)
+ def = ""
+ for (i=1; i<=n; ++i)
+ {
+ if (length(arr[i]))
+ {
+ # each identifier word of the language name
+ if (def)
+ def = def "_"
+ aup = toupper(arr[i])
+ def = def aup
+ for (l in lang)
+ {
+ # contained in already existing definitions?
+ if (lang[l] ~ aup)
+ printf( "// %-50s %s\n", arr[i] ": " lang[l], l)
+ }
+ }
+ }
+ printf( "#define LANGUAGE_%-26s 0x%s\n", def, x)
+ }
+ print "\n// --- reverse check follows ----------------------------------\n"
+ for (x in lang)
+ {
+ if (!(x in all))
+ print "// not in input file: " x " " lang[x]
+ }
+ print "\n// --- filtered table entries follow (if any) -----------------\n"
+ for (x in filtered)
+ print "// filtered: " x " " filtered[x]
+}
diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx
new file mode 100644
index 000000000000..8955f554007f
--- /dev/null
+++ b/i18nlangtag/source/isolang/mslangid.cxx
@@ -0,0 +1,492 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+#include <rtl/ustring.hxx>
+#include <rtl/string.hxx>
+#include <com/sun/star/i18n/ScriptType.hpp>
+
+#include "i18nlangtag/mslangid.hxx"
+
+
+LanguageType MsLangId::nConfiguredSystemLanguage = LANGUAGE_SYSTEM;
+LanguageType MsLangId::nConfiguredSystemUILanguage = LANGUAGE_SYSTEM;
+
+LanguageType MsLangId::nConfiguredWesternFallback = LANGUAGE_SYSTEM;
+LanguageType MsLangId::nConfiguredAsianFallback = LANGUAGE_SYSTEM;
+LanguageType MsLangId::nConfiguredComplexFallback = LANGUAGE_SYSTEM;
+
+// static
+void MsLangId::setConfiguredSystemLanguage( LanguageType nLang )
+{
+ nConfiguredSystemLanguage = nLang;
+}
+
+
+// static
+void MsLangId::setConfiguredSystemUILanguage( LanguageType nLang )
+{
+ nConfiguredSystemUILanguage = nLang;
+}
+
+// static
+void MsLangId::setConfiguredWesternFallback( LanguageType nLang )
+{
+ nConfiguredWesternFallback = nLang;
+}
+
+// static
+void MsLangId::setConfiguredAsianFallback( LanguageType nLang )
+{
+ nConfiguredAsianFallback = nLang;
+}
+
+// static
+void MsLangId::setConfiguredComplexFallback( LanguageType nLang )
+{
+ nConfiguredComplexFallback = nLang;
+}
+
+// static
+inline LanguageType MsLangId::simplifySystemLanguages( LanguageType nLang )
+{
+ switch (nLang)
+ {
+ case LANGUAGE_PROCESS_OR_USER_DEFAULT :
+ case LANGUAGE_SYSTEM_DEFAULT :
+ case LANGUAGE_SYSTEM :
+ nLang = LANGUAGE_SYSTEM;
+ break;
+ default:
+ ; // nothing
+ }
+ return nLang;
+}
+
+// static
+LanguageType MsLangId::getRealLanguage( LanguageType nLang )
+{
+ switch (simplifySystemLanguages( nLang))
+ {
+ case LANGUAGE_SYSTEM :
+ if (nConfiguredSystemLanguage == LANGUAGE_SYSTEM)
+ nLang = getSystemLanguage();
+ else
+ nLang = nConfiguredSystemLanguage;
+ break;
+ case LANGUAGE_HID_HUMAN_INTERFACE_DEVICE :
+ if (nConfiguredSystemUILanguage == LANGUAGE_SYSTEM)
+ nLang = getSystemUILanguage();
+ else
+ nLang = nConfiguredSystemUILanguage;
+ break;
+ default:
+ /* TODO: would this be useful here? */
+ //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang);
+ ; // nothing
+ }
+ if (nLang == LANGUAGE_DONTKNOW)
+ nLang = LANGUAGE_ENGLISH_US;
+ return nLang;
+}
+
+
+// static
+LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sal_Int16 nType )
+{
+ if (nLang == LANGUAGE_NONE)
+ return nLang;
+
+ nLang = getRealLanguage(nLang);
+ if (nType != ::com::sun::star::i18n::ScriptType::WEAK && getScriptType(nLang) != nType)
+ {
+ switch(nType)
+ {
+ case ::com::sun::star::i18n::ScriptType::ASIAN:
+ if (nConfiguredAsianFallback == LANGUAGE_SYSTEM)
+ nLang = LANGUAGE_CHINESE_SIMPLIFIED;
+ else
+ nLang = nConfiguredAsianFallback;
+ break;
+ case ::com::sun::star::i18n::ScriptType::COMPLEX:
+ if (nConfiguredComplexFallback == LANGUAGE_SYSTEM)
+ nLang = LANGUAGE_HINDI;
+ else
+ nLang = nConfiguredComplexFallback;
+ break;
+ default:
+ if (nConfiguredWesternFallback == LANGUAGE_SYSTEM)
+ nLang = LANGUAGE_ENGLISH_US;
+ else
+ nLang = nConfiguredWesternFallback;
+ break;
+ }
+ }
+ return nLang;
+}
+
+// static
+void MsLangId::Conversion::convertLanguageToLocale( LanguageType nLang,
+ ::com::sun::star::lang::Locale & rLocale )
+{
+ if (!rLocale.Variant.isEmpty())
+ rLocale.Variant = OUString();
+ convertLanguageToIsoNames( nLang, rLocale.Language, rLocale.Country);
+ /* FIXME: this x-... is temporary until conversion will be moved up to
+ * LanguageTag. Also handle the nasty "*" joker as privateuse. */
+ if (rLocale.Language.startsWith( "x-") || (rLocale.Language == "*"))
+ {
+ rLocale.Variant = rLocale.Language;
+ rLocale.Language = "qlt";
+ }
+}
+
+
+// static
+::com::sun::star::lang::Locale MsLangId::Conversion::convertLanguageToLocale(
+ LanguageType nLang, bool bResolveSystem )
+{
+ ::com::sun::star::lang::Locale aLocale;
+ if (!bResolveSystem && simplifySystemLanguages( nLang) == LANGUAGE_SYSTEM)
+ ; // nothing => empty locale
+ else
+ {
+ // Still resolve LANGUAGE_DONTKNOW if resolving is not requested,
+ // but not LANGUAGE_SYSTEM or others.
+ if (bResolveSystem || nLang == LANGUAGE_DONTKNOW)
+ nLang = MsLangId::getRealLanguage( nLang);
+ convertLanguageToLocale( nLang, aLocale);
+ }
+ return aLocale;
+}
+
+
+// static
+LanguageType MsLangId::Conversion::convertLocaleToLanguage(
+ const ::com::sun::star::lang::Locale& rLocale )
+{
+ // empty language => LANGUAGE_SYSTEM
+ if (rLocale.Language.isEmpty())
+ return LANGUAGE_SYSTEM;
+
+ /* FIXME: this x-... is temporary until conversion will be moved up to
+ * LanguageTag. Also handle the nasty "*" joker as privateuse. */
+ LanguageType nRet = ((!rLocale.Variant.isEmpty() &&
+ (rLocale.Variant.startsWithIgnoreAsciiCase( "x-") || (rLocale.Variant == "*"))) ?
+ convertPrivateUseToLanguage( rLocale.Variant) :
+ convertIsoNamesToLanguage( rLocale.Language, rLocale.Country));
+ if (nRet == LANGUAGE_DONTKNOW)
+ nRet = LANGUAGE_SYSTEM;
+
+ return nRet;
+}
+
+
+// static
+::com::sun::star::lang::Locale MsLangId::Conversion::convertLanguageToLocaleWithFallback(
+ LanguageType nLang )
+{
+ return lookupFallbackLocale( MsLangId::getRealLanguage( nLang));
+}
+
+
+// static
+::com::sun::star::lang::Locale MsLangId::getFallbackLocale(
+ const ::com::sun::star::lang::Locale & rLocale )
+{
+ // empty language => LANGUAGE_SYSTEM
+ if (rLocale.Language.isEmpty())
+ return Conversion::convertLanguageToLocaleWithFallback( LANGUAGE_SYSTEM);
+
+ return Conversion::lookupFallbackLocale( rLocale);
+}
+
+// static
+bool MsLangId::isRightToLeft( LanguageType nLang )
+{
+ switch( nLang & LANGUAGE_MASK_PRIMARY )
+ {
+ case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_HEBREW & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_YIDDISH & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_URDU & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_FARSI & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_KASHMIRI & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_SINDHI & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_UIGHUR_CHINA & LANGUAGE_MASK_PRIMARY :
+ case LANGUAGE_USER_KYRGYZ_CHINA & LANGUAGE_MASK_PRIMARY :
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+// static
+bool MsLangId::isSimplifiedChinese( LanguageType nLang )
+{
+ return isChinese(nLang) && !isTraditionalChinese(nLang);
+}
+
+// static
+bool MsLangId::isSimplifiedChinese( const ::com::sun::star::lang::Locale & rLocale )
+{
+ return rLocale.Language == "zh" && !isTraditionalChinese(rLocale);
+}
+
+// static
+bool MsLangId::isTraditionalChinese( LanguageType nLang )
+{
+ bool bRet = false;
+ switch (nLang)
+ {
+ case LANGUAGE_CHINESE_TRADITIONAL:
+ case LANGUAGE_CHINESE_HONGKONG:
+ case LANGUAGE_CHINESE_MACAU:
+ bRet = true;
+ default:
+ break;
+ }
+ return bRet;
+}
+
+// static
+bool MsLangId::isTraditionalChinese( const ::com::sun::star::lang::Locale & rLocale )
+{
+ return rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO");
+}
+
+//static
+bool MsLangId::isChinese( LanguageType nLang )
+{
+ return MsLangId::getPrimaryLanguage(nLang) == LANGUAGE_CHINESE;
+}
+
+//static
+bool MsLangId::isKorean( LanguageType nLang )
+{
+ return MsLangId::getPrimaryLanguage(nLang) == LANGUAGE_KOREAN;
+}
+
+// static
+bool MsLangId::isCJK( LanguageType nLang )
+{
+ switch (nLang & LANGUAGE_MASK_PRIMARY)
+ {
+ case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+// static
+bool MsLangId::isFamilyNameFirst( LanguageType nLang )
+{
+ return isCJK(nLang) || nLang == LANGUAGE_HUNGARIAN;
+}
+
+// static
+bool MsLangId::hasForbiddenCharacters( LanguageType nLang )
+{
+ return isCJK(nLang);
+}
+
+
+// static
+bool MsLangId::needsSequenceChecking( LanguageType nLang )
+{
+ switch (nLang & LANGUAGE_MASK_PRIMARY)
+ {
+ case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_LAO & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_THAI & LANGUAGE_MASK_PRIMARY:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+// static
+sal_Int16 MsLangId::getScriptType( LanguageType nLang )
+{
+ sal_Int16 nScript;
+ switch( nLang )
+ {
+ // CJK
+ // all LANGUAGE_CHINESE_... are caught below
+ case LANGUAGE_JAPANESE:
+ case LANGUAGE_KOREAN:
+ case LANGUAGE_KOREAN_JOHAB:
+ case LANGUAGE_USER_KOREAN_NORTH:
+ nScript = ::com::sun::star::i18n::ScriptType::ASIAN;
+ break;
+
+ // CTL
+ // all LANGUAGE_ARABIC_... are caught below
+ case LANGUAGE_AMHARIC_ETHIOPIA:
+ case LANGUAGE_ASSAMESE:
+ case LANGUAGE_BENGALI:
+ case LANGUAGE_BENGALI_BANGLADESH:
+ case LANGUAGE_BURMESE:
+ case LANGUAGE_FARSI:
+ case LANGUAGE_HEBREW:
+ case LANGUAGE_YIDDISH:
+ case LANGUAGE_USER_YIDDISH_US:
+ case LANGUAGE_MARATHI:
+ case LANGUAGE_PUNJABI:
+ case LANGUAGE_GUJARATI:
+ case LANGUAGE_HINDI:
+ case LANGUAGE_KANNADA:
+ case LANGUAGE_KASHMIRI:
+ case LANGUAGE_KASHMIRI_INDIA:
+ case LANGUAGE_KHMER:
+ case LANGUAGE_LAO:
+ case LANGUAGE_MALAYALAM:
+ case LANGUAGE_MANIPURI:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN:
+ case LANGUAGE_NEPALI:
+ case LANGUAGE_NEPALI_INDIA:
+ case LANGUAGE_ORIYA:
+ case LANGUAGE_SANSKRIT:
+ case LANGUAGE_SINDHI:
+ case LANGUAGE_SINDHI_PAKISTAN:
+ case LANGUAGE_SINHALESE_SRI_LANKA:
+ case LANGUAGE_SYRIAC:
+ case LANGUAGE_TAMIL:
+ case LANGUAGE_TELUGU:
+ case LANGUAGE_THAI:
+ case LANGUAGE_TIBETAN:
+ case LANGUAGE_DZONGKHA:
+ case LANGUAGE_URDU:
+ case LANGUAGE_URDU_PAKISTAN:
+ case LANGUAGE_URDU_INDIA:
+ case LANGUAGE_USER_KURDISH_IRAQ:
+ case LANGUAGE_USER_KURDISH_IRAN:
+ case LANGUAGE_DHIVEHI:
+ case LANGUAGE_USER_BODO_INDIA:
+ case LANGUAGE_USER_DOGRI_INDIA:
+ case LANGUAGE_USER_MAITHILI_INDIA:
+ case LANGUAGE_UIGHUR_CHINA:
+ case LANGUAGE_USER_LIMBU:
+ case LANGUAGE_USER_KYRGYZ_CHINA:
+ nScript = ::com::sun::star::i18n::ScriptType::COMPLEX;
+ break;
+
+// currently not knowing scripttype - defaulted to LATIN:
+/*
+#define LANGUAGE_ARMENIAN 0x042B
+#define LANGUAGE_INDONESIAN 0x0421
+#define LANGUAGE_KAZAKH 0x043F
+#define LANGUAGE_KONKANI 0x0457
+#define LANGUAGE_MACEDONIAN 0x042F
+#define LANGUAGE_TATAR 0x0444
+*/
+
+ default:
+ switch ( nLang & LANGUAGE_MASK_PRIMARY )
+ {
+ // CJK catcher
+ case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
+ nScript = ::com::sun::star::i18n::ScriptType::ASIAN;
+ break;
+ // CTL catcher
+ case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY:
+ nScript = ::com::sun::star::i18n::ScriptType::COMPLEX;
+ break;
+ // Western (actually not necessarily Latin but also Cyrillic, for example)
+ default:
+ nScript = ::com::sun::star::i18n::ScriptType::LATIN;
+ }
+ break;
+ }
+ return nScript;
+}
+
+
+// static
+LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bool bUserInterfaceSelection )
+{
+ switch (nLang)
+ {
+ default:
+ break; // nothing
+ case LANGUAGE_OBSOLETE_USER_LATIN:
+ nLang = LANGUAGE_LATIN;
+ break;
+ case LANGUAGE_OBSOLETE_USER_MAORI:
+ nLang = LANGUAGE_MAORI_NEW_ZEALAND;
+ break;
+ case LANGUAGE_OBSOLETE_USER_KINYARWANDA:
+ nLang = LANGUAGE_KINYARWANDA_RWANDA;
+ break;
+ case LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN:
+ nLang = LANGUAGE_UPPER_SORBIAN_GERMANY;
+ break;
+ case LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN:
+ nLang = LANGUAGE_LOWER_SORBIAN_GERMANY;
+ break;
+ case LANGUAGE_OBSOLETE_USER_OCCITAN:
+ nLang = LANGUAGE_OCCITAN_FRANCE;
+ break;
+ case LANGUAGE_OBSOLETE_USER_BRETON:
+ nLang = LANGUAGE_BRETON_FRANCE;
+ break;
+ case LANGUAGE_OBSOLETE_USER_KALAALLISUT:
+ nLang = LANGUAGE_KALAALLISUT_GREENLAND;
+ break;
+ case LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH:
+ nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG;
+ break;
+ case LANGUAGE_OBSOLETE_USER_KABYLE:
+ nLang = LANGUAGE_TAMAZIGHT_LATIN;
+ break;
+
+ // The following are not strictly obsolete but should be mapped to a
+ // replacement locale when encountered.
+
+ // no_NO is an alias for nb_NO
+ case LANGUAGE_NORWEGIAN:
+ nLang = LANGUAGE_NORWEGIAN_BOKMAL;
+ break;
+
+ // #i94435# A Spanish variant that differs only in collation details we
+ // do not support.
+ case LANGUAGE_SPANISH_DATED:
+ nLang = LANGUAGE_SPANISH_MODERN;
+ break;
+
+ // Do not use ca-XV for document content.
+ /* TODO: remove in case we implement BCP47 language tags. */
+ case LANGUAGE_USER_CATALAN_VALENCIAN:
+ if (!bUserInterfaceSelection)
+ nLang = LANGUAGE_CATALAN;
+ break;
+ }
+ return nLang;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
new file mode 100644
index 000000000000..e1eea3b75f04
--- /dev/null
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -0,0 +1,1254 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "i18nlangtag/languagetag.hxx"
+#include "i18nlangtag/mslangid.hxx"
+#include <rtl/ustrbuf.hxx>
+#include <rtl/bootstrap.hxx>
+#include <osl/file.hxx>
+#include <rtl/instance.hxx>
+#include <rtl/locale.h>
+
+//#define erDEBUG
+
+#if defined(ENABLE_LIBLANGTAG)
+#include <liblangtag/langtag.h>
+#else
+/* Replacement code for LGPL phobic and Android systems.
+ * For iOS we could probably use NSLocale instead, that should have more or
+ * less required functionality. If it is good enough, it could be used for Mac
+ * OS X, too.
+ */
+#include "simple-langtag.cxx"
+#endif
+
+using rtl::OUString;
+using rtl::OString;
+using rtl::OUStringBuffer;
+using namespace com::sun::star;
+
+// The actual pointer type of mpImplLangtag that is declared void* to not
+// pollute the entire code base with liblangtag.
+#define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
+#define MPLANGTAG LANGTAGCAST(mpImplLangtag)
+
+/** Convention to signal presence of BCP 47 language tag in a Locale's Variant
+ field. The Locale's Language field then will contain this ISO 639-2
+ reserved for local use code. */
+#define ISO639_LANGUAGE_TAG "qlt"
+
+
+// Helper to ensure lt_error_t is free'd
+struct myLtError
+{
+ lt_error_t* p;
+ myLtError() : p(NULL) {}
+ ~myLtError() { if (p) lt_error_unref( p); }
+};
+
+
+// "statics" to be returned as const reference to an empty locale and string.
+namespace {
+struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
+struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
+}
+
+
+/** A reference holder for liblangtag data de/initialization, one static
+ instance. Currently implemented such that the first "ref" inits and dtor
+ (our library deinitialized) tears down.
+*/
+class LiblantagDataRef
+{
+public:
+ LiblantagDataRef();
+ ~LiblantagDataRef();
+ inline void incRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && !mnRef++)
+ setup();
+ }
+ inline void decRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
+ teardown();
+ }
+private:
+ rtl::OString maDataPath; // path to liblangtag data, "|" if system
+ sal_uInt32 mnRef;
+
+ void setupDataPath();
+ void setup();
+ void teardown();
+};
+
+namespace {
+struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
+}
+
+LiblantagDataRef::LiblantagDataRef()
+ :
+ mnRef(0)
+{
+}
+
+LiblantagDataRef::~LiblantagDataRef()
+{
+ // When destructed we're tearing down unconditionally.
+ if (mnRef)
+ mnRef = 1;
+ decRef();
+}
+
+void LiblantagDataRef::setup()
+{
+ SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
+ if (maDataPath.isEmpty())
+ setupDataPath();
+ lt_db_initialize();
+ // Hold ref eternally.
+ mnRef = SAL_MAX_UINT32;
+}
+
+void LiblantagDataRef::teardown()
+{
+ SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
+ lt_db_finalize();
+}
+
+void LiblantagDataRef::setupDataPath()
+{
+ // maDataPath is assumed to be empty here.
+ OUString aURL("$BRAND_BASE_DIR/share/liblangtag");
+ rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
+
+ // Check if data is in our own installation, else assume system
+ // installation.
+ OUString aData( aURL);
+ aData += "/language-subtag-registry.xml";
+ osl::DirectoryItem aDirItem;
+ if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
+ {
+ OUString aPath;
+ if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
+ maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
+ }
+ if (maDataPath.isEmpty())
+ maDataPath = "|"; // assume system
+ else
+ lt_db_set_datadir( maDataPath.getStr());
+}
+
+LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
+ :
+ maBcp47( rBcp47LanguageTag),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rBcp47LanguageTag.isEmpty()),
+ mbInitializedBcp47( !mbSystemLocale),
+ mbInitializedLocale( false),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
+ :
+ maLocale( rLocale),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rLocale.Language.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( LanguageType nLanguage )
+ :
+ mpImplLangtag( NULL),
+ mnLangID( nLanguage),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( false),
+ mbInitializedLangID( !mbSystemLocale),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( const OUString& rLanguage, const OUString& rCountry )
+ :
+ maLocale( rLanguage, rCountry, ""),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rLanguage.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( const rtl_Locale & rLocale )
+ :
+ maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( maLocale.Language.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+ convertFromRtlLocale();
+}
+
+
+LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
+ :
+ maLocale( rLanguageTag.maLocale),
+ maBcp47( rLanguageTag.maBcp47),
+ maCachedLanguage( rLanguageTag.maCachedLanguage),
+ maCachedScript( rLanguageTag.maCachedScript),
+ maCachedCountry( rLanguageTag.maCachedCountry),
+ mpImplLangtag( rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL),
+ mnLangID( rLanguageTag.mnLangID),
+ meIsValid( rLanguageTag.meIsValid),
+ meIsIsoLocale( rLanguageTag.meIsIsoLocale),
+ meIsIsoODF( rLanguageTag.meIsIsoODF),
+ meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded),
+ mbSystemLocale( rLanguageTag.mbSystemLocale),
+ mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
+ mbInitializedLocale( rLanguageTag.mbInitializedLocale),
+ mbInitializedLangID( rLanguageTag.mbInitializedLangID),
+ mbCachedLanguage( rLanguageTag.mbCachedLanguage),
+ mbCachedScript( rLanguageTag.mbCachedScript),
+ mbCachedCountry( rLanguageTag.mbCachedCountry),
+ mbIsFallback( rLanguageTag.mbIsFallback)
+{
+ if (mpImplLangtag)
+ theDataRef::get().incRef();
+}
+
+
+LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
+{
+ maLocale = rLanguageTag.maLocale;
+ maBcp47 = rLanguageTag.maBcp47;
+ maCachedLanguage = rLanguageTag.maCachedLanguage;
+ maCachedScript = rLanguageTag.maCachedScript;
+ maCachedCountry = rLanguageTag.maCachedCountry;
+ mpImplLangtag = rLanguageTag.mpImplLangtag;
+ mpImplLangtag = rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL;
+ mnLangID = rLanguageTag.mnLangID;
+ meIsValid = rLanguageTag.meIsValid;
+ meIsIsoLocale = rLanguageTag.meIsIsoLocale;
+ meIsIsoODF = rLanguageTag.meIsIsoODF;
+ meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded;
+ mbSystemLocale = rLanguageTag.mbSystemLocale;
+ mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
+ mbInitializedLocale = rLanguageTag.mbInitializedLocale;
+ mbInitializedLangID = rLanguageTag.mbInitializedLangID;
+ mbCachedLanguage = rLanguageTag.mbCachedLanguage;
+ mbCachedScript = rLanguageTag.mbCachedScript;
+ mbCachedCountry = rLanguageTag.mbCachedCountry;
+ mbIsFallback = rLanguageTag.mbIsFallback;
+ if (mpImplLangtag)
+ theDataRef::get().incRef();
+ return *this;
+}
+
+
+LanguageTag::~LanguageTag()
+{
+ if (mpImplLangtag)
+ {
+ lt_tag_unref( MPLANGTAG);
+ theDataRef::get().decRef();
+ }
+}
+
+
+void LanguageTag::resetVars()
+{
+ if (mpImplLangtag)
+ {
+ lt_tag_unref( MPLANGTAG);
+ mpImplLangtag = NULL;
+ theDataRef::get().decRef();
+ }
+
+ maLocale = lang::Locale();
+ if (!maBcp47.isEmpty())
+ maBcp47 = OUString();
+ if (!maCachedLanguage.isEmpty())
+ maCachedLanguage= OUString();
+ if (!maCachedScript.isEmpty())
+ maCachedScript = OUString();
+ if (!maCachedCountry.isEmpty())
+ maCachedCountry = OUString();
+ mnLangID = LANGUAGE_DONTKNOW;
+ meIsValid = DECISION_DONTKNOW;
+ meIsIsoLocale = DECISION_DONTKNOW;
+ meIsIsoODF = DECISION_DONTKNOW;
+ meIsLiblangtagNeeded= DECISION_DONTKNOW;
+ mbSystemLocale = true;
+ mbInitializedBcp47 = false;
+ mbInitializedLocale = false;
+ mbInitializedLangID = false;
+ mbCachedLanguage = false;
+ mbCachedScript = false;
+ mbCachedCountry = false;
+ mbIsFallback = false;
+}
+
+
+void LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
+{
+ resetVars();
+ maBcp47 = rBcp47LanguageTag;
+ mbSystemLocale = rBcp47LanguageTag.isEmpty();
+ mbInitializedBcp47 = !mbSystemLocale;
+
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
+{
+ resetVars();
+ maLocale = rLocale;
+ mbSystemLocale = rLocale.Language.isEmpty();
+ mbInitializedLocale = !mbSystemLocale;
+}
+
+
+void LanguageTag::reset( LanguageType nLanguage )
+{
+ resetVars();
+ mnLangID = nLanguage;
+ mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
+ mbInitializedLangID = !mbSystemLocale;
+}
+
+
+void LanguageTag::reset( const rtl_Locale & rLocale )
+{
+ reset( lang::Locale( rLocale.Language, rLocale.Country, rLocale.Variant));
+ convertFromRtlLocale();
+}
+
+
+bool LanguageTag::canonicalize()
+{
+#ifdef erDEBUG
+ // dump once
+ struct dumper
+ {
+ void** mpp;
+ dumper( void** pp ) : mpp( *pp ? NULL : pp) {}
+ ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); }
+ };
+ dumper aDumper( &mpImplLangtag);
+#endif
+
+ // Side effect: have maBcp47 in any case, resolved system.
+ // Some methods calling canonicalize() (or not calling it due to
+ // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
+ // meIsLiblangtagNeeded anywhere else than hereafter.
+ getBcp47( true );
+
+ // The simple cases and known locales don't need liblangtag processing,
+ // which also avoids loading liblangtag data on startup.
+ if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
+ {
+ bool bTemporaryLocale = false;
+ bool bTemporaryLangID = false;
+ if (!mbInitializedLocale && !mbInitializedLangID)
+ {
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ mbInitializedLangID = true;
+ }
+ else
+ {
+ // Now this is getting funny.. we only have some BCP47 string
+ // and want to determine if parsing it would be possible
+ // without using liblangtag just to see if it is a simple known
+ // locale.
+ OUString aLanguage, aScript, aCountry;
+ Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry);
+ if (eExt != EXTRACTED_NONE)
+ {
+ if (eExt == EXTRACTED_LSC && aScript.isEmpty())
+ {
+ maLocale.Language = aLanguage;
+ maLocale.Country = aCountry;
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = aCountry;
+ maLocale.Variant = maBcp47;
+ }
+ bTemporaryLocale = mbInitializedLocale = true;
+ }
+ }
+ }
+ if (mbInitializedLangID && !mbInitializedLocale)
+ {
+ // Do not call getLocale() here because that prefers
+ // convertBcp47ToLocale() which would end up in recursion via
+ // isIsoLocale()!
+
+ // Prepare to verify that we have a known locale, not just an
+ // arbitrary MS-LangID.
+ convertLangToLocale();
+ }
+ if (mbInitializedLocale)
+ {
+ if (maLocale.Variant.isEmpty())
+ meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
+ else
+ {
+ if (!mbInitializedLangID)
+ {
+ convertLocaleToLang();
+ if (bTemporaryLocale)
+ bTemporaryLangID = true;
+ }
+ if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
+ meIsLiblangtagNeeded = DECISION_NO; // known locale
+ }
+ }
+ if (bTemporaryLocale)
+ {
+ mbInitializedLocale = false;
+ maLocale = lang::Locale();
+ }
+ if (bTemporaryLangID)
+ {
+ mbInitializedLangID = false;
+ mnLangID = LANGUAGE_DONTKNOW;
+ }
+ }
+ if (meIsLiblangtagNeeded == DECISION_NO)
+ {
+ meIsValid = DECISION_YES; // really, known must be valid ...
+ return true; // that's it
+ }
+ meIsLiblangtagNeeded = DECISION_YES;
+ SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47);
+
+ if (!mpImplLangtag)
+ {
+ theDataRef::get().incRef();
+ mpImplLangtag = lt_tag_new();
+ }
+
+ myLtError aError;
+
+ if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
+ {
+ char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p);
+ SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47);
+ if (pTag)
+ {
+ OUString aOld( maBcp47);
+ maBcp47 = OUString::createFromAscii( pTag);
+ // Make the lt_tag_t follow the new string if different, which
+ // removes default script and such.
+ if (maBcp47 != aOld)
+ {
+ if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p))
+ {
+ SAL_WARN( "i18nlangtag", "LanguageTag::canonicalize: could not reparse " << maBcp47);
+ free( pTag);
+ meIsValid = DECISION_NO;
+ return false;
+ }
+ }
+ free( pTag);
+ meIsValid = DECISION_YES;
+ return true;
+ }
+ }
+ else
+ {
+ SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: could not parse " << maBcp47);
+ }
+ meIsValid = DECISION_NO;
+ return false;
+}
+
+
+void LanguageTag::convertLocaleToBcp47()
+{
+ if (mbSystemLocale && !mbInitializedLocale)
+ convertLangToLocale();
+
+ if (maLocale.Language == ISO639_LANGUAGE_TAG)
+ {
+ maBcp47 = maLocale.Variant;
+ meIsIsoLocale = DECISION_NO;
+ }
+ else
+ {
+ /* XXX NOTE: most legacy code never evaluated the Variant field, so for
+ * now just concatenate language and country. In case we stumbled over
+ * variant aware code we'd have to take care of that. */
+ if (maLocale.Country.isEmpty())
+ maBcp47 = maLocale.Language;
+ else
+ {
+ OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength());
+ aBuf.append( maLocale.Language).append( '-').append( maLocale.Country);
+ maBcp47 = aBuf.makeStringAndClear();
+ }
+ }
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertLocaleToLang()
+{
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ }
+ else
+ {
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to
+ * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
+ mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
+ }
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertBcp47ToLocale()
+{
+ bool bIso = isIsoLocale();
+ if (bIso)
+ {
+ maLocale.Language = getLanguageFromLangtag();
+ maLocale.Country = getRegionFromLangtag();
+ maLocale.Variant = OUString();
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = getCountry();
+ maLocale.Variant = maBcp47;
+ }
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertBcp47ToLang()
+{
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ }
+ else
+ {
+ /* FIXME: this is temporary. If we support locales that consist not
+ * only of language and country, e.g. added script, this probably needs
+ * to be adapted. */
+ if (!mbInitializedLocale)
+ convertBcp47ToLocale();
+ convertLocaleToLang();
+ }
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertLangToLocale()
+{
+ if (mbSystemLocale && !mbInitializedLangID)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ mbInitializedLangID = true;
+ }
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to be
+ * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
+ // Resolve system here!
+ maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertLangToBcp47()
+{
+ /* FIXME: this is temporary. If we support locales that consist not only of
+ * language and country, e.g. added script, this probably needs to be
+ * adapted. */
+ if (!mbInitializedLocale)
+ convertLangToLocale();
+ convertLocaleToBcp47();
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertFromRtlLocale()
+{
+ // The rtl_Locale follows the Open Group Base Specification,
+ // 8.2 Internationalization Variables
+ // language[_territory][.codeset][@modifier]
+ // On GNU/Linux systems usually being glibc locales.
+ // sal/osl/unx/nlsupport.c _parse_locale() parses them into
+ // Language: language 2 or 3 alpha code
+ // Country: [territory] 2 alpha code
+ // Variant: [.codeset][@modifier]
+ // Variant effectively contains anything that follows the territory, not
+ // looking for '.' dot delimiter or '@' modifier content.
+ if (!maLocale.Variant.isEmpty())
+ {
+ OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
+ RTL_TEXTENCODING_UTF8);
+ /* FIXME: let liblangtag parse this entirely with
+ * lt_tag_convert_from_locale() but that needs a patch to pass the
+ * string. */
+#if 0
+ myLtError aError;
+ theDataRef::get().incRef();
+ mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
+ maBcp47 = OStringToOUString( lt_tag_get_string( MPLANGTAG), RTL_TEXTENCODING_UTF8);
+ mbInitializedBcp47 = true;
+#else
+ mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
+ if (mnLangID == LANGUAGE_DONTKNOW)
+ {
+ SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
+ mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
+ }
+ mbInitializedLangID = true;
+#endif
+ maLocale = lang::Locale();
+ mbInitializedLocale = false;
+ }
+}
+
+
+const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return theEmptyBcp47::get();
+ if (!mbInitializedBcp47)
+ {
+ if (mbInitializedLocale)
+ const_cast<LanguageTag*>(this)->convertLocaleToBcp47();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToBcp47();
+ }
+ return maBcp47;
+}
+
+
+OUString LanguageTag::getLanguageFromLangtag()
+{
+ OUString aLanguage;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aLanguage;
+ if (mpImplLangtag)
+ {
+ const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
+ SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
+ if (!pLangT)
+ return aLanguage;
+ const char* pLang = lt_lang_get_tag( pLangT);
+ SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
+ if (pLang)
+ aLanguage = OUString::createFromAscii( pLang);
+ }
+ else
+ {
+ if (mbCachedLanguage || cacheSimpleLSC())
+ aLanguage = maCachedLanguage;
+ }
+ return aLanguage;
+}
+
+
+OUString LanguageTag::getScriptFromLangtag()
+{
+ OUString aScript;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aScript;
+ if (mpImplLangtag)
+ {
+ const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
+ // pScriptT==NULL is valid for default scripts
+ if (!pScriptT)
+ return aScript;
+ const char* pScript = lt_script_get_tag( pScriptT);
+ SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
+ if (pScript)
+ aScript = OUString::createFromAscii( pScript);
+ }
+ else
+ {
+ if (mbCachedScript || cacheSimpleLSC())
+ aScript = maCachedScript;
+ }
+ return aScript;
+}
+
+
+OUString LanguageTag::getRegionFromLangtag()
+{
+ OUString aRegion;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aRegion;
+ if (mpImplLangtag)
+ {
+ const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
+ // pRegionT==NULL is valid for language only tags, rough check here
+ // that does not take sophisticated tags into account that actually
+ // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
+ // that ll-CC and lll-CC actually fail.
+ SAL_WARN_IF( !pRegionT &&
+ maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
+ maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
+ "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
+ if (!pRegionT)
+ return aRegion;
+ const char* pRegion = lt_region_get_tag( pRegionT);
+ SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
+ if (pRegion)
+ aRegion = OUString::createFromAscii( pRegion);
+ }
+ else
+ {
+ if (mbCachedCountry || cacheSimpleLSC())
+ aRegion = maCachedCountry;
+ }
+ return aRegion;
+}
+
+
+const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return theEmptyLocale::get();
+ if (!mbInitializedLocale)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToLocale();
+ }
+ return maLocale;
+}
+
+
+LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return LANGUAGE_SYSTEM;
+ if (!mbInitializedLangID)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLang();
+ else
+ const_cast<LanguageTag*>(this)->convertLocaleToLang();
+ }
+ return mnLangID;
+}
+
+
+void LanguageTag::getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const
+{
+ if (!isIsoLocale())
+ {
+ rLanguage = OUString();
+ rCountry = OUString();
+ return;
+ }
+ // After isIsoLocale() it's safe to call getLanguage() for ISO code.
+ rLanguage = getLanguage();
+ rCountry = getCountry();
+}
+
+
+namespace
+{
+
+bool isLowerAscii( sal_Unicode c )
+{
+ return 'a' <= c && c <= 'z';
+}
+
+bool isUpperAscii( sal_Unicode c )
+{
+ return 'A' <= c && c <= 'Z';
+}
+
+}
+
+
+// static
+bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ bool b2chars;
+ if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
+ isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
+ (b2chars || isLowerAscii( rLanguage[2])))
+ return true;
+ SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
+ (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
+ (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
+ "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoCountry( const OUString& rRegion )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rRegion.isEmpty() ||
+ (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
+ return true;
+ SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
+ "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoScript( const OUString& rScript )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rScript.isEmpty() ||
+ (rScript.getLength() == 4 &&
+ isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
+ isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
+ return true;
+ SAL_WARN_IF( rScript.getLength() == 4 &&
+ (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
+ isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
+ "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
+ return false;
+}
+
+
+OUString LanguageTag::getLanguage() const
+{
+ if (!mbCachedLanguage)
+ {
+ maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag();
+ mbCachedLanguage = true;
+ }
+ return maCachedLanguage;
+}
+
+
+OUString LanguageTag::getScript() const
+{
+ if (!mbCachedScript)
+ {
+ maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag();
+ mbCachedScript = true;
+ }
+ return maCachedScript;
+}
+
+
+OUString LanguageTag::getLanguageAndScript() const
+{
+ OUString aLanguageScript( getLanguage());
+ OUString aScript( getScript());
+ if (!aScript.isEmpty())
+ {
+ OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength());
+ aBuf.append( aLanguageScript).append( '-').append( aScript);
+ aLanguageScript = aBuf.makeStringAndClear();
+ }
+ return aLanguageScript;
+}
+
+
+OUString LanguageTag::getCountry() const
+{
+ if (!mbCachedCountry)
+ {
+ maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+ if (!isIsoCountry( maCachedCountry))
+ maCachedCountry = OUString();
+ mbCachedCountry = true;
+ }
+ return maCachedCountry;
+}
+
+
+OUString LanguageTag::getRegion() const
+{
+ return const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+}
+
+
+OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
+{
+ OUString aRet;
+ if (isIsoLocale())
+ {
+ OUString aCountry( getCountry());
+ if (aCountry.isEmpty())
+ aRet = getLanguage() + rEncoding;
+ else
+ aRet = getLanguage() + "_" + aCountry + rEncoding;
+ }
+ else
+ {
+ /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
+ * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
+ * So far no code was prepared for anything else than a simple
+ * language_country locale so we don't loose anything here right now.
+ * */
+ }
+ return aRet;
+}
+
+
+bool LanguageTag::hasScript() const
+{
+ if (!mbCachedScript)
+ getScript();
+ return !maCachedScript.isEmpty();
+}
+
+
+bool LanguageTag::cacheSimpleLSC()
+{
+ OUString aLanguage, aScript, aCountry;
+ bool bRet = (simpleExtract( maBcp47, aLanguage, aScript, aCountry) == EXTRACTED_LSC);
+ if (bRet)
+ {
+ maCachedLanguage = aLanguage;
+ maCachedScript = aScript;
+ maCachedCountry = aCountry;
+ mbCachedLanguage = mbCachedScript = mbCachedCountry = true;
+ }
+ return bRet;
+}
+
+
+bool LanguageTag::isIsoLocale() const
+{
+ if (meIsIsoLocale == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ // It must be at most ll-CC or lll-CC
+ // Do not use getCountry() here, use getRegion() instead.
+ meIsIsoLocale = ((maBcp47.isEmpty() ||
+ (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoLocale == DECISION_YES;
+}
+
+
+bool LanguageTag::isIsoODF() const
+{
+ if (meIsIsoODF == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ if (!isIsoScript( getScript()))
+ return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
+ // The usual case is lll-CC so simply check that first.
+ if (isIsoLocale())
+ return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
+ // If this is not ISO locale for which script must not exist it can
+ // still be ISO locale plus ISO script lll-Ssss-CC
+ meIsIsoODF = ((maBcp47.getLength() <= 11 &&
+ isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoODF == DECISION_YES;
+}
+
+
+bool LanguageTag::isValidBcp47() const
+{
+ if (meIsValid == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
+ "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
+ }
+ return meIsValid == DECISION_YES;
+}
+
+
+bool LanguageTag::isSystemLocale() const
+{
+ return mbSystemLocale;
+}
+
+
+LanguageTag & LanguageTag::makeFallback()
+{
+ if (!mbIsFallback)
+ {
+ if (mbInitializedLangID)
+ {
+ LanguageType nLang1 = getLanguageType();
+ LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1);
+ if (nLang1 != nLang2)
+ reset( nLang2);
+ }
+ else
+ {
+ const lang::Locale& rLocale1 = getLocale();
+ lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
+ if ( rLocale1.Language != aLocale2.Language ||
+ rLocale1.Country != aLocale2.Country ||
+ rLocale1.Variant != aLocale2.Variant)
+ reset( aLocale2);
+ }
+ mbIsFallback = true;
+ }
+ return *this;
+}
+
+
+::std::vector< OUString > LanguageTag::getFallbackStrings() const
+{
+ ::std::vector< OUString > aVec;
+ OUString aLanguage( getLanguage());
+ OUString aCountry( getCountry());
+ if (isIsoLocale())
+ {
+ if (!aCountry.isEmpty())
+ aVec.push_back( aLanguage + "-" + aCountry);
+ aVec.push_back( aLanguage);
+ return aVec;
+ }
+ aVec.push_back( getBcp47());
+ OUString aTmp;
+ if (hasScript())
+ {
+ OUString aScript( getScript());
+ if (!aCountry.isEmpty())
+ {
+ aTmp = aLanguage + "-" + aScript + "-" + aCountry;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ aTmp = aLanguage + "-" + aScript;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ if (!aCountry.isEmpty())
+ {
+ aTmp = aLanguage + "-" + aCountry;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ aTmp = aLanguage;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ return aVec;
+}
+
+
+bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
+{
+ // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
+ // can use the operator==() optimization.
+ if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
+ return operator==( rLanguageTag);
+
+ // Compare full language tag strings.
+ return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
+}
+
+
+bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
+{
+ if (isSystemLocale() && rLanguageTag.isSystemLocale())
+ return true; // both SYSTEM
+
+ // No need to convert to BCP47 if both Lang-IDs are available.
+ if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
+ {
+ // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
+ return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
+ }
+
+ // Compare full language tag strings but SYSTEM unresolved.
+ return getBcp47( false) == rLanguageTag.getBcp47( false);
+}
+
+
+bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
+{
+ return !operator==( rLanguageTag);
+}
+
+
+// static
+LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
+ OUString& rLanguage, OUString& rScript, OUString& rCountry )
+{
+ Extraction eRet = EXTRACTED_NONE;
+ const sal_Int32 nLen = rBcp47.getLength();
+ const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
+ if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
+ {
+ // It's f*d up but we need to recognize this.
+ eRet = EXTRACTED_X_JOKER;
+ }
+ else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
+ {
+ // x-... privateuse tags MUST be known to us by definition.
+ eRet = EXTRACTED_X;
+ }
+ else if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll
+ {
+ rLanguage = rBcp47;
+ rScript = rCountry = OUString();
+ eRet = EXTRACTED_LSC;
+ }
+ else if ( (nLen == 5 && nHyph1 == 2) // ll-CC
+ || (nLen == 6 && nHyph1 == 3)) // lll-CC
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1);
+ rCountry = rBcp47.copy( nHyph1 + 1, 2);
+ rScript = OUString();
+ eRet = EXTRACTED_LSC;
+ }
+ else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check
+ || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check
+ {
+ const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1);
+ if (nHyph2 == nHyph1 + 5)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1);
+ rScript = rBcp47.copy( nHyph1 + 1, 4);
+ rCountry = rBcp47.copy( nHyph2 + 1, 2);
+ eRet = EXTRACTED_LSC;
+ }
+ }
+ if (eRet == EXTRACTED_NONE)
+ rLanguage = rScript = rCountry = OUString();
+ return eRet;
+}
+
+
+// static
+::std::vector< OUString >::const_iterator LanguageTag::getFallback(
+ const ::std::vector< OUString > & rList, const OUString & rReference )
+{
+ if (rList.empty())
+ return rList.end();
+
+ ::std::vector< OUString >::const_iterator it;
+
+ // Try the simple case first without constructing fallbacks.
+ for (it = rList.begin(); it != rList.end(); ++it)
+ {
+ if (*it == rReference)
+ return it; // exact match
+ }
+
+ ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings());
+ aFallbacks.erase( aFallbacks.begin()); // first is full BCP47, we already checked that
+ if (rReference != "en-US")
+ {
+ aFallbacks.push_back( "en-US");
+ if (rReference != "en")
+ aFallbacks.push_back( "en");
+ }
+ if (rReference != "x-default")
+ aFallbacks.push_back( "x-default");
+ if (rReference != "x-no-translate")
+ aFallbacks.push_back( "x-no-translate");
+ /* TODO: the original comphelper::Locale::getFallback() code had
+ * "x-notranslate" instead of "x-no-translate", but all .xcu files use
+ * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
+ * Did that ever work? Was it supposed to work at all like this? */
+
+ for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
+ {
+ for (it = rList.begin(); it != rList.end(); ++it)
+ {
+ if (*it == *fb)
+ return it; // fallback found
+ }
+ }
+
+ // Did not find anything so return something of the list, the first value
+ // will do as well as any other as none did match any of the possible
+ // fallbacks.
+ return rList.begin();
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18nlangtag/source/languagetag/simple-langtag.cxx b/i18nlangtag/source/languagetag/simple-langtag.cxx
new file mode 100644
index 000000000000..d96f721dbeef
--- /dev/null
+++ b/i18nlangtag/source/languagetag/simple-langtag.cxx
@@ -0,0 +1,400 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+/** Cheap and cheesy replacement code for liblangtag on systems that do not
+ allow / want LGPL code or dependencies on glib.
+
+ XXX NOTE: This code does not check language tags for validity or if they
+ are registered with IANA, does not canonicalize or strip default script
+ tags if included nor does it do any other fancy stuff that liblangtag is
+ capable of. It just makes depending code work without.
+ */
+
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+namespace {
+
+typedef int lt_bool_t;
+
+struct lt_error_t {
+ void *something;
+ lt_error_t() : something(NULL) {}
+};
+
+static void* g_malloc(size_t s)
+{
+ return malloc(s);
+}
+
+static void g_free(void* p)
+{
+ if (p)
+ free(p);
+}
+
+static void lt_error_unref(lt_error_t *error)
+{
+ if (error)
+ {
+ g_free( error->something);
+ g_free( error);
+ }
+}
+
+struct my_ref
+{
+ sal_uInt32 mnRef;
+ explicit my_ref() : mnRef(1) {}
+ virtual ~my_ref() {}
+ void incRef() { ++mnRef; }
+ void decRef() { if (--mnRef == 0) delete this; }
+};
+
+struct my_t_impl : public my_ref
+{
+ char* mpStr;
+ explicit my_t_impl() : my_ref(), mpStr(NULL) {}
+ virtual ~my_t_impl() { g_free( mpStr); }
+ explicit my_t_impl( const my_t_impl& r )
+ :
+ my_ref(),
+ mpStr(r.mpStr ? strdup( r.mpStr) : NULL)
+ {
+ }
+ my_t_impl& operator=( const my_t_impl& r )
+ {
+ if (this == &r)
+ return *this;
+ g_free( mpStr);
+ mpStr = (r.mpStr ? strdup( r.mpStr) : NULL);
+ return *this;
+ }
+ void assign( const char* str )
+ {
+ g_free( mpStr);
+ mpStr = (str ? strdup( str) : NULL);
+ }
+ void assign( const char* str, const char* stop )
+ {
+ g_free( mpStr);
+ if (str && str < stop)
+ {
+ mpStr = static_cast<char*>(g_malloc( stop - str + 1));
+ memcpy( mpStr, str, stop - str);
+ mpStr[stop - str] = 0;
+ }
+ else
+ mpStr = NULL;
+ }
+ void append( const char* str, const char* stop )
+ {
+ if (str && str < stop)
+ {
+ size_t nOld = mpStr ? strlen( mpStr) : 0;
+ size_t nNew = nOld + (stop - str) + 1;
+ char* p = static_cast<char*>(g_malloc( nNew));
+ if (nOld)
+ memcpy( p, mpStr, nOld);
+ memcpy( p + nOld, str, stop - str);
+ p[nNew-1] = 0;
+ g_free( mpStr);
+ mpStr = p;
+ }
+ }
+ void zero()
+ {
+ g_free( mpStr);
+ mpStr = NULL;
+ }
+};
+
+struct lt_lang_t : public my_t_impl
+{
+ explicit lt_lang_t() : my_t_impl() {}
+ virtual ~lt_lang_t() {}
+};
+
+struct lt_script_t : public my_t_impl
+{
+ explicit lt_script_t() : my_t_impl() {}
+ virtual ~lt_script_t() {}
+};
+
+struct lt_region_t : public my_t_impl
+{
+ explicit lt_region_t() : my_t_impl() {}
+ virtual ~lt_region_t() {}
+};
+
+struct lt_tag_t : public my_t_impl
+{
+ lt_lang_t maLanguage;
+ lt_script_t maScript;
+ lt_region_t maRegion;
+ explicit lt_tag_t() : my_t_impl(), maLanguage(), maScript(), maRegion() {}
+ virtual ~lt_tag_t() {}
+ explicit lt_tag_t( const lt_tag_t& r )
+ :
+ my_t_impl( r),
+ maLanguage( r.maLanguage),
+ maScript( r.maScript),
+ maRegion( r.maRegion)
+ {
+ }
+ lt_tag_t& operator=( const lt_tag_t& r )
+ {
+ if (this == &r)
+ return *this;
+ my_t_impl::operator=( r);
+ maLanguage = r.maLanguage;
+ maScript = r.maScript;
+ maRegion = r.maRegion;
+ return *this;
+ }
+ void assign( const char* str )
+ {
+ maLanguage.zero();
+ maScript.zero();
+ maRegion.zero();
+ my_t_impl::assign( str);
+ }
+};
+
+static void lt_db_initialize() { }
+static void lt_db_finalize() { }
+static void lt_db_set_datadir( const char* /* dir */ ) { }
+
+static lt_tag_t* lt_tag_new(void)
+{
+ return new lt_tag_t;
+}
+
+static lt_tag_t* lt_tag_copy(lt_tag_t *tag)
+{
+ return (tag ? new lt_tag_t( *tag) : NULL);
+}
+
+static void lt_tag_unref(lt_tag_t *tag)
+{
+ if (tag)
+ tag->decRef();
+}
+
+/** See http://tools.ietf.org/html/rfc5646
+
+ We are simply ignorant of grandfathered (irregular and regular) subtags and
+ may either bail out or accept them, sorry (or not). However, we do accept
+ any i-* irregular and x-* privateuse. Subtags are not checked for validity
+ (alpha, digit, registered, ...).
+ */
+static lt_bool_t lt_tag_parse(lt_tag_t *tag,
+ const char *tag_string,
+ lt_error_t **error)
+{
+ (void) error;
+ if (!tag)
+ return 0;
+ tag->assign( tag_string);
+ if (!tag_string)
+ return 0;
+ // In case we supported other subtags this would get more complicated.
+ my_t_impl* aSubtags[] = { &tag->maLanguage, &tag->maScript, &tag->maRegion, NULL };
+ my_t_impl** ppSub = &aSubtags[0];
+ const char* pStart = tag_string;
+ const char* p = pStart;
+ const char* pEnd = pStart + strlen( pStart); // scanning includes \0
+ bool bStartLang = true;
+ bool bPrivate = false;
+ for ( ; p <= pEnd && ppSub && *ppSub; ++p)
+ {
+ if (p == pEnd || *p == '-')
+ {
+ size_t nLen = p - pStart;
+ if (*ppSub == &tag->maLanguage)
+ {
+ if (bStartLang)
+ {
+ bStartLang = false;
+ switch (nLen)
+ {
+ case 1: // irregular or privateuse
+ if (*pStart == 'i' || *pStart == 'x')
+ {
+ (*ppSub)->assign( pStart, p);
+ bPrivate = true;
+ }
+ else
+ return 0; // bad
+ break;
+ case 2: // ISO 639 alpha-2
+ case 3: // ISO 639 alpha-3
+ (*ppSub)->assign( pStart, p);
+ break;
+ case 4: // reserved for future use
+ return 0; // bad
+ break;
+ case 5:
+ case 6:
+ case 7:
+ case 8: // registered language subtag
+ (*ppSub++)->assign( pStart, p);
+ break;
+ default:
+ return 0; // bad
+ }
+ }
+ else
+ {
+ if (nLen > 8)
+ return 0; // bad
+ if (bPrivate)
+ {
+ // Any combination of "x" 1*("-" (2*8alphanum))
+ // allowed, store first as language and return ok.
+ // For i-* simply assume the same.
+ (*ppSub)->append( pStart-1, p);
+ return !0; // ok
+ }
+ else if (nLen == 3)
+ {
+ // extlang subtag, 1 to 3 allowed we don't check that.
+ // But if it's numeric it's a region UN M.49 code
+ // instead and no script subtag is present, so advance.
+ if ('0' <= *pStart && *pStart <= '9')
+ {
+ ppSub += 2; // &tag->maRegion XXX watch this when inserting fields
+ --p;
+ continue; // for
+ }
+ else
+ (*ppSub)->append( pStart-1, p);
+ }
+ else
+ {
+ // Not part of language subtag, advance.
+ ++ppSub;
+ --p;
+ continue; // for
+ }
+ }
+ }
+ else if (*ppSub == &tag->maScript)
+ {
+ switch (nLen)
+ {
+ case 4:
+ // script subtag, or a (DIGIT 3alphanum) variant with
+ // no script and no region in which case we stop
+ // parsing.
+ if ('0' <= *pStart && *pStart <= '9')
+ ppSub = NULL;
+ else
+ (*ppSub++)->assign( pStart, p);
+ break;
+ case 3:
+ // This may be a region UN M.49 code if 3DIGIT and no
+ // script code present. Just check first character and
+ // advance.
+ if ('0' <= *pStart && *pStart <= '9')
+ {
+ ++ppSub;
+ --p;
+ continue; // for
+ }
+ else
+ return 0; // bad
+ break;
+ case 2:
+ // script omitted, region subtag, advance.
+ ++ppSub;
+ --p;
+ continue; // for
+ break;
+ case 1:
+ // script omitted, region omitted, extension subtag
+ // with singleton, stop parsing
+ ppSub = NULL;
+ break;
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ // script omitted, region omitted, variant subtag, stop
+ // parsing.
+ ppSub = NULL;
+ default:
+ return 0; // bad
+ }
+ }
+ else if (*ppSub == &tag->maRegion)
+ {
+ if (nLen == 2 || nLen == 3)
+ (*ppSub++)->assign( pStart, p);
+ else
+ return 0; // bad
+ }
+ pStart = p+1;
+ }
+ }
+ return !0;
+}
+
+static char* lt_tag_canonicalize(lt_tag_t *tag,
+ lt_error_t **error)
+{
+ (void) error;
+ return tag && tag->mpStr ? strdup( tag->mpStr) : NULL;
+}
+
+static const lt_lang_t* lt_tag_get_language(const lt_tag_t *tag)
+{
+ return tag && tag->maLanguage.mpStr ? &tag->maLanguage : NULL;
+}
+
+static const lt_script_t *lt_tag_get_script(const lt_tag_t *tag)
+{
+ return tag && tag->maScript.mpStr ? &tag->maScript : NULL;
+}
+
+static const lt_region_t *lt_tag_get_region(const lt_tag_t *tag)
+{
+ return tag && tag->maRegion.mpStr ? &tag->maRegion : NULL;
+}
+
+static const char *lt_lang_get_tag(const lt_lang_t *lang)
+{
+ return lang ? lang->mpStr : NULL;
+}
+
+static const char *lt_script_get_tag(const lt_script_t *script)
+{
+ return script ? script->mpStr : NULL;
+}
+
+static const char *lt_region_get_tag(const lt_region_t *region)
+{
+ return region ? region->mpStr : NULL;
+}
+
+#ifdef erDEBUG
+static void lt_tag_dump(const lt_tag_t *tag)
+{
+ fprintf( stderr, "\n");
+ fprintf( stderr, "SimpleLangtag langtag: %s\n", tag->mpStr);
+ fprintf( stderr, "SimpleLangtag language: %s\n", tag->maLanguage.mpStr);
+ fprintf( stderr, "SimpleLangtag script: %s\n", tag->maScript.mpStr);
+ fprintf( stderr, "SimpleLangtag region: %s\n", tag->maRegion.mpStr);
+}
+#endif
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */