summaryrefslogtreecommitdiff
path: root/i18nlangtag/source/languagetag/languagetag.cxx
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2013-04-05 18:40:39 +0200
committerEike Rathke <erack@redhat.com>2013-04-05 19:10:48 +0200
commit876c619b944dfbc88464045f1400c549a01a1164 (patch)
treef15f930fe100bda4c0a0503728654801ac977fcd /i18nlangtag/source/languagetag/languagetag.cxx
parent8ef9e38aa84675c57b331a796d900b3c10e04f44 (diff)
new module i18nlangtag
Moved portions from module i18npool, all of former i18nisolang1 library that now is i18nlangtag. Included are languagetag, isolang and mslangid. This i18nlangtag code is now even used by module comphelper, so disentangling i18npool and making this an own module was needed to not create circular module dependencies. Change-Id: Ib887c3d6dde667403fd22d382310ba5f1a9b0015
Diffstat (limited to 'i18nlangtag/source/languagetag/languagetag.cxx')
-rw-r--r--i18nlangtag/source/languagetag/languagetag.cxx1254
1 files changed, 1254 insertions, 0 deletions
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
new file mode 100644
index 000000000000..e1eea3b75f04
--- /dev/null
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -0,0 +1,1254 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "i18nlangtag/languagetag.hxx"
+#include "i18nlangtag/mslangid.hxx"
+#include <rtl/ustrbuf.hxx>
+#include <rtl/bootstrap.hxx>
+#include <osl/file.hxx>
+#include <rtl/instance.hxx>
+#include <rtl/locale.h>
+
+//#define erDEBUG
+
+#if defined(ENABLE_LIBLANGTAG)
+#include <liblangtag/langtag.h>
+#else
+/* Replacement code for LGPL phobic and Android systems.
+ * For iOS we could probably use NSLocale instead, that should have more or
+ * less required functionality. If it is good enough, it could be used for Mac
+ * OS X, too.
+ */
+#include "simple-langtag.cxx"
+#endif
+
+using rtl::OUString;
+using rtl::OString;
+using rtl::OUStringBuffer;
+using namespace com::sun::star;
+
+// The actual pointer type of mpImplLangtag that is declared void* to not
+// pollute the entire code base with liblangtag.
+#define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
+#define MPLANGTAG LANGTAGCAST(mpImplLangtag)
+
+/** Convention to signal presence of BCP 47 language tag in a Locale's Variant
+ field. The Locale's Language field then will contain this ISO 639-2
+ reserved for local use code. */
+#define ISO639_LANGUAGE_TAG "qlt"
+
+
+// Helper to ensure lt_error_t is free'd
+struct myLtError
+{
+ lt_error_t* p;
+ myLtError() : p(NULL) {}
+ ~myLtError() { if (p) lt_error_unref( p); }
+};
+
+
+// "statics" to be returned as const reference to an empty locale and string.
+namespace {
+struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
+struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
+}
+
+
+/** A reference holder for liblangtag data de/initialization, one static
+ instance. Currently implemented such that the first "ref" inits and dtor
+ (our library deinitialized) tears down.
+*/
+class LiblantagDataRef
+{
+public:
+ LiblantagDataRef();
+ ~LiblantagDataRef();
+ inline void incRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && !mnRef++)
+ setup();
+ }
+ inline void decRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
+ teardown();
+ }
+private:
+ rtl::OString maDataPath; // path to liblangtag data, "|" if system
+ sal_uInt32 mnRef;
+
+ void setupDataPath();
+ void setup();
+ void teardown();
+};
+
+namespace {
+struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
+}
+
+LiblantagDataRef::LiblantagDataRef()
+ :
+ mnRef(0)
+{
+}
+
+LiblantagDataRef::~LiblantagDataRef()
+{
+ // When destructed we're tearing down unconditionally.
+ if (mnRef)
+ mnRef = 1;
+ decRef();
+}
+
+void LiblantagDataRef::setup()
+{
+ SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
+ if (maDataPath.isEmpty())
+ setupDataPath();
+ lt_db_initialize();
+ // Hold ref eternally.
+ mnRef = SAL_MAX_UINT32;
+}
+
+void LiblantagDataRef::teardown()
+{
+ SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
+ lt_db_finalize();
+}
+
+void LiblantagDataRef::setupDataPath()
+{
+ // maDataPath is assumed to be empty here.
+ OUString aURL("$BRAND_BASE_DIR/share/liblangtag");
+ rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
+
+ // Check if data is in our own installation, else assume system
+ // installation.
+ OUString aData( aURL);
+ aData += "/language-subtag-registry.xml";
+ osl::DirectoryItem aDirItem;
+ if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
+ {
+ OUString aPath;
+ if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
+ maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
+ }
+ if (maDataPath.isEmpty())
+ maDataPath = "|"; // assume system
+ else
+ lt_db_set_datadir( maDataPath.getStr());
+}
+
+LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
+ :
+ maBcp47( rBcp47LanguageTag),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rBcp47LanguageTag.isEmpty()),
+ mbInitializedBcp47( !mbSystemLocale),
+ mbInitializedLocale( false),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
+ :
+ maLocale( rLocale),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rLocale.Language.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( LanguageType nLanguage )
+ :
+ mpImplLangtag( NULL),
+ mnLangID( nLanguage),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( false),
+ mbInitializedLangID( !mbSystemLocale),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( const OUString& rLanguage, const OUString& rCountry )
+ :
+ maLocale( rLanguage, rCountry, ""),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rLanguage.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( const rtl_Locale & rLocale )
+ :
+ maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( maLocale.Language.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+ convertFromRtlLocale();
+}
+
+
+LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
+ :
+ maLocale( rLanguageTag.maLocale),
+ maBcp47( rLanguageTag.maBcp47),
+ maCachedLanguage( rLanguageTag.maCachedLanguage),
+ maCachedScript( rLanguageTag.maCachedScript),
+ maCachedCountry( rLanguageTag.maCachedCountry),
+ mpImplLangtag( rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL),
+ mnLangID( rLanguageTag.mnLangID),
+ meIsValid( rLanguageTag.meIsValid),
+ meIsIsoLocale( rLanguageTag.meIsIsoLocale),
+ meIsIsoODF( rLanguageTag.meIsIsoODF),
+ meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded),
+ mbSystemLocale( rLanguageTag.mbSystemLocale),
+ mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
+ mbInitializedLocale( rLanguageTag.mbInitializedLocale),
+ mbInitializedLangID( rLanguageTag.mbInitializedLangID),
+ mbCachedLanguage( rLanguageTag.mbCachedLanguage),
+ mbCachedScript( rLanguageTag.mbCachedScript),
+ mbCachedCountry( rLanguageTag.mbCachedCountry),
+ mbIsFallback( rLanguageTag.mbIsFallback)
+{
+ if (mpImplLangtag)
+ theDataRef::get().incRef();
+}
+
+
+LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
+{
+ maLocale = rLanguageTag.maLocale;
+ maBcp47 = rLanguageTag.maBcp47;
+ maCachedLanguage = rLanguageTag.maCachedLanguage;
+ maCachedScript = rLanguageTag.maCachedScript;
+ maCachedCountry = rLanguageTag.maCachedCountry;
+ mpImplLangtag = rLanguageTag.mpImplLangtag;
+ mpImplLangtag = rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL;
+ mnLangID = rLanguageTag.mnLangID;
+ meIsValid = rLanguageTag.meIsValid;
+ meIsIsoLocale = rLanguageTag.meIsIsoLocale;
+ meIsIsoODF = rLanguageTag.meIsIsoODF;
+ meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded;
+ mbSystemLocale = rLanguageTag.mbSystemLocale;
+ mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
+ mbInitializedLocale = rLanguageTag.mbInitializedLocale;
+ mbInitializedLangID = rLanguageTag.mbInitializedLangID;
+ mbCachedLanguage = rLanguageTag.mbCachedLanguage;
+ mbCachedScript = rLanguageTag.mbCachedScript;
+ mbCachedCountry = rLanguageTag.mbCachedCountry;
+ mbIsFallback = rLanguageTag.mbIsFallback;
+ if (mpImplLangtag)
+ theDataRef::get().incRef();
+ return *this;
+}
+
+
+LanguageTag::~LanguageTag()
+{
+ if (mpImplLangtag)
+ {
+ lt_tag_unref( MPLANGTAG);
+ theDataRef::get().decRef();
+ }
+}
+
+
+void LanguageTag::resetVars()
+{
+ if (mpImplLangtag)
+ {
+ lt_tag_unref( MPLANGTAG);
+ mpImplLangtag = NULL;
+ theDataRef::get().decRef();
+ }
+
+ maLocale = lang::Locale();
+ if (!maBcp47.isEmpty())
+ maBcp47 = OUString();
+ if (!maCachedLanguage.isEmpty())
+ maCachedLanguage= OUString();
+ if (!maCachedScript.isEmpty())
+ maCachedScript = OUString();
+ if (!maCachedCountry.isEmpty())
+ maCachedCountry = OUString();
+ mnLangID = LANGUAGE_DONTKNOW;
+ meIsValid = DECISION_DONTKNOW;
+ meIsIsoLocale = DECISION_DONTKNOW;
+ meIsIsoODF = DECISION_DONTKNOW;
+ meIsLiblangtagNeeded= DECISION_DONTKNOW;
+ mbSystemLocale = true;
+ mbInitializedBcp47 = false;
+ mbInitializedLocale = false;
+ mbInitializedLangID = false;
+ mbCachedLanguage = false;
+ mbCachedScript = false;
+ mbCachedCountry = false;
+ mbIsFallback = false;
+}
+
+
+void LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
+{
+ resetVars();
+ maBcp47 = rBcp47LanguageTag;
+ mbSystemLocale = rBcp47LanguageTag.isEmpty();
+ mbInitializedBcp47 = !mbSystemLocale;
+
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
+{
+ resetVars();
+ maLocale = rLocale;
+ mbSystemLocale = rLocale.Language.isEmpty();
+ mbInitializedLocale = !mbSystemLocale;
+}
+
+
+void LanguageTag::reset( LanguageType nLanguage )
+{
+ resetVars();
+ mnLangID = nLanguage;
+ mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
+ mbInitializedLangID = !mbSystemLocale;
+}
+
+
+void LanguageTag::reset( const rtl_Locale & rLocale )
+{
+ reset( lang::Locale( rLocale.Language, rLocale.Country, rLocale.Variant));
+ convertFromRtlLocale();
+}
+
+
+bool LanguageTag::canonicalize()
+{
+#ifdef erDEBUG
+ // dump once
+ struct dumper
+ {
+ void** mpp;
+ dumper( void** pp ) : mpp( *pp ? NULL : pp) {}
+ ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); }
+ };
+ dumper aDumper( &mpImplLangtag);
+#endif
+
+ // Side effect: have maBcp47 in any case, resolved system.
+ // Some methods calling canonicalize() (or not calling it due to
+ // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
+ // meIsLiblangtagNeeded anywhere else than hereafter.
+ getBcp47( true );
+
+ // The simple cases and known locales don't need liblangtag processing,
+ // which also avoids loading liblangtag data on startup.
+ if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
+ {
+ bool bTemporaryLocale = false;
+ bool bTemporaryLangID = false;
+ if (!mbInitializedLocale && !mbInitializedLangID)
+ {
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ mbInitializedLangID = true;
+ }
+ else
+ {
+ // Now this is getting funny.. we only have some BCP47 string
+ // and want to determine if parsing it would be possible
+ // without using liblangtag just to see if it is a simple known
+ // locale.
+ OUString aLanguage, aScript, aCountry;
+ Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry);
+ if (eExt != EXTRACTED_NONE)
+ {
+ if (eExt == EXTRACTED_LSC && aScript.isEmpty())
+ {
+ maLocale.Language = aLanguage;
+ maLocale.Country = aCountry;
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = aCountry;
+ maLocale.Variant = maBcp47;
+ }
+ bTemporaryLocale = mbInitializedLocale = true;
+ }
+ }
+ }
+ if (mbInitializedLangID && !mbInitializedLocale)
+ {
+ // Do not call getLocale() here because that prefers
+ // convertBcp47ToLocale() which would end up in recursion via
+ // isIsoLocale()!
+
+ // Prepare to verify that we have a known locale, not just an
+ // arbitrary MS-LangID.
+ convertLangToLocale();
+ }
+ if (mbInitializedLocale)
+ {
+ if (maLocale.Variant.isEmpty())
+ meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
+ else
+ {
+ if (!mbInitializedLangID)
+ {
+ convertLocaleToLang();
+ if (bTemporaryLocale)
+ bTemporaryLangID = true;
+ }
+ if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
+ meIsLiblangtagNeeded = DECISION_NO; // known locale
+ }
+ }
+ if (bTemporaryLocale)
+ {
+ mbInitializedLocale = false;
+ maLocale = lang::Locale();
+ }
+ if (bTemporaryLangID)
+ {
+ mbInitializedLangID = false;
+ mnLangID = LANGUAGE_DONTKNOW;
+ }
+ }
+ if (meIsLiblangtagNeeded == DECISION_NO)
+ {
+ meIsValid = DECISION_YES; // really, known must be valid ...
+ return true; // that's it
+ }
+ meIsLiblangtagNeeded = DECISION_YES;
+ SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47);
+
+ if (!mpImplLangtag)
+ {
+ theDataRef::get().incRef();
+ mpImplLangtag = lt_tag_new();
+ }
+
+ myLtError aError;
+
+ if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
+ {
+ char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p);
+ SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47);
+ if (pTag)
+ {
+ OUString aOld( maBcp47);
+ maBcp47 = OUString::createFromAscii( pTag);
+ // Make the lt_tag_t follow the new string if different, which
+ // removes default script and such.
+ if (maBcp47 != aOld)
+ {
+ if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p))
+ {
+ SAL_WARN( "i18nlangtag", "LanguageTag::canonicalize: could not reparse " << maBcp47);
+ free( pTag);
+ meIsValid = DECISION_NO;
+ return false;
+ }
+ }
+ free( pTag);
+ meIsValid = DECISION_YES;
+ return true;
+ }
+ }
+ else
+ {
+ SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: could not parse " << maBcp47);
+ }
+ meIsValid = DECISION_NO;
+ return false;
+}
+
+
+void LanguageTag::convertLocaleToBcp47()
+{
+ if (mbSystemLocale && !mbInitializedLocale)
+ convertLangToLocale();
+
+ if (maLocale.Language == ISO639_LANGUAGE_TAG)
+ {
+ maBcp47 = maLocale.Variant;
+ meIsIsoLocale = DECISION_NO;
+ }
+ else
+ {
+ /* XXX NOTE: most legacy code never evaluated the Variant field, so for
+ * now just concatenate language and country. In case we stumbled over
+ * variant aware code we'd have to take care of that. */
+ if (maLocale.Country.isEmpty())
+ maBcp47 = maLocale.Language;
+ else
+ {
+ OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength());
+ aBuf.append( maLocale.Language).append( '-').append( maLocale.Country);
+ maBcp47 = aBuf.makeStringAndClear();
+ }
+ }
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertLocaleToLang()
+{
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ }
+ else
+ {
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to
+ * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
+ mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
+ }
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertBcp47ToLocale()
+{
+ bool bIso = isIsoLocale();
+ if (bIso)
+ {
+ maLocale.Language = getLanguageFromLangtag();
+ maLocale.Country = getRegionFromLangtag();
+ maLocale.Variant = OUString();
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = getCountry();
+ maLocale.Variant = maBcp47;
+ }
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertBcp47ToLang()
+{
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ }
+ else
+ {
+ /* FIXME: this is temporary. If we support locales that consist not
+ * only of language and country, e.g. added script, this probably needs
+ * to be adapted. */
+ if (!mbInitializedLocale)
+ convertBcp47ToLocale();
+ convertLocaleToLang();
+ }
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertLangToLocale()
+{
+ if (mbSystemLocale && !mbInitializedLangID)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ mbInitializedLangID = true;
+ }
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to be
+ * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
+ // Resolve system here!
+ maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertLangToBcp47()
+{
+ /* FIXME: this is temporary. If we support locales that consist not only of
+ * language and country, e.g. added script, this probably needs to be
+ * adapted. */
+ if (!mbInitializedLocale)
+ convertLangToLocale();
+ convertLocaleToBcp47();
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertFromRtlLocale()
+{
+ // The rtl_Locale follows the Open Group Base Specification,
+ // 8.2 Internationalization Variables
+ // language[_territory][.codeset][@modifier]
+ // On GNU/Linux systems usually being glibc locales.
+ // sal/osl/unx/nlsupport.c _parse_locale() parses them into
+ // Language: language 2 or 3 alpha code
+ // Country: [territory] 2 alpha code
+ // Variant: [.codeset][@modifier]
+ // Variant effectively contains anything that follows the territory, not
+ // looking for '.' dot delimiter or '@' modifier content.
+ if (!maLocale.Variant.isEmpty())
+ {
+ OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
+ RTL_TEXTENCODING_UTF8);
+ /* FIXME: let liblangtag parse this entirely with
+ * lt_tag_convert_from_locale() but that needs a patch to pass the
+ * string. */
+#if 0
+ myLtError aError;
+ theDataRef::get().incRef();
+ mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
+ maBcp47 = OStringToOUString( lt_tag_get_string( MPLANGTAG), RTL_TEXTENCODING_UTF8);
+ mbInitializedBcp47 = true;
+#else
+ mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
+ if (mnLangID == LANGUAGE_DONTKNOW)
+ {
+ SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
+ mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
+ }
+ mbInitializedLangID = true;
+#endif
+ maLocale = lang::Locale();
+ mbInitializedLocale = false;
+ }
+}
+
+
+const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return theEmptyBcp47::get();
+ if (!mbInitializedBcp47)
+ {
+ if (mbInitializedLocale)
+ const_cast<LanguageTag*>(this)->convertLocaleToBcp47();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToBcp47();
+ }
+ return maBcp47;
+}
+
+
+OUString LanguageTag::getLanguageFromLangtag()
+{
+ OUString aLanguage;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aLanguage;
+ if (mpImplLangtag)
+ {
+ const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
+ SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
+ if (!pLangT)
+ return aLanguage;
+ const char* pLang = lt_lang_get_tag( pLangT);
+ SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
+ if (pLang)
+ aLanguage = OUString::createFromAscii( pLang);
+ }
+ else
+ {
+ if (mbCachedLanguage || cacheSimpleLSC())
+ aLanguage = maCachedLanguage;
+ }
+ return aLanguage;
+}
+
+
+OUString LanguageTag::getScriptFromLangtag()
+{
+ OUString aScript;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aScript;
+ if (mpImplLangtag)
+ {
+ const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
+ // pScriptT==NULL is valid for default scripts
+ if (!pScriptT)
+ return aScript;
+ const char* pScript = lt_script_get_tag( pScriptT);
+ SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
+ if (pScript)
+ aScript = OUString::createFromAscii( pScript);
+ }
+ else
+ {
+ if (mbCachedScript || cacheSimpleLSC())
+ aScript = maCachedScript;
+ }
+ return aScript;
+}
+
+
+OUString LanguageTag::getRegionFromLangtag()
+{
+ OUString aRegion;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aRegion;
+ if (mpImplLangtag)
+ {
+ const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
+ // pRegionT==NULL is valid for language only tags, rough check here
+ // that does not take sophisticated tags into account that actually
+ // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
+ // that ll-CC and lll-CC actually fail.
+ SAL_WARN_IF( !pRegionT &&
+ maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
+ maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
+ "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
+ if (!pRegionT)
+ return aRegion;
+ const char* pRegion = lt_region_get_tag( pRegionT);
+ SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
+ if (pRegion)
+ aRegion = OUString::createFromAscii( pRegion);
+ }
+ else
+ {
+ if (mbCachedCountry || cacheSimpleLSC())
+ aRegion = maCachedCountry;
+ }
+ return aRegion;
+}
+
+
+const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return theEmptyLocale::get();
+ if (!mbInitializedLocale)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToLocale();
+ }
+ return maLocale;
+}
+
+
+LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return LANGUAGE_SYSTEM;
+ if (!mbInitializedLangID)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLang();
+ else
+ const_cast<LanguageTag*>(this)->convertLocaleToLang();
+ }
+ return mnLangID;
+}
+
+
+void LanguageTag::getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const
+{
+ if (!isIsoLocale())
+ {
+ rLanguage = OUString();
+ rCountry = OUString();
+ return;
+ }
+ // After isIsoLocale() it's safe to call getLanguage() for ISO code.
+ rLanguage = getLanguage();
+ rCountry = getCountry();
+}
+
+
+namespace
+{
+
+bool isLowerAscii( sal_Unicode c )
+{
+ return 'a' <= c && c <= 'z';
+}
+
+bool isUpperAscii( sal_Unicode c )
+{
+ return 'A' <= c && c <= 'Z';
+}
+
+}
+
+
+// static
+bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ bool b2chars;
+ if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
+ isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
+ (b2chars || isLowerAscii( rLanguage[2])))
+ return true;
+ SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
+ (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
+ (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
+ "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoCountry( const OUString& rRegion )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rRegion.isEmpty() ||
+ (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
+ return true;
+ SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
+ "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoScript( const OUString& rScript )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rScript.isEmpty() ||
+ (rScript.getLength() == 4 &&
+ isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
+ isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
+ return true;
+ SAL_WARN_IF( rScript.getLength() == 4 &&
+ (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
+ isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
+ "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
+ return false;
+}
+
+
+OUString LanguageTag::getLanguage() const
+{
+ if (!mbCachedLanguage)
+ {
+ maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag();
+ mbCachedLanguage = true;
+ }
+ return maCachedLanguage;
+}
+
+
+OUString LanguageTag::getScript() const
+{
+ if (!mbCachedScript)
+ {
+ maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag();
+ mbCachedScript = true;
+ }
+ return maCachedScript;
+}
+
+
+OUString LanguageTag::getLanguageAndScript() const
+{
+ OUString aLanguageScript( getLanguage());
+ OUString aScript( getScript());
+ if (!aScript.isEmpty())
+ {
+ OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength());
+ aBuf.append( aLanguageScript).append( '-').append( aScript);
+ aLanguageScript = aBuf.makeStringAndClear();
+ }
+ return aLanguageScript;
+}
+
+
+OUString LanguageTag::getCountry() const
+{
+ if (!mbCachedCountry)
+ {
+ maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+ if (!isIsoCountry( maCachedCountry))
+ maCachedCountry = OUString();
+ mbCachedCountry = true;
+ }
+ return maCachedCountry;
+}
+
+
+OUString LanguageTag::getRegion() const
+{
+ return const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+}
+
+
+OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
+{
+ OUString aRet;
+ if (isIsoLocale())
+ {
+ OUString aCountry( getCountry());
+ if (aCountry.isEmpty())
+ aRet = getLanguage() + rEncoding;
+ else
+ aRet = getLanguage() + "_" + aCountry + rEncoding;
+ }
+ else
+ {
+ /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
+ * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
+ * So far no code was prepared for anything else than a simple
+ * language_country locale so we don't loose anything here right now.
+ * */
+ }
+ return aRet;
+}
+
+
+bool LanguageTag::hasScript() const
+{
+ if (!mbCachedScript)
+ getScript();
+ return !maCachedScript.isEmpty();
+}
+
+
+bool LanguageTag::cacheSimpleLSC()
+{
+ OUString aLanguage, aScript, aCountry;
+ bool bRet = (simpleExtract( maBcp47, aLanguage, aScript, aCountry) == EXTRACTED_LSC);
+ if (bRet)
+ {
+ maCachedLanguage = aLanguage;
+ maCachedScript = aScript;
+ maCachedCountry = aCountry;
+ mbCachedLanguage = mbCachedScript = mbCachedCountry = true;
+ }
+ return bRet;
+}
+
+
+bool LanguageTag::isIsoLocale() const
+{
+ if (meIsIsoLocale == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ // It must be at most ll-CC or lll-CC
+ // Do not use getCountry() here, use getRegion() instead.
+ meIsIsoLocale = ((maBcp47.isEmpty() ||
+ (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoLocale == DECISION_YES;
+}
+
+
+bool LanguageTag::isIsoODF() const
+{
+ if (meIsIsoODF == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ if (!isIsoScript( getScript()))
+ return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
+ // The usual case is lll-CC so simply check that first.
+ if (isIsoLocale())
+ return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
+ // If this is not ISO locale for which script must not exist it can
+ // still be ISO locale plus ISO script lll-Ssss-CC
+ meIsIsoODF = ((maBcp47.getLength() <= 11 &&
+ isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoODF == DECISION_YES;
+}
+
+
+bool LanguageTag::isValidBcp47() const
+{
+ if (meIsValid == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
+ "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
+ }
+ return meIsValid == DECISION_YES;
+}
+
+
+bool LanguageTag::isSystemLocale() const
+{
+ return mbSystemLocale;
+}
+
+
+LanguageTag & LanguageTag::makeFallback()
+{
+ if (!mbIsFallback)
+ {
+ if (mbInitializedLangID)
+ {
+ LanguageType nLang1 = getLanguageType();
+ LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1);
+ if (nLang1 != nLang2)
+ reset( nLang2);
+ }
+ else
+ {
+ const lang::Locale& rLocale1 = getLocale();
+ lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
+ if ( rLocale1.Language != aLocale2.Language ||
+ rLocale1.Country != aLocale2.Country ||
+ rLocale1.Variant != aLocale2.Variant)
+ reset( aLocale2);
+ }
+ mbIsFallback = true;
+ }
+ return *this;
+}
+
+
+::std::vector< OUString > LanguageTag::getFallbackStrings() const
+{
+ ::std::vector< OUString > aVec;
+ OUString aLanguage( getLanguage());
+ OUString aCountry( getCountry());
+ if (isIsoLocale())
+ {
+ if (!aCountry.isEmpty())
+ aVec.push_back( aLanguage + "-" + aCountry);
+ aVec.push_back( aLanguage);
+ return aVec;
+ }
+ aVec.push_back( getBcp47());
+ OUString aTmp;
+ if (hasScript())
+ {
+ OUString aScript( getScript());
+ if (!aCountry.isEmpty())
+ {
+ aTmp = aLanguage + "-" + aScript + "-" + aCountry;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ aTmp = aLanguage + "-" + aScript;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ if (!aCountry.isEmpty())
+ {
+ aTmp = aLanguage + "-" + aCountry;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ aTmp = aLanguage;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ return aVec;
+}
+
+
+bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
+{
+ // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
+ // can use the operator==() optimization.
+ if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
+ return operator==( rLanguageTag);
+
+ // Compare full language tag strings.
+ return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
+}
+
+
+bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
+{
+ if (isSystemLocale() && rLanguageTag.isSystemLocale())
+ return true; // both SYSTEM
+
+ // No need to convert to BCP47 if both Lang-IDs are available.
+ if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
+ {
+ // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
+ return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
+ }
+
+ // Compare full language tag strings but SYSTEM unresolved.
+ return getBcp47( false) == rLanguageTag.getBcp47( false);
+}
+
+
+bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
+{
+ return !operator==( rLanguageTag);
+}
+
+
+// static
+LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
+ OUString& rLanguage, OUString& rScript, OUString& rCountry )
+{
+ Extraction eRet = EXTRACTED_NONE;
+ const sal_Int32 nLen = rBcp47.getLength();
+ const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
+ if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
+ {
+ // It's f*d up but we need to recognize this.
+ eRet = EXTRACTED_X_JOKER;
+ }
+ else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
+ {
+ // x-... privateuse tags MUST be known to us by definition.
+ eRet = EXTRACTED_X;
+ }
+ else if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll
+ {
+ rLanguage = rBcp47;
+ rScript = rCountry = OUString();
+ eRet = EXTRACTED_LSC;
+ }
+ else if ( (nLen == 5 && nHyph1 == 2) // ll-CC
+ || (nLen == 6 && nHyph1 == 3)) // lll-CC
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1);
+ rCountry = rBcp47.copy( nHyph1 + 1, 2);
+ rScript = OUString();
+ eRet = EXTRACTED_LSC;
+ }
+ else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check
+ || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check
+ {
+ const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1);
+ if (nHyph2 == nHyph1 + 5)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1);
+ rScript = rBcp47.copy( nHyph1 + 1, 4);
+ rCountry = rBcp47.copy( nHyph2 + 1, 2);
+ eRet = EXTRACTED_LSC;
+ }
+ }
+ if (eRet == EXTRACTED_NONE)
+ rLanguage = rScript = rCountry = OUString();
+ return eRet;
+}
+
+
+// static
+::std::vector< OUString >::const_iterator LanguageTag::getFallback(
+ const ::std::vector< OUString > & rList, const OUString & rReference )
+{
+ if (rList.empty())
+ return rList.end();
+
+ ::std::vector< OUString >::const_iterator it;
+
+ // Try the simple case first without constructing fallbacks.
+ for (it = rList.begin(); it != rList.end(); ++it)
+ {
+ if (*it == rReference)
+ return it; // exact match
+ }
+
+ ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings());
+ aFallbacks.erase( aFallbacks.begin()); // first is full BCP47, we already checked that
+ if (rReference != "en-US")
+ {
+ aFallbacks.push_back( "en-US");
+ if (rReference != "en")
+ aFallbacks.push_back( "en");
+ }
+ if (rReference != "x-default")
+ aFallbacks.push_back( "x-default");
+ if (rReference != "x-no-translate")
+ aFallbacks.push_back( "x-no-translate");
+ /* TODO: the original comphelper::Locale::getFallback() code had
+ * "x-notranslate" instead of "x-no-translate", but all .xcu files use
+ * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
+ * Did that ever work? Was it supposed to work at all like this? */
+
+ for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
+ {
+ for (it = rList.begin(); it != rList.end(); ++it)
+ {
+ if (*it == *fb)
+ return it; // fallback found
+ }
+ }
+
+ // Did not find anything so return something of the list, the first value
+ // will do as well as any other as none did match any of the possible
+ // fallbacks.
+ return rList.begin();
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */