summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--RepositoryExternal.mk61
-rw-r--r--i18npool/Library_i18nisolang1.mk6
-rw-r--r--i18npool/inc/i18npool/languagetag.hxx168
-rw-r--r--i18npool/prj/build.lst2
-rw-r--r--i18npool/source/languagetag/languagetag.cxx760
5 files changed, 996 insertions, 1 deletions
diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk
index 26676a786d9a..5efe5eb9f67e 100644
--- a/RepositoryExternal.mk
+++ b/RepositoryExternal.mk
@@ -427,6 +427,67 @@ endef
endif # SYSTEM_LIBXSLT
+ifeq ($(SYSTEM_GLIB),YES)
+
+define gb_LinkTarget__use_glib
+$(call gb_LinkTarget_set_include,$(1),\
+ $$(INCLUDE) \
+ $(GLIB_CFLAGS) \
+)
+
+$(call gb_LinkTarget_add_libs,$(1),$(GLIB_LIBS))
+
+endef
+
+else # !SYSTEM_GLIB
+
+$(eval $(call gb_Helper_register_libraries,PLAINLIBS_OOO, \
+ glib \
+))
+
+define gb_LinkTarget__use_glib
+$(call gb_LinkTarget_set_include,$(1),\
+ $$(INCLUDE) \
+ -I$(OUTDIR)/inc/external/glib-2.0 \
+)
+
+$(call gb_LinkTarget_use_libraries,$(1),\
+ glib-2.0 \
+)
+
+endef
+
+endif # SYSTEM_GLIB
+
+
+ifeq ($(SYSTEM_LIBLANGTAG),YES)
+
+define gb_LinkTarget__use_liblangtag
+$(call gb_LinkTarget_set_include,$(1),\
+ $$(INCLUDE) \
+ $(LIBLANGTAG_CFLAGS) \
+)
+
+$(call gb_LinkTarget_add_libs,$(1),$(LIBLANGTAG_LIBS))
+
+endef
+
+else # !SYSTEM_LIBLANGTAG
+
+$(eval $(call gb_Helper_register_libraries,PLAINLIBS_OOO, \
+ langtag \
+))
+
+define gb_LinkTarget__use_liblangtag
+$(call gb_LinkTarget_use_libraries,$(1),\
+ langtag \
+)
+
+endef
+
+endif # SYSTEM_LIBLANGTAG
+
+
ifeq ($(SYSTEM_NEON),YES)
define gb_LinkTarget__use_neon
diff --git a/i18npool/Library_i18nisolang1.mk b/i18npool/Library_i18nisolang1.mk
index c41f71e635b7..db0cdf244594 100644
--- a/i18npool/Library_i18nisolang1.mk
+++ b/i18npool/Library_i18nisolang1.mk
@@ -31,6 +31,7 @@ $(eval $(call gb_Library_Library,i18nisolang1))
$(eval $(call gb_Library_use_package,i18nisolang1,i18npool_inc))
$(eval $(call gb_Library_set_include,i18nisolang1,\
+ -I$(SRCDIR)/i18npool/inc \
$$(INCLUDE) \
))
@@ -49,6 +50,11 @@ $(eval $(call gb_Library_add_exception_objects,i18nisolang1,\
i18npool/source/isolang/insys \
i18npool/source/isolang/isolang \
i18npool/source/isolang/mslangid \
+ i18npool/source/languagetag/languagetag \
))
+$(eval $(call gb_Library_use_external,i18nisolang1,glib))
+
+$(eval $(call gb_Library_use_external,i18nisolang1,liblangtag))
+
# vim: set noet sw=4 ts=4:
diff --git a/i18npool/inc/i18npool/languagetag.hxx b/i18npool/inc/i18npool/languagetag.hxx
new file mode 100644
index 000000000000..1dcc213e1745
--- /dev/null
+++ b/i18npool/inc/i18npool/languagetag.hxx
@@ -0,0 +1,168 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_I18NPOOL_LANGUAGETAG_HXX
+#define INCLUDED_I18NPOOL_LANGUAGETAG_HXX
+
+#include <sal/config.h>
+#include <rtl/ustring.hxx>
+#include <com/sun/star/lang/Locale.hpp>
+#include <i18npool/lang.h>
+
+
+/** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
+ conversions in between.
+
+ Note that member variables are mutable and may change their values even in
+ const methods. Getter methods return either the original value or matching
+ converted values.
+ */
+class LanguageTag
+{
+public:
+
+ /** Init LanguageTag with existing BCP 47 language tag string.
+
+ @param bCanonicalize
+ If TRUE, canonicalize tag and reparse, the resulting tag string may
+ be different.
+ IF FALSE, the tag is simply stored and can be retrieved with
+ getBcp47().
+
+ Note that conversions to ISO codes, locales or LanguageType or
+ obtaining language or script will canonicalize the tag string anyway,
+ so specifying bCanonicalize=false is not a guarantee that the tag will
+ stay identical to what was passed.
+ */
+ explicit LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize = false );
+
+ /** Init LanguageTag with Locale. */
+ explicit LanguageTag( const com::sun::star::lang::Locale & rLocale );
+
+ /** Init LanguageTag with LanguageType MS-LangID. */
+ explicit LanguageTag( LanguageType nLanguage );
+
+ /** Init LanguageTag with language and country strings.
+
+ This is a convenience ctor for places that so far use only language and
+ country to replace the MsLangId::convert...IsoNames...() calls. Avoid
+ use in new code.
+ */
+ explicit LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry );
+
+ explicit LanguageTag( const LanguageTag & rLanguageTag );
+ ~LanguageTag();
+ LanguageTag& operator=( const LanguageTag & rLanguageTag );
+
+ /** Obtain BCP 47 language tag. */
+ rtl::OUString getBcp47() const;
+
+ /** Obtain language tag as Locale.
+
+ As a convention, language tags that can not be expressed as "pure"
+ com::sun::star::lang::Locale content using Language and Country fields
+ store "qlt" (ISO 639 reserved for local use) in the Language field and
+ the entire BCP 47 language tag in the Variant field. The Country field
+ contains the corresponding ISO 3166 country code _if_ there is one, or
+ otherwise is empty.
+ */
+ com::sun::star::lang::Locale getLocale() const;
+
+ /** Obtain mapping to MS-LangID. */
+ LanguageType getLanguageType() const;
+
+ /** Get ISO 639 language code, or BCP 47 language. */
+ rtl::OUString getLanguage() const;
+
+ /** Get ISO 15924 script code, if not the default script according to
+ BCP 47. For default script an empty string is returned.
+ */
+ rtl::OUString getScript() const;
+
+ /** Get combined language and script code, separated by '-' if
+ non-default script, if default script only language.
+ */
+ rtl::OUString getLanguageAndScript() const;
+
+ /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
+ region not expressable as 2 character country code.
+ */
+ rtl::OUString getCountry() const;
+
+ /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or
+ any other BCP 47 region tag.
+ */
+ rtl::OUString getRegion() const;
+
+ /** If language tag is a locale that can be expressed using only ISO 639
+ language codes and ISO 3166 country codes, thus is convertible to a
+ conforming Locale struct without using extension mechanisms. Note that
+ an empty language tag or empty Locale::Language field or LanguageType
+ LANGUAGE_SYSTEM is treated as a valid ISO locale.
+ */
+ bool isIsoLocale() const;
+
+ /** If language tag is a locale that can be expressed using only ISO 639
+ language codes and ISO 15924 script codes and ISO 3166 country codes,
+ thus can be stored in an ODF document using only fo:language, fo:script
+ and fo:country attributes. If this is FALSE, the locale must be stored
+ as a <*:rfc-language-tag> element.
+ */
+ bool isIsoODF() const;
+
+ /** If this is a valid BCP 47 language tag. */
+ bool isValidBcp47() const;
+
+private:
+
+ enum Decision
+ {
+ DECISION_DONTKNOW,
+ DECISION_NO,
+ DECISION_YES
+ };
+
+ mutable com::sun::star::lang::Locale maLocale;
+ mutable rtl::OUString maBcp47;
+ mutable rtl::OUString maCachedLanguage; ///< cache getLanguage()
+ mutable rtl::OUString maCachedScript; ///< cache getScript()
+ mutable rtl::OUString maCachedCountry; ///< cache getCountry()
+ mutable void* mpImplLangtag; ///< actually lt_tag_t pointer, encapsulated
+ mutable LanguageType mnLangID;
+ mutable Decision meIsValid;
+ mutable Decision meIsIsoLocale;
+ mutable Decision meIsIsoODF;
+ mutable bool mbInitializedBcp47 : 1;
+ mutable bool mbInitializedLocale : 1;
+ mutable bool mbInitializedLangID : 1;
+ mutable bool mbCachedLanguage : 1;
+ mutable bool mbCachedScript : 1;
+ mutable bool mbCachedCountry : 1;
+
+ void convertLocaleToBcp47();
+ void convertLocaleToLang();
+ void convertBcp47ToLocale();
+ void convertBcp47ToLang();
+ void convertLangToLocale();
+ void convertLangToBcp47();
+
+ bool canonicalize() const;
+
+ rtl::OUString getLanguageFromLangtag() const;
+ rtl::OUString getScriptFromLangtag() const;
+ rtl::OUString getRegionFromLangtag() const;
+
+ static bool isIsoLanguage( const rtl::OUString& rLanguage );
+ static bool isIsoScript( const rtl::OUString& rScript );
+ static bool isIsoCountry( const rtl::OUString& rRegion );
+};
+
+#endif // INCLUDED_I18NPOOL_LANGUAGETAG_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/prj/build.lst b/i18npool/prj/build.lst
index 562549343643..831d29f5e168 100644
--- a/i18npool/prj/build.lst
+++ b/i18npool/prj/build.lst
@@ -1,2 +1,2 @@
-inp i18npool : bridges sax stoc comphelper CPPUNIT:cppunit ICU:icu i18nutil regexp cpputools LIBXSLT:libxslt udkapi offapi ure unotest NULL
+inp i18npool : bridges sax stoc comphelper CPPUNIT:cppunit ICU:icu i18nutil regexp cpputools LIBXSLT:libxslt LIBXML2:libxml2 LIBLANGTAG:liblangtag udkapi offapi ure unotest NULL
inp i18npool\prj nmake - all inp_prj NULL
diff --git a/i18npool/source/languagetag/languagetag.cxx b/i18npool/source/languagetag/languagetag.cxx
new file mode 100644
index 000000000000..d5cfacdc3e02
--- /dev/null
+++ b/i18npool/source/languagetag/languagetag.cxx
@@ -0,0 +1,760 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "i18npool/languagetag.hxx"
+#include "i18npool/mslangid.hxx"
+#include <rtl/ustrbuf.hxx>
+#include <rtl/bootstrap.hxx>
+#include <osl/file.hxx>
+#include <liblangtag/langtag.h>
+
+//#define erDEBUG
+
+using rtl::OUString;
+using rtl::OString;
+using rtl::OUStringBuffer;
+using namespace com::sun::star;
+
+// The actual pointer type of mpImplLangtag that is declared void* to not
+// pollute the entire code base with liblangtag.
+#define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
+#define MPLANGTAG LANGTAGCAST(mpImplLangtag)
+
+/** Convention to signal presence of BCP 47 language tag in a Locale's Variant
+ field. The Locale's Language field then will contain this ISO 639-2
+ reserved for local use code. */
+#define ISO639_LANGUAGE_TAG "qlt"
+
+
+/** A reference holder for liblangtag data de/initialization, one static
+ instance. Currently implemented such that the first "ref" inits and dtor
+ (our library deinitialized) tears down.
+*/
+class LiblantagDataRef
+{
+public:
+ LiblantagDataRef();
+ ~LiblantagDataRef();
+ inline void incRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && !mnRef++)
+ setup();
+ }
+ inline void decRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
+ teardown();
+ }
+private:
+ rtl::OString maDataPath; // path to liblangtag data, "|" if system
+ sal_uInt32 mnRef;
+
+ void setupDataPath();
+ void setup();
+ void teardown();
+};
+
+static LiblantagDataRef theDataRef;
+
+LiblantagDataRef::LiblantagDataRef()
+ :
+ mnRef(0)
+{
+}
+
+LiblantagDataRef::~LiblantagDataRef()
+{
+ // When destructed we're tearing down unconditionally.
+ if (mnRef)
+ mnRef = 1;
+ decRef();
+}
+
+void LiblantagDataRef::setup()
+{
+ if (maDataPath.isEmpty())
+ setupDataPath();
+ lt_db_initialize();
+ // Hold ref eternally.
+ mnRef = SAL_MAX_UINT32;
+}
+
+void LiblantagDataRef::teardown()
+{
+ lt_db_finalize();
+}
+
+void LiblantagDataRef::setupDataPath()
+{
+ // maDataPath is assumed to be empty here.
+ OUString aPath;
+ if (!rtl::Bootstrap::get( "BRAND_BASE_DIR", aPath))
+ OSL_FAIL( "LiblantagDataRef: can't get BRAND_BASE_DIR");
+ else
+ {
+ // Check if data is in our own installation, else assume system
+ // installation.
+ aPath += "/share/liblangtag";
+ OUString aData( aPath);
+ aData += "/language-subtag-registry.xml";
+ osl::DirectoryItem aDirItem;
+ if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
+ maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
+ }
+ if (maDataPath.isEmpty())
+ maDataPath = "|"; // assume system
+ else
+ lt_db_set_datadir( maDataPath.getStr());
+}
+
+
+LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize )
+ :
+ maBcp47( rBcp47LanguageTag),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ mbInitializedBcp47( true),
+ mbInitializedLocale( false),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false)
+{
+ theDataRef.incRef();
+
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
+ :
+ maLocale( rLocale),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( true),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false)
+{
+ theDataRef.incRef();
+}
+
+
+LanguageTag::LanguageTag( LanguageType nLanguage )
+ :
+ mpImplLangtag( NULL),
+ mnLangID( nLanguage),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( false),
+ mbInitializedLangID( true),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false)
+{
+ theDataRef.incRef();
+}
+
+
+LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry )
+ :
+ maLocale( rLanguage, rCountry, ""),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( true),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false)
+{
+ theDataRef.incRef();
+}
+
+
+LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
+ :
+ maLocale( rLanguageTag.maLocale),
+ maBcp47( rLanguageTag.maBcp47),
+ maCachedLanguage( rLanguageTag.maCachedLanguage),
+ maCachedScript( rLanguageTag.maCachedScript),
+ maCachedCountry( rLanguageTag.maCachedCountry),
+ mpImplLangtag( rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL),
+ mnLangID( rLanguageTag.mnLangID),
+ meIsValid( rLanguageTag.meIsValid),
+ meIsIsoLocale( rLanguageTag.meIsIsoLocale),
+ meIsIsoODF( rLanguageTag.meIsIsoODF),
+ mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
+ mbInitializedLocale( rLanguageTag.mbInitializedLocale),
+ mbInitializedLangID( rLanguageTag.mbInitializedLangID),
+ mbCachedLanguage( rLanguageTag.mbCachedLanguage),
+ mbCachedScript( rLanguageTag.mbCachedScript),
+ mbCachedCountry( rLanguageTag.mbCachedCountry)
+{
+ theDataRef.incRef();
+}
+
+
+LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
+{
+ theDataRef.incRef();
+
+ maLocale = rLanguageTag.maLocale;
+ maBcp47 = rLanguageTag.maBcp47;
+ maCachedLanguage = rLanguageTag.maCachedLanguage;
+ maCachedScript = rLanguageTag.maCachedScript;
+ maCachedCountry = rLanguageTag.maCachedCountry;
+ mpImplLangtag = rLanguageTag.mpImplLangtag;
+ mpImplLangtag = rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL;
+ mnLangID = rLanguageTag.mnLangID;
+ meIsValid = rLanguageTag.meIsValid;
+ meIsIsoLocale = rLanguageTag.meIsIsoLocale;
+ meIsIsoODF = rLanguageTag.meIsIsoODF;
+ mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
+ mbInitializedLocale = rLanguageTag.mbInitializedLocale;
+ mbInitializedLangID = rLanguageTag.mbInitializedLangID;
+ mbCachedLanguage = rLanguageTag.mbCachedLanguage;
+ mbCachedScript = rLanguageTag.mbCachedScript;
+ mbCachedCountry = rLanguageTag.mbCachedCountry;
+ return *this;
+}
+
+
+LanguageTag::~LanguageTag()
+{
+ lt_tag_unref( MPLANGTAG);
+
+ theDataRef.decRef();
+}
+
+
+bool LanguageTag::canonicalize() const
+{
+#ifdef erDEBUG
+ // dump once
+ struct dumper
+ {
+ void** mpp;
+ dumper( void** pp ) : mpp( *pp ? NULL : pp) {}
+ ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); }
+ };
+ dumper aDumper( &mpImplLangtag);
+#endif
+
+ // g_error_free() mocks about NULL, so ...
+ struct myerror
+ {
+ GError* p;
+ myerror() : p(NULL) {}
+ ~myerror() { if (p) g_error_free( p); }
+ } aError;
+
+ getBcp47(); // side effect: have maBcp47 in any case
+ // Checking empty for system locale before having allocated mpImplLangtag
+ // may result in multiple calls of this method because that serves as flag
+ // whether this was canonicalized, but that's better than allocating
+ // lt_tag_t for all those system locales.
+ if (maBcp47.isEmpty())
+ {
+ meIsValid = DECISION_YES;
+ return true;
+ }
+ if (!mpImplLangtag)
+ mpImplLangtag = lt_tag_new();
+ if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
+ {
+ gchar* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p);
+ SAL_WARN_IF( !pTag || aError.p, "i18npool.langtag", "LanguageTag::canonicalize: could not canonicalize, " <<
+ (aError.p ? aError.p->message : ""));
+ if (pTag)
+ {
+ OUString aOld( maBcp47);
+ maBcp47 = OUString::createFromAscii( pTag);
+ // Make the lt_tag_t follow the new string if different, which
+ // removes default script and such.
+ if (maBcp47 != aOld)
+ {
+ if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p))
+ {
+ SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not reparse, " <<
+ (aError.p ? aError.p->message : ""));
+ g_free( pTag);
+ meIsValid = DECISION_NO;
+ return false;
+ }
+ }
+ g_free( pTag);
+ meIsValid = DECISION_YES;
+ return true;
+ }
+ }
+ else
+ {
+ SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not parse, " <<
+ (aError.p ? aError.p->message : ""));
+ }
+ meIsValid = DECISION_NO;
+ return false;
+}
+
+
+void LanguageTag::convertLocaleToBcp47()
+{
+ if (maLocale.Language.isEmpty())
+ {
+ // Special case system locale.
+ maBcp47 = OUString();
+ meIsIsoLocale = DECISION_YES;
+ }
+ else if (maLocale.Language == ISO639_LANGUAGE_TAG)
+ {
+ maBcp47 = maLocale.Variant;
+ meIsIsoLocale = DECISION_NO;
+ }
+ else
+ {
+ /* XXX NOTE: most legacy code never evaluated the Variant field, so for
+ * now just concatenate language and country. In case we stumbled over
+ * variant aware code we'd have to take care of that. */
+ if (maLocale.Country.isEmpty())
+ maBcp47 = maLocale.Language;
+ else
+ {
+ OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength());
+ aBuf.append( maLocale.Language).append( '-').append( maLocale.Country);
+ maBcp47 = aBuf.makeStringAndClear();
+ }
+ }
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertLocaleToLang()
+{
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to be
+ * implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
+ mnLangID = MsLangId::convertLocaleToLanguage( maLocale);
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertBcp47ToLocale()
+{
+ if (maBcp47.isEmpty())
+ {
+ // Special case system locale.
+ maLocale = lang::Locale();
+ meIsIsoLocale = DECISION_YES;
+ }
+ else
+ {
+ bool bIso = isIsoLocale();
+ if (bIso)
+ {
+ maLocale.Language = getLanguageFromLangtag();
+ maLocale.Country = getRegionFromLangtag();
+ maLocale.Variant = OUString();
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = getCountry();
+ maLocale.Variant = maBcp47;
+ }
+ }
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertBcp47ToLang()
+{
+ /* FIXME: this is temporary. If we support locales that consist not only of
+ * language and country, e.g. added script, this probably needs to be
+ * adapted. */
+ if (!mbInitializedLocale)
+ convertBcp47ToLocale();
+ convertLocaleToLang();
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertLangToLocale()
+{
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to be
+ * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
+ // Do not resolve system here!
+ maLocale = MsLangId::convertLanguageToLocale( mnLangID, false);
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertLangToBcp47()
+{
+ /* FIXME: this is temporary. If we support locales that consist not only of
+ * language and country, e.g. added script, this probably needs to be
+ * adapted. */
+ if (!mbInitializedLocale)
+ convertLangToLocale();
+ convertLocaleToBcp47();
+ mbInitializedBcp47 = true;
+}
+
+
+rtl::OUString LanguageTag::getBcp47() const
+{
+ if (!mbInitializedBcp47)
+ {
+ if (mbInitializedLocale)
+ const_cast<LanguageTag*>(this)->convertLocaleToBcp47();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToBcp47();
+ }
+ return maBcp47;
+}
+
+
+rtl::OUString LanguageTag::getLanguageFromLangtag() const
+{
+ rtl::OUString aLanguage;
+ if (!mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aLanguage;
+ const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
+ SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
+ if (!pLangT)
+ return aLanguage;
+ const gchar* pLang = lt_lang_get_tag( pLangT);
+ SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
+ if (pLang)
+ aLanguage = OUString::createFromAscii( pLang);
+ return aLanguage;
+}
+
+
+rtl::OUString LanguageTag::getScriptFromLangtag() const
+{
+ rtl::OUString aScript;
+ if (!mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aScript;
+ const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
+ // pScriptT==NULL is valid for default scripts
+ if (!pScriptT)
+ return aScript;
+ const gchar* pScript = lt_script_get_tag( pScriptT);
+ SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
+ if (pScript)
+ aScript = OUString::createFromAscii( pScript);
+ return aScript;
+}
+
+
+rtl::OUString LanguageTag::getRegionFromLangtag() const
+{
+ rtl::OUString aRegion;
+ if (!mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aRegion;
+ const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
+ SAL_WARN_IF( !pRegionT, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
+ if (!pRegionT)
+ return aRegion;
+ const gchar* pRegion = lt_region_get_tag( pRegionT);
+ SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
+ if (pRegion)
+ aRegion = OUString::createFromAscii( pRegion);
+ return aRegion;
+}
+
+
+com::sun::star::lang::Locale LanguageTag::getLocale() const
+{
+ if (!mbInitializedLocale)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToLocale();
+ }
+ return maLocale;
+}
+
+
+LanguageType LanguageTag::getLanguageType() const
+{
+ if (!mbInitializedLangID)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLang();
+ else
+ const_cast<LanguageTag*>(this)->convertLocaleToLang();
+ }
+ return mnLangID;
+}
+
+
+namespace
+{
+
+bool isLowerAscii( sal_Unicode c )
+{
+ return 'a' <= c && c <= 'z';
+}
+
+bool isUpperAscii( sal_Unicode c )
+{
+ return 'A' <= c && c <= 'Z';
+}
+
+}
+
+
+// static
+bool LanguageTag::isIsoLanguage( const rtl::OUString& rLanguage )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ bool b2chars;
+ if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
+ isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
+ (b2chars || isLowerAscii( rLanguage[2])))
+ return true;
+ SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
+ (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
+ (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag",
+ "LanguageTag::isIsoLanguage: rejecting upper case");
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoCountry( const rtl::OUString& rRegion )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1]))
+ return true;
+ SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
+ "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case");
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoScript( const rtl::OUString& rScript )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rScript.isEmpty() ||
+ (rScript.getLength() == 4 &&
+ isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
+ isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
+ return true;
+ SAL_WARN_IF( rScript.getLength() == 4 &&
+ (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
+ isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
+ "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch");
+ return false;
+}
+
+
+rtl::OUString LanguageTag::getLanguage() const
+{
+ if (!mbCachedLanguage)
+ {
+ maCachedLanguage = getLanguageFromLangtag();
+ mbCachedLanguage = true;
+ }
+ return maCachedLanguage;
+}
+
+
+rtl::OUString LanguageTag::getScript() const
+{
+ if (!mbCachedScript)
+ {
+ maCachedScript = getScriptFromLangtag();
+ mbCachedScript = true;
+ }
+ return maCachedScript;
+}
+
+
+rtl::OUString LanguageTag::getLanguageAndScript() const
+{
+ OUString aLanguageScript( getLanguage());
+ OUString aScript( getScript());
+ if (!aScript.isEmpty())
+ {
+ OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength());
+ aBuf.append( aLanguageScript).append( '-').append( aScript);
+ aLanguageScript = aBuf.makeStringAndClear();
+ }
+ return aLanguageScript;
+}
+
+
+rtl::OUString LanguageTag::getCountry() const
+{
+ if (!mbCachedCountry)
+ {
+ maCachedCountry = getRegionFromLangtag();
+ if (!isIsoCountry( maCachedCountry))
+ maCachedCountry = OUString();
+ mbCachedCountry = true;
+ }
+ return maCachedCountry;
+}
+
+
+rtl::OUString LanguageTag::getRegion() const
+{
+ return getRegionFromLangtag();
+}
+
+
+bool LanguageTag::isIsoLocale() const
+{
+ if (meIsIsoLocale == DECISION_DONTKNOW)
+ {
+ if (!mpImplLangtag)
+ canonicalize();
+ // It must be at most ll-CC or lll-CC
+ // Do not use getCountry() here, use getRegion() instead.
+ meIsIsoLocale = ((maBcp47.isEmpty() ||
+ (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoLocale == DECISION_YES;
+}
+
+
+bool LanguageTag::isIsoODF() const
+{
+ if (meIsIsoODF == DECISION_DONTKNOW)
+ {
+ if (!mpImplLangtag)
+ canonicalize();
+ if (!isIsoScript( getScript()))
+ return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
+ // The usual case is lll-CC so simply check that first.
+ if (isIsoLocale())
+ return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
+ // If this is not ISO locale for which script must not exist it can
+ // still be ISO locale plus ISO script lll-Ssss-CC
+ meIsIsoODF = ((maBcp47.getLength() <= 11 &&
+ isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoODF == DECISION_YES;
+}
+
+
+bool LanguageTag::isValidBcp47() const
+{
+ if (meIsValid == DECISION_DONTKNOW)
+ {
+ if (!mpImplLangtag)
+ canonicalize();
+ SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag",
+ "LanguageTag::isValidBcp47: canonicalize() doesn't set meIsValid");
+ }
+ return meIsValid == DECISION_YES;
+}
+
+
+#ifdef erDEBUG
+void dbg_languagetag()
+{
+ LanguageTag de_DE( "de-Latn-DE", true);
+ de_DE.getBcp47();
+ de_DE.getLocale();
+ de_DE.getLanguageType();
+ de_DE.getLanguage();
+ de_DE.getLanguageAndScript();
+ de_DE.getScript();
+ de_DE.getCountry();
+ de_DE.getRegion();
+ de_DE.isIsoLocale();
+ de_DE.isIsoODF();
+
+ LanguageTag SystemLocale( lang::Locale("","",""));
+ SystemLocale.getBcp47();
+ SystemLocale.getLocale();
+ SystemLocale.getLanguageType();
+ SystemLocale.getLanguage();
+ SystemLocale.getLanguageAndScript();
+ SystemLocale.getScript();
+ SystemLocale.getCountry();
+ SystemLocale.getRegion();
+ SystemLocale.isIsoLocale();
+ SystemLocale.isIsoODF();
+ SystemLocale.isValidBcp47();
+
+ LanguageTag SystemLang( LANGUAGE_SYSTEM);
+ SystemLang.getBcp47();
+ SystemLang.getLocale();
+ SystemLang.getLanguageType();
+ SystemLang.getLanguage();
+ SystemLang.getLanguageAndScript();
+ SystemLang.getScript();
+ SystemLang.getCountry();
+ SystemLang.getRegion();
+ SystemLang.isIsoLocale();
+ SystemLang.isIsoODF();
+ SystemLang.isValidBcp47();
+
+ LanguageTag SystemBcp47( "");
+ SystemBcp47.getBcp47();
+ SystemBcp47.getLocale();
+ SystemBcp47.getLanguageType();
+ SystemBcp47.getLanguage();
+ SystemBcp47.getLanguageAndScript();
+ SystemBcp47.getScript();
+ SystemBcp47.getCountry();
+ SystemBcp47.getRegion();
+ SystemBcp47.isIsoLocale();
+ SystemBcp47.isIsoODF();
+ SystemBcp47.isValidBcp47();
+
+ LanguageTag wab( "wrong-and-bad");
+ wab.getBcp47();
+ wab.getLocale();
+ wab.getLanguageType();
+ wab.getLanguage();
+ wab.getLanguageAndScript();
+ wab.getScript();
+ wab.getCountry();
+ wab.getRegion();
+ wab.isIsoLocale();
+ wab.isIsoODF();
+ wab.isValidBcp47();
+}
+#endif
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */