From a774a4826323ae2027e432da5d1a2a9d3356ad0d Mon Sep 17 00:00:00 2001 From: Eike Rathke Date: Mon, 6 Aug 2012 19:22:43 +0200 Subject: implemented LanguageTag wrapper --- i18npool/Library_i18nisolang1.mk | 6 + i18npool/inc/i18npool/languagetag.hxx | 168 ++++++ i18npool/prj/build.lst | 2 +- i18npool/source/languagetag/languagetag.cxx | 760 ++++++++++++++++++++++++++++ 4 files changed, 935 insertions(+), 1 deletion(-) create mode 100644 i18npool/inc/i18npool/languagetag.hxx create mode 100644 i18npool/source/languagetag/languagetag.cxx (limited to 'i18npool') diff --git a/i18npool/Library_i18nisolang1.mk b/i18npool/Library_i18nisolang1.mk index c41f71e635b7..db0cdf244594 100644 --- a/i18npool/Library_i18nisolang1.mk +++ b/i18npool/Library_i18nisolang1.mk @@ -31,6 +31,7 @@ $(eval $(call gb_Library_Library,i18nisolang1)) $(eval $(call gb_Library_use_package,i18nisolang1,i18npool_inc)) $(eval $(call gb_Library_set_include,i18nisolang1,\ + -I$(SRCDIR)/i18npool/inc \ $$(INCLUDE) \ )) @@ -49,6 +50,11 @@ $(eval $(call gb_Library_add_exception_objects,i18nisolang1,\ i18npool/source/isolang/insys \ i18npool/source/isolang/isolang \ i18npool/source/isolang/mslangid \ + i18npool/source/languagetag/languagetag \ )) +$(eval $(call gb_Library_use_external,i18nisolang1,glib)) + +$(eval $(call gb_Library_use_external,i18nisolang1,liblangtag)) + # vim: set noet sw=4 ts=4: diff --git a/i18npool/inc/i18npool/languagetag.hxx b/i18npool/inc/i18npool/languagetag.hxx new file mode 100644 index 000000000000..1dcc213e1745 --- /dev/null +++ b/i18npool/inc/i18npool/languagetag.hxx @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_I18NPOOL_LANGUAGETAG_HXX +#define INCLUDED_I18NPOOL_LANGUAGETAG_HXX + +#include +#include +#include +#include + + +/** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and + conversions in between. + + Note that member variables are mutable and may change their values even in + const methods. Getter methods return either the original value or matching + converted values. + */ +class LanguageTag +{ +public: + + /** Init LanguageTag with existing BCP 47 language tag string. + + @param bCanonicalize + If TRUE, canonicalize tag and reparse, the resulting tag string may + be different. + IF FALSE, the tag is simply stored and can be retrieved with + getBcp47(). + + Note that conversions to ISO codes, locales or LanguageType or + obtaining language or script will canonicalize the tag string anyway, + so specifying bCanonicalize=false is not a guarantee that the tag will + stay identical to what was passed. + */ + explicit LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize = false ); + + /** Init LanguageTag with Locale. */ + explicit LanguageTag( const com::sun::star::lang::Locale & rLocale ); + + /** Init LanguageTag with LanguageType MS-LangID. */ + explicit LanguageTag( LanguageType nLanguage ); + + /** Init LanguageTag with language and country strings. + + This is a convenience ctor for places that so far use only language and + country to replace the MsLangId::convert...IsoNames...() calls. Avoid + use in new code. + */ + explicit LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry ); + + explicit LanguageTag( const LanguageTag & rLanguageTag ); + ~LanguageTag(); + LanguageTag& operator=( const LanguageTag & rLanguageTag ); + + /** Obtain BCP 47 language tag. */ + rtl::OUString getBcp47() const; + + /** Obtain language tag as Locale. + + As a convention, language tags that can not be expressed as "pure" + com::sun::star::lang::Locale content using Language and Country fields + store "qlt" (ISO 639 reserved for local use) in the Language field and + the entire BCP 47 language tag in the Variant field. The Country field + contains the corresponding ISO 3166 country code _if_ there is one, or + otherwise is empty. + */ + com::sun::star::lang::Locale getLocale() const; + + /** Obtain mapping to MS-LangID. */ + LanguageType getLanguageType() const; + + /** Get ISO 639 language code, or BCP 47 language. */ + rtl::OUString getLanguage() const; + + /** Get ISO 15924 script code, if not the default script according to + BCP 47. For default script an empty string is returned. + */ + rtl::OUString getScript() const; + + /** Get combined language and script code, separated by '-' if + non-default script, if default script only language. + */ + rtl::OUString getLanguageAndScript() const; + + /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a + region not expressable as 2 character country code. + */ + rtl::OUString getCountry() const; + + /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or + any other BCP 47 region tag. + */ + rtl::OUString getRegion() const; + + /** If language tag is a locale that can be expressed using only ISO 639 + language codes and ISO 3166 country codes, thus is convertible to a + conforming Locale struct without using extension mechanisms. Note that + an empty language tag or empty Locale::Language field or LanguageType + LANGUAGE_SYSTEM is treated as a valid ISO locale. + */ + bool isIsoLocale() const; + + /** If language tag is a locale that can be expressed using only ISO 639 + language codes and ISO 15924 script codes and ISO 3166 country codes, + thus can be stored in an ODF document using only fo:language, fo:script + and fo:country attributes. If this is FALSE, the locale must be stored + as a <*:rfc-language-tag> element. + */ + bool isIsoODF() const; + + /** If this is a valid BCP 47 language tag. */ + bool isValidBcp47() const; + +private: + + enum Decision + { + DECISION_DONTKNOW, + DECISION_NO, + DECISION_YES + }; + + mutable com::sun::star::lang::Locale maLocale; + mutable rtl::OUString maBcp47; + mutable rtl::OUString maCachedLanguage; ///< cache getLanguage() + mutable rtl::OUString maCachedScript; ///< cache getScript() + mutable rtl::OUString maCachedCountry; ///< cache getCountry() + mutable void* mpImplLangtag; ///< actually lt_tag_t pointer, encapsulated + mutable LanguageType mnLangID; + mutable Decision meIsValid; + mutable Decision meIsIsoLocale; + mutable Decision meIsIsoODF; + mutable bool mbInitializedBcp47 : 1; + mutable bool mbInitializedLocale : 1; + mutable bool mbInitializedLangID : 1; + mutable bool mbCachedLanguage : 1; + mutable bool mbCachedScript : 1; + mutable bool mbCachedCountry : 1; + + void convertLocaleToBcp47(); + void convertLocaleToLang(); + void convertBcp47ToLocale(); + void convertBcp47ToLang(); + void convertLangToLocale(); + void convertLangToBcp47(); + + bool canonicalize() const; + + rtl::OUString getLanguageFromLangtag() const; + rtl::OUString getScriptFromLangtag() const; + rtl::OUString getRegionFromLangtag() const; + + static bool isIsoLanguage( const rtl::OUString& rLanguage ); + static bool isIsoScript( const rtl::OUString& rScript ); + static bool isIsoCountry( const rtl::OUString& rRegion ); +}; + +#endif // INCLUDED_I18NPOOL_LANGUAGETAG_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/prj/build.lst b/i18npool/prj/build.lst index 562549343643..831d29f5e168 100644 --- a/i18npool/prj/build.lst +++ b/i18npool/prj/build.lst @@ -1,2 +1,2 @@ -inp i18npool : bridges sax stoc comphelper CPPUNIT:cppunit ICU:icu i18nutil regexp cpputools LIBXSLT:libxslt udkapi offapi ure unotest NULL +inp i18npool : bridges sax stoc comphelper CPPUNIT:cppunit ICU:icu i18nutil regexp cpputools LIBXSLT:libxslt LIBXML2:libxml2 LIBLANGTAG:liblangtag udkapi offapi ure unotest NULL inp i18npool\prj nmake - all inp_prj NULL diff --git a/i18npool/source/languagetag/languagetag.cxx b/i18npool/source/languagetag/languagetag.cxx new file mode 100644 index 000000000000..d5cfacdc3e02 --- /dev/null +++ b/i18npool/source/languagetag/languagetag.cxx @@ -0,0 +1,760 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "i18npool/languagetag.hxx" +#include "i18npool/mslangid.hxx" +#include +#include +#include +#include + +//#define erDEBUG + +using rtl::OUString; +using rtl::OString; +using rtl::OUStringBuffer; +using namespace com::sun::star; + +// The actual pointer type of mpImplLangtag that is declared void* to not +// pollute the entire code base with liblangtag. +#define LANGTAGCAST(p) (reinterpret_cast(p)) +#define MPLANGTAG LANGTAGCAST(mpImplLangtag) + +/** Convention to signal presence of BCP 47 language tag in a Locale's Variant + field. The Locale's Language field then will contain this ISO 639-2 + reserved for local use code. */ +#define ISO639_LANGUAGE_TAG "qlt" + + +/** A reference holder for liblangtag data de/initialization, one static + instance. Currently implemented such that the first "ref" inits and dtor + (our library deinitialized) tears down. +*/ +class LiblantagDataRef +{ +public: + LiblantagDataRef(); + ~LiblantagDataRef(); + inline void incRef() + { + if (mnRef != SAL_MAX_UINT32 && !mnRef++) + setup(); + } + inline void decRef() + { + if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef) + teardown(); + } +private: + rtl::OString maDataPath; // path to liblangtag data, "|" if system + sal_uInt32 mnRef; + + void setupDataPath(); + void setup(); + void teardown(); +}; + +static LiblantagDataRef theDataRef; + +LiblantagDataRef::LiblantagDataRef() + : + mnRef(0) +{ +} + +LiblantagDataRef::~LiblantagDataRef() +{ + // When destructed we're tearing down unconditionally. + if (mnRef) + mnRef = 1; + decRef(); +} + +void LiblantagDataRef::setup() +{ + if (maDataPath.isEmpty()) + setupDataPath(); + lt_db_initialize(); + // Hold ref eternally. + mnRef = SAL_MAX_UINT32; +} + +void LiblantagDataRef::teardown() +{ + lt_db_finalize(); +} + +void LiblantagDataRef::setupDataPath() +{ + // maDataPath is assumed to be empty here. + OUString aPath; + if (!rtl::Bootstrap::get( "BRAND_BASE_DIR", aPath)) + OSL_FAIL( "LiblantagDataRef: can't get BRAND_BASE_DIR"); + else + { + // Check if data is in our own installation, else assume system + // installation. + aPath += "/share/liblangtag"; + OUString aData( aPath); + aData += "/language-subtag-registry.xml"; + osl::DirectoryItem aDirItem; + if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None) + maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8); + } + if (maDataPath.isEmpty()) + maDataPath = "|"; // assume system + else + lt_db_set_datadir( maDataPath.getStr()); +} + + +LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize ) + : + maBcp47( rBcp47LanguageTag), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + mbInitializedBcp47( true), + mbInitializedLocale( false), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false) +{ + theDataRef.incRef(); + + if (bCanonicalize) + canonicalize(); +} + + +LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale ) + : + maLocale( rLocale), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + mbInitializedBcp47( false), + mbInitializedLocale( true), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false) +{ + theDataRef.incRef(); +} + + +LanguageTag::LanguageTag( LanguageType nLanguage ) + : + mpImplLangtag( NULL), + mnLangID( nLanguage), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + mbInitializedBcp47( false), + mbInitializedLocale( false), + mbInitializedLangID( true), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false) +{ + theDataRef.incRef(); +} + + +LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry ) + : + maLocale( rLanguage, rCountry, ""), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + mbInitializedBcp47( false), + mbInitializedLocale( true), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false) +{ + theDataRef.incRef(); +} + + +LanguageTag::LanguageTag( const LanguageTag & rLanguageTag ) + : + maLocale( rLanguageTag.maLocale), + maBcp47( rLanguageTag.maBcp47), + maCachedLanguage( rLanguageTag.maCachedLanguage), + maCachedScript( rLanguageTag.maCachedScript), + maCachedCountry( rLanguageTag.maCachedCountry), + mpImplLangtag( rLanguageTag.mpImplLangtag ? + lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL), + mnLangID( rLanguageTag.mnLangID), + meIsValid( rLanguageTag.meIsValid), + meIsIsoLocale( rLanguageTag.meIsIsoLocale), + meIsIsoODF( rLanguageTag.meIsIsoODF), + mbInitializedBcp47( rLanguageTag.mbInitializedBcp47), + mbInitializedLocale( rLanguageTag.mbInitializedLocale), + mbInitializedLangID( rLanguageTag.mbInitializedLangID), + mbCachedLanguage( rLanguageTag.mbCachedLanguage), + mbCachedScript( rLanguageTag.mbCachedScript), + mbCachedCountry( rLanguageTag.mbCachedCountry) +{ + theDataRef.incRef(); +} + + +LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag ) +{ + theDataRef.incRef(); + + maLocale = rLanguageTag.maLocale; + maBcp47 = rLanguageTag.maBcp47; + maCachedLanguage = rLanguageTag.maCachedLanguage; + maCachedScript = rLanguageTag.maCachedScript; + maCachedCountry = rLanguageTag.maCachedCountry; + mpImplLangtag = rLanguageTag.mpImplLangtag; + mpImplLangtag = rLanguageTag.mpImplLangtag ? + lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL; + mnLangID = rLanguageTag.mnLangID; + meIsValid = rLanguageTag.meIsValid; + meIsIsoLocale = rLanguageTag.meIsIsoLocale; + meIsIsoODF = rLanguageTag.meIsIsoODF; + mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47; + mbInitializedLocale = rLanguageTag.mbInitializedLocale; + mbInitializedLangID = rLanguageTag.mbInitializedLangID; + mbCachedLanguage = rLanguageTag.mbCachedLanguage; + mbCachedScript = rLanguageTag.mbCachedScript; + mbCachedCountry = rLanguageTag.mbCachedCountry; + return *this; +} + + +LanguageTag::~LanguageTag() +{ + lt_tag_unref( MPLANGTAG); + + theDataRef.decRef(); +} + + +bool LanguageTag::canonicalize() const +{ +#ifdef erDEBUG + // dump once + struct dumper + { + void** mpp; + dumper( void** pp ) : mpp( *pp ? NULL : pp) {} + ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); } + }; + dumper aDumper( &mpImplLangtag); +#endif + + // g_error_free() mocks about NULL, so ... + struct myerror + { + GError* p; + myerror() : p(NULL) {} + ~myerror() { if (p) g_error_free( p); } + } aError; + + getBcp47(); // side effect: have maBcp47 in any case + // Checking empty for system locale before having allocated mpImplLangtag + // may result in multiple calls of this method because that serves as flag + // whether this was canonicalized, but that's better than allocating + // lt_tag_t for all those system locales. + if (maBcp47.isEmpty()) + { + meIsValid = DECISION_YES; + return true; + } + if (!mpImplLangtag) + mpImplLangtag = lt_tag_new(); + if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p)) + { + gchar* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p); + SAL_WARN_IF( !pTag || aError.p, "i18npool.langtag", "LanguageTag::canonicalize: could not canonicalize, " << + (aError.p ? aError.p->message : "")); + if (pTag) + { + OUString aOld( maBcp47); + maBcp47 = OUString::createFromAscii( pTag); + // Make the lt_tag_t follow the new string if different, which + // removes default script and such. + if (maBcp47 != aOld) + { + if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p)) + { + SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not reparse, " << + (aError.p ? aError.p->message : "")); + g_free( pTag); + meIsValid = DECISION_NO; + return false; + } + } + g_free( pTag); + meIsValid = DECISION_YES; + return true; + } + } + else + { + SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not parse, " << + (aError.p ? aError.p->message : "")); + } + meIsValid = DECISION_NO; + return false; +} + + +void LanguageTag::convertLocaleToBcp47() +{ + if (maLocale.Language.isEmpty()) + { + // Special case system locale. + maBcp47 = OUString(); + meIsIsoLocale = DECISION_YES; + } + else if (maLocale.Language == ISO639_LANGUAGE_TAG) + { + maBcp47 = maLocale.Variant; + meIsIsoLocale = DECISION_NO; + } + else + { + /* XXX NOTE: most legacy code never evaluated the Variant field, so for + * now just concatenate language and country. In case we stumbled over + * variant aware code we'd have to take care of that. */ + if (maLocale.Country.isEmpty()) + maBcp47 = maLocale.Language; + else + { + OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength()); + aBuf.append( maLocale.Language).append( '-').append( maLocale.Country); + maBcp47 = aBuf.makeStringAndClear(); + } + } + mbInitializedBcp47 = true; +} + + +void LanguageTag::convertLocaleToLang() +{ + /* FIXME: this is temporary until code base is converted to not use + * MsLangId::convert...() anymore. After that, proper new method has to be + * implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */ + mnLangID = MsLangId::convertLocaleToLanguage( maLocale); + mbInitializedLangID = true; +} + + +void LanguageTag::convertBcp47ToLocale() +{ + if (maBcp47.isEmpty()) + { + // Special case system locale. + maLocale = lang::Locale(); + meIsIsoLocale = DECISION_YES; + } + else + { + bool bIso = isIsoLocale(); + if (bIso) + { + maLocale.Language = getLanguageFromLangtag(); + maLocale.Country = getRegionFromLangtag(); + maLocale.Variant = OUString(); + } + else + { + maLocale.Language = ISO639_LANGUAGE_TAG; + maLocale.Country = getCountry(); + maLocale.Variant = maBcp47; + } + } + mbInitializedLocale = true; +} + + +void LanguageTag::convertBcp47ToLang() +{ + /* FIXME: this is temporary. If we support locales that consist not only of + * language and country, e.g. added script, this probably needs to be + * adapted. */ + if (!mbInitializedLocale) + convertBcp47ToLocale(); + convertLocaleToLang(); + mbInitializedLangID = true; +} + + +void LanguageTag::convertLangToLocale() +{ + /* FIXME: this is temporary until code base is converted to not use + * MsLangId::convert...() anymore. After that, proper new method has to be + * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */ + // Do not resolve system here! + maLocale = MsLangId::convertLanguageToLocale( mnLangID, false); + mbInitializedLocale = true; +} + + +void LanguageTag::convertLangToBcp47() +{ + /* FIXME: this is temporary. If we support locales that consist not only of + * language and country, e.g. added script, this probably needs to be + * adapted. */ + if (!mbInitializedLocale) + convertLangToLocale(); + convertLocaleToBcp47(); + mbInitializedBcp47 = true; +} + + +rtl::OUString LanguageTag::getBcp47() const +{ + if (!mbInitializedBcp47) + { + if (mbInitializedLocale) + const_cast(this)->convertLocaleToBcp47(); + else + const_cast(this)->convertLangToBcp47(); + } + return maBcp47; +} + + +rtl::OUString LanguageTag::getLanguageFromLangtag() const +{ + rtl::OUString aLanguage; + if (!mpImplLangtag) + canonicalize(); + if (maBcp47.isEmpty()) + return aLanguage; + const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG); + SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL"); + if (!pLangT) + return aLanguage; + const gchar* pLang = lt_lang_get_tag( pLangT); + SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL"); + if (pLang) + aLanguage = OUString::createFromAscii( pLang); + return aLanguage; +} + + +rtl::OUString LanguageTag::getScriptFromLangtag() const +{ + rtl::OUString aScript; + if (!mpImplLangtag) + canonicalize(); + if (maBcp47.isEmpty()) + return aScript; + const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG); + // pScriptT==NULL is valid for default scripts + if (!pScriptT) + return aScript; + const gchar* pScript = lt_script_get_tag( pScriptT); + SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL"); + if (pScript) + aScript = OUString::createFromAscii( pScript); + return aScript; +} + + +rtl::OUString LanguageTag::getRegionFromLangtag() const +{ + rtl::OUString aRegion; + if (!mpImplLangtag) + canonicalize(); + if (maBcp47.isEmpty()) + return aRegion; + const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG); + SAL_WARN_IF( !pRegionT, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL"); + if (!pRegionT) + return aRegion; + const gchar* pRegion = lt_region_get_tag( pRegionT); + SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL"); + if (pRegion) + aRegion = OUString::createFromAscii( pRegion); + return aRegion; +} + + +com::sun::star::lang::Locale LanguageTag::getLocale() const +{ + if (!mbInitializedLocale) + { + if (mbInitializedBcp47) + const_cast(this)->convertBcp47ToLocale(); + else + const_cast(this)->convertLangToLocale(); + } + return maLocale; +} + + +LanguageType LanguageTag::getLanguageType() const +{ + if (!mbInitializedLangID) + { + if (mbInitializedBcp47) + const_cast(this)->convertBcp47ToLang(); + else + const_cast(this)->convertLocaleToLang(); + } + return mnLangID; +} + + +namespace +{ + +bool isLowerAscii( sal_Unicode c ) +{ + return 'a' <= c && c <= 'z'; +} + +bool isUpperAscii( sal_Unicode c ) +{ + return 'A' <= c && c <= 'Z'; +} + +} + + +// static +bool LanguageTag::isIsoLanguage( const rtl::OUString& rLanguage ) +{ + /* TODO: ignore case? For now let's see where rubbish is used. */ + bool b2chars; + if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) && + isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) && + (b2chars || isLowerAscii( rLanguage[2]))) + return true; + SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) && + (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) || + (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag", + "LanguageTag::isIsoLanguage: rejecting upper case"); + return false; +} + + +// static +bool LanguageTag::isIsoCountry( const rtl::OUString& rRegion ) +{ + /* TODO: ignore case? For now let's see where rubbish is used. */ + if (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])) + return true; + SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])), + "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case"); + return false; +} + + +// static +bool LanguageTag::isIsoScript( const rtl::OUString& rScript ) +{ + /* TODO: ignore case? For now let's see where rubbish is used. */ + if (rScript.isEmpty() || + (rScript.getLength() == 4 && + isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) && + isLowerAscii( rScript[2]) && isLowerAscii( rScript[3]))) + return true; + SAL_WARN_IF( rScript.getLength() == 4 && + (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) || + isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])), + "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch"); + return false; +} + + +rtl::OUString LanguageTag::getLanguage() const +{ + if (!mbCachedLanguage) + { + maCachedLanguage = getLanguageFromLangtag(); + mbCachedLanguage = true; + } + return maCachedLanguage; +} + + +rtl::OUString LanguageTag::getScript() const +{ + if (!mbCachedScript) + { + maCachedScript = getScriptFromLangtag(); + mbCachedScript = true; + } + return maCachedScript; +} + + +rtl::OUString LanguageTag::getLanguageAndScript() const +{ + OUString aLanguageScript( getLanguage()); + OUString aScript( getScript()); + if (!aScript.isEmpty()) + { + OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength()); + aBuf.append( aLanguageScript).append( '-').append( aScript); + aLanguageScript = aBuf.makeStringAndClear(); + } + return aLanguageScript; +} + + +rtl::OUString LanguageTag::getCountry() const +{ + if (!mbCachedCountry) + { + maCachedCountry = getRegionFromLangtag(); + if (!isIsoCountry( maCachedCountry)) + maCachedCountry = OUString(); + mbCachedCountry = true; + } + return maCachedCountry; +} + + +rtl::OUString LanguageTag::getRegion() const +{ + return getRegionFromLangtag(); +} + + +bool LanguageTag::isIsoLocale() const +{ + if (meIsIsoLocale == DECISION_DONTKNOW) + { + if (!mpImplLangtag) + canonicalize(); + // It must be at most ll-CC or lll-CC + // Do not use getCountry() here, use getRegion() instead. + meIsIsoLocale = ((maBcp47.isEmpty() || + (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ? + DECISION_YES : DECISION_NO); + } + return meIsIsoLocale == DECISION_YES; +} + + +bool LanguageTag::isIsoODF() const +{ + if (meIsIsoODF == DECISION_DONTKNOW) + { + if (!mpImplLangtag) + canonicalize(); + if (!isIsoScript( getScript())) + return ((meIsIsoODF = DECISION_NO) == DECISION_YES); + // The usual case is lll-CC so simply check that first. + if (isIsoLocale()) + return ((meIsIsoODF = DECISION_YES) == DECISION_YES); + // If this is not ISO locale for which script must not exist it can + // still be ISO locale plus ISO script lll-Ssss-CC + meIsIsoODF = ((maBcp47.getLength() <= 11 && + isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ? + DECISION_YES : DECISION_NO); + } + return meIsIsoODF == DECISION_YES; +} + + +bool LanguageTag::isValidBcp47() const +{ + if (meIsValid == DECISION_DONTKNOW) + { + if (!mpImplLangtag) + canonicalize(); + SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag", + "LanguageTag::isValidBcp47: canonicalize() doesn't set meIsValid"); + } + return meIsValid == DECISION_YES; +} + + +#ifdef erDEBUG +void dbg_languagetag() +{ + LanguageTag de_DE( "de-Latn-DE", true); + de_DE.getBcp47(); + de_DE.getLocale(); + de_DE.getLanguageType(); + de_DE.getLanguage(); + de_DE.getLanguageAndScript(); + de_DE.getScript(); + de_DE.getCountry(); + de_DE.getRegion(); + de_DE.isIsoLocale(); + de_DE.isIsoODF(); + + LanguageTag SystemLocale( lang::Locale("","","")); + SystemLocale.getBcp47(); + SystemLocale.getLocale(); + SystemLocale.getLanguageType(); + SystemLocale.getLanguage(); + SystemLocale.getLanguageAndScript(); + SystemLocale.getScript(); + SystemLocale.getCountry(); + SystemLocale.getRegion(); + SystemLocale.isIsoLocale(); + SystemLocale.isIsoODF(); + SystemLocale.isValidBcp47(); + + LanguageTag SystemLang( LANGUAGE_SYSTEM); + SystemLang.getBcp47(); + SystemLang.getLocale(); + SystemLang.getLanguageType(); + SystemLang.getLanguage(); + SystemLang.getLanguageAndScript(); + SystemLang.getScript(); + SystemLang.getCountry(); + SystemLang.getRegion(); + SystemLang.isIsoLocale(); + SystemLang.isIsoODF(); + SystemLang.isValidBcp47(); + + LanguageTag SystemBcp47( ""); + SystemBcp47.getBcp47(); + SystemBcp47.getLocale(); + SystemBcp47.getLanguageType(); + SystemBcp47.getLanguage(); + SystemBcp47.getLanguageAndScript(); + SystemBcp47.getScript(); + SystemBcp47.getCountry(); + SystemBcp47.getRegion(); + SystemBcp47.isIsoLocale(); + SystemBcp47.isIsoODF(); + SystemBcp47.isValidBcp47(); + + LanguageTag wab( "wrong-and-bad"); + wab.getBcp47(); + wab.getLocale(); + wab.getLanguageType(); + wab.getLanguage(); + wab.getLanguageAndScript(); + wab.getScript(); + wab.getCountry(); + wab.getRegion(); + wab.isIsoLocale(); + wab.isIsoODF(); + wab.isValidBcp47(); +} +#endif + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit