diff options
Diffstat (limited to 'i18nlangtag')
20 files changed, 5962 insertions, 0 deletions
diff --git a/i18nlangtag/CppunitTest_i18nlangtag_test_languagetag.mk b/i18nlangtag/CppunitTest_i18nlangtag_test_languagetag.mk new file mode 100644 index 000000000000..0bed7f56468f --- /dev/null +++ b/i18nlangtag/CppunitTest_i18nlangtag_test_languagetag.mk @@ -0,0 +1,49 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_CppunitTest_CppunitTest,i18nlangtag_test_languagetag)) + +$(eval $(call gb_CppunitTest_use_api,i18nlangtag_test_languagetag,\ + udkapi \ + offapi \ +)) + +$(eval $(call gb_CppunitTest_use_library_objects,i18nlangtag_test_languagetag,i18nlangtag)) + +$(eval $(call gb_CppunitTest_use_libraries,i18nlangtag_test_languagetag,\ + cppu \ + cppuhelper \ + sal \ + $(gb_UWINAPI) \ +)) + +ifeq ($(ENABLE_LIBLANGTAG),YES) +$(eval $(call gb_CppunitTest_use_externals,i18nlangtag_test_languagetag,\ + liblangtag \ + libxml2 \ +)) +$(eval $(call gb_CppunitTest_add_defs,i18nlangtag_test_languagetag,-DENABLE_LIBLANGTAG)) + +ifeq ($(SYSTEM_LIBLANGTAG),YES) +$(eval $(call gb_CppunitTest_add_defs,i18nlangtag_test_languagetag,-DSYSTEM_LIBLANGTAG)) +else +$(eval $(call gb_CppunitTest_use_package,i18nlangtag_test_languagetag,langtag_data)) +endif +endif + +$(eval $(call gb_CppunitTest_set_include,i18nlangtag_test_languagetag,\ + -I$(SRCDIR)/i18nlangtag/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_CppunitTest_add_exception_objects,i18nlangtag_test_languagetag,\ + i18nlangtag/qa/cppunit/test_languagetag \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/i18nlangtag/Library_i18nlangtag.mk b/i18nlangtag/Library_i18nlangtag.mk new file mode 100644 index 000000000000..2f5efa0d7d55 --- /dev/null +++ b/i18nlangtag/Library_i18nlangtag.mk @@ -0,0 +1,71 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# Version: MPL 1.1 / GPLv3+ / LGPLv3+ +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License or as specified alternatively below. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Initial Developer of the Original Code is +# Matúš Kukan <matus.kukan@gmail.com> +# Portions created by the Initial Developer are Copyright (C) 2011 the +# Initial Developer. All Rights Reserved. +# +# Major Contributor(s): +# +# For minor contributions see the git repository. +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 3 or later (the "GPLv3+"), or +# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), +# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable +# instead of those above. + +$(eval $(call gb_Library_Library,i18nlangtag)) + +$(eval $(call gb_Library_use_packages,i18nlangtag,\ + cppu_odk_headers \ + i18nlangtag_inc \ +)) + +$(eval $(call gb_Library_set_include,i18nlangtag,\ + -I$(SRCDIR)/i18nlangtag/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Library_use_sdk_api,i18nlangtag)) + +$(eval $(call gb_Library_add_defs,i18nlangtag,\ + -DI18NLANGTAG_DLLIMPLEMENTATION \ +)) + +$(eval $(call gb_Library_use_libraries,i18nlangtag,\ + sal \ + $(gb_UWINAPI) \ +)) + +$(eval $(call gb_Library_add_exception_objects,i18nlangtag,\ + i18nlangtag/source/isolang/insys \ + i18nlangtag/source/isolang/isolang \ + i18nlangtag/source/isolang/mslangid \ + i18nlangtag/source/languagetag/languagetag \ +)) + + +ifeq ($(ENABLE_LIBLANGTAG),YES) +$(eval $(call gb_Library_add_defs,i18nlangtag,-DENABLE_LIBLANGTAG)) +$(eval $(call gb_Library_use_external,i18nlangtag,liblangtag)) +$(eval $(call gb_Library_use_external,i18nlangtag,libxml2)) +$(eval $(call gb_Library_use_system_win32_libs,i18nlangtag,\ + $(if $(filter $(COM),MSC), \ + kernel32 \ + ) \ +)) +endif + +# vim: set noet sw=4 ts=4: diff --git a/i18nlangtag/Makefile b/i18nlangtag/Makefile new file mode 100644 index 000000000000..0997e628485b --- /dev/null +++ b/i18nlangtag/Makefile @@ -0,0 +1,14 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/i18nlangtag/Module_i18nlangtag.mk b/i18nlangtag/Module_i18nlangtag.mk new file mode 100644 index 000000000000..c16f50f4576a --- /dev/null +++ b/i18nlangtag/Module_i18nlangtag.mk @@ -0,0 +1,20 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +$(eval $(call gb_Module_Module,i18nlangtag)) + +$(eval $(call gb_Module_add_targets,i18nlangtag,\ + Library_i18nlangtag \ + Package_inc \ +)) + +$(eval $(call gb_Module_add_check_targets,i18nlangtag,\ + CppunitTest_i18nlangtag_test_languagetag \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/i18nlangtag/Package_inc.mk b/i18nlangtag/Package_inc.mk new file mode 100644 index 000000000000..984faf4e21b3 --- /dev/null +++ b/i18nlangtag/Package_inc.mk @@ -0,0 +1,36 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# Version: MPL 1.1 / GPLv3+ / LGPLv3+ +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License or as specified alternatively below. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Initial Developer of the Original Code is +# Matúš Kukan <matus.kukan@gmail.com> +# Portions created by the Initial Developer are Copyright (C) 2011 the +# Initial Developer. All Rights Reserved. +# +# Major Contributor(s): +# +# For minor contributions see the git repository. +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 3 or later (the "GPLv3+"), or +# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), +# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable +# instead of those above. + +$(eval $(call gb_Package_Package,i18nlangtag_inc,$(SRCDIR)/i18nlangtag/inc)) + +$(eval $(call gb_Package_add_file,i18nlangtag_inc,inc/i18nlangtag/i18nlangtagdllapi.h,i18nlangtag/i18nlangtagdllapi.h)) +$(eval $(call gb_Package_add_file,i18nlangtag_inc,inc/i18nlangtag/lang.h,i18nlangtag/lang.h)) +$(eval $(call gb_Package_add_file,i18nlangtag_inc,inc/i18nlangtag/languagetag.hxx,i18nlangtag/languagetag.hxx)) +$(eval $(call gb_Package_add_file,i18nlangtag_inc,inc/i18nlangtag/mslangid.hxx,i18nlangtag/mslangid.hxx)) + +# vim: set noet sw=4 ts=4: diff --git a/i18nlangtag/README b/i18nlangtag/README new file mode 100644 index 000000000000..88ca43bb965b --- /dev/null +++ b/i18nlangtag/README @@ -0,0 +1,3 @@ +Code for language tags, LanguageTag wrapper for liblangtag and converter between BCP47 language tags, Locale(Language,Country,Variant) and MS-LangIDs. + +Basic functionality used by almost every other module including comphelper, so even don't use that string helpers in this code to not create circular dependencies. Stick with sal! diff --git a/i18nlangtag/inc/i18nlangtag/i18nlangtagdllapi.h b/i18nlangtag/inc/i18nlangtag/i18nlangtagdllapi.h new file mode 100644 index 000000000000..ba9af35c08fe --- /dev/null +++ b/i18nlangtag/inc/i18nlangtag/i18nlangtagdllapi.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_I18NLANGTAG_I18NLANGTAGDLLAPI_H +#define INCLUDED_I18NLANGTAG_I18NLANGTAGDLLAPI_H + +#include "sal/types.h" + +#if defined(I18NLANGTAG_DLLIMPLEMENTATION) +#define I18NLANGTAG_DLLPUBLIC SAL_DLLPUBLIC_EXPORT +#else +#define I18NLANGTAG_DLLPUBLIC SAL_DLLPUBLIC_IMPORT +#endif +#define I18NLANGTAG_DLLPRIVATE SAL_DLLPRIVATE + +#endif /* INCLUDED_I18NLANGTAG_I18NLANGTAGDLLAPI_H */ + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/inc/i18nlangtag/lang.h b/i18nlangtag/inc/i18nlangtag/lang.h new file mode 100644 index 000000000000..d4f5ca7a61b3 --- /dev/null +++ b/i18nlangtag/inc/i18nlangtag/lang.h @@ -0,0 +1,568 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_I18NLANGTAG_LANG_H +#define INCLUDED_I18NLANGTAG_LANG_H + +/** + These are MS LANGIDs, the lower 10 bits (mask 0x03ff, values below 0x0400 + aren't real locale IDs) represent the primary language ID, the upper 6 bits + represent the sublanguage ID, which in most cases together with the primary + language simply designates a country. A LANGID is constructed by + UINT16 nLangID = ((((UINT16)(SubLangId)) << 10) | (UINT16)(PriLangId)); + + A user-defined primary language ID is a value in the range 0x0200 to 0x03ff. + All other values are reserved for Windows system use. + + A user-defined sublanguage identifier is a value in the range 0x20 to 0x3f. + All other values are reserved for Windows system use. + If there is no sublanguage ID for a primary language ID, use SUBLANG_DEFAULT + (0x01, which shifted to the left by 10 bits results in the frequently seen + 0x0400). OR'ed with a 0x0200 primary results in 0x0600. + + Values added as of 2006-04-18, a helper script: ../../source/isolang/lcid.awk + Utility to compare MS-LANGID definitions with those defined in this file. + + For further information about MS-LANGIDs please see include/winnt.h of a + recent MSDEV version and the following web pages. + + + The once complete list, not necessarily supported by Windows: + List of Locale ID (LCID) Values as Assigned by Microsoft + http://www.microsoft.com/globaldev/reference/lcid-all.mspx + + As a complete list is never complete, some more that came with WinXP SP2: + Windows XP/Server 2003 - List of Locale IDs, Input Locale, and Language Collection + http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx + + And of course 2 lists aren't enough, so Windows Vista needs an extra one. + Which at least seems to include values of other versions of Windows. + Language Identifier Constants and Strings + http://msdn2.microsoft.com/en-us/library/ms776294.aspx + + Hey, yet another list, maybe this one will not move around? It seems to be + quite complete.. + Language Identifier Constants and Strings (Windows) + http://msdn.microsoft.com/en-us/library/dd318693(VS.85).aspx + + List of supported locale identifiers in Word + http://support.microsoft.com/default.aspx?scid=KB;en-us;q221435 + */ + + +/* It must be safe to include this file in plain C code, so only C style + * comments are used. Do NOT use // C++ style comments. */ + +/* disable typedef for usage in svtools/source/misc/langtab.src */ +#ifndef RSC_RESOURCE_USAGE +typedef unsigned short LanguageType; +#endif + +#define LANGUAGE_MASK_PRIMARY 0x03ff + +#ifdef __cplusplus +/* Please use the methods provided in mslangid.hxx for type-safety! */ +#else +#define MSLANGID_MAKELANGID( nSubLangId, nPriLangId ) \ + (((nSubLangId) << 10) | (nPriLangId)) +#define MSLANGID_GETPRIMARYLANGUAGE( nLangID ) \ + ((nLangID) & LANGUAGE_MASK_PRIMARY) +#define MSLANGID_GETSUBLANGUAGE( nLangID ) \ + (((nLangID) & ~LANGUAGE_MASK_PRIMARY) >> 10) +#endif + + +#define LANGUAGE_DONTKNOW 0x03FF /* yes, the mask */ +#define LANGUAGE_NONE 0x00FF +#define LANGUAGE_HID_HUMAN_INTERFACE_DEVICE 0x04FF +#define LANGUAGE_SYSTEM 0x0000 /* OOo/SO definition */ + +/* The Invariant Locale (Locale ID = 0x007f) is a locale that can be used by + * applications when a consistent and locale-independent result is required. + * The invariant locale can be used, for example, when comparing character + * strings using the CompareString() API and a consistent result regardless of + * the User Locale is expected. + * The settings of the Invariant Locale are similar to US-English international + * standards, but should not be used to display formatted data. */ +/* NOTE: this is taken from the MS documentation! Not supported by OOo/SO! */ +#define LANGUAGE_INVARIANT 0x007F + +#define LANGUAGE_AFRIKAANS 0x0436 +#define LANGUAGE_ALBANIAN 0x041C +#define LANGUAGE_ALSATIAN_FRANCE 0x0484 +#define LANGUAGE_AMHARIC_ETHIOPIA 0x045E +#define LANGUAGE_ARABIC_ALGERIA 0x1401 +#define LANGUAGE_ARABIC_BAHRAIN 0x3C01 +#define LANGUAGE_ARABIC_EGYPT 0x0C01 +#define LANGUAGE_ARABIC_IRAQ 0x0801 +#define LANGUAGE_ARABIC_JORDAN 0x2C01 +#define LANGUAGE_ARABIC_KUWAIT 0x3401 +#define LANGUAGE_ARABIC_LEBANON 0x3001 +#define LANGUAGE_ARABIC_LIBYA 0x1001 +#define LANGUAGE_ARABIC_MOROCCO 0x1801 +#define LANGUAGE_ARABIC_OMAN 0x2001 +#define LANGUAGE_ARABIC_QATAR 0x4001 +#define LANGUAGE_ARABIC_SAUDI_ARABIA 0x0401 +#define LANGUAGE_ARABIC_SYRIA 0x2801 +#define LANGUAGE_ARABIC_TUNISIA 0x1C01 +#define LANGUAGE_ARABIC_UAE 0x3801 +#define LANGUAGE_ARABIC_YEMEN 0x2401 +#define LANGUAGE_ARABIC_PRIMARY_ONLY 0x0001 /* primary only, not a locale! */ +#define LANGUAGE_ARMENIAN 0x042B +#define LANGUAGE_ASSAMESE 0x044D +#define LANGUAGE_AZERI 0x002C /* primary only, not a locale! */ +#define LANGUAGE_AZERI_CYRILLIC 0x082C +#define LANGUAGE_AZERI_LATIN 0x042C +#define LANGUAGE_BASHKIR_RUSSIA 0x046D +#define LANGUAGE_BASQUE 0x042D +#define LANGUAGE_BELARUSIAN 0x0423 +#define LANGUAGE_BENGALI 0x0445 /* in India */ +#define LANGUAGE_BENGALI_BANGLADESH 0x0845 +#define LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA 0x141A +#define LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA 0x201A +#define LANGUAGE_BOSNIAN_BOSNIA_HERZEGOVINA LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA /* TODO: remove, only for langtab.src & localize.sdf compatibility */ +#define LANGUAGE_BRETON_FRANCE 0x047E /* obsoletes LANGUAGE_USER_BRETON 0x0629 */ +#define LANGUAGE_BULGARIAN 0x0402 +#define LANGUAGE_BURMESE 0x0455 +#define LANGUAGE_CATALAN 0x0403 +#define LANGUAGE_CHEROKEE_UNITED_STATES 0x045C +#define LANGUAGE_CHINESE 0x0004 /* primary only, not a locale! */ +#define LANGUAGE_CHINESE_HONGKONG 0x0C04 +#define LANGUAGE_CHINESE_MACAU 0x1404 +#define LANGUAGE_CHINESE_SIMPLIFIED 0x0804 +#define LANGUAGE_CHINESE_SINGAPORE 0x1004 +#define LANGUAGE_CHINESE_TRADITIONAL 0x0404 +/* #define LANGUAGE_CHINESE_SIMPLIFIED 0x0004 */ /* artificial political? Defined as 'zh-CHS' by MS. Primary only! */ +/* #define LANGUAGE_CHINESE_TRADITIONAL 0x7C04 */ /* artificial political? Defined as 'zh-CHT' by MS. */ +#define LANGUAGE_CORSICAN_FRANCE 0x0483 +#define LANGUAGE_CROATIAN 0x041A +#define LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA 0x101A +#define LANGUAGE_CZECH 0x0405 +#define LANGUAGE_DANISH 0x0406 +#define LANGUAGE_DARI_AFGHANISTAN 0x048C /* AKA Zoroastrian Dari */ +#define LANGUAGE_DHIVEHI 0x0465 /* AKA Divehi */ +#define LANGUAGE_DUTCH 0x0413 +#define LANGUAGE_DUTCH_BELGIAN 0x0813 +#define LANGUAGE_EDO 0x0466 +#define LANGUAGE_ENGLISH 0x0009 /* primary only, not a locale! */ +#define LANGUAGE_ENGLISH_AUS 0x0C09 +#define LANGUAGE_ENGLISH_BELIZE 0x2809 +#define LANGUAGE_ENGLISH_CAN 0x1009 +#define LANGUAGE_ENGLISH_CARRIBEAN 0x2409 +#define LANGUAGE_ENGLISH_EIRE 0x1809 +#define LANGUAGE_ENGLISH_HONG_KONG_SAR 0x3C09 +#define LANGUAGE_ENGLISH_INDIA 0x4009 +#define LANGUAGE_ENGLISH_INDONESIA 0x3809 +#define LANGUAGE_ENGLISH_JAMAICA 0x2009 +#define LANGUAGE_ENGLISH_MALAYSIA 0x4409 +#define LANGUAGE_ENGLISH_NZ 0x1409 +#define LANGUAGE_ENGLISH_PHILIPPINES 0x3409 +#define LANGUAGE_ENGLISH_SAFRICA 0x1C09 +#define LANGUAGE_ENGLISH_SINGAPORE 0x4809 +#define LANGUAGE_ENGLISH_TRINIDAD 0x2C09 +#define LANGUAGE_ENGLISH_UK 0x0809 +#define LANGUAGE_ENGLISH_US 0x0409 +#define LANGUAGE_ENGLISH_ZIMBABWE 0x3009 +#define LANGUAGE_ESTONIAN 0x0425 +#define LANGUAGE_FAEROESE 0x0438 +#define LANGUAGE_FARSI 0x0429 +#define LANGUAGE_FILIPINO 0x0464 +#define LANGUAGE_FINNISH 0x040B +#define LANGUAGE_FRENCH 0x040C +#define LANGUAGE_FRENCH_BELGIAN 0x080C +#define LANGUAGE_FRENCH_CAMEROON 0x2C0C +#define LANGUAGE_FRENCH_CANADIAN 0x0C0C +#define LANGUAGE_FRENCH_COTE_D_IVOIRE 0x300C + +#define LANGUAGE_FRENCH_HAITI 0x3C0C +#define LANGUAGE_FRENCH_LUXEMBOURG 0x140C +#define LANGUAGE_FRENCH_MALI 0x340C +#define LANGUAGE_FRENCH_MONACO 0x180C +#define LANGUAGE_FRENCH_MOROCCO 0x380C +#define LANGUAGE_FRENCH_NORTH_AFRICA 0xE40C +#define LANGUAGE_FRENCH_REUNION 0x200C +#define LANGUAGE_FRENCH_SENEGAL 0x280C +#define LANGUAGE_FRENCH_SWISS 0x100C +#define LANGUAGE_FRENCH_WEST_INDIES 0x1C0C +#define LANGUAGE_FRENCH_ZAIRE 0x240C +#define LANGUAGE_FRISIAN_NETHERLANDS 0x0462 +#define LANGUAGE_FULFULDE_NIGERIA 0x0467 +#define LANGUAGE_GAELIC_IRELAND 0x083C +#define LANGUAGE_GAELIC_SCOTLAND 0x043C +#define LANGUAGE_GALICIAN 0x0456 +#define LANGUAGE_GEORGIAN 0x0437 +#define LANGUAGE_GERMAN 0x0407 +#define LANGUAGE_GERMAN_AUSTRIAN 0x0C07 +#define LANGUAGE_GERMAN_LIECHTENSTEIN 0x1407 +#define LANGUAGE_GERMAN_LUXEMBOURG 0x1007 +#define LANGUAGE_GERMAN_SWISS 0x0807 +#define LANGUAGE_GREEK 0x0408 +#define LANGUAGE_GUARANI_PARAGUAY 0x0474 +#define LANGUAGE_GUJARATI 0x0447 +#define LANGUAGE_HAUSA_NIGERIA 0x0468 +#define LANGUAGE_HAWAIIAN_UNITED_STATES 0x0475 +#define LANGUAGE_HEBREW 0x040D +#define LANGUAGE_HINDI 0x0439 +#define LANGUAGE_HUNGARIAN 0x040E +#define LANGUAGE_IBIBIO_NIGERIA 0x0469 +#define LANGUAGE_ICELANDIC 0x040F +#define LANGUAGE_IGBO_NIGERIA 0x0470 +#define LANGUAGE_INDONESIAN 0x0421 +#define LANGUAGE_INUKTITUT_SYLLABICS_CANADA 0x045D +#define LANGUAGE_INUKTITUT_LATIN_CANADA 0x085D +#define LANGUAGE_ITALIAN 0x0410 +#define LANGUAGE_ITALIAN_SWISS 0x0810 +#define LANGUAGE_JAPANESE 0x0411 +#define LANGUAGE_KALAALLISUT_GREENLAND 0x046F /* obsoletes LANGUAGE_USER_KALAALLISUT 0x062A */ +#define LANGUAGE_KANNADA 0x044B +#define LANGUAGE_KANURI_NIGERIA 0x0471 +#define LANGUAGE_KASHMIRI 0x0460 +#define LANGUAGE_KASHMIRI_INDIA 0x0860 +#define LANGUAGE_KAZAKH 0x043F +#define LANGUAGE_KHMER 0x0453 +#define LANGUAGE_KICHE_GUATEMALA 0x0486 /* AKA K'iche', West Central Quiche, */ +#define LANGUAGE_KINYARWANDA_RWANDA 0x0487 /* obsoletes LANGUAGE_USER_KINYARWANDA 0x0621 */ +#define LANGUAGE_KIRGHIZ 0x0440 /* AKA Kyrgyz */ +#define LANGUAGE_KONKANI 0x0457 +#define LANGUAGE_KOREAN 0x0412 +#define LANGUAGE_KOREAN_JOHAB 0x0812 +#define LANGUAGE_LAO 0x0454 +#define LANGUAGE_LATIN 0x0476 /* obsoletes LANGUAGE_USER_LATIN 0x0610 */ +#define LANGUAGE_LATVIAN 0x0426 +#define LANGUAGE_LITHUANIAN 0x0427 +#define LANGUAGE_LITHUANIAN_CLASSIC 0x0827 +#define LANGUAGE_LUXEMBOURGISH_LUXEMBOURG 0x046E /* obsoletes LANGUAGE_USER_LUXEMBOURGISH 0x0630 */ +#define LANGUAGE_MACEDONIAN 0x042F +#define LANGUAGE_MALAY 0x003E /* primary only, not a locale! */ +#define LANGUAGE_MALAYALAM 0x044C /* in India */ +#define LANGUAGE_MALAY_BRUNEI_DARUSSALAM 0x083E +#define LANGUAGE_MALAY_MALAYSIA 0x043E +#define LANGUAGE_MALTESE 0x043A +#define LANGUAGE_MANIPURI 0x0458 +#define LANGUAGE_MAORI_NEW_ZEALAND 0x0481 /* obsoletes LANGUAGE_USER_MAORI 0x0620 */ +#define LANGUAGE_MAPUDUNGUN_CHILE 0x047A /* AKA Araucanian */ +#define LANGUAGE_MARATHI 0x044E +#define LANGUAGE_MOHAWK_CANADA 0x047C +#define LANGUAGE_MONGOLIAN 0x0450 /* Cyrillic script */ +#define LANGUAGE_MONGOLIAN_MONGOLIAN 0x0850 +#define LANGUAGE_NEPALI 0x0461 +#define LANGUAGE_NEPALI_INDIA 0x0861 +#define LANGUAGE_NORWEGIAN 0x0014 /* primary only, not a locale! */ +#define LANGUAGE_NORWEGIAN_BOKMAL 0x0414 +#define LANGUAGE_NORWEGIAN_NYNORSK 0x0814 +#define LANGUAGE_OCCITAN_FRANCE 0x0482 /* obsoletes LANGUAGE_USER_OCCITAN 0x0625 */ +#define LANGUAGE_ORIYA 0x0448 +#define LANGUAGE_OROMO 0x0472 +#define LANGUAGE_PAPIAMENTU 0x0479 +#define LANGUAGE_PASHTO 0x0463 +#define LANGUAGE_POLISH 0x0415 +#define LANGUAGE_PORTUGUESE 0x0816 +#define LANGUAGE_PORTUGUESE_BRAZILIAN 0x0416 +#define LANGUAGE_PUNJABI 0x0446 +#define LANGUAGE_PUNJABI_PAKISTAN 0x0846 +#define LANGUAGE_QUECHUA_BOLIVIA 0x046B +#define LANGUAGE_QUECHUA_ECUADOR 0x086B +#define LANGUAGE_QUECHUA_PERU 0x0C6B +#define LANGUAGE_RHAETO_ROMAN 0x0417 +#define LANGUAGE_ROMANIAN 0x0418 +#define LANGUAGE_ROMANIAN_MOLDOVA 0x0818 +#define LANGUAGE_RUSSIAN 0x0419 +#define LANGUAGE_RUSSIAN_MOLDOVA 0x0819 +#define LANGUAGE_SAMI_NORTHERN_NORWAY 0x043B +#define LANGUAGE_SAMI_LAPPISH LANGUAGE_SAMI_NORTHERN_NORWAY /* the old MS definition */ +#define LANGUAGE_SAMI_INARI 0x243B +#define LANGUAGE_SAMI_LULE_NORWAY 0x103B +#define LANGUAGE_SAMI_LULE_SWEDEN 0x143B +#define LANGUAGE_SAMI_NORTHERN_FINLAND 0x0C3B +#define LANGUAGE_SAMI_NORTHERN_SWEDEN 0x083B +#define LANGUAGE_SAMI_SKOLT 0x203B +#define LANGUAGE_SAMI_SOUTHERN_NORWAY 0x183B +#define LANGUAGE_SAMI_SOUTHERN_SWEDEN 0x1C3B +#define LANGUAGE_SANSKRIT 0x044F +#define LANGUAGE_SEPEDI 0x046C +#define LANGUAGE_NORTHERNSOTHO LANGUAGE_SEPEDI /* just an alias for the already existing localization */ +#define LANGUAGE_SERBIAN 0x001A /* primary only, not a locale! */ +#define LANGUAGE_SERBIAN_CYRILLIC 0x0C1A /* MS lists this as Serbian (Cyrillic, Serbia) 'sr-Cyrl-SP', but they use 'SP' since at least Windows2003 where it was Serbia and Montenegro! */ +#define LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA 0x1C1A +#define LANGUAGE_SERBIAN_LATIN 0x081A /* MS lists this as Serbian (Latin, Serbia) 'sr-Latn-SP', but they use 'SP' since at least Windows2003 where it was Serbia and Montenegro! */ +#define LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA 0x181A +#define LANGUAGE_SERBIAN_LATIN_NEUTRAL 0x7C1A /* MS lists this as 'sr' only. What a mess. */ +#define LANGUAGE_SESOTHO 0x0430 /* also called Sutu now by MS */ +#define LANGUAGE_SINDHI 0x0459 +#define LANGUAGE_SINDHI_PAKISTAN 0x0859 +#define LANGUAGE_SINHALESE_SRI_LANKA 0x045B +#define LANGUAGE_SLOVAK 0x041B +#define LANGUAGE_SLOVENIAN 0x0424 +#define LANGUAGE_SOMALI 0x0477 +#define LANGUAGE_UPPER_SORBIAN_GERMANY 0x042E /* obsoletes LANGUAGE_USER_UPPER_SORBIAN 0x0623 */ +#define LANGUAGE_LOWER_SORBIAN_GERMANY 0x082E /* obsoletes LANGUAGE_USER_LOWER_SORBIAN 0x0624. NOTE: the primary ID is identical to Upper Sorbian, which is not quite correct because they're distinct languages */ +#define LANGUAGE_SORBIAN LANGUAGE_USER_UPPER_SORBIAN /* a strange MS definition */ +#define LANGUAGE_SPANISH_DATED 0x040A /* old collation, not supported, see #i94435# */ +#define LANGUAGE_SPANISH_ARGENTINA 0x2C0A +#define LANGUAGE_SPANISH_BOLIVIA 0x400A +#define LANGUAGE_SPANISH_CHILE 0x340A +#define LANGUAGE_SPANISH_COLOMBIA 0x240A +#define LANGUAGE_SPANISH_COSTARICA 0x140A +#define LANGUAGE_SPANISH_DOMINICAN_REPUBLIC 0x1C0A +#define LANGUAGE_SPANISH_ECUADOR 0x300A +#define LANGUAGE_SPANISH_EL_SALVADOR 0x440A +#define LANGUAGE_SPANISH_GUATEMALA 0x100A +#define LANGUAGE_SPANISH_HONDURAS 0x480A +#define LANGUAGE_SPANISH_LATIN_AMERICA 0xE40A /* no locale possible */ +#define LANGUAGE_SPANISH_MEXICAN 0x080A +#define LANGUAGE_SPANISH_MODERN 0x0C0A +#define LANGUAGE_SPANISH_NICARAGUA 0x4C0A +#define LANGUAGE_SPANISH_PANAMA 0x180A +#define LANGUAGE_SPANISH_PARAGUAY 0x3C0A +#define LANGUAGE_SPANISH_PERU 0x280A +#define LANGUAGE_SPANISH_PUERTO_RICO 0x500A +#define LANGUAGE_SPANISH_UNITED_STATES 0x540A +#define LANGUAGE_SPANISH_URUGUAY 0x380A +#define LANGUAGE_SPANISH_VENEZUELA 0x200A +#define LANGUAGE_SPANISH LANGUAGE_SPANISH_MODERN /* modern collation, see #i94435# */ +#define LANGUAGE_SWAHILI 0x0441 /* Kenya */ +#define LANGUAGE_SWEDISH 0x041D +#define LANGUAGE_SWEDISH_FINLAND 0x081D +#define LANGUAGE_SYRIAC 0x045A +#define LANGUAGE_TAJIK 0x0428 +#define LANGUAGE_TAMAZIGHT_ARABIC 0x045F +#define LANGUAGE_TAMAZIGHT_LATIN 0x085F +#define LANGUAGE_TAMAZIGHT_TIFINAGH 0x0C5F +#define LANGUAGE_TAMIL 0x0449 +#define LANGUAGE_TATAR 0x0444 +#define LANGUAGE_TELUGU 0x044A +#define LANGUAGE_THAI 0x041E +#define LANGUAGE_TIBETAN 0x0451 +#define LANGUAGE_DZONGKHA 0x0851 +#define LANGUAGE_TIBETAN_BHUTAN LANGUAGE_DZONGKHA /* a MS error, see #i53497# */ +#define LANGUAGE_TIGRIGNA_ERITREA 0x0873 +#define LANGUAGE_TIGRIGNA_ETHIOPIA 0x0473 +#define LANGUAGE_TSONGA 0x0431 +#define LANGUAGE_TSWANA 0x0432 /* AKA Setsuana, for South Africa */ +#define LANGUAGE_TURKISH 0x041F +#define LANGUAGE_TURKMEN 0x0442 +#define LANGUAGE_UIGHUR_CHINA 0x0480 +#define LANGUAGE_UKRAINIAN 0x0422 +#define LANGUAGE_URDU 0x0020 /* primary only, not a locale! */ +#define LANGUAGE_URDU_INDIA 0x0820 +#define LANGUAGE_URDU_PAKISTAN 0x0420 +#define LANGUAGE_UZBEK_CYRILLIC 0x0843 +#define LANGUAGE_UZBEK_LATIN 0x0443 +#define LANGUAGE_VENDA 0x0433 +#define LANGUAGE_VIETNAMESE 0x042A +#define LANGUAGE_WELSH 0x0452 +#define LANGUAGE_WOLOF_SENEGAL 0x0488 +#define LANGUAGE_XHOSA 0x0434 /* AKA isiZhosa */ +#define LANGUAGE_YAKUT_RUSSIA 0x0485 +#define LANGUAGE_YI 0x0478 /* Sichuan Yi */ +#define LANGUAGE_YIDDISH 0x043D +#define LANGUAGE_YORUBA 0x046A +#define LANGUAGE_ZULU 0x0435 + +/*! use only for import/export of MS documents, number formatter maps it to + *! LANGUAGE_SYSTEM and then to effective system language */ +#define LANGUAGE_SYSTEM_DEFAULT 0x0800 + +/*! use only for import/export of MS documents, number formatter maps it to + *! LANGUAGE_SYSTEM and then to effective system language */ +#define LANGUAGE_PROCESS_OR_USER_DEFAULT 0x0400 + +/* And now the extensions we define, valid from + * 0x0610 to 0x07FF with sublanguage ID 0x01 (default) + * 0x0A00 to 0x0BFF with sublanguage ID 0x02 + * ... + * 0x8200 to 0x83FF with sublanguage ID 0x20 + * 0x8600 to 0x87FF with sublanguage ID 0x21 + * ... + * 0xFA00 to 0xFBFF with sublanguage ID 0x3E + * 0xFE00 to 0xFFFF with sublanguage ID 0x3F + * + * Obsolete OOo user defines now have other values assigned by MS, and + * different name. Mapping an obsolete value to ISO code should work provided + * that such a mapping exists in i18nlangtag/source/isolang/isolang.cxx, but + * mapping ISO back to LANGID will return the new value. + */ +#define LANGUAGE_OBSOLETE_USER_LATIN 0x0610 +#define LANGUAGE_USER_LATIN LANGUAGE_LATIN +#define LANGUAGE_USER_ESPERANTO 0x0611 /* no locale possible */ +#define LANGUAGE_USER_INTERLINGUA 0x0612 /* no locale, but conventions */ +#define LANGUAGE_OBSOLETE_USER_MAORI 0x0620 +#define LANGUAGE_USER_MAORI LANGUAGE_MAORI_NEW_ZEALAND +#define LANGUAGE_OBSOLETE_USER_KINYARWANDA 0x0621 +#define LANGUAGE_USER_KINYARWANDA LANGUAGE_KINYARWANDA_RWANDA +/* was reserved for Northern Sotho but never used: 0x0622 */ /* obsoleted by LANGUAGE_SEPEDI */ +#define LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN 0x0623 +#define LANGUAGE_USER_UPPER_SORBIAN LANGUAGE_UPPER_SORBIAN_GERMANY +#define LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN 0x0624 +#define LANGUAGE_USER_LOWER_SORBIAN LANGUAGE_LOWER_SORBIAN_GERMANY +#define LANGUAGE_OBSOLETE_USER_OCCITAN 0x0625 +#define LANGUAGE_USER_OCCITAN LANGUAGE_OCCITAN_FRANCE /* reserved to languedocian */ + +#define LANGUAGE_USER_KOREAN_NORTH 0x8012 /* North Korean as opposed to South Korean, makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_KOREAN)) */ +#define LANGUAGE_USER_KURDISH_TURKEY 0x0626 /* sublang 0x01, Latin script */ +#define LANGUAGE_USER_KURDISH_SYRIA 0x0A26 /* sublang 0x02, Latin script */ +#define LANGUAGE_USER_KURDISH_IRAQ 0x0E26 /* sublang 0x03, Arabic script */ +#define LANGUAGE_USER_KURDISH_IRAN 0x1226 /* sublang 0x04, Arabic script */ +#define LANGUAGE_USER_SARDINIAN 0x0627 +/* was reserved for Dzongkha but turned down with #i53497#: 0x0628 */ /* obsoleted by LANGUAGE_DZONGKHA */ +#define LANGUAGE_USER_SWAHILI_TANZANIA 0x8041 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_SWAHILI)) */ +#define LANGUAGE_OBSOLETE_USER_BRETON 0x0629 +#define LANGUAGE_USER_BRETON LANGUAGE_BRETON_FRANCE +#define LANGUAGE_OBSOLETE_USER_KALAALLISUT 0x062A +#define LANGUAGE_USER_KALAALLISUT LANGUAGE_KALAALLISUT_GREENLAND +#define LANGUAGE_USER_SWAZI 0x062B +#define LANGUAGE_USER_NDEBELE_SOUTH 0x062C +#define LANGUAGE_USER_TSWANA_BOTSWANA 0x8032 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_TSWANA)) */ +#define LANGUAGE_USER_MOORE 0x062D +#define LANGUAGE_USER_BAMBARA 0x062E +#define LANGUAGE_USER_AKAN 0x062F +#define LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH 0x0630 +#define LANGUAGE_USER_LUXEMBOURGISH LANGUAGE_LUXEMBOURGISH_LUXEMBOURG +#define LANGUAGE_USER_FRIULIAN 0x0631 +#define LANGUAGE_USER_FIJIAN 0x0632 +#define LANGUAGE_USER_AFRIKAANS_NAMIBIA 0x8036 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_AFRIKAANS)) */ +#define LANGUAGE_USER_ENGLISH_NAMIBIA 0x8009 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_ENGLISH_US)) */ +#define LANGUAGE_USER_WALLOON 0x0633 +#define LANGUAGE_USER_COPTIC 0x0634 +#define LANGUAGE_USER_CHUVASH 0x0635 +#define LANGUAGE_USER_GASCON 0x0636 /* Gascon France */ +#define LANGUAGE_USER_GERMAN_BELGIUM 0x8007 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_GERMAN)) */ +#define LANGUAGE_USER_CATALAN_VALENCIAN 0x8003 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_CATALAN)) */ +#define LANGUAGE_USER_HAUSA_GHANA 0x8068 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_HAUSA_NIGERIA)) */ +#define LANGUAGE_USER_EWE_GHANA 0x0637 +#define LANGUAGE_USER_ENGLISH_GHANA 0x8409 /* makeLangID( 0x21, getPrimaryLanguage( LANGUAGE_ENGLISH_US)) */ +#define LANGUAGE_USER_TAGALOG 0x0638 +#define LANGUAGE_USER_LINGALA_DRCONGO 0x0639 +#define LANGUAGE_USER_SANGO 0x063A +#define LANGUAGE_USER_GANDA 0x063B +#define LANGUAGE_USER_LOW_GERMAN 0x063C +#define LANGUAGE_USER_HILIGAYNON 0x063D +#define LANGUAGE_USER_NYANJA 0x063E +#define LANGUAGE_USER_KASHUBIAN 0x063F +#define LANGUAGE_USER_SPANISH_CUBA 0x800A /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_SPANISH)) */ +#define LANGUAGE_USER_TETUN 0x0640 +#define LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA 0x0641 +#define LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA 0x0642 +#define LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA 0x8C1A /* makeLangID( 0x20+0x03, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC)) */ +#define LANGUAGE_USER_SERBIAN_LATIN_SERBIA 0x881A /* makeLangID( 0x20+0x02, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN)) */ +#define LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO 0xCC1A /* makeLangID( 0x20+0x13, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC)) */ +#define LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO 0xC81A /* makeLangID( 0x20+0x12, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN)) */ +#define LANGUAGE_USER_SAMI_KILDIN_RUSSIA 0x803B /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_SAMI_NORTHERN_NORWAY)) */ +#define LANGUAGE_USER_BODO_INDIA 0x0643 +#define LANGUAGE_USER_DOGRI_INDIA 0x0644 +#define LANGUAGE_USER_MAITHILI_INDIA 0x0645 +#define LANGUAGE_USER_SANTALI_INDIA 0x0646 +#define LANGUAGE_USER_TETUN_TIMOR_LESTE 0x0A40 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_TETUN)) */ +#define LANGUAGE_USER_TOK_PISIN 0x0647 +#define LANGUAGE_USER_SHUSWAP 0x0648 +#define LANGUAGE_USER_ARABIC_CHAD 0x8001 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_COMOROS 0x8401 /* makeLangID( 0x21, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_DJIBOUTI 0x8801 /* makeLangID( 0x22, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_ERITREA 0x8C01 /* makeLangID( 0x23, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_ISRAEL 0x9001 /* makeLangID( 0x24, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_MAURITANIA 0x9401 /* makeLangID( 0x25, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_PALESTINE 0x9801 /* makeLangID( 0x26, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_SOMALIA 0x9C01 /* makeLangID( 0x27, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ARABIC_SUDAN 0xA001 /* makeLangID( 0x28, getPrimaryLanguage( LANGUAGE_ARABIC_SAUDI_ARABIA)) */ +#define LANGUAGE_USER_ANCIENT_GREEK 0x0649 +#define LANGUAGE_USER_ASTURIAN 0x064A +#define LANGUAGE_USER_LATGALIAN 0x064B +#define LANGUAGE_USER_MAORE 0x064C +#define LANGUAGE_USER_BUSHI 0x064D +#define LANGUAGE_USER_TAHITIAN 0x064E +#define LANGUAGE_USER_MALAGASY_PLATEAU 0x064F +#define LANGUAGE_USER_PAPIAMENTU_ARUBA 0x8079 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_PAPIAMENTU)) */ +#define LANGUAGE_USER_SARDINIAN_CAMPIDANESE 0x0650 +#define LANGUAGE_USER_SARDINIAN_GALLURESE 0x0651 +#define LANGUAGE_USER_SARDINIAN_LOGUDORESE 0x0652 +#define LANGUAGE_USER_SARDINIAN_SASSARESE 0x0653 +#define LANGUAGE_USER_BAFIA 0x0654 +#define LANGUAGE_USER_GIKUYU 0x0655 +#define LANGUAGE_USER_RUSYN_UKRAINE 0x0656 +#define LANGUAGE_USER_RUSYN_SLOVAKIA 0x8256 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_RUSYN_UKRAINE)) */ +#define LANGUAGE_USER_YIDDISH_US 0x083D /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_YIDDISH)) */ +#define LANGUAGE_USER_LIMBU 0x0657 +#define LANGUAGE_USER_LOJBAN 0x0658 /* no locale */ +#define LANGUAGE_OBSOLETE_USER_KABYLE 0x0659 +#define LANGUAGE_USER_KABYLE LANGUAGE_TAMAZIGHT_LATIN +#define LANGUAGE_USER_HAITIAN 0x065A +#define LANGUAGE_USER_BEEMBE 0x065B +#define LANGUAGE_USER_BEKWEL 0x065C +#define LANGUAGE_USER_KITUBA 0x065D +#define LANGUAGE_USER_LARI 0x065E +#define LANGUAGE_USER_MBOCHI 0x065F +#define LANGUAGE_USER_TEKE_IBALI 0x0660 +#define LANGUAGE_USER_TEKE_TYEE 0x0661 +#define LANGUAGE_USER_VILI 0x0662 +#define LANGUAGE_USER_PORTUGUESE_ANGOLA 0x8016 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_PORTUGUESE)) */ +#define LANGUAGE_USER_MANX 0x0663 +#define LANGUAGE_USER_TEKE_EBOO 0x0664 +#define LANGUAGE_USER_ARAGONESE 0x0665 +#define LANGUAGE_USER_KEYID 0x0666 /* key id pseudolanguage */ +#define LANGUAGE_USER_PALI_LATIN 0x0667 +#define LANGUAGE_USER_KYRGYZ_CHINA 0x0668 /* not derived from LANGUAGE_KIRGHIZ as these may be different scripts, see http://www.omniglot.com/writing/kirghiz.htm */ +#define LANGUAGE_USER_KOMI_ZYRIAN 0x0669 +#define LANGUAGE_USER_KOMI_PERMYAK 0x066A +#define LANGUAGE_USER_PITJANTJATJARA 0x066B +#define LANGUAGE_USER_ENGLISH_MALAWI 0x8809 /* makeLangID( 0x22, getPrimaryLanguage( LANGUAGE_ENGLISH_UK)) */ +#define LANGUAGE_USER_ERZYA 0x066C +#define LANGUAGE_USER_MARI_MEADOW 0x066D +#define LANGUAGE_USER_KHANTY 0x066E +#define LANGUAGE_USER_LIVONIAN 0x066F +#define LANGUAGE_USER_MOKSHA 0x0670 +#define LANGUAGE_USER_MARI_HILL 0x0671 +#define LANGUAGE_USER_NGANASAN 0x0672 +#define LANGUAGE_USER_OLONETS 0x0673 +#define LANGUAGE_USER_VEPS 0x0674 +#define LANGUAGE_USER_VORO 0x0675 +#define LANGUAGE_USER_NENETS 0x0676 +#define LANGUAGE_USER_PAPIAMENTU_CURACAO 0x8479 /* makeLangID( 0x21, getPrimaryLanguage( LANGUAGE_PAPIAMENTU)) */ +#define LANGUAGE_USER_PAPIAMENTU_BONAIRE 0x8879 /* makeLangID( 0x22, getPrimaryLanguage( LANGUAGE_PAPIAMENTU)) */ +#define LANGUAGE_USER_AKA 0x0677 +#define LANGUAGE_USER_AKA_CONGO 0x8277 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_AKA)) */ +#define LANGUAGE_USER_DIBOLE 0x0678 +#define LANGUAGE_USER_DOONDO 0x0679 +#define LANGUAGE_USER_KAAMBA 0x067A +#define LANGUAGE_USER_KOONGO 0x067B +#define LANGUAGE_USER_KOONGO_CONGO 0x827B /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_KOONGO)) */ +#define LANGUAGE_USER_KUNYI 0x067C +#define LANGUAGE_USER_NGUNGWEL 0x067D +#define LANGUAGE_USER_NJYEM 0x067E +#define LANGUAGE_USER_NJYEM_CONGO 0x827E /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_NJYEM)) */ +#define LANGUAGE_USER_PUNU 0x067F +#define LANGUAGE_USER_PUNU_CONGO 0x827F /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_PUNU)) */ +#define LANGUAGE_USER_SUUNDI 0x0680 +#define LANGUAGE_USER_TEKE_KUKUYA 0x0681 +#define LANGUAGE_USER_TSAANGI 0x0682 +#define LANGUAGE_USER_YAKA 0x0683 +#define LANGUAGE_USER_YOMBE 0x0684 +#define LANGUAGE_USER_YOMBE_CONGO 0x8284 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_YOMBE)) */ +#define LANGUAGE_USER_SIDAMA 0x0685 + +#define LANGUAGE_USER_PRIV_JOKER 0xFFEB /* privateuse "*" (sic! bad! nasty!), primary 0x3eb, sub 0x3f */ +#define LANGUAGE_USER_PRIV_COMMENT 0xFFEC /* privateuse "x-comment", primary 0x3ec, sub 0x3f */ +#define LANGUAGE_USER_PRIV_DEFAULT 0xFFED /* privateuse "x-default", primary 0x3ed, sub 0x3f */ +#define LANGUAGE_USER_PRIV_NOTRANSLATE 0xFFEE /* privateuse "x-no-translate" (sic!), primary 0x3ee, sub 0x3f */ +#define LANGUAGE_MULTIPLE 0xFFEF /* multiple languages, primary 0x3ef, sub 0x3f */ +#define LANGUAGE_UNDETERMINED 0xFFF0 /* undetermined language, primary 0x3f0, sub 0x3f */ +#define LANGUAGE_USER_SYSTEM_CONFIG 0xFFFE /* not a locale, to be used only in configuration context to obtain system default, primary 0x3fe, sub 0x3f */ + +#endif /* INCLUDED_I18NLANGTAG_LANG_H */ + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/inc/i18nlangtag/languagetag.hxx b/i18nlangtag/inc/i18nlangtag/languagetag.hxx new file mode 100644 index 000000000000..e5ba011ab2ba --- /dev/null +++ b/i18nlangtag/inc/i18nlangtag/languagetag.hxx @@ -0,0 +1,400 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX +#define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX + +#include <sal/config.h> +#include <rtl/ustring.hxx> +#include <com/sun/star/lang/Locale.hpp> +#include <i18nlangtag/i18nlangtagdllapi.h> +#include <i18nlangtag/lang.h> + +#include <vector> + +typedef struct _rtl_Locale rtl_Locale; // as in rtl/locale.h + + +/** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and + conversions in between. + + Note that member variables are mutable and may change their values even in + const methods. Getter methods return either the original value or matching + converted values. + */ +class I18NLANGTAG_DLLPUBLIC LanguageTag +{ +public: + + /** Init LanguageTag with existing BCP 47 language tag string. + + @param bCanonicalize + If TRUE, canonicalize tag and reparse, the resulting tag string may + be different. + IF FALSE, the tag is simply stored and can be retrieved with + getBcp47(). + + Note that conversions to ISO codes, locales or LanguageType or + obtaining language or script will canonicalize the tag string anyway, + so specifying bCanonicalize=false is not a guarantee that the tag will + stay identical to what was passed. + */ + explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false ); + + /** Init LanguageTag with Locale. */ + explicit LanguageTag( const com::sun::star::lang::Locale & rLocale ); + + /** Init LanguageTag with LanguageType MS-LangID. */ + explicit LanguageTag( LanguageType nLanguage ); + + /** Init LanguageTag with language and country strings. + + This is a convenience ctor for places that so far use only language and + country to replace the MsLangId::convert...IsoNames...() calls. Avoid + use in new code. + */ + explicit LanguageTag( const OUString& rLanguage, const OUString& rCountry ); + + /** Init LanguageTag with rtl_Locale. + + This is a convenience ctor. + */ + explicit LanguageTag( const rtl_Locale & rLocale ); + + LanguageTag( const LanguageTag & rLanguageTag ); + ~LanguageTag(); + LanguageTag& operator=( const LanguageTag & rLanguageTag ); + + /** Obtain BCP 47 language tag. + + @param bResolveSystem + If TRUE, resolve an empty language tag denoting the system + locale to the real locale used. + If FALSE, return an empty OUString for such a tag. + */ + const OUString & getBcp47( bool bResolveSystem = true ) const; + + /** Obtain language tag as Locale. + + As a convention, language tags that can not be expressed as "pure" + com::sun::star::lang::Locale content using Language and Country fields + store "qlt" (ISO 639 reserved for local use) in the Language field and + the entire BCP 47 language tag in the Variant field. The Country field + contains the corresponding ISO 3166 country code _if_ there is one, or + otherwise is empty. + + @param bResolveSystem + If TRUE, resolve an empty language tag denoting the system + locale to the real locale used. + If FALSE, return an empty Locale for such a tag. + */ + const com::sun::star::lang::Locale & getLocale( bool bResolveSystem = true ) const; + + /** Obtain mapping to MS-LangID. + + @param bResolveSystem + If TRUE, resolve an empty language tag denoting the system + locale to the real locale used. + If FALSE, return LANGUAGE_SYSTEM for such a tag. + */ + LanguageType getLanguageType( bool bResolveSystem = true ) const; + + /** Obtain ISO strings for language and country. + + This is a convenience method for places that so far use only language and + country to replace the MsLangId::convert...IsoNames...() calls. Avoid + use in new code. + + ATTENTION! May return empty strings if the language tag is not + expressable in valid ISO codes! + + @see isIsoLocale() + + Always resolves an empty tag to the system locale. + */ + void getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const; + + /** Get ISO 639 language code, or BCP 47 language. + + Always resolves an empty tag to the system locale. + */ + OUString getLanguage() const; + + /** Get ISO 15924 script code, if not the default script according to + BCP 47. For default script an empty string is returned. + + @see hasScript() + + Always resolves an empty tag to the system locale. + */ + OUString getScript() const; + + /** Get combined language and script code, separated by '-' if + non-default script, if default script only language. + + @see hasScript() + + Always resolves an empty tag to the system locale. + */ + OUString getLanguageAndScript() const; + + /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a + region not expressable as 2 character country code. + + Always resolves an empty tag to the system locale. + */ + OUString getCountry() const; + + /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or + any other BCP 47 region tag. + + Always resolves an empty tag to the system locale. + */ + OUString getRegion() const; + + /** Get a GLIBC locale string. + + Always resolves an empty tag to the system locale. + + @param rEncoding + An encoding to be appended to language_country, for example + ".UTF-8" including the dot. + + @return The resulting GLIBC locale string if it could be constructed, + if not an empty string is returned. + */ + OUString getGlibcLocaleString( const OUString & rEncoding ) const; + + /** If language tag has a non-default script specified. + */ + bool hasScript() const; + + /** If language tag is a locale that can be expressed using only ISO 639 + language codes and ISO 3166 country codes, thus is convertible to a + conforming Locale struct without using extension mechanisms. + + Note that an empty language tag or empty Locale::Language field or + LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in + some context, but here is not. If you want that ask for + aTag.isSystemLocale() || aTag.isIsoLocale() + + Always resolves an empty tag to the system locale. + */ + bool isIsoLocale() const; + + /** If language tag is a locale that can be expressed using only ISO 639 + language codes and ISO 15924 script codes and ISO 3166 country codes, + thus can be stored in an ODF document using only fo:language, fo:script + and fo:country attributes. If this is FALSE, the locale must be stored + as a <*:rfc-language-tag> element. + + Always resolves an empty tag to the system locale. + */ + bool isIsoODF() const; + + /** If this is a valid BCP 47 language tag. + + Always resolves an empty tag to the system locale. + */ + bool isValidBcp47() const; + + /** If this tag was contructed as an empty tag denoting the system locale. + */ + bool isSystemLocale() const; + + + /** Reset with existing BCP 47 language tag string. See ctor. */ + void reset( const OUString & rBcp47LanguageTag, bool bCanonicalize = false ); + + /** Reset with Locale. */ + void reset( const com::sun::star::lang::Locale & rLocale ); + + /** Reset with LanguageType MS-LangID. */ + void reset( LanguageType nLanguage ); + + /** Reset with rtl_Locale. */ + void reset( const rtl_Locale & rLocale ); + + + /** Fall back to a known locale. + + If the current tag does not represent a known (by us) locale, fall back + to the most likely locale possible known. + If the current tag is known, no change occurs. + */ + LanguageTag & makeFallback(); + + /** Return a vector of fall-back strings. + + In order: + full BCP 47 tag, same as getBcp47() + lll-Ssss-CC + lll-Ssss + lll-CC + lll + + Only strings that differ from a higher order are included, for example + if there is no script the elements will be bcp47, lll-CC, lll; if the + bcp47 string is identical to lll-CC then only lll-CC, lll. + + Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166 + alpha code. If the region can not be expressed as ISO 3166 then no -CC + tags are included. + */ + ::std::vector< OUString > getFallbackStrings() const; + + + /** @short search for an equal or at least for a similar locale in a list + of possible ones. + + @descr First search for a locale that is equal to the reference + locale. (means: same BCP47 string) + + If the reference locale could not be located, check for + "similar" locales, in the same order as obtained by + getFallbackStrings(). + + If no similar locale could be located, we search for a locale + "en-US" inside the given locale list. + + If "en-US" could not be located, we search for a locale "en" + inside the given list. + + If no "same" nor any "similar" locale could be found, we try + "x-default" and "x-no-translate" explicitly. Sometimes + variables don't use real localization. For example, in case the + localized value is a fix product name. + + If no locale matched until then, we use any other locale that + exists inside the set of given ones, namely the first + encountered! + + @param rList + the vector of possible locales as BCP47 strings. + + @param rReference + the reference locale, BCP47 string. + + @return An iterator that points to the found element inside the given + locale list. If no matching locale could be found it points to + the end of the list. + */ + static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList, + const OUString & rReference ); + + + /** Test equality of two LanguageTag, possibly resolving system locale. + + @param bResolveSystem + If TRUE, resolve empty language tags denoting the system + locale to the real locale used before comparing. + If FALSE, the behavior is identical to operator==(), system + locales are not resolved first. + */ + bool equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const; + + /** Test equality of two LanguageTag. + + Does NOT resolve system, i.e. if the system locale is en-US + LanguageTag("")==LanguageTag("en-US") returns false! Use + equals(...,true) instead if system locales shall be resolved. + */ + bool operator==( const LanguageTag & rLanguageTag ) const; + + /** Test inequality of two LanguageTag. + + Does NOT resolve system, i.e. if the system locale is en-US + LanguageTag("")!=LanguageTag("en-US") returns true! Use + !equals(,...true) instead if system locales shall be resolved. + */ + bool operator!=( const LanguageTag & rLanguageTag ) const; + +private: + + enum Decision + { + DECISION_DONTKNOW, + DECISION_NO, + DECISION_YES + }; + + mutable com::sun::star::lang::Locale maLocale; + mutable OUString maBcp47; + mutable OUString maCachedLanguage; ///< cache getLanguage() + mutable OUString maCachedScript; ///< cache getScript() + mutable OUString maCachedCountry; ///< cache getCountry() + mutable void* mpImplLangtag; ///< actually lt_tag_t pointer, encapsulated + mutable LanguageType mnLangID; + mutable Decision meIsValid; + mutable Decision meIsIsoLocale; + mutable Decision meIsIsoODF; + mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed + bool mbSystemLocale : 1; + mutable bool mbInitializedBcp47 : 1; + mutable bool mbInitializedLocale : 1; + mutable bool mbInitializedLangID : 1; + mutable bool mbCachedLanguage : 1; + mutable bool mbCachedScript : 1; + mutable bool mbCachedCountry : 1; + bool mbIsFallback : 1; + + void convertLocaleToBcp47(); + void convertLocaleToLang(); + void convertBcp47ToLocale(); + void convertBcp47ToLang(); + void convertLangToLocale(); + void convertLangToBcp47(); + + void convertFromRtlLocale(); + + bool canonicalize(); + + OUString getLanguageFromLangtag(); + OUString getScriptFromLangtag(); + OUString getRegionFromLangtag(); + + void resetVars(); + + /** Obtain Language, Script and Country via simpleExtract() and assign them + to the cached variables if successful. + + @return return of simpleExtract() + */ + bool cacheSimpleLSC(); + + static bool isIsoLanguage( const OUString& rLanguage ); + static bool isIsoScript( const OUString& rScript ); + static bool isIsoCountry( const OUString& rRegion ); + + enum Extraction + { + EXTRACTED_NONE, + EXTRACTED_LSC, + EXTRACTED_X, + EXTRACTED_X_JOKER + }; + + /** Of a simple language tag of the form lll[-Ssss][-CC] (i.e. one that + would fulfill the isIsoODF() condition) extract the portions. + + Does not check case or content! + + @return EXTRACTED_LSC if simple tag was detected, EXTRACTED_X if x-... + privateuse tag was detected, EXTRACTED_X_JOKER if "*" joker was + detected, else EXTRACTED_NONE. + */ + static Extraction simpleExtract( const OUString& rBcp47, + OUString& rLanguage, + OUString& rScript, + OUString& rCountry ); +}; + +#endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/inc/i18nlangtag/mslangid.hxx b/i18nlangtag/inc/i18nlangtag/mslangid.hxx new file mode 100644 index 000000000000..8148d1760de0 --- /dev/null +++ b/i18nlangtag/inc/i18nlangtag/mslangid.hxx @@ -0,0 +1,309 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_I18NLANGTAG_MSLANGID_HXX +#define INCLUDED_I18NLANGTAG_MSLANGID_HXX + +#include <sal/config.h> + +#include "i18nlangtag/i18nlangtagdllapi.h" +#include "i18nlangtag/lang.h" +#include <com/sun/star/lang/Locale.hpp> + +/** Methods related to Microsoft language IDs. For details about MS-LANGIDs + please see lang.h */ +class I18NLANGTAG_DLLPUBLIC MsLangId +{ +public: + + /// Create a LangID from a primary and a sublanguage. + static inline LanguageType makeLangID( LanguageType nSubLangId, LanguageType nPriLangId) + { + return (nSubLangId << 10) | nPriLangId; + } + + /// Get the primary language of a LangID. + static inline LanguageType getPrimaryLanguage( LanguageType nLangID) + { + return nLangID & LANGUAGE_MASK_PRIMARY; + } + + /// Get the sublanguage of a LangID. + static inline LanguageType getSubLanguage( LanguageType nLangID) + { + return (nLangID & ~LANGUAGE_MASK_PRIMARY) >> 10; + } + + /** Language/locale of category LC_CTYPE (on Unix, else the system + language). + Evaluation order: LC_ALL, LC_CTYPE, LANG */ + static LanguageType getSystemLanguage(); + + /** Language/locale of category LC_MESSAGES (on Unix, else same as + GetSystemLanguage()). + Evaluation order: LANGUAGE, LC_ALL, LC_MESSAGES, LANG */ + static LanguageType getSystemUILanguage(); + + + /** @short: A proper language/locale if the nLang parameter designates some + special value. + + @descr: NOTE: The "system" values may be overridden by the + application's configuration. + + @returns + case LANGUAGE_PROCESS_OR_USER_DEFAULT : configured or system language + case LANGUAGE_SYSTEM_DEFAULT : configured or system language + case LANGUAGE_SYSTEM : configured or system language + case LANGUAGE_HID_HUMAN_INTERFACE_DEVICE : configured or system UI language + case LANGUAGE_DONTKNOW : LANGUAGE_ENGLISH_US + else: nLang + + In case the configured language is LANGUAGE_SYSTEM, which is also + the initial default, the system language is obtained. In case the + configured or resulting system language is LANGUAGE_DONTKNOW, + LANGUAGE_ENGLISH_US is returned instead. + */ + static LanguageType getRealLanguage( LanguageType nLang ); + + + // TODO: refactor to LanguageTag? Used only in + // i18npool/source/localedata/localedata.cxx + + /** Get fall-back Locale for Locale with handling of an empty language name + designating the SYSTEM language. Returns the same Locale if an exact + match was found. + */ + static ::com::sun::star::lang::Locale getFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ); + + + // TODO: refactor to LanguageTag, used only in + // i18npool/source/isolang/inunx.cxx to convert Unix locale string + + static LanguageType convertUnxByteStringToLanguage( const OString& rString ); + + + static LanguageType resolveSystemLanguageByScriptType( LanguageType nLang, sal_Int16 nType ); + + + /** Whether locale has a Right-To-Left orientation. */ + static bool isRightToLeft( LanguageType nLang ); + + /** Whether locale is a CJK locale */ + static bool isCJK( LanguageType nLang ); + + /** Whether locale is a chinese locale */ + static bool isChinese( LanguageType nLang ); + + /** Whether locale is a simplified chinese locale */ + static bool isSimplifiedChinese( LanguageType nLang ); + + /** Whether locale is a traditional chinese locale */ + static bool isTraditionalChinese( LanguageType nLang ); + + /** Whether locale is a korean locale */ + static bool isKorean( LanguageType nLang ); + + /** Whether locale is a simplified chinese locale */ + static bool isSimplifiedChinese( const ::com::sun::star::lang::Locale & rLocale ); + + /** Whether locale is a traditional chinese locale */ + static bool isTraditionalChinese( const ::com::sun::star::lang::Locale & rLocale ); + + /** Whether locale is one where family name comes first, e.g. Japan, Hungary, Samoa */ + static bool isFamilyNameFirst( LanguageType nLang ); + + /** Whether there are "forbidden characters at start or end of line" in + this locale. CJK locales. + + @see offapi/com/sun/star/i18n/ForbiddenCharacters.idl + */ + static bool hasForbiddenCharacters( LanguageType nLang ); + + + /** Whether locale needs input sequence checking. CTL locales. */ + static bool needsSequenceChecking( LanguageType nLang ); + + + /** Get ::com::sun::star::i18n::ScriptType of locale. */ + static sal_Int16 getScriptType( LanguageType nLang ); + + + /** Map an obsolete user defined LANGID (see lang.h + LANGUAGE_OBSOLETE_USER_...) to the new value defined by MS in the + meantime. + + Also used to map UI localizations using reserved ISO codes to something + "official" but not identical in order to not pollute documents with + invalid ISO codes. + + @param bUserInterfaceSelection + If TRUE, don't replace such UI-only locale. Only use for + Tools->Options->LanguageSettings->UserInterface listbox. + If FALSE, do replace. + */ + static LanguageType getReplacementForObsoleteLanguage( LanguageType nLang, + bool bUserInterfaceSelection = false ); + + + /** @ATTENTION: these are _ONLY_ to be called by the application's + configuration! */ + static void setConfiguredSystemLanguage( LanguageType nLang ); + static void setConfiguredSystemUILanguage( LanguageType nLang ); + static void setConfiguredWesternFallback( LanguageType nLang ); + static void setConfiguredComplexFallback( LanguageType nLang ); + static void setConfiguredAsianFallback( LanguageType nLang ); + +// --------------------------------------------------------------------------- + + /** @internal - Access to fields of an element of the simple conversion table. + For resource compiler build environment usage only! */ + struct IsoLangEntry + { + LanguageType mnLang; + sal_Char maLangStr[4]; + sal_Char maCountry[3]; + + /** Obtain a language tag string with '-' separator. */ + I18NLANGTAG_DLLPUBLIC OUString getTagString() const; + }; + + /** @internal - Return a pointer to the IsoLangEntry of the underlying table, + matching the offset passed by nIndex. Only meaningful for the resource + compiler to build a list of known languages. + + @returns address of IsoLangEntry, or NULL pointer if nIndex exceeds the + table elements' count. + */ + static const IsoLangEntry* getIsoLangEntry( size_t nIndex ); + +// --------------------------------------------------------------------------- + + /** Encapsulated conversion methods used by LanguageTag and conversions, + not to be used by anything else. + */ + class Conversion + { + private: + + friend class LanguageTag; + + friend ::com::sun::star::lang::Locale MsLangId::getFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ); + + friend LanguageType MsLangId::convertUnxByteStringToLanguage( + const OString& rString ); + + + /** Convert a Locale to a LanguageType with handling of an empty + language name designating LANGUAGE_SYSTEM. + */ + I18NLANGTAG_DLLPRIVATE static LanguageType convertLocaleToLanguage( + const ::com::sun::star::lang::Locale & rLocale ); + + /** Convert x-... privateuse, used by convertLocaleToLanguage(Locale) */ + I18NLANGTAG_DLLPRIVATE static LanguageType convertPrivateUseToLanguage( + const OUString& rPriv ); + + /** Used by convertLocaleToLanguage(Locale) */ + I18NLANGTAG_DLLPRIVATE static LanguageType convertIsoNamesToLanguage( + const OUString& rLang, const OUString& rCountry ); + + + /** Used by convertUnxByteStringToLanguage(OString) */ + I18NLANGTAG_DLLPRIVATE static LanguageType convertIsoNamesToLanguage( + const OString& rLang, const OString& rCountry ); + + + /** Convert a LanguageType to a Locale. + + @param bResolveSystem + If bResolveSystem==true, a LANGUAGE_SYSTEM is resolved. + If bResolveSystem==false, a LANGUAGE_SYSTEM results in an + empty Locale. + */ + I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale convertLanguageToLocale( + LanguageType nLang, bool bResolveSystem ); + + /** Convert a LanguageType to a Locale, resolving LANGUAGE_SYSTEM. + + Used by convertLanguageToLocale(LanguageType,bool) + */ + I18NLANGTAG_DLLPRIVATE static void convertLanguageToLocale( + LanguageType nLang, ::com::sun::star::lang::Locale & rLocale ); + + /** Used by convertLanguageToLocale(LanguageType,Locale) */ + I18NLANGTAG_DLLPRIVATE static void convertLanguageToIsoNames( + LanguageType nLang, OUString& rLangStr, OUString& rCountry ); + + + I18NLANGTAG_DLLPRIVATE static LanguageType lookupFallbackLanguage( LanguageType nLang ); + + I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale lookupFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ); + + + /** Convert a LanguageType to a Locale, resolving LANGUAGE_SYSTEM, + falling back to a default locale if no exact match was found. + + Used by getFallbackLocale(Locale) + */ + I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale convertLanguageToLocaleWithFallback( + LanguageType nLang ); + + /** Used by convertLanguageToLocaleWithFallback(LanguageType) */ + I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale lookupFallbackLocale( + LanguageType nLang ); + }; + +private: + + static LanguageType nConfiguredSystemLanguage; + static LanguageType nConfiguredSystemUILanguage; + + static LanguageType nConfiguredWesternFallback; + static LanguageType nConfiguredAsianFallback; + static LanguageType nConfiguredComplexFallback; + + static LanguageType getPlatformSystemLanguage(); + static LanguageType getPlatformSystemUILanguage(); + + // Substitute LANGUAGE_SYSTEM for LANGUAGE_SYSTEM_DEFAULT and + // LANGUAGE_PROCESS_OR_USER_DEFAULT, other values aren't touched. + I18NLANGTAG_DLLPRIVATE static inline LanguageType simplifySystemLanguages( LanguageType nLang ); +}; + + +// static +inline LanguageType MsLangId::getSystemLanguage() +{ + return getPlatformSystemLanguage(); +} + + +// static +inline LanguageType MsLangId::getSystemUILanguage() +{ + return getPlatformSystemUILanguage(); +} + +#endif // INCLUDED_I18NLANGTAG_MSLANGID_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx new file mode 100644 index 000000000000..c64d199fc0e4 --- /dev/null +++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx @@ -0,0 +1,353 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <cppunit/TestFixture.h> +#include <cppunit/TestAssert.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> + +#include <i18nlangtag/mslangid.hxx> +#include <i18nlangtag/languagetag.hxx> + +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> +#include <osl/file.hxx> + +#include <com/sun/star/lang/Locale.hpp> + +using namespace com::sun::star; + +// To test the replacement code add '&& 0' and also in +// source/languagetag/languagetag.cxx +#if defined(ENABLE_LIBLANGTAG) +#define USE_LIBLANGTAG 1 +#else +#define USE_LIBLANGTAG 0 +#endif + +namespace { + +class TestLanguageTag : public CppUnit::TestFixture +{ +public: + TestLanguageTag() {} + virtual ~TestLanguageTag() {} + + void testAllTags(); + void testAllIsoLangEntries(); + + CPPUNIT_TEST_SUITE(TestLanguageTag); + CPPUNIT_TEST(testAllTags); + CPPUNIT_TEST(testAllIsoLangEntries); + CPPUNIT_TEST_SUITE_END(); +}; + +void TestLanguageTag::testAllTags() +{ + { + OUString s_de_Latn_DE( "de-Latn-DE" ); + LanguageTag de_DE( s_de_Latn_DE, true ); + OUString aBcp47 = de_DE.getBcp47(); + lang::Locale aLocale = de_DE.getLocale(); + LanguageType nLanguageType = de_DE.getLanguageType(); +#if USE_LIBLANGTAG + CPPUNIT_ASSERT_MESSAGE("Default script should be stripped after canonicalize.", aBcp47 == "de-DE" ); + CPPUNIT_ASSERT( aLocale.Language == "de" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( nLanguageType == LANGUAGE_GERMAN ); + CPPUNIT_ASSERT( de_DE.getLanguage() == "de" ); + CPPUNIT_ASSERT( de_DE.getCountry() == "DE" ); + CPPUNIT_ASSERT( de_DE.getScript() == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de" ); +#else + // The simple replacement code doesn't do any fancy stuff. + CPPUNIT_ASSERT_MESSAGE("Default script was stripped after canonicalize!?!", aBcp47 == s_de_Latn_DE ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "de-Latn-DE" ); + CPPUNIT_ASSERT( nLanguageType == LANGUAGE_SYSTEM ); // XXX not resolved! + CPPUNIT_ASSERT( de_DE.getLanguage() == "de" ); + CPPUNIT_ASSERT( de_DE.getCountry() == "DE" ); + CPPUNIT_ASSERT( de_DE.getScript() == "Latn" ); + CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de-Latn" ); +#endif + } + + { + OUString s_klingon( "i-klingon" ); + LanguageTag klingon( s_klingon, true ); + lang::Locale aLocale = klingon.getLocale(); +#if USE_LIBLANGTAG + CPPUNIT_ASSERT( klingon.getBcp47() == "tlh" ); + CPPUNIT_ASSERT( aLocale.Language == "tlh" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( klingon.getLanguageType() == LANGUAGE_SYSTEM ); + CPPUNIT_ASSERT( klingon.isValidBcp47() == true ); + CPPUNIT_ASSERT( klingon.isIsoLocale() == true ); + CPPUNIT_ASSERT( klingon.isIsoODF() == true ); +#else + CPPUNIT_ASSERT( klingon.getBcp47() == s_klingon ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == s_klingon ); + CPPUNIT_ASSERT( klingon.getLanguageType() == LANGUAGE_SYSTEM ); + CPPUNIT_ASSERT( klingon.isValidBcp47() == true ); + CPPUNIT_ASSERT( klingon.isIsoLocale() == false ); + CPPUNIT_ASSERT( klingon.isIsoODF() == false ); +#endif + } + + { + OUString s_sr_RS( "sr-RS" ); + LanguageTag sr_RS( s_sr_RS, true ); + lang::Locale aLocale = sr_RS.getLocale(); + CPPUNIT_ASSERT( sr_RS.getBcp47() == s_sr_RS ); + CPPUNIT_ASSERT( aLocale.Language == "sr" ); + CPPUNIT_ASSERT( aLocale.Country == "RS" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( sr_RS.getLanguageType() == LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA ); + CPPUNIT_ASSERT( sr_RS.isValidBcp47() == true ); + CPPUNIT_ASSERT( sr_RS.isIsoLocale() == true ); + CPPUNIT_ASSERT( sr_RS.isIsoODF() == true ); + } + + { + OUString s_sr_Latn_RS( "sr-Latn-RS" ); + LanguageTag sr_RS( s_sr_Latn_RS, true ); + lang::Locale aLocale = sr_RS.getLocale(); + CPPUNIT_ASSERT( sr_RS.getBcp47() == s_sr_Latn_RS ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "RS" ); + CPPUNIT_ASSERT( aLocale.Variant == s_sr_Latn_RS ); + /* TODO: conversion doesn't know this yet, once it does activate test. */ +#if 0 + CPPUNIT_ASSERT( sr_RS.getLanguageType() == LANGUAGE_USER_SERBIAN_LATIN_SERBIA ); +#else + CPPUNIT_ASSERT( sr_RS.getLanguageType() == LANGUAGE_SYSTEM ); +#endif + CPPUNIT_ASSERT( sr_RS.isValidBcp47() == true ); + CPPUNIT_ASSERT( sr_RS.isIsoLocale() == false ); + CPPUNIT_ASSERT( sr_RS.isIsoODF() == true ); + CPPUNIT_ASSERT( sr_RS.getLanguage() == "sr" ); + CPPUNIT_ASSERT( sr_RS.getCountry() == "RS" ); + CPPUNIT_ASSERT( sr_RS.getScript() == "Latn" ); + CPPUNIT_ASSERT( sr_RS.getLanguageAndScript() == "sr-Latn" ); + } + + { + OUString s_de_DE( "de-DE" ); + LanguageTag de_DE( s_de_DE, true ); + lang::Locale aLocale = de_DE.getLocale(); + CPPUNIT_ASSERT( de_DE.getBcp47() == s_de_DE ); + CPPUNIT_ASSERT( aLocale.Language == "de" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageType() == LANGUAGE_GERMAN ); + CPPUNIT_ASSERT( de_DE.isValidBcp47() == true ); + CPPUNIT_ASSERT( de_DE.isIsoLocale() == true ); + CPPUNIT_ASSERT( de_DE.isIsoODF() == true ); + CPPUNIT_ASSERT( de_DE.getLanguage() == "de" ); + CPPUNIT_ASSERT( de_DE.getCountry() == "DE" ); + CPPUNIT_ASSERT( de_DE.getScript() == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de" ); + } + + { + OUString s_de_DE( "de-DE" ); + LanguageTag de_DE( lang::Locale( "de", "DE", "" ) ); + lang::Locale aLocale = de_DE.getLocale(); + CPPUNIT_ASSERT( de_DE.getBcp47() == s_de_DE ); + CPPUNIT_ASSERT( aLocale.Language == "de" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageType() == LANGUAGE_GERMAN ); + } + + { + OUString s_de_DE( "de-DE" ); + LanguageTag de_DE( LANGUAGE_GERMAN ); + lang::Locale aLocale = de_DE.getLocale(); + CPPUNIT_ASSERT( de_DE.getBcp47() == s_de_DE ); + CPPUNIT_ASSERT( aLocale.Language == "de" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageType() == LANGUAGE_GERMAN ); + } + + // 'qtz' is a local use known pseudolocale for key ID resource + { + OUString s_qtz( "qtz" ); + LanguageTag qtz( s_qtz ); + lang::Locale aLocale = qtz.getLocale(); + CPPUNIT_ASSERT( qtz.getBcp47() == s_qtz ); + CPPUNIT_ASSERT( aLocale.Language == "qtz" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( qtz.getLanguageType() == LANGUAGE_USER_KEYID ); + } + + // 'qty' is a local use unknown locale + { + OUString s_qty( "qty" ); + LanguageTag qty( s_qty ); + lang::Locale aLocale = qty.getLocale(); + CPPUNIT_ASSERT( qty.getBcp47() == s_qty ); + CPPUNIT_ASSERT( aLocale.Language == "qty" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( qty.getLanguageType() == LANGUAGE_SYSTEM ); + } + + // 'x-comment' is a privateuse known "locale" + { + OUString s_xcomment( "x-comment" ); + LanguageTag xcomment( s_xcomment ); + lang::Locale aLocale = xcomment.getLocale(); + CPPUNIT_ASSERT( xcomment.getBcp47() == s_xcomment ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "x-comment" ); + CPPUNIT_ASSERT( xcomment.getLanguageType() == LANGUAGE_USER_PRIV_COMMENT ); + } + + // 'x-foobar' is a privateuse unknown "locale" + { + OUString s_xfoobar( "x-foobar" ); + LanguageTag xfoobar( s_xfoobar ); + lang::Locale aLocale = xfoobar.getLocale(); + CPPUNIT_ASSERT( xfoobar.getBcp47() == s_xfoobar ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "x-foobar" ); + CPPUNIT_ASSERT( xfoobar.getLanguageType() == LANGUAGE_SYSTEM ); + } + + // '*' the dreaded jolly joker is a "privateuse" known "locale" + { + OUString s_joker( "*" ); + LanguageTag joker( s_joker ); + lang::Locale aLocale = joker.getLocale(); + CPPUNIT_ASSERT( joker.getBcp47() == s_joker ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "*" ); + CPPUNIT_ASSERT( joker.getLanguageType() == LANGUAGE_USER_PRIV_JOKER ); + + joker.reset( LANGUAGE_USER_PRIV_JOKER ); + aLocale = joker.getLocale(); + CPPUNIT_ASSERT( joker.getBcp47() == s_joker ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "*" ); + CPPUNIT_ASSERT( joker.getLanguageType() == LANGUAGE_USER_PRIV_JOKER ); + } + + // test reset() methods + { + LanguageTag aTag( LANGUAGE_DONTKNOW ); + lang::Locale aLocale; + + aTag.reset( LANGUAGE_GERMAN ); + aLocale = aTag.getLocale(); + CPPUNIT_ASSERT( aTag.getBcp47() == "de-DE" ); + CPPUNIT_ASSERT( aLocale.Language == "de" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( aTag.getLanguageType() == LANGUAGE_GERMAN ); + + aTag.reset( "en-US" ); + aLocale = aTag.getLocale(); + CPPUNIT_ASSERT( aTag.getBcp47() == "en-US" ); + CPPUNIT_ASSERT( aLocale.Language == "en" ); + CPPUNIT_ASSERT( aLocale.Country == "US" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( aTag.getLanguageType() == LANGUAGE_ENGLISH_US ); + + aTag.reset( lang::Locale( "de", "DE", "" ) ); + aLocale = aTag.getLocale(); + CPPUNIT_ASSERT( aTag.getBcp47() == "de-DE" ); + CPPUNIT_ASSERT( aLocale.Language == "de" ); + CPPUNIT_ASSERT( aLocale.Country == "DE" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( aTag.getLanguageType() == LANGUAGE_GERMAN ); + } + + { + OUString s_uab( "unreg-and-bad" ); + LanguageTag uab( s_uab, true ); + lang::Locale aLocale = uab.getLocale(); + CPPUNIT_ASSERT( uab.getBcp47() == s_uab ); + CPPUNIT_ASSERT( aLocale.Language == "qlt" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == s_uab ); + CPPUNIT_ASSERT( uab.getLanguageType() == LANGUAGE_SYSTEM ); + CPPUNIT_ASSERT( uab.isValidBcp47() == false ); + CPPUNIT_ASSERT( uab.isIsoLocale() == false ); + CPPUNIT_ASSERT( uab.isIsoODF() == false ); + } +} + +void TestLanguageTag::testAllIsoLangEntries() +{ + const MsLangId::IsoLangEntry* pLangEntry; + sal_Int32 nIndex = 0; + while (((pLangEntry = MsLangId::getIsoLangEntry( nIndex++ )) != NULL) && (pLangEntry->mnLang != LANGUAGE_DONTKNOW)) + { + LanguageTag aTagString( pLangEntry->getTagString(), true); + LanguageTag aTagID( pLangEntry->mnLang); + if (pLangEntry->getTagString() != aTagString.getBcp47()) + { + OString aMessage( OUStringToOString( pLangEntry->getTagString(), RTL_TEXTENCODING_ASCII_US)); + aMessage += " -> " + OUStringToOString( aTagString.getBcp47(), RTL_TEXTENCODING_ASCII_US); + CPPUNIT_ASSERT_MESSAGE( aMessage.getStr(), pLangEntry->getTagString() == aTagString.getBcp47()); + } + if (pLangEntry->getTagString() != aTagID.getBcp47()) + { + // There are multiple mappings, ID must be equal after conversions. + LanguageTag aTagBack( aTagID.getBcp47(), true); + if (aTagString.getLanguageType() != aTagBack.getLanguageType()) + { + OString aMessage( OUStringToOString( pLangEntry->getTagString(), RTL_TEXTENCODING_ASCII_US)); + aMessage += " " + OString::number( aTagString.getLanguageType(), 16) + + " -> " + OString::number( aTagBack.getLanguageType(), 16); + CPPUNIT_ASSERT_MESSAGE( aMessage.getStr(), aTagString.getLanguageType() == aTagBack.getLanguageType()); + } + } +#if 0 + // This does not hold, there are cases like 'ar' + // LANGUAGE_ARABIC_PRIMARY_ONLY that when mapped back results in + // 'ar-SA' as default locale. + if (pLangEntry->mnLang != aTagString.getLanguageType()) + { + // There are multiple mappings, string must be equal after conversions. + LanguageTag aTagBack( aTagString.getLanguageType()); + if (aTagID.getBcp47() != aTagBack.getBcp47()) + { + OString aMessage( OUStringToOString( pLangEntry->getTagString(), RTL_TEXTENCODING_ASCII_US)); + aMessage += " " + OUStringToOString( aTagID.getBcp47(), RTL_TEXTENCODING_ASCII_US) + + " -> " + OUStringToOString( aTagBack.getBcp47(), RTL_TEXTENCODING_ASCII_US); + CPPUNIT_ASSERT_MESSAGE( aMessage.getStr(), aTagID.getBcp47() == aTagBack.getBcp47()); + } + } +#endif + } + +} + +CPPUNIT_TEST_SUITE_REGISTRATION( TestLanguageTag ); + +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/isolang/insys.cxx b/i18nlangtag/source/isolang/insys.cxx new file mode 100644 index 000000000000..a0f1ab496c4d --- /dev/null +++ b/i18nlangtag/source/isolang/insys.cxx @@ -0,0 +1,35 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#if defined( WNT ) + +#include "inwnt.cxx" + +#elif defined( UNX ) + +#include "inunx.cxx" + +#else + +#error unknown platform + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/isolang/inunx.cxx b/i18nlangtag/source/isolang/inunx.cxx new file mode 100644 index 000000000000..f47bfa4b67c2 --- /dev/null +++ b/i18nlangtag/source/isolang/inunx.cxx @@ -0,0 +1,139 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <stdlib.h> // for getenv() +#include <stdio.h> + +#ifdef MACOSX +#include <osl/process.h> +#include <rtl/locale.h> +#include <rtl/ustring.hxx> + +#else // MACOSX +#include <rtl/string.hxx> + +#endif // MACOSX +#include <rtl/instance.hxx> +#include "i18nlangtag/languagetag.hxx" +#include "i18nlangtag/mslangid.hxx" + +// ======================================================================= + +static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW; +static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW; + +// ----------------------------------------------------------------------- + +// Get locale of category LC_CTYPE of environment variables +static const sal_Char* getLangFromEnvironment() +{ + static const sal_Char* pFallback = "C"; + const sal_Char *pLang = NULL; + + pLang = getenv ( "LC_ALL" ); + if (! pLang || pLang[0] == 0) + pLang = getenv ( "LC_CTYPE" ); + if (! pLang || pLang[0] == 0) + pLang = getenv( "LANG" ); + if (! pLang || pLang[0] == 0) + pLang = pFallback; + + return pLang; +} + +// ----------------------------------------------------------------------- + +// Get locale of category LC_MESSAGES of environment variables +static const sal_Char* getUILangFromEnvironment() +{ + static const sal_Char* pFallback = "C"; + const sal_Char *pLang = NULL; + + pLang = getenv ( "LANGUAGE" ); // respect the GNU extension + if (! pLang || pLang[0] == 0) + pLang = getenv ( "LC_ALL" ); + if (! pLang || pLang[0] == 0) + pLang = getenv ( "LC_MESSAGES" ); + if (! pLang || pLang[0] == 0) + pLang = getenv( "LANG" ); + if (! pLang || pLang[0] == 0) + pLang = pFallback; + + return pLang; +} + +// ----------------------------------------------------------------------- + +typedef const sal_Char * (*getLangFromEnv)(); + +static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage, + getLangFromEnv pGetLangFromEnv ) +{ + /* get the language from the user environment */ + LanguageType nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { + ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex()); + nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { +#ifdef MACOSX + rtl_Locale *procLocale; + (void) pGetLangFromEnv; /* unused */ + + if ( osl_getProcessLocale(&procLocale) == osl_Process_E_None ) + { + nLang = LanguageTag( *procLocale ).getLanguageType(); + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + rSystemLanguage = nLang; +#ifdef DEBUG + if ( rSystemLanguage == LANGUAGE_DONTKNOW ) + fprintf( stderr, "intnunx.cxx: failed to convert osl_getProcessLocale() language to system language.\n" ); +#endif + } +#else /* MACOSX */ + rtl::OString aUnxLang( (pGetLangFromEnv)() ); + nLang = MsLangId::convertUnxByteStringToLanguage( aUnxLang ); + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + rSystemLanguage = nLang; +#endif /* MACOSX */ + } + else { + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + } + } +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemLanguage() +{ + getPlatformSystemLanguageImpl( nImplSystemLanguage, &getLangFromEnvironment); + return nImplSystemLanguage; +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemUILanguage() +{ + getPlatformSystemLanguageImpl( nImplSystemUILanguage, &getUILangFromEnvironment); + return nImplSystemUILanguage; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/isolang/inwnt.cxx b/i18nlangtag/source/isolang/inwnt.cxx new file mode 100644 index 000000000000..c0971673d1f3 --- /dev/null +++ b/i18nlangtag/source/isolang/inwnt.cxx @@ -0,0 +1,104 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <sal/config.h> + +#define WINVER 0x0500 + +#ifdef _MSC_VER +#pragma warning(push,1) // disable warnings within system headers +#endif +#include <windef.h> // needed by winnls.h +#include <winbase.h> // needed by winnls.h +#include <winnls.h> +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#include <rtl/instance.hxx> +#include "i18nlangtag/mslangid.hxx" + +static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW; +static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW; + +// ======================================================================= + +static LanguageType GetSVLang( LANGID nWinLangId ) +{ + // No Translation, we work with the original MS code without the SORT_ID. + // So we can get never LANG-ID's from MS, which are currently not defined + // by us. + return LanguageType( static_cast<sal_uInt16>(nWinLangId & 0xffff)); +} + +// ----------------------------------------------------------------------- + +typedef LANGID (WINAPI *getLangFromEnv)(); + +static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage, + getLangFromEnv pGetUserDefault, getLangFromEnv pGetSystemDefault ) +{ + LanguageType nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { + ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex()); + nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { + LANGID nLangId; + + nLangId = (pGetUserDefault)(); + nLang = GetSVLang( nLangId ); + + if ( nLang == LANGUAGE_DONTKNOW ) + { + nLangId = (pGetSystemDefault)(); + nLang = GetSVLang( nLangId ); + } + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + rSystemLanguage = nLang; + } + else + { + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + } + } +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemLanguage() +{ + getPlatformSystemLanguageImpl( nImplSystemLanguage, + &GetUserDefaultLangID, &GetSystemDefaultLangID); + return nImplSystemLanguage; +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemUILanguage() +{ + // TODO: this could be distinguished, #if(WINVER >= 0x0500) + // needs _run_ time differentiation though, not at compile time. + getPlatformSystemLanguageImpl( nImplSystemUILanguage, + &GetUserDefaultUILanguage, &GetSystemDefaultUILanguage); + return nImplSystemUILanguage; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx new file mode 100644 index 000000000000..a6a467ce4671 --- /dev/null +++ b/i18nlangtag/source/isolang/isolang.cxx @@ -0,0 +1,1085 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/strbuf.hxx> + +#include "i18nlangtag/mslangid.hxx" + +// ======================================================================= + +struct IsoLangEngEntry +{ + LanguageType mnLang; + sal_Char maCountry[3]; +}; + +struct IsoLangNoneStdEntry +{ + LanguageType mnLang; + sal_Char maLangStr[4]; + sal_Char maCountry[9]; +}; + +struct IsoLangOtherEntry +{ + LanguageType mnLang; + const sal_Char* mpLangStr; +}; + +// ----------------------------------------------------------------------- + +// Entries for languages are lower case, for countries upper case, as +// recommended by rfc4646 (obsoletes rfc3066 (obsoletes rfc1766)). +// convertIsoNamesToLanguage() is case insensitive +// +// Sort order: Most used first. +// +// The default entry for a LangID <-> ISO mapping has to be first. For +// conversion of legacy mappings one LangID can map to multiple ISO codes, and +// one ISO code combination can map to multiple LangIDs. For compatibility with +// already existing calls it can also be a sequence as follows: + +// LANGUAGE_ENGLISH, "en", "" +// LANGUAGE_ENGLISH_US, "en", "US" + +// Here, in a convertIsoNamesToLanguage() call "en-US" is converted to +// LANGUAGE_ENGLISH_US and "en" is converted to LANGUAGE_ENGLISH. A call with +// "en-ZZ" (not in table) would result in LANGUAGE_ENGLISH because the first +// entry matching the language and not having a country is returned, regardless +// of whether being sorted before or after other entries of the same language +// with some country. To obtain a _locale_ (not language only) in the order +// given, convertLocaleToLanguageWithFallback() must be called. + +// If the sequence instead was + +// LANGUAGE_ENGLISH_US, "en", "US" +// LANGUAGE_ENGLISH, "en", "" + +// in a convertIsoNamesToLanguage() call "en-US" is still converted to +// LANGUAGE_ENGLISH_US, but "en" is _also_ converted to LANGUAGE_ENGLISH_US +// because no country was passed and it is the first entry to match the +// language, see code. A call with "en-ZZ" (not in table) would still result in +// LANGUAGE_ENGLISH. + +/* erAck: 2007-07-05T20:01+0200 TODO: The entire suite's "primary language + * only" usage and locale fall back should be cleaned up and made consistent. I + * strongly doubt that most callers exactly expect the behavior described. + * Currently these primary LangIDs are used literally in OOo code: + * LANGUAGE_ENGLISH LANGUAGE_CHINESE LANGUAGE_MALAY + * LANGUAGE_AZERI LANGUAGE_URDU LANGUAGE_KASHMIRI + */ + +static MsLangId::IsoLangEntry const aImplIsoLangEntries[] = +{ + // MS-LANGID codes ISO639-1/2/3 ISO3166 + { LANGUAGE_ENGLISH, "en", "" }, + { LANGUAGE_ENGLISH_US, "en", "US" }, + { LANGUAGE_ENGLISH_UK, "en", "GB" }, + { LANGUAGE_ENGLISH_AUS, "en", "AU" }, + { LANGUAGE_ENGLISH_CAN, "en", "CA" }, + { LANGUAGE_FRENCH, "fr", "FR" }, + { LANGUAGE_FRENCH, "fr", "" }, + { LANGUAGE_GERMAN, "de", "DE" }, + { LANGUAGE_ITALIAN, "it", "IT" }, + { LANGUAGE_DUTCH, "nl", "NL" }, + { LANGUAGE_SPANISH_MODERN, "es", "ES" }, + { LANGUAGE_SPANISH_DATED, "es", "ES" }, + { LANGUAGE_PORTUGUESE, "pt", "PT" }, + { LANGUAGE_PORTUGUESE_BRAZILIAN, "pt", "BR" }, + { LANGUAGE_DANISH, "da", "DK" }, + { LANGUAGE_GREEK, "el", "GR" }, + { LANGUAGE_CHINESE, "zh", "" }, + { LANGUAGE_CHINESE_SIMPLIFIED, "zh", "CN" }, + { LANGUAGE_CHINESE_TRADITIONAL, "zh", "TW" }, + { LANGUAGE_CHINESE_HONGKONG, "zh", "HK" }, + { LANGUAGE_CHINESE_SINGAPORE, "zh", "SG" }, + { LANGUAGE_CHINESE_MACAU, "zh", "MO" }, + { LANGUAGE_ENGLISH_HONG_KONG_SAR, "en", "HK" }, + { LANGUAGE_JAPANESE, "ja", "JP" }, + { LANGUAGE_KOREAN, "ko", "KR" }, + { LANGUAGE_KOREAN_JOHAB, "ko", "KR" }, + { LANGUAGE_USER_KOREAN_NORTH, "ko", "KP" }, + { LANGUAGE_SWEDISH, "sv", "SE" }, + { LANGUAGE_SWEDISH_FINLAND, "sv", "FI" }, + { LANGUAGE_FINNISH, "fi", "FI" }, + { LANGUAGE_RUSSIAN, "ru", "RU" }, + { LANGUAGE_TATAR, "tt", "RU" }, + { LANGUAGE_ENGLISH_NZ, "en", "NZ" }, + { LANGUAGE_ENGLISH_EIRE, "en", "IE" }, + { LANGUAGE_DUTCH_BELGIAN, "nl", "BE" }, + { LANGUAGE_FRENCH_BELGIAN, "fr", "BE" }, + { LANGUAGE_FRENCH_CANADIAN, "fr", "CA" }, + { LANGUAGE_FRENCH_SWISS, "fr", "CH" }, + { LANGUAGE_GERMAN_SWISS, "de", "CH" }, + { LANGUAGE_GERMAN_AUSTRIAN, "de", "AT" }, + { LANGUAGE_ITALIAN_SWISS, "it", "CH" }, + { LANGUAGE_ALBANIAN, "sq", "AL" }, + { LANGUAGE_ARABIC_SAUDI_ARABIA, "ar", "SA" }, + { LANGUAGE_ARABIC_EGYPT, "ar", "EG" }, + { LANGUAGE_ARABIC_UAE, "ar", "AE" }, + { LANGUAGE_ARABIC_IRAQ, "ar", "IQ" }, + { LANGUAGE_ARABIC_LIBYA, "ar", "LY" }, + { LANGUAGE_ARABIC_ALGERIA, "ar", "DZ" }, + { LANGUAGE_ARABIC_MOROCCO, "ar", "MA" }, + { LANGUAGE_ARABIC_TUNISIA, "ar", "TN" }, + { LANGUAGE_ARABIC_OMAN, "ar", "OM" }, + { LANGUAGE_ARABIC_YEMEN, "ar", "YE" }, + { LANGUAGE_ARABIC_SYRIA, "ar", "SY" }, + { LANGUAGE_ARABIC_JORDAN, "ar", "JO" }, + { LANGUAGE_ARABIC_LEBANON, "ar", "LB" }, + { LANGUAGE_ARABIC_KUWAIT, "ar", "KW" }, + { LANGUAGE_ARABIC_BAHRAIN, "ar", "BH" }, + { LANGUAGE_ARABIC_QATAR, "ar", "QA" }, + { LANGUAGE_USER_ARABIC_CHAD, "ar", "TD" }, + { LANGUAGE_USER_ARABIC_COMOROS, "ar", "KM" }, + { LANGUAGE_USER_ARABIC_DJIBOUTI, "ar", "DJ" }, + { LANGUAGE_USER_ARABIC_ERITREA, "ar", "ER" }, + { LANGUAGE_USER_ARABIC_ISRAEL, "ar", "IL" }, + { LANGUAGE_USER_ARABIC_MAURITANIA, "ar", "MR" }, + { LANGUAGE_USER_ARABIC_PALESTINE, "ar", "PS" }, + { LANGUAGE_USER_ARABIC_SOMALIA, "ar", "SO" }, + { LANGUAGE_USER_ARABIC_SUDAN, "ar", "SD" }, + { LANGUAGE_ARABIC_PRIMARY_ONLY, "ar", "" }, + { LANGUAGE_BASQUE, "eu", "" }, + { LANGUAGE_BULGARIAN, "bg", "BG" }, + { LANGUAGE_CZECH, "cs", "CZ" }, + { LANGUAGE_CZECH, "cz", "" }, + { LANGUAGE_ENGLISH_JAMAICA, "en", "JM" }, + { LANGUAGE_ENGLISH_CARRIBEAN, "en", "BS" }, // not 100%, because AG is Bahamas + { LANGUAGE_ENGLISH_BELIZE, "en", "BZ" }, + { LANGUAGE_ENGLISH_TRINIDAD, "en", "TT" }, + { LANGUAGE_ENGLISH_ZIMBABWE, "en", "ZW" }, + { LANGUAGE_ENGLISH_INDONESIA, "en", "ID" }, + { LANGUAGE_ESTONIAN, "et", "EE" }, + { LANGUAGE_FAEROESE, "fo", "FO" }, + { LANGUAGE_FARSI, "fa", "IR" }, + { LANGUAGE_FRENCH_LUXEMBOURG, "fr", "LU" }, + { LANGUAGE_FRENCH_MONACO, "fr", "MC" }, + { LANGUAGE_GERMAN_LUXEMBOURG, "de", "LU" }, + { LANGUAGE_GERMAN_LIECHTENSTEIN, "de", "LI" }, + { LANGUAGE_HEBREW, "he", "IL" }, // new: old was "iw" + { LANGUAGE_HEBREW, "iw", "IL" }, // old: new is "he" + { LANGUAGE_HUNGARIAN, "hu", "HU" }, + { LANGUAGE_ICELANDIC, "is", "IS" }, + { LANGUAGE_INDONESIAN, "id", "ID" }, // new: old was "in" + { LANGUAGE_INDONESIAN, "in", "ID" }, // old: new is "id" + { LANGUAGE_NORWEGIAN, "no", "NO" }, + { LANGUAGE_NORWEGIAN_BOKMAL, "nb", "NO" }, + { LANGUAGE_NORWEGIAN_NYNORSK, "nn", "NO" }, + { LANGUAGE_POLISH, "pl", "PL" }, + { LANGUAGE_RHAETO_ROMAN, "rm", "CH" }, + { LANGUAGE_ROMANIAN, "ro", "RO" }, + { LANGUAGE_ROMANIAN_MOLDOVA, "ro", "MD" }, + { LANGUAGE_SLOVAK, "sk", "SK" }, + { LANGUAGE_SLOVENIAN, "sl", "SI" }, + { LANGUAGE_SPANISH_MEXICAN, "es", "MX" }, + { LANGUAGE_SPANISH_GUATEMALA, "es", "GT" }, + { LANGUAGE_SPANISH_COSTARICA, "es", "CR" }, + { LANGUAGE_SPANISH_PANAMA, "es", "PA" }, + { LANGUAGE_SPANISH_DOMINICAN_REPUBLIC, "es", "DO" }, + { LANGUAGE_SPANISH_VENEZUELA, "es", "VE" }, + { LANGUAGE_SPANISH_COLOMBIA, "es", "CO" }, + { LANGUAGE_SPANISH_PERU, "es", "PE" }, + { LANGUAGE_SPANISH_ARGENTINA, "es", "AR" }, + { LANGUAGE_SPANISH_ECUADOR, "es", "EC" }, + { LANGUAGE_SPANISH_CHILE, "es", "CL" }, + { LANGUAGE_SPANISH_URUGUAY, "es", "UY" }, + { LANGUAGE_SPANISH_PARAGUAY, "es", "PY" }, + { LANGUAGE_SPANISH_BOLIVIA, "es", "BO" }, + { LANGUAGE_SPANISH_EL_SALVADOR, "es", "SV" }, + { LANGUAGE_SPANISH_HONDURAS, "es", "HN" }, + { LANGUAGE_SPANISH_NICARAGUA, "es", "NI" }, + { LANGUAGE_SPANISH_PUERTO_RICO, "es", "PR" }, + { LANGUAGE_SPANISH_UNITED_STATES, "es", "US" }, + { LANGUAGE_SPANISH_LATIN_AMERICA, "es", "" }, + { LANGUAGE_TURKISH, "tr", "TR" }, + { LANGUAGE_UKRAINIAN, "uk", "UA" }, + { LANGUAGE_VIETNAMESE, "vi", "VN" }, + { LANGUAGE_LATVIAN, "lv", "LV" }, + { LANGUAGE_MACEDONIAN, "mk", "MK" }, + { LANGUAGE_MALAY, "ms", "" }, + { LANGUAGE_MALAY_MALAYSIA, "ms", "MY" }, + { LANGUAGE_MALAY_BRUNEI_DARUSSALAM, "ms", "BN" }, + { LANGUAGE_ENGLISH_MALAYSIA, "en", "MY" }, + { LANGUAGE_THAI, "th", "TH" }, + { LANGUAGE_LITHUANIAN, "lt", "LT" }, + { LANGUAGE_LITHUANIAN_CLASSIC, "lt", "LT" }, + { LANGUAGE_CROATIAN, "hr", "HR" }, // Croatian in Croatia + { LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA, "hr", "BA" }, + { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA" }, +// { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_AND_HERZEGOVINA, "bs", "BA" }, // script codes not supported yet + { LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, "sr", "RS" }, // Serbian Cyrillic in Serbia + { LANGUAGE_SERBIAN_CYRILLIC, "sr", "YU" }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_CS instead, sr_CS not supported by ICU 2.6 (3.4 does) + { LANGUAGE_SERBIAN_CYRILLIC, "sr", "CS" }, // alias to be able to integrate localizations, rsc needs it + { LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME" }, + { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr", "BA" }, + { LANGUAGE_SERBIAN, "sr", "" }, // SERBIAN is only LID, MS-LCID not defined (was dupe of CROATIAN) + { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS" }, // Serbian Latin in Serbia; kludge, needed to be sr_Latn_RS instead, script codes not supported yet + { LANGUAGE_SERBIAN_LATIN, "sh", "YU" }, // legacy Serbian Latin in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_Latn_CS instead, script codes not supported yet + { LANGUAGE_SERBIAN_LATIN, "sh", "CS" }, // Serbian Latin in Serbia and Montenegro; kludge, needed to be sr_Latn_CS instead, script codes not supported yet + { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sh", "ME" }, // Serbian Latin in Montenegro; kludge, needed to be sr_Latn_ME instead, script codes not supported yet + { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sh", "BA" }, + { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sh", "" }, // kludge, needed to be sr_Latn instead, script codes not supported yet + { LANGUAGE_ARMENIAN, "hy", "AM" }, + { LANGUAGE_AZERI, "az", "" }, + { LANGUAGE_AZERI_LATIN, "az", "AZ" }, +// { LANGUAGE_AZERI_CYRILLIC, "az", "AZ" }, // script codes not supported yet + { LANGUAGE_UZBEK_LATIN, "uz", "UZ" }, +// { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ" }, // script codes not supported yet + { LANGUAGE_BENGALI_BANGLADESH, "bn", "BD" }, + { LANGUAGE_BENGALI, "bn", "IN" }, + { LANGUAGE_BURMESE, "my", "MM" }, + { LANGUAGE_KAZAKH, "kk", "KZ" }, + { LANGUAGE_ENGLISH_INDIA, "en", "IN" }, + { LANGUAGE_URDU, "ur", "" }, + { LANGUAGE_URDU_INDIA, "ur", "IN" }, + { LANGUAGE_URDU_PAKISTAN, "ur", "PK" }, + { LANGUAGE_HINDI, "hi", "IN" }, + { LANGUAGE_GUJARATI, "gu", "IN" }, + { LANGUAGE_KANNADA, "kn", "IN" }, + { LANGUAGE_ASSAMESE, "as", "IN" }, + { LANGUAGE_KASHMIRI, "ks", "" }, + { LANGUAGE_KASHMIRI_INDIA, "ks", "IN" }, + { LANGUAGE_MALAYALAM, "ml", "IN" }, + { LANGUAGE_MANIPURI, "mni", "IN" }, + { LANGUAGE_MARATHI, "mr", "IN" }, + { LANGUAGE_KONKANI, "kok", "IN" }, + { LANGUAGE_NEPALI, "ne", "NP" }, + { LANGUAGE_NEPALI_INDIA, "ne", "IN" }, + { LANGUAGE_ORIYA, "or", "IN" }, + { LANGUAGE_PUNJABI, "pa", "IN" }, + { LANGUAGE_SANSKRIT, "sa", "IN" }, + { LANGUAGE_SINDHI, "sd", "IN" }, + { LANGUAGE_TAMIL, "ta", "IN" }, + { LANGUAGE_TELUGU, "te", "IN" }, + { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK" }, // preferring "lah" over "pa" for Western Punjabi, see http://www.ethnologue.com/show_language.asp?code=PNB + { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK" }, + { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK" }, + { LANGUAGE_BELARUSIAN, "be", "BY" }, + { LANGUAGE_CATALAN, "ca", "ES" }, // Spain (default) + { LANGUAGE_CATALAN, "ca", "AD" }, // Andorra + { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; workaround for UI localization only, do not use in document content! + { LANGUAGE_CATALAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; UI localization quirk only, do not use in document content! +// { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "ES" }, // In case MS format files escaped into the wild, map them back. + { LANGUAGE_FRENCH_CAMEROON, "fr", "CM" }, + { LANGUAGE_FRENCH_COTE_D_IVOIRE, "fr", "CI" }, + { LANGUAGE_FRENCH_MALI, "fr", "ML" }, + { LANGUAGE_FRENCH_SENEGAL, "fr", "SN" }, + { LANGUAGE_FRENCH_ZAIRE, "fr", "CD" }, // Democratic Republic Of Congo + { LANGUAGE_FRENCH_MOROCCO, "fr", "MA" }, + { LANGUAGE_FRENCH_REUNION, "fr", "RE" }, + { LANGUAGE_FRENCH_NORTH_AFRICA, "fr", "" }, + { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" }, // unknown ISO country code + { LANGUAGE_FRISIAN_NETHERLANDS, "fy", "NL" }, + { LANGUAGE_GAELIC_IRELAND, "ga", "IE" }, + { LANGUAGE_GAELIC_SCOTLAND, "gd", "GB" }, + { LANGUAGE_GALICIAN, "gl", "ES" }, + { LANGUAGE_GEORGIAN, "ka", "GE" }, + { LANGUAGE_KHMER, "km", "KH" }, + { LANGUAGE_KIRGHIZ, "ky", "KG" }, + { LANGUAGE_LAO, "lo", "LA" }, + { LANGUAGE_MALTESE, "mt", "MT" }, + { LANGUAGE_MONGOLIAN, "mn", "MN" }, // Cyrillic script + { LANGUAGE_MONGOLIAN_MONGOLIAN, "mn", "MN" }, + { LANGUAGE_RUSSIAN_MOLDOVA, "mo", "MD" }, + { LANGUAGE_SWAHILI, "sw", "KE" }, + { LANGUAGE_USER_SWAHILI_TANZANIA, "sw", "TZ" }, + { LANGUAGE_TAJIK, "tg", "TJ" }, + { LANGUAGE_TIBETAN, "bo", "CN" }, // CN politically correct? + { LANGUAGE_DZONGKHA, "dz", "BT" }, + { LANGUAGE_TURKMEN, "tk", "TM" }, + { LANGUAGE_WELSH, "cy", "GB" }, + { LANGUAGE_SESOTHO, "st", "ZA" }, + { LANGUAGE_SEPEDI, "nso", "ZA" }, + { LANGUAGE_SEPEDI, "ns", "ZA" }, // fake "ns" for compatibility with existing OOo1.1.x localization to be able to read those documents + { LANGUAGE_TSONGA, "ts", "ZA" }, + { LANGUAGE_TSWANA, "tn", "ZA" }, + { LANGUAGE_ENGLISH_SAFRICA, "en", "ZA" }, + { LANGUAGE_AFRIKAANS, "af", "ZA" }, + { LANGUAGE_VENDA, "ve", "ZA" }, // default 639-1 + { LANGUAGE_VENDA, "ven", "ZA" }, // 639-2 may have been used temporarily since 2004-07-23 + { LANGUAGE_XHOSA, "xh", "ZA" }, + { LANGUAGE_ZULU, "zu", "ZA" }, + { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC" }, + { LANGUAGE_QUECHUA_PERU, "qu", "PE" }, + { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO" }, // macro: quh-BO, qul-BO + { LANGUAGE_PASHTO, "ps", "AF" }, + { LANGUAGE_OROMO, "om", "ET" }, + { LANGUAGE_DHIVEHI, "dv", "MV" }, + { LANGUAGE_UIGHUR_CHINA, "ug", "CN" }, + { LANGUAGE_TIGRIGNA_ETHIOPIA, "ti", "ET" }, + { LANGUAGE_TIGRIGNA_ERITREA, "ti", "ER" }, + { LANGUAGE_AMHARIC_ETHIOPIA, "am", "ET" }, + { LANGUAGE_GUARANI_PARAGUAY, "gug", "PY" }, + { LANGUAGE_HAWAIIAN_UNITED_STATES, "haw", "US" }, + { LANGUAGE_EDO, "bin", "NG" }, + { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG" }, + { LANGUAGE_HAUSA_NIGERIA, "ha", "NG" }, + { LANGUAGE_USER_HAUSA_GHANA, "ha", "GH" }, + { LANGUAGE_IGBO_NIGERIA, "ig", "NG" }, + { LANGUAGE_KANURI_NIGERIA, "kr", "NG" }, + { LANGUAGE_YORUBA, "yo", "NG" }, + { LANGUAGE_SOMALI, "so", "SO" }, + { LANGUAGE_PAPIAMENTU, "pap", "AN" }, + { LANGUAGE_USER_PAPIAMENTU_ARUBA, "pap", "AW" }, + { LANGUAGE_USER_PAPIAMENTU_CURACAO, "pap", "CW" }, + { LANGUAGE_USER_PAPIAMENTU_BONAIRE, "pap", "BQ" }, + { LANGUAGE_ENGLISH_SINGAPORE, "en", "SG" }, + { LANGUAGE_USER_YIDDISH_US, "yi", "US" }, + { LANGUAGE_YIDDISH, "yi", "IL" }, // new: old was "ji" + { LANGUAGE_YIDDISH, "ji", "IL" }, // old: new is "yi" + { LANGUAGE_SYRIAC, "syr", "TR" }, // "TR" according to http://www.ethnologue.com/show_language.asp?code=SYC + { LANGUAGE_SINHALESE_SRI_LANKA, "si", "LK" }, + { LANGUAGE_CHEROKEE_UNITED_STATES, "chr", "US" }, + { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA" }, +// { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu", "CA" }, // script codes not supported yet + { LANGUAGE_SAMI_NORTHERN_NORWAY, "se", "NO" }, + { LANGUAGE_SAMI_INARI, "smn", "FI" }, + { LANGUAGE_SAMI_LULE_NORWAY, "smj", "NO" }, + { LANGUAGE_SAMI_LULE_SWEDEN, "smj", "SE" }, + { LANGUAGE_SAMI_NORTHERN_FINLAND, "se", "FI" }, + { LANGUAGE_SAMI_NORTHERN_SWEDEN, "se", "SE" }, + { LANGUAGE_SAMI_SKOLT, "sms", "FI" }, + { LANGUAGE_SAMI_SOUTHERN_NORWAY, "sma", "NO" }, + { LANGUAGE_SAMI_SOUTHERN_SWEDEN, "sma", "SE" }, + { LANGUAGE_USER_SAMI_KILDIN_RUSSIA, "sjd", "RU" }, + { LANGUAGE_MAPUDUNGUN_CHILE, "arn", "CL" }, + { LANGUAGE_CORSICAN_FRANCE, "co", "FR" }, + { LANGUAGE_ALSATIAN_FRANCE, "gsw", "FR" }, // in fact 'gsw' is Schwyzerduetsch (Swiss German), which is a dialect of Alemannic German, as is Alsatian. They aren't distinct languages and share this code. + { LANGUAGE_YAKUT_RUSSIA, "sah", "RU" }, + { LANGUAGE_MOHAWK_CANADA, "moh", "CA" }, + { LANGUAGE_BASHKIR_RUSSIA, "ba", "RU" }, + { LANGUAGE_KICHE_GUATEMALA, "qut", "GT" }, + { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF" }, + { LANGUAGE_WOLOF_SENEGAL, "wo", "SN" }, + { LANGUAGE_FILIPINO, "fil", "PH" }, + { LANGUAGE_USER_TAGALOG, "tl", "PH" }, + { LANGUAGE_ENGLISH_PHILIPPINES, "en", "PH" }, +// { LANGUAGE_IBIBIO_NIGERIA, "nic", "NG" }, // ISO "nic" is only a collective language code + { LANGUAGE_YI, "ii", "CN" }, + { LANGUAGE_TAMAZIGHT_LATIN, "kab", "DZ" }, // In practice Kabyle is the language used for this + { LANGUAGE_OBSOLETE_USER_KABYLE, "kab", "DZ" }, + { LANGUAGE_TAMAZIGHT_LATIN, "ber", "DZ" }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there. + { LANGUAGE_TAMAZIGHT_TIFINAGH, "ber", "MA" }, // Morocco is officially using Tifinagh for its Berber languages so store it to distinguish explicitly from LANGUAGE_TAMAZIGHT_LATIN, even though as a collective language its not of much use +// { LANGUAGE_TAMAZIGHT_ARABIC, "ber", "" }, // ISO "ber" only collective! + { LANGUAGE_LATIN, "la", "VA" }, + { LANGUAGE_OBSOLETE_USER_LATIN, "la", "VA" }, + { LANGUAGE_USER_ESPERANTO, "eo", "" }, + { LANGUAGE_USER_INTERLINGUA, "ia", "" }, + { LANGUAGE_MAORI_NEW_ZEALAND, "mi", "NZ" }, + { LANGUAGE_OBSOLETE_USER_MAORI, "mi", "NZ" }, + { LANGUAGE_KINYARWANDA_RWANDA, "rw", "RW" }, + { LANGUAGE_OBSOLETE_USER_KINYARWANDA, "rw", "RW" }, + { LANGUAGE_UPPER_SORBIAN_GERMANY, "hsb", "DE" }, // MS maps this to 'wen-DE', which is nonsense. 'wen' is a collective language code, 'WEN' is a SIL code, see http://www.ethnologue.com/14/show_iso639.asp?code=wen and http://www.ethnologue.com/14/show_language.asp?code=WEN + { LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN,"hsb", "DE" }, + { LANGUAGE_LOWER_SORBIAN_GERMANY, "dsb", "DE" }, // MS maps this to 'wee-DE', which is nonsense. 'WEE' is a SIL code, see http://www.ethnologue.com/14/show_language.asp?code=WEE + { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE" }, + { LANGUAGE_OCCITAN_FRANCE, "oc", "FR" }, + { LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR" }, + { LANGUAGE_USER_KURDISH_TURKEY, "ku", "TR" }, + { LANGUAGE_USER_KURDISH_SYRIA, "ku", "SY" }, + { LANGUAGE_USER_KURDISH_IRAQ, "ku", "IQ" }, + { LANGUAGE_USER_KURDISH_IRAN, "ku", "IR" }, + { LANGUAGE_USER_SARDINIAN, "sc", "IT" }, // macrolanguage code + { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT" }, + { LANGUAGE_USER_SARDINIAN_GALLURESE, "sdn", "IT" }, + { LANGUAGE_USER_SARDINIAN_LOGUDORESE, "src", "IT" }, + { LANGUAGE_USER_SARDINIAN_SASSARESE, "sdc", "IT" }, + { LANGUAGE_BRETON_FRANCE, "br", "FR" }, + { LANGUAGE_OBSOLETE_USER_BRETON, "br", "FR" }, + { LANGUAGE_KALAALLISUT_GREENLAND, "kl", "GL" }, + { LANGUAGE_OBSOLETE_USER_KALAALLISUT, "kl", "GL" }, + { LANGUAGE_USER_SWAZI, "ss", "ZA" }, + { LANGUAGE_USER_NDEBELE_SOUTH, "nr", "ZA" }, + { LANGUAGE_USER_TSWANA_BOTSWANA, "tn", "BW" }, + { LANGUAGE_USER_MOORE, "mos", "BF" }, + { LANGUAGE_USER_BAMBARA, "bm", "ML" }, + { LANGUAGE_USER_AKAN, "ak", "GH" }, + { LANGUAGE_LUXEMBOURGISH_LUXEMBOURG, "lb", "LU" }, + { LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH, "lb", "LU" }, + { LANGUAGE_USER_FRIULIAN, "fur", "IT" }, + { LANGUAGE_USER_FIJIAN, "fj", "FJ" }, + { LANGUAGE_USER_AFRIKAANS_NAMIBIA, "af", "NA" }, + { LANGUAGE_USER_ENGLISH_NAMIBIA, "en", "NA" }, + { LANGUAGE_USER_WALLOON, "wa", "BE" }, + { LANGUAGE_USER_COPTIC, "cop", "EG" }, + { LANGUAGE_USER_GASCON, "gsc", "FR" }, + { LANGUAGE_USER_GERMAN_BELGIUM, "de", "BE" }, + { LANGUAGE_USER_CHUVASH, "cv", "RU" }, + { LANGUAGE_USER_EWE_GHANA, "ee", "GH" }, + { LANGUAGE_USER_ENGLISH_GHANA, "en", "GH" }, + { LANGUAGE_USER_SANGO, "sg", "CF" }, + { LANGUAGE_USER_GANDA, "lg", "UG" }, + { LANGUAGE_USER_LINGALA_DRCONGO, "ln", "CD" }, + { LANGUAGE_USER_LOW_GERMAN, "nds", "DE" }, + { LANGUAGE_USER_HILIGAYNON, "hil", "PH" }, + { LANGUAGE_USER_ENGLISH_MALAWI, "en", "MW" }, /* en default for MW */ + { LANGUAGE_USER_NYANJA, "ny", "MW" }, + { LANGUAGE_USER_KASHUBIAN, "csb", "PL" }, + { LANGUAGE_USER_SPANISH_CUBA, "es", "CU" }, + { LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA, "qul", "BO" }, + { LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA, "quh", "BO" }, + { LANGUAGE_USER_BODO_INDIA, "brx", "IN" }, + { LANGUAGE_USER_DOGRI_INDIA, "dgo", "IN" }, + { LANGUAGE_USER_MAITHILI_INDIA, "mai", "IN" }, + { LANGUAGE_USER_SANTALI_INDIA, "sat", "IN" }, + { LANGUAGE_USER_TETUN, "tet", "ID" }, + { LANGUAGE_USER_TETUN_TIMOR_LESTE, "tet", "TL" }, + { LANGUAGE_USER_TOK_PISIN, "tpi", "PG" }, + { LANGUAGE_USER_SHUSWAP, "shs", "CA" }, + { LANGUAGE_USER_ANCIENT_GREEK, "grc", "GR" }, + { LANGUAGE_USER_ASTURIAN, "ast", "ES" }, + { LANGUAGE_USER_LATGALIAN, "ltg", "LV" }, + { LANGUAGE_USER_MAORE, "swb", "YT" }, + { LANGUAGE_USER_BUSHI, "buc", "YT" }, + { LANGUAGE_USER_TAHITIAN, "ty", "PF" }, + { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG" }, + { LANGUAGE_USER_MALAGASY_PLATEAU, "mg", "MG" }, + { LANGUAGE_USER_BAFIA, "ksf", "CM" }, + { LANGUAGE_USER_GIKUYU, "ki", "KE" }, + { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA" }, + { LANGUAGE_USER_RUSYN_SLOVAKIA, "rue", "SK" }, + { LANGUAGE_USER_LIMBU, "lif", "NP" }, + { LANGUAGE_USER_LOJBAN, "jbo", "" }, + { LANGUAGE_USER_HAITIAN, "ht", "HT" }, + { LANGUAGE_FRENCH_HAITI, "fr", "HT" }, + { LANGUAGE_USER_BEEMBE, "beq", "CG" }, + { LANGUAGE_USER_BEKWEL, "bkw", "CG" }, + { LANGUAGE_USER_KITUBA, "mkw", "CG" }, + { LANGUAGE_USER_LARI, "ldi", "CG" }, + { LANGUAGE_USER_MBOCHI, "mdw", "CG" }, + { LANGUAGE_USER_TEKE_EBOO, "ebo", "CG" }, + { LANGUAGE_USER_TEKE_IBALI, "tek", "CG" }, + { LANGUAGE_USER_TEKE_TYEE, "tyx", "CG" }, + { LANGUAGE_USER_VILI, "vif", "CG" }, + { LANGUAGE_USER_PORTUGUESE_ANGOLA, "pt", "AO" }, + { LANGUAGE_USER_MANX, "gv", "GB" }, + { LANGUAGE_USER_ARAGONESE, "an", "ES" }, + { LANGUAGE_USER_KEYID, "qtz", "" }, // key id pseudolanguage used for UI testing + { LANGUAGE_USER_PALI_LATIN, "pli", "" }, // Pali with Latin script + { LANGUAGE_USER_KYRGYZ_CHINA, "ky", "CN" }, + { LANGUAGE_USER_KOMI_ZYRIAN, "kpv", "RU" }, + { LANGUAGE_USER_KOMI_PERMYAK, "koi", "RU" }, + { LANGUAGE_USER_PITJANTJATJARA, "pjt", "AU" }, + { LANGUAGE_USER_ERZYA, "myv", "RU" }, + { LANGUAGE_USER_MARI_MEADOW, "mhr", "RU" }, + { LANGUAGE_USER_KHANTY, "kca", "RU" }, + { LANGUAGE_USER_LIVONIAN, "liv", "RU" }, + { LANGUAGE_USER_MOKSHA, "mdf", "RU" }, + { LANGUAGE_USER_MARI_HILL, "mrj", "RU" }, + { LANGUAGE_USER_NGANASAN, "nio", "RU" }, + { LANGUAGE_USER_OLONETS, "olo", "RU" }, + { LANGUAGE_USER_VEPS, "vep", "RU" }, + { LANGUAGE_USER_VORO, "vro", "EE" }, + { LANGUAGE_USER_NENETS, "yrk", "RU" }, + { LANGUAGE_USER_AKA, "axk", "CF" }, + { LANGUAGE_USER_AKA_CONGO, "axk", "CG" }, + { LANGUAGE_USER_DIBOLE, "bvx", "CG" }, + { LANGUAGE_USER_DOONDO, "dde", "CG" }, + { LANGUAGE_USER_KAAMBA, "xku", "CG" }, + { LANGUAGE_USER_KOONGO, "kng", "CD" }, + { LANGUAGE_USER_KOONGO_CONGO, "kng", "CG" }, + { LANGUAGE_USER_KUNYI, "njx", "CG" }, + { LANGUAGE_USER_NGUNGWEL, "ngz", "CG" }, + { LANGUAGE_USER_NJYEM, "njy", "CM" }, + { LANGUAGE_USER_NJYEM_CONGO, "njy", "CG" }, + { LANGUAGE_USER_PUNU, "puu", "GA" }, + { LANGUAGE_USER_PUNU_CONGO, "puu", "CG" }, + { LANGUAGE_USER_SUUNDI, "sdj", "CG" }, + { LANGUAGE_USER_TEKE_KUKUYA, "kkw", "CG" }, + { LANGUAGE_USER_TSAANGI, "tsa", "CG" }, + { LANGUAGE_USER_YAKA, "iyx", "CG" }, + { LANGUAGE_USER_YOMBE, "yom", "CD" }, + { LANGUAGE_USER_YOMBE_CONGO, "yom", "CG" }, + { LANGUAGE_USER_SIDAMA, "sid", "ET" }, + { LANGUAGE_MULTIPLE, "mul", "" }, // multiple languages, many languages are used + { LANGUAGE_UNDETERMINED, "und", "" }, // undetermined language, language cannot be identified + { LANGUAGE_NONE, "zxx", "" }, // added to ISO 639-2 on 2006-01-11: Used to declare the absence of linguistic information + { LANGUAGE_DONTKNOW, "", "" } // marks end of table +}; + +static MsLangId::IsoLangEntry aLastResortFallbackEntry = +{ LANGUAGE_ENGLISH_US, "en", "US" }; + +OUString MsLangId::IsoLangEntry::getTagString() const +{ + if (maCountry[0]) + return OUString( OUString::createFromAscii( maLangStr) + "-" + OUString::createFromAscii( maCountry)); + else + return OUString::createFromAscii( maLangStr); +} + +// ----------------------------------------------------------------------- + +// In this table are the countries which should mapped to a specific +// english language +static IsoLangEngEntry const aImplIsoLangEngEntries[] = +{ + { LANGUAGE_ENGLISH_UK, "AO" }, // Angola + { LANGUAGE_ENGLISH_UK, "BJ" }, // Benin + { LANGUAGE_ENGLISH_UK, "BW" }, // Botswana + { LANGUAGE_ENGLISH_UK, "BI" }, // Burundi + { LANGUAGE_ENGLISH_UK, "CM" }, // Cameroon + { LANGUAGE_ENGLISH_UK, "GA" }, // Gabon + { LANGUAGE_ENGLISH_UK, "GM" }, // Gambia + { LANGUAGE_ENGLISH_UK, "GH" }, // Ghana + { LANGUAGE_ENGLISH_UK, "GN" }, // Guinea + { LANGUAGE_ENGLISH_UK, "LS" }, // Lesotho + { LANGUAGE_ENGLISH_UK, "MW" }, // Malawi + { LANGUAGE_ENGLISH_UK, "MT" }, // Malta + { LANGUAGE_ENGLISH_UK, "NA" }, // Namibia + { LANGUAGE_ENGLISH_UK, "NG" }, // Nigeria + { LANGUAGE_ENGLISH_UK, "UG" }, // Uganda + { LANGUAGE_ENGLISH_UK, "ZM" }, // Zambia + { LANGUAGE_ENGLISH_UK, "ZW" }, // Zimbabwe + { LANGUAGE_ENGLISH_UK, "SZ" }, // Swaziland + { LANGUAGE_ENGLISH_UK, "NG" }, // Sierra Leone + { LANGUAGE_ENGLISH_UK, "KN" }, // Saint Kitts and Nevis + { LANGUAGE_ENGLISH_UK, "SH" }, // St. Helena + { LANGUAGE_ENGLISH_UK, "IO" }, // British Indian Oceanic Territory + { LANGUAGE_ENGLISH_UK, "FK" }, // Falkland Islands + { LANGUAGE_ENGLISH_UK, "GI" }, // Gibraltar + { LANGUAGE_ENGLISH_UK, "KI" }, // Kiribati + { LANGUAGE_ENGLISH_UK, "VG" }, // Virgin Islands + { LANGUAGE_ENGLISH_UK, "MU" }, // Mauritius + { LANGUAGE_ENGLISH_UK, "FJ" }, // Fiji + { LANGUAGE_ENGLISH_US, "KI" }, // Kiribati + { LANGUAGE_ENGLISH_US, "LR" }, // Liberia + { LANGUAGE_ENGLISH_US, "GU" }, // Guam + { LANGUAGE_ENGLISH_US, "MH" }, // Marshall Islands + { LANGUAGE_ENGLISH_US, "PW" }, // Palau + { LANGUAGE_ENGLISH_CARRIBEAN, "AI" }, // Anguilla + { LANGUAGE_ENGLISH_CARRIBEAN, "AG" }, // Antigua and Barbuda + { LANGUAGE_ENGLISH_CARRIBEAN, "BS" }, // Bahamas + { LANGUAGE_ENGLISH_CARRIBEAN, "BB" }, // Barbedos + { LANGUAGE_ENGLISH_CARRIBEAN, "BM" }, // Bermuda + { LANGUAGE_ENGLISH_CARRIBEAN, "KY" }, // Cayman Islands + { LANGUAGE_ENGLISH_CARRIBEAN, "GD" }, // Grenada + { LANGUAGE_ENGLISH_CARRIBEAN, "DM" }, // Dominica + { LANGUAGE_ENGLISH_CARRIBEAN, "HT" }, // Haiti + { LANGUAGE_ENGLISH_CARRIBEAN, "MS" }, // Montserrat + { LANGUAGE_ENGLISH_CARRIBEAN, "FM" }, // Micronesia + { LANGUAGE_ENGLISH_CARRIBEAN, "VC" }, // St. Vincent / Grenadines + { LANGUAGE_ENGLISH_CARRIBEAN, "LC" }, // Saint Lucia + { LANGUAGE_ENGLISH_CARRIBEAN, "TC" }, // Turks & Caicos Islands + { LANGUAGE_ENGLISH_CARRIBEAN, "GY" }, // Guyana + { LANGUAGE_ENGLISH_CARRIBEAN, "TT" }, // Trinidad and Tobago + { LANGUAGE_ENGLISH_AUS, "CX" }, // Christmas Islands + { LANGUAGE_ENGLISH_AUS, "CC" }, // Cocos (Keeling) Islands + { LANGUAGE_ENGLISH_AUS, "NF" }, // Norfolk Island + { LANGUAGE_ENGLISH_AUS, "PG" }, // Papua New Guinea + { LANGUAGE_ENGLISH_AUS, "SB" }, // Solomon Islands + { LANGUAGE_ENGLISH_AUS, "TV" }, // Tuvalu + { LANGUAGE_ENGLISH_AUS, "NR" }, // Nauru + { LANGUAGE_ENGLISH_NZ, "CK" }, // Cook Islands + { LANGUAGE_ENGLISH_NZ, "NU" }, // Niue + { LANGUAGE_ENGLISH_NZ, "TK" }, // Tokelau + { LANGUAGE_ENGLISH_NZ, "TO" }, // Tonga + { LANGUAGE_DONTKNOW, "" } // marks end of table +}; + +// ----------------------------------------------------------------------- + +static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries[] = +{ + { LANGUAGE_NORWEGIAN_BOKMAL, "no", "BOK" }, // registered subtags for "no" in rfc1766 + { LANGUAGE_NORWEGIAN_NYNORSK, "no", "NYN" }, // registered subtags for "no" in rfc1766 + { LANGUAGE_SERBIAN_LATIN, "sr", "latin" }, + { LANGUAGE_SERBIAN_CYRILLIC, "sr", "cyrillic" }, + { LANGUAGE_AZERI_LATIN, "az", "latin" }, + { LANGUAGE_AZERI_CYRILLIC, "az", "cyrillic" }, + { LANGUAGE_DONTKNOW, "", "" } // marks end of table +}; + +// ----------------------------------------------------------------------- + +// in this table are only names to find the best language +static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries2[] = +{ + { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmaal" }, + { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmal" }, + { LANGUAGE_NORWEGIAN_NYNORSK, "no", "nynorsk" }, + { LANGUAGE_DONTKNOW, "", "" } // marks end of table +}; + +// ----------------------------------------------------------------------- + +// in this table are only names to find the best language +static IsoLangOtherEntry const aImplOtherEntries[] = +{ + { LANGUAGE_ENGLISH_US, "c" }, + { LANGUAGE_CHINESE, "chinese" }, + { LANGUAGE_GERMAN, "german" }, + { LANGUAGE_JAPANESE, "japanese" }, + { LANGUAGE_KOREAN, "korean" }, + { LANGUAGE_ENGLISH_US, "posix" }, + { LANGUAGE_CHINESE_TRADITIONAL, "tchinese" }, + { LANGUAGE_DONTKNOW, NULL } // marks end of table +}; + + +// in this table are only privateuse names +static IsoLangOtherEntry const aImplPrivateUseEntries[] = +{ + { LANGUAGE_USER_PRIV_NOTRANSLATE, "x-no-translate" }, //! not BCP47 but legacy in .xcu configmgr + { LANGUAGE_USER_PRIV_DEFAULT, "x-default" }, + { LANGUAGE_USER_PRIV_COMMENT, "x-comment" }, + { LANGUAGE_USER_PRIV_JOKER, "*" }, //! not BCP47 but transferable in configmgr + { LANGUAGE_DONTKNOW, NULL } // marks end of table +}; + +// ======================================================================= + +// static +void MsLangId::Conversion::convertLanguageToIsoNames( LanguageType nLang, + OUString& rLangStr, OUString& rCountry ) +{ + if ( nLang == LANGUAGE_SYSTEM ) + nLang = MsLangId::getSystemLanguage(); + + // Search for LangID (in this table we find only defined ISO combinations) + const IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if ( pEntry->mnLang == nLang ) + { + rLangStr = OUString::createFromAscii( pEntry->maLangStr ); + rCountry = OUString::createFromAscii( pEntry->maCountry ); + return; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Search for LangID if we didn't find a specific ISO combination. + // All entries in this table are allowed for mime specifications, + // but not defined ISO combinations. + const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; + do + { + if ( pNoneStdEntry->mnLang == nLang ) + { + rLangStr = OUString::createFromAscii( pNoneStdEntry->maLangStr ); + rCountry = OUString::createFromAscii( pNoneStdEntry->maCountry ); + return; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Look for privateuse definitions. + const IsoLangOtherEntry* pPrivateEntry = aImplPrivateUseEntries; + do + { + if ( pPrivateEntry->mnLang == nLang ) + { + rLangStr = OUString::createFromAscii( pPrivateEntry->mpLangStr ); + rCountry = OUString(); + return; + } + ++pPrivateEntry; + } + while ( pPrivateEntry->mnLang != LANGUAGE_DONTKNOW ); + + // not found + rLangStr = OUString(); + rCountry = OUString(); +} + +// ----------------------------------------------------------------------- + +// ----------------------------------------------------------------------- + +static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( LanguageType nLang ) +{ + LanguageType nPrimary = MsLangId::getPrimaryLanguage( nLang); + + // Search for LangID and remember first lang-only. + const MsLangId::IsoLangEntry* pFirstPrimary = NULL; + const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if (pEntry->mnLang == nLang) + { + if (*pEntry->maCountry) + return *pEntry; + switch (nLang) + { + // These are known to have no country assigned. + case LANGUAGE_BASQUE: + case LANGUAGE_USER_ESPERANTO: + case LANGUAGE_USER_INTERLINGUA: + case LANGUAGE_USER_LOJBAN: + return *pEntry; + default: + ; // nothing + } + } + if (!pFirstPrimary && + MsLangId::getPrimaryLanguage( pEntry->mnLang) == nPrimary) + pFirstPrimary = pEntry; + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Language not found at all => use default. + if (!pFirstPrimary) + return aLastResortFallbackEntry; + + // Search for first entry of primary language with any country. + pEntry = pFirstPrimary; + do + { + if (MsLangId::getPrimaryLanguage( pEntry->mnLang) == nLang) + { + if (*pEntry->maCountry) + return *pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + return aLastResortFallbackEntry; +} + +// static +LanguageType MsLangId::Conversion::lookupFallbackLanguage( LanguageType nLang ) +{ + return lcl_lookupFallbackEntry( nLang).mnLang; +} + + +// static +::com::sun::star::lang::Locale MsLangId::Conversion::lookupFallbackLocale( LanguageType nLang ) +{ + const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( nLang); + return ::com::sun::star::lang::Locale( + OUString::createFromAscii( rEntry.maLangStr), + OUString::createFromAscii( rEntry.maCountry), + OUString()); +} + +// ----------------------------------------------------------------------- + +static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( + const ::com::sun::star::lang::Locale & rLocale ) +{ + // language is lower case in table + OUString aLowerLang = rLocale.Language.toAsciiLowerCase(); + // country is upper case in table + OUString aUpperCountry = rLocale.Country.toAsciiUpperCase(); + sal_Int32 nCountryLen = aUpperCountry.getLength(); + + // Search for locale and remember first lang-only. + const MsLangId::IsoLangEntry* pFirstLang = NULL; + const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if (aLowerLang.equalsAscii( pEntry->maLangStr)) + { + if (*pEntry->maCountry) + { + if (nCountryLen && aUpperCountry.equalsAscii( pEntry->maCountry)) + return *pEntry; + } + else + { + switch (pEntry->mnLang) + { + // These are known to have no country assigned. + case LANGUAGE_BASQUE: + case LANGUAGE_USER_ESPERANTO: + case LANGUAGE_USER_INTERLINGUA: + case LANGUAGE_USER_LOJBAN: + return *pEntry; + default: + ; // nothing + } + } + if (!pFirstLang) + pFirstLang = pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Language not found at all => use default. + if (!pFirstLang) + return aLastResortFallbackEntry; + + // Search for first entry of language with any country. + pEntry = pFirstLang; + do + { + if (aLowerLang.equalsAscii( pEntry->maLangStr)) + { + if (*pEntry->maCountry) + return *pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + return aLastResortFallbackEntry; +} + + +// static +::com::sun::star::lang::Locale MsLangId::Conversion::lookupFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ) +{ + const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( rLocale); + return ::com::sun::star::lang::Locale( + OUString::createFromAscii( rEntry.maLangStr), + OUString::createFromAscii( rEntry.maCountry), + OUString()); +} + +// ======================================================================= + +// static +LanguageType MsLangId::Conversion::convertPrivateUseToLanguage( const OUString& rPriv ) +{ + const IsoLangOtherEntry* pPrivateEntry = aImplPrivateUseEntries; + do + { + if ( rPriv.equalsIgnoreAsciiCaseAscii( pPrivateEntry->mpLangStr ) ) + return pPrivateEntry->mnLang; + ++pPrivateEntry; + } while ( pPrivateEntry->mnLang != LANGUAGE_DONTKNOW ); + return LANGUAGE_DONTKNOW; +} + + +// static +LanguageType MsLangId::Conversion::convertIsoNamesToLanguage( const OUString& rLang, + const OUString& rCountry ) +{ + // language is lower case in table + OUString aLowerLang = rLang.toAsciiLowerCase(); + // country is upper case in table + OUString aUpperCountry = rCountry.toAsciiUpperCase(); + + // first look for exact match + const IsoLangEntry* pFirstLang = NULL; + const IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if ( aLowerLang.equalsAscii( pEntry->maLangStr ) ) + { + if ( aUpperCountry.isEmpty() || + aUpperCountry.equalsAscii( pEntry->maCountry ) ) + return pEntry->mnLang; + if ( !pFirstLang ) + pFirstLang = pEntry; + else if ( !*pEntry->maCountry ) + pFirstLang = pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // some eng countries should be mapped to a specific english language + if ( aLowerLang == "en" ) + { + const IsoLangEngEntry* pEngEntry = aImplIsoLangEngEntries; + do + { + if ( aUpperCountry.equalsAscii( pEngEntry->maCountry ) ) + return pEngEntry->mnLang; + ++pEngEntry; + } + while ( pEngEntry->mnLang != LANGUAGE_DONTKNOW ); + } + + // test for specific languages which are not used standard ISO 3166 codes + const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; + do + { + if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) ) + { + // The countries in this table are not all in upper case + if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) ) + return pNoneStdEntry->mnLang; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + pNoneStdEntry = aImplIsoNoneStdLangEntries2; + do + { + if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) ) + { + // The countries in this table are not all in upper case + if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) ) + return pNoneStdEntry->mnLang; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + + // If the language is correct, than we return the default language + if ( pFirstLang ) + return pFirstLang->mnLang; + + // if only the country is set, look for any entry matching the country + // (to allow reading country and language in separate steps, in any order) + if ( !rCountry.isEmpty() && rLang.isEmpty() ) + { + const IsoLangEntry* pEntry2 = aImplIsoLangEntries; + do + { + if ( aUpperCountry.equalsAscii( pEntry2->maCountry ) ) + return pEntry2->mnLang; + ++pEntry2; + } + while ( pEntry2->mnLang != LANGUAGE_DONTKNOW ); + + aLowerLang = aUpperCountry.toAsciiLowerCase(); + } + + // Look for privateuse definitions. + LanguageType nLang = convertPrivateUseToLanguage( aLowerLang); + if (nLang != LANGUAGE_DONTKNOW) + return nLang; + + // Now look for all other definitions, which are not standard + const IsoLangOtherEntry* pOtherEntry = aImplOtherEntries; + do + { + if ( aLowerLang.equalsAscii( pOtherEntry->mpLangStr ) ) + return pOtherEntry->mnLang; + ++pOtherEntry; + } + while ( pOtherEntry->mnLang != LANGUAGE_DONTKNOW ); + + return LANGUAGE_DONTKNOW; +} + +// ----------------------------------------------------------------------- + +// static +LanguageType MsLangId::Conversion::convertIsoNamesToLanguage( const OString& rLang, + const OString& rCountry ) +{ + OUString aLang = OStringToOUString( rLang, RTL_TEXTENCODING_ASCII_US); + OUString aCountry = OStringToOUString( rCountry, RTL_TEXTENCODING_ASCII_US); + return convertIsoNamesToLanguage( aLang, aCountry); +} + +// ----------------------------------------------------------------------- + +struct IsoLangGLIBCModifiersEntry +{ + LanguageType mnLang; + sal_Char maLangStr[4]; + sal_Char maCountry[3]; + sal_Char maAtString[9]; +}; + +static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] = +{ + // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier + { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" }, + { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia + { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro + { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro + { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" }, + { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" }, + { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" }, + { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table +}; + +// convert a unix locale string into LanguageType + +// static +LanguageType MsLangId::convertUnxByteStringToLanguage( + const OString& rString ) +{ + OString aLang; + OString aCountry; + OString aAtString; + + sal_Int32 nLangSepPos = rString.indexOf( (sal_Char)'_' ); + sal_Int32 nCountrySepPos = rString.indexOf( (sal_Char)'.' ); + sal_Int32 nAtPos = rString.indexOf( (sal_Char)'@' ); + + if (nCountrySepPos < 0) + nCountrySepPos = nAtPos; + if (nCountrySepPos < 0) + nCountrySepPos = rString.getLength(); + + if (nAtPos >= 0) + aAtString = rString.copy( nAtPos+1 ); + + if ( ((nLangSepPos >= 0) && (nLangSepPos > nCountrySepPos)) + || ((nLangSepPos < 0)) ) + { + // eg. "el.sun_eu_greek", "tchinese", "es.ISO8859-15" + aLang = rString.copy( 0, nCountrySepPos ); + } + else if ( nLangSepPos >= 0 ) + { + // well formed iso names like "en_US.UTF-8", "sh_BA.ISO8859-2@bosnia" + aLang = rString.copy( 0, nLangSepPos ); + aCountry = rString.copy( nLangSepPos+1, nCountrySepPos - nLangSepPos - 1); + } + + // if there is a glibc modifier, first look for exact match in modifier table + if (!aAtString.isEmpty()) + { + // language is lower case in table + OString aLowerLang = aLang.toAsciiLowerCase(); + // country is upper case in table + OString aUpperCountry = aCountry.toAsciiUpperCase(); + const IsoLangGLIBCModifiersEntry* pGLIBCModifiersEntry = aImplIsoLangGLIBCModifiersEntries; + do + { // avoid embedded \0 warning + if (( aLowerLang.equals( static_cast< const char* >( pGLIBCModifiersEntry->maLangStr ))) && + ( aAtString.equals( static_cast< const char* >( pGLIBCModifiersEntry->maAtString )))) + { + if ( aUpperCountry.isEmpty() || + aUpperCountry.equals( static_cast< const char* >( pGLIBCModifiersEntry->maCountry ))) + { + return pGLIBCModifiersEntry->mnLang; + } + } + ++pGLIBCModifiersEntry; + } + while ( pGLIBCModifiersEntry->mnLang != LANGUAGE_DONTKNOW ); + } + + return Conversion::convertIsoNamesToLanguage( aLang, aCountry ); +} + +// ----------------------------------------------------------------------- +// pass one IsoLangEntry to the outer world of the resource compiler + +// static +const MsLangId::IsoLangEntry* MsLangId::getIsoLangEntry( size_t nIndex ) +{ + if (nIndex < SAL_N_ELEMENTS(aImplIsoLangEntries)) + return &aImplIsoLangEntries[ nIndex]; + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/isolang/langid.pl b/i18nlangtag/source/isolang/langid.pl new file mode 100755 index 000000000000..4504f08eabed --- /dev/null +++ b/i18nlangtag/source/isolang/langid.pl @@ -0,0 +1,409 @@ +: # -*- perl -*- vim: ft=perl +eval 'exec perl -w -S $0 ${1+"$@"}' +if 0; +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# + +# See Usage() below or invoke without arguments for short instructions. +# For long instructions use the source, Luke ;-) + +use strict; + +sub Usage() +{ + print STDERR + "\n", + "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n", + "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n", + "listbox entries, langlist.mk, file_ooo.scp registry name, languages.pm and\n", + "msi-encodinglist.txt\n\n", + + "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n", + + "A language string will be used as a generic string match in all searched files.\n", + "You may enclose the language string in word delimiters,\n", + "e.g. \\blanguage_german\\b for a specific match.\n", + "If the language string expression matches more than one define,\n", + "e.g. as in 'german', all matching defines will be processed.\n", + "If the language string does not match a define or an identifier in\n", + "langtab.src, a generic string match of the listbox entries will be tried.\n\n", + + "Numeric values of LangID,primarylanguage,sublanguage can be given\n", + "decimal, hexadecimal (leading 0x), octal (leading 0) or binary (leading 0b).\n", + "The exact language_define of an exact match will be used in remaining lookups.\n\n", + + "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n", + "for example: 'en-US' or 'de-' or '-CH',\n", + "xx and YY can be given case insensitive, will be lowered-uppered internally,\n", + "and xx and YY themselves may be regular expressions.\n", + "Also here a list of matches will be processed.\n\n", + + "If option --single is given, only the first match will be processed.\n\n"; +} + +my $SOLARVERSION = $ENV{"SOLARVERSION"}; +my $INPATH = $ENV{"INPATH"}; +my $SRC_ROOT = $ENV{"SRC_ROOT"}; +my $UPDMINOREXT = $ENV{"UPDMINOREXT"}; +if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT)) +{ + print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n"; + Usage(); + exit 1; +} +if (!defined($UPDMINOREXT)) { + $UPDMINOREXT = ''; +} +my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT"; + +my $LANGUAGE_MASK_PRIMARY = 0x03ff; + +sub getPrimaryLanguage($) +{ + my($lcid) = @_; + return $lcid & $LANGUAGE_MASK_PRIMARY; +} + +sub getSubLanguage($) +{ + my($lcid) = @_; + return $lcid >> 10; +} + +sub makeLangID($$) +{ + my( $sub, $pri) = @_; + return ($sub << 10) | $pri; +} + + +sub grepFile($$$$@) +{ + my( $regex, $path, $module, $name, @addregex) = @_; + my @result; + my $found = 0; + my $areopen = 0; + my $arecloser = ''; + my $file; + # Try module under current working directory first to catch local + # modifications. A Not yet delivered lang.h is a special case. + if ("$path/$module/$name" eq "$SOLENVINC/i18nlangtag/lang.h") { + $file = "./$module/inc/i18nlangtag/lang.h"; } + else { + $file = "./$module/$name"; } + if (!($found = open( IN, $file))) + { + # Then with the given path. + $file = "$path/$module/$name"; + if (!($found = open( IN, $file))) + { + print "No $file\n"; + $file = "$path/$module.lnk/$name"; + if (!($found = open( IN, $file))) { + print "No $file.\n"; + $file = "$path/$module.link/$name"; + if (!($found = open( IN, $file))) { + print "No $file either.\n"; } + } + } + } + if ($found) + { + $found = 0; + while (my $line = <IN>) + { + if ($line =~ /$regex/) + { + if (!$found) + { + $found = 1; + print "$file:\n"; + } + chomp( $line); + print "$line\n"; + push( @result, $line); + } + elsif (@addregex) + { + # By convention first element is opener, second element is closer. + if (!$areopen) + { + if ($line =~ /$addregex[0]/) + { + $areopen = 1; + $arecloser = $addregex[1]; + } + } + if ($areopen) + { + for (my $i = 2; $i < @addregex; ++$i) + { + if ($line =~ /$addregex[$i]/) + { + if (!$found) + { + $found = 1; + print "$file:\n"; + } + chomp( $line); + print "$line\n"; + push( @result, $line); + } + } + if ($line =~ /$arecloser/) + { + $areopen = 0; + } + } + } + } + close( IN); + } + if (!$found) { + print "Not found in $file\n"; + #print "Not found in $file for $regex @addregex\n"; + } + return @result; +} + + +sub main() +{ + my( $lcid, @parts, $grepdef, $options, $single); + $grepdef = 0; + $single = 0; + for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options) + { + if ($ARGV[$options] eq '--single') { $single = 1; } + else { print "Unknown option: $ARGV[$options]\n"; } + } + if (@ARGV == 1 + $options) + { + # 0x hex, 0b bin, 0 oct + if ($ARGV[$options] =~ /^0/) { + $lcid = oct( $ARGV[0]); } + elsif ($ARGV[$options] =~ /^[0-9]/) { + $lcid = $ARGV[$options]; } + else + { + $grepdef = $ARGV[$options]; + $lcid = 0; + } + $parts[0] = getPrimaryLanguage( $lcid); + $parts[1] = getSubLanguage( $lcid); + } + elsif (@ARGV == 2 + $options) + { + for (my $i = $options; $i < 2 + $options; ++$i) + { + if ($ARGV[$i] =~ /^0/) { + $parts[$i] = oct( $ARGV[$i]); } + else { + $parts[$i] = $ARGV[$i]; } + } + $lcid = makeLangID( $parts[1], $parts[0]); + } + else + { + Usage(); + return 1; + } + my $modifier = "(?i)"; + my (@resultlist, @greplist, $result); + # If no string was given on the command line, but value(s) were, lookup the + # LangID value to obtain the define identifier. + if ($grepdef) + { + # #define LANGUAGE_AFRIKAANS 0x0436 + @resultlist = grepFile( + $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, + $SOLENVINC, "i18nlangtag", "lang.h", ()); + } + else + { + printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid, + $lcid, $parts[0], $parts[1]); + my $buf = sprintf( "0x%04X", $lcid); + @resultlist = grepFile( + '^\s*#\s*define\s+\w+\s+' . $buf, + $SOLENVINC, "i18nlangtag", "lang.h", ()); + } + for $result (@resultlist) + { + # #define LANGUAGE_AFRIKAANS 0x0436 + if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) + { + push( @greplist, '\b' . $1 . '\b'); + $modifier = ""; # complete identifier now case sensitive + if ($single) { + last; } + } + } + # If the string given is of the form xx-yy lookup a language,country pair + # to obtain the define identifier. xx and yy themselfs may be regexps. + # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy' + if ($grepdef =~ /^(.*)-$/) { + $grepdef = $1 . "-.*"; } + if ($grepdef =~ /^-(.*)$/) { + $grepdef = ".*-" . $1; } + if ($grepdef =~ /^(.*)-(.*)$/) + { + my $lang = $1; + my $coun = $2; + $lang = lc($lang); + $coun = uc($coun); + # { LANGUAGE_AFRIKAANS, "af", "ZA" }, + @resultlist = grepFile( + '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,', + "$SRC_ROOT", "i18nlangtag", "source/isolang/isolang.cxx", ()); + for $result (@resultlist) + { + if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) + { + push( @greplist, '\b' . $1 . '\b'); + $modifier = ""; # complete identifier now case sensitive + if ($single) { + last; } + } + } + $grepdef = 0; + } + if (!@greplist && $grepdef) { + push( @greplist, $grepdef); } + for $grepdef (@greplist) + { + print "\nUsing: " . $grepdef . "\n"; + + # Decimal LCID, was needed for Langpack.ulf but isn't used anymore, + # keep just in case we'd need it again. + # #define LANGUAGE_AFRIKAANS 0x0436 + @resultlist = grepFile( + $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, + $SOLENVINC, "i18nlangtag", "lang.h", ()); + my @lcidlist; + for $result (@resultlist) + { + # #define LANGUAGE_AFRIKAANS 0x0436 + if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) + { + push( @lcidlist, oct( $2)); + } + } + + # { LANGUAGE_AFRIKAANS, "af", "ZA" }, + @resultlist = grepFile( + $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,', + "$SRC_ROOT", "i18nlangtag", "source/isolang/isolang.cxx", ()); + + my @langcoungreplist; + for $result (@resultlist) + { + if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) + { + my $lang = $1; + my $coun = $2; + my $loca; + if ($coun) + { + $loca = $lang . "_" . $coun; + push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?'); + } + else + { + $loca = $lang; + $coun = ""; + push( @langcoungreplist, '\b' . $lang . '\b'); + } + my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml"; + my $found; + if (!($found = open( LD, $file))) + { + $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml"; + if (!($found = open( LD, $file))) + { + $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml"; + $found = open( LD, $file); + } + } + if ($found) + { + print "Found $file:\n"; + my $on = 0; + while (my $line = <LD>) + { + if ($line =~ /<(Language|Country)>/) { + $on = 1; } + if ($on) { + print $line; } + if ($line =~ /<\/(Language|Country)>/) { + $on = 0; } + } + close( LD); + } + else { + print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; } + } + } + + # case LANGUAGE_ARABIC: + grepFile( + $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:', + "$SRC_ROOT", "i18nlangtag", "source/isolang/mslangid.cxx", ()); + + # With CWS 'langstatusbar' the language listbox resource file gets a new location. + my $module = "svx"; + my $name = "source/dialog/langtab.src"; + if (!(-e "$SRC_ROOT/$module/$name")) { + $module = "svtools"; + $name = "source/misc/langtab.src"; + } + # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ; + # lookup define + @resultlist = grepFile( + $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;', + "$SRC_ROOT", $module, $name, ()); + # lookup string + if (!@resultlist) { + grepFile( + $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;', + "$SRC_ROOT", $module, $name, ()); } + + for my $langcoun (@langcoungreplist) + { + # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu"; + grepFile( + '^\s*Name\s*\(' . $langcoun . '\)\s*=', + "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ()); + + # completelangiso=af ar as-IN ... zu + grepFile( + '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '', + "$SRC_ROOT", "solenv", "inc/langlist.mk", + # needs a duplicated pair of backslashes to produce a literal \\ + ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$')); + + # af 1252 1078 # Afrikaans + grepFile( + '^\s*' . $langcoun . '', + "$SRC_ROOT", "l10ntools", "source/ulfconv/msi-encodinglist.txt", ()); + } + } + return 0; +} + +main(); diff --git a/i18nlangtag/source/isolang/lcid.awk b/i18nlangtag/source/isolang/lcid.awk new file mode 100644 index 000000000000..db1a48d57ed8 --- /dev/null +++ b/i18nlangtag/source/isolang/lcid.awk @@ -0,0 +1,187 @@ +#!/usr/bin/awk -f +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# +# Utility to compare MS-LANGID definitions with those defined in ../../inc/i18nlangtag/lang.h +# Run in i18nlangtag/source/isolang +# +# outputs new #define LANGUAGE_... 0x... and also some commented out substrings +# that were matched in already existing defines. +# +# ATTENTION! The sed filter in the command line examples below assures that a +# '|' border is drawn by html2text in data tables, and nowhere else, on which +# this awk script relies. This script also heavily relies on the column layout +# encountered. Should MS decide to change their layout or their CSS names +# ("data..."), this would probably break. Should html2text decide that the last +# border="..." attribute encountered wins instead of the first, this may break +# also. +# +# sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' +# +# After html2text best if file cleaned up to _only_ contain the table entries, +# but not necessary, entries are filtered. Check output. +# +# Expects input from the saved page of one of +# +# (1) +# http://www.microsoft.com/globaldev/reference/lcid-all.mspx +# filtered through ``html2text -nobs ...'', generated table: +# blank,name,hex,dec,blank fields: +# |Afrikaans_-_South_Africa___|0436___|1078___| +# +# complete command line: +# lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile +# +# +# (2) +# http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx +# filtered through ``html2text -nobs ...'', generated table: +# blank,name,hex,dec,inputlocales,collection,blank fields: +# |Afrikaans |0436 |1078 |0436:00000409, |Basic | +# +# complete command line: +# lynx -dump -source http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile +# +# +# (3) +# http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp +# filtered through ``html2text -nobs ...'', generated table: +# blank,hex,locale,name,blank fields: +# |0x0436___|af-ZA___|Afrikaans_(South_Africa)___| +# +# complete command line: +# lynx -dump -source http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile +# +# Author: Eike Rathke <erack@sun.com>, <er@openoffice.org> +# + +BEGIN { + while ((getline < "../../inc/i18nlangtag/lang.h") > 0) + { + if ($0 ~ /^#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/) + { + # lang[HEX]=NAME + lang[toupper(substr($3,3))] = toupper($2) + #print substr($3,3) "=" $2 + } + } + # html2text table follows + FS = "\|" + filetype = 0 + lcid_all = 1 + xp_lcid = 2 + nls_238z = 3 + filetypename[filetype] = "unknown" + filetypename[lcid_all] = "lcid_all" + filetypename[xp_lcid] = "xp_lcid" + filetypename[nls_238z] = "nls_238z" + namefield[lcid_all] = 2 + namefield[xp_lcid] = 2 + namefield[nls_238z] = 4 + hexfield[lcid_all] = 3 + hexfield[xp_lcid] = 3 + hexfield[nls_238z] = 2 + locfield[lcid_all] = 0 + locfield[xp_lcid] = 0 + locfield[nls_238z] = 3 +} + +(NF < 5) { next } + +!filetype { + if (NF == 5) + { + if ($2 ~ /^0x/) + filetype = nls_238z + else if ($2 ~ /^Afrikaans/) + filetype = lcid_all + } + else if (NF == 7) + filetype = xp_lcid + if (!filetype) + next + name = namefield[filetype] + hex = hexfield[filetype] + loc = locfield[filetype] +} + +{ + gsub( /^[^:]*:/, "", $name) + gsub( /\..*/, "", $name) + gsub( /(^[ _]+)|([ _]+$)/, "", $hex) + gsub( /(^[ _]+)|([ _]+$)/, "", $name) + if (loc) + gsub( /(^[ _]+)|([ _]+$)/, "", $loc) +} + +($hex ~ /^0x/) { $hex = substr( $hex, 3) } + +# if only 464 instead of 0464, make it match lang.h +(length($hex) < 4) { $hex = "0" $hex } + +($hex !~ /^[0-9a-fA-F][0-9a-fA-F]*$/) { filtered[$hex] = $0; next } + +# all[HEX]=string +{ all[toupper($hex)] = $name } + +(loc) { comment[toupper($hex)] = " /* " $loc " */" } + +# new hex: newlang[HEX]=string +!(toupper($hex) in lang) { newlang[toupper($hex)] = $name } + +END { + if (!filetype) + { + print "No file type recognized." >>"/dev/stderr" + exit(1) + } + print "// assuming " filetypename[filetype] " file" + # every new language + for (x in newlang) + { + printf( "xxxxxxx LANGUAGE_%-26s 0x%s%s\n", newlang[x], x, comment[x]) + n = split(newlang[x],arr,/[^A-Za-z0-9]/) + def = "" + for (i=1; i<=n; ++i) + { + if (length(arr[i])) + { + # each identifier word of the language name + if (def) + def = def "_" + aup = toupper(arr[i]) + def = def aup + for (l in lang) + { + # contained in already existing definitions? + if (lang[l] ~ aup) + printf( "// %-50s %s\n", arr[i] ": " lang[l], l) + } + } + } + printf( "#define LANGUAGE_%-26s 0x%s\n", def, x) + } + print "\n// --- reverse check follows ----------------------------------\n" + for (x in lang) + { + if (!(x in all)) + print "// not in input file: " x " " lang[x] + } + print "\n// --- filtered table entries follow (if any) -----------------\n" + for (x in filtered) + print "// filtered: " x " " filtered[x] +} diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx new file mode 100644 index 000000000000..8955f554007f --- /dev/null +++ b/i18nlangtag/source/isolang/mslangid.cxx @@ -0,0 +1,492 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <com/sun/star/i18n/ScriptType.hpp> + +#include "i18nlangtag/mslangid.hxx" + + +LanguageType MsLangId::nConfiguredSystemLanguage = LANGUAGE_SYSTEM; +LanguageType MsLangId::nConfiguredSystemUILanguage = LANGUAGE_SYSTEM; + +LanguageType MsLangId::nConfiguredWesternFallback = LANGUAGE_SYSTEM; +LanguageType MsLangId::nConfiguredAsianFallback = LANGUAGE_SYSTEM; +LanguageType MsLangId::nConfiguredComplexFallback = LANGUAGE_SYSTEM; + +// static +void MsLangId::setConfiguredSystemLanguage( LanguageType nLang ) +{ + nConfiguredSystemLanguage = nLang; +} + + +// static +void MsLangId::setConfiguredSystemUILanguage( LanguageType nLang ) +{ + nConfiguredSystemUILanguage = nLang; +} + +// static +void MsLangId::setConfiguredWesternFallback( LanguageType nLang ) +{ + nConfiguredWesternFallback = nLang; +} + +// static +void MsLangId::setConfiguredAsianFallback( LanguageType nLang ) +{ + nConfiguredAsianFallback = nLang; +} + +// static +void MsLangId::setConfiguredComplexFallback( LanguageType nLang ) +{ + nConfiguredComplexFallback = nLang; +} + +// static +inline LanguageType MsLangId::simplifySystemLanguages( LanguageType nLang ) +{ + switch (nLang) + { + case LANGUAGE_PROCESS_OR_USER_DEFAULT : + case LANGUAGE_SYSTEM_DEFAULT : + case LANGUAGE_SYSTEM : + nLang = LANGUAGE_SYSTEM; + break; + default: + ; // nothing + } + return nLang; +} + +// static +LanguageType MsLangId::getRealLanguage( LanguageType nLang ) +{ + switch (simplifySystemLanguages( nLang)) + { + case LANGUAGE_SYSTEM : + if (nConfiguredSystemLanguage == LANGUAGE_SYSTEM) + nLang = getSystemLanguage(); + else + nLang = nConfiguredSystemLanguage; + break; + case LANGUAGE_HID_HUMAN_INTERFACE_DEVICE : + if (nConfiguredSystemUILanguage == LANGUAGE_SYSTEM) + nLang = getSystemUILanguage(); + else + nLang = nConfiguredSystemUILanguage; + break; + default: + /* TODO: would this be useful here? */ + //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang); + ; // nothing + } + if (nLang == LANGUAGE_DONTKNOW) + nLang = LANGUAGE_ENGLISH_US; + return nLang; +} + + +// static +LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sal_Int16 nType ) +{ + if (nLang == LANGUAGE_NONE) + return nLang; + + nLang = getRealLanguage(nLang); + if (nType != ::com::sun::star::i18n::ScriptType::WEAK && getScriptType(nLang) != nType) + { + switch(nType) + { + case ::com::sun::star::i18n::ScriptType::ASIAN: + if (nConfiguredAsianFallback == LANGUAGE_SYSTEM) + nLang = LANGUAGE_CHINESE_SIMPLIFIED; + else + nLang = nConfiguredAsianFallback; + break; + case ::com::sun::star::i18n::ScriptType::COMPLEX: + if (nConfiguredComplexFallback == LANGUAGE_SYSTEM) + nLang = LANGUAGE_HINDI; + else + nLang = nConfiguredComplexFallback; + break; + default: + if (nConfiguredWesternFallback == LANGUAGE_SYSTEM) + nLang = LANGUAGE_ENGLISH_US; + else + nLang = nConfiguredWesternFallback; + break; + } + } + return nLang; +} + +// static +void MsLangId::Conversion::convertLanguageToLocale( LanguageType nLang, + ::com::sun::star::lang::Locale & rLocale ) +{ + if (!rLocale.Variant.isEmpty()) + rLocale.Variant = OUString(); + convertLanguageToIsoNames( nLang, rLocale.Language, rLocale.Country); + /* FIXME: this x-... is temporary until conversion will be moved up to + * LanguageTag. Also handle the nasty "*" joker as privateuse. */ + if (rLocale.Language.startsWith( "x-") || (rLocale.Language == "*")) + { + rLocale.Variant = rLocale.Language; + rLocale.Language = "qlt"; + } +} + + +// static +::com::sun::star::lang::Locale MsLangId::Conversion::convertLanguageToLocale( + LanguageType nLang, bool bResolveSystem ) +{ + ::com::sun::star::lang::Locale aLocale; + if (!bResolveSystem && simplifySystemLanguages( nLang) == LANGUAGE_SYSTEM) + ; // nothing => empty locale + else + { + // Still resolve LANGUAGE_DONTKNOW if resolving is not requested, + // but not LANGUAGE_SYSTEM or others. + if (bResolveSystem || nLang == LANGUAGE_DONTKNOW) + nLang = MsLangId::getRealLanguage( nLang); + convertLanguageToLocale( nLang, aLocale); + } + return aLocale; +} + + +// static +LanguageType MsLangId::Conversion::convertLocaleToLanguage( + const ::com::sun::star::lang::Locale& rLocale ) +{ + // empty language => LANGUAGE_SYSTEM + if (rLocale.Language.isEmpty()) + return LANGUAGE_SYSTEM; + + /* FIXME: this x-... is temporary until conversion will be moved up to + * LanguageTag. Also handle the nasty "*" joker as privateuse. */ + LanguageType nRet = ((!rLocale.Variant.isEmpty() && + (rLocale.Variant.startsWithIgnoreAsciiCase( "x-") || (rLocale.Variant == "*"))) ? + convertPrivateUseToLanguage( rLocale.Variant) : + convertIsoNamesToLanguage( rLocale.Language, rLocale.Country)); + if (nRet == LANGUAGE_DONTKNOW) + nRet = LANGUAGE_SYSTEM; + + return nRet; +} + + +// static +::com::sun::star::lang::Locale MsLangId::Conversion::convertLanguageToLocaleWithFallback( + LanguageType nLang ) +{ + return lookupFallbackLocale( MsLangId::getRealLanguage( nLang)); +} + + +// static +::com::sun::star::lang::Locale MsLangId::getFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ) +{ + // empty language => LANGUAGE_SYSTEM + if (rLocale.Language.isEmpty()) + return Conversion::convertLanguageToLocaleWithFallback( LANGUAGE_SYSTEM); + + return Conversion::lookupFallbackLocale( rLocale); +} + +// static +bool MsLangId::isRightToLeft( LanguageType nLang ) +{ + switch( nLang & LANGUAGE_MASK_PRIMARY ) + { + case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_HEBREW & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_YIDDISH & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_URDU & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_FARSI & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_KASHMIRI & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_SINDHI & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_UIGHUR_CHINA & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_USER_KYRGYZ_CHINA & LANGUAGE_MASK_PRIMARY : + return true; + + default: + break; + } + return false; +} + +// static +bool MsLangId::isSimplifiedChinese( LanguageType nLang ) +{ + return isChinese(nLang) && !isTraditionalChinese(nLang); +} + +// static +bool MsLangId::isSimplifiedChinese( const ::com::sun::star::lang::Locale & rLocale ) +{ + return rLocale.Language == "zh" && !isTraditionalChinese(rLocale); +} + +// static +bool MsLangId::isTraditionalChinese( LanguageType nLang ) +{ + bool bRet = false; + switch (nLang) + { + case LANGUAGE_CHINESE_TRADITIONAL: + case LANGUAGE_CHINESE_HONGKONG: + case LANGUAGE_CHINESE_MACAU: + bRet = true; + default: + break; + } + return bRet; +} + +// static +bool MsLangId::isTraditionalChinese( const ::com::sun::star::lang::Locale & rLocale ) +{ + return rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO"); +} + +//static +bool MsLangId::isChinese( LanguageType nLang ) +{ + return MsLangId::getPrimaryLanguage(nLang) == LANGUAGE_CHINESE; +} + +//static +bool MsLangId::isKorean( LanguageType nLang ) +{ + return MsLangId::getPrimaryLanguage(nLang) == LANGUAGE_KOREAN; +} + +// static +bool MsLangId::isCJK( LanguageType nLang ) +{ + switch (nLang & LANGUAGE_MASK_PRIMARY) + { + case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: + return true; + default: + break; + } + return false; +} + +// static +bool MsLangId::isFamilyNameFirst( LanguageType nLang ) +{ + return isCJK(nLang) || nLang == LANGUAGE_HUNGARIAN; +} + +// static +bool MsLangId::hasForbiddenCharacters( LanguageType nLang ) +{ + return isCJK(nLang); +} + + +// static +bool MsLangId::needsSequenceChecking( LanguageType nLang ) +{ + switch (nLang & LANGUAGE_MASK_PRIMARY) + { + case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_LAO & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_THAI & LANGUAGE_MASK_PRIMARY: + return true; + default: + break; + } + return false; +} + + +// static +sal_Int16 MsLangId::getScriptType( LanguageType nLang ) +{ + sal_Int16 nScript; + switch( nLang ) + { + // CJK + // all LANGUAGE_CHINESE_... are caught below + case LANGUAGE_JAPANESE: + case LANGUAGE_KOREAN: + case LANGUAGE_KOREAN_JOHAB: + case LANGUAGE_USER_KOREAN_NORTH: + nScript = ::com::sun::star::i18n::ScriptType::ASIAN; + break; + + // CTL + // all LANGUAGE_ARABIC_... are caught below + case LANGUAGE_AMHARIC_ETHIOPIA: + case LANGUAGE_ASSAMESE: + case LANGUAGE_BENGALI: + case LANGUAGE_BENGALI_BANGLADESH: + case LANGUAGE_BURMESE: + case LANGUAGE_FARSI: + case LANGUAGE_HEBREW: + case LANGUAGE_YIDDISH: + case LANGUAGE_USER_YIDDISH_US: + case LANGUAGE_MARATHI: + case LANGUAGE_PUNJABI: + case LANGUAGE_GUJARATI: + case LANGUAGE_HINDI: + case LANGUAGE_KANNADA: + case LANGUAGE_KASHMIRI: + case LANGUAGE_KASHMIRI_INDIA: + case LANGUAGE_KHMER: + case LANGUAGE_LAO: + case LANGUAGE_MALAYALAM: + case LANGUAGE_MANIPURI: + case LANGUAGE_MONGOLIAN_MONGOLIAN: + case LANGUAGE_NEPALI: + case LANGUAGE_NEPALI_INDIA: + case LANGUAGE_ORIYA: + case LANGUAGE_SANSKRIT: + case LANGUAGE_SINDHI: + case LANGUAGE_SINDHI_PAKISTAN: + case LANGUAGE_SINHALESE_SRI_LANKA: + case LANGUAGE_SYRIAC: + case LANGUAGE_TAMIL: + case LANGUAGE_TELUGU: + case LANGUAGE_THAI: + case LANGUAGE_TIBETAN: + case LANGUAGE_DZONGKHA: + case LANGUAGE_URDU: + case LANGUAGE_URDU_PAKISTAN: + case LANGUAGE_URDU_INDIA: + case LANGUAGE_USER_KURDISH_IRAQ: + case LANGUAGE_USER_KURDISH_IRAN: + case LANGUAGE_DHIVEHI: + case LANGUAGE_USER_BODO_INDIA: + case LANGUAGE_USER_DOGRI_INDIA: + case LANGUAGE_USER_MAITHILI_INDIA: + case LANGUAGE_UIGHUR_CHINA: + case LANGUAGE_USER_LIMBU: + case LANGUAGE_USER_KYRGYZ_CHINA: + nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; + break; + +// currently not knowing scripttype - defaulted to LATIN: +/* +#define LANGUAGE_ARMENIAN 0x042B +#define LANGUAGE_INDONESIAN 0x0421 +#define LANGUAGE_KAZAKH 0x043F +#define LANGUAGE_KONKANI 0x0457 +#define LANGUAGE_MACEDONIAN 0x042F +#define LANGUAGE_TATAR 0x0444 +*/ + + default: + switch ( nLang & LANGUAGE_MASK_PRIMARY ) + { + // CJK catcher + case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: + nScript = ::com::sun::star::i18n::ScriptType::ASIAN; + break; + // CTL catcher + case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY: + nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; + break; + // Western (actually not necessarily Latin but also Cyrillic, for example) + default: + nScript = ::com::sun::star::i18n::ScriptType::LATIN; + } + break; + } + return nScript; +} + + +// static +LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bool bUserInterfaceSelection ) +{ + switch (nLang) + { + default: + break; // nothing + case LANGUAGE_OBSOLETE_USER_LATIN: + nLang = LANGUAGE_LATIN; + break; + case LANGUAGE_OBSOLETE_USER_MAORI: + nLang = LANGUAGE_MAORI_NEW_ZEALAND; + break; + case LANGUAGE_OBSOLETE_USER_KINYARWANDA: + nLang = LANGUAGE_KINYARWANDA_RWANDA; + break; + case LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN: + nLang = LANGUAGE_UPPER_SORBIAN_GERMANY; + break; + case LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN: + nLang = LANGUAGE_LOWER_SORBIAN_GERMANY; + break; + case LANGUAGE_OBSOLETE_USER_OCCITAN: + nLang = LANGUAGE_OCCITAN_FRANCE; + break; + case LANGUAGE_OBSOLETE_USER_BRETON: + nLang = LANGUAGE_BRETON_FRANCE; + break; + case LANGUAGE_OBSOLETE_USER_KALAALLISUT: + nLang = LANGUAGE_KALAALLISUT_GREENLAND; + break; + case LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH: + nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG; + break; + case LANGUAGE_OBSOLETE_USER_KABYLE: + nLang = LANGUAGE_TAMAZIGHT_LATIN; + break; + + // The following are not strictly obsolete but should be mapped to a + // replacement locale when encountered. + + // no_NO is an alias for nb_NO + case LANGUAGE_NORWEGIAN: + nLang = LANGUAGE_NORWEGIAN_BOKMAL; + break; + + // #i94435# A Spanish variant that differs only in collation details we + // do not support. + case LANGUAGE_SPANISH_DATED: + nLang = LANGUAGE_SPANISH_MODERN; + break; + + // Do not use ca-XV for document content. + /* TODO: remove in case we implement BCP47 language tags. */ + case LANGUAGE_USER_CATALAN_VALENCIAN: + if (!bUserInterfaceSelection) + nLang = LANGUAGE_CATALAN; + break; + } + return nLang; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx new file mode 100644 index 000000000000..e1eea3b75f04 --- /dev/null +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -0,0 +1,1254 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "i18nlangtag/languagetag.hxx" +#include "i18nlangtag/mslangid.hxx" +#include <rtl/ustrbuf.hxx> +#include <rtl/bootstrap.hxx> +#include <osl/file.hxx> +#include <rtl/instance.hxx> +#include <rtl/locale.h> + +//#define erDEBUG + +#if defined(ENABLE_LIBLANGTAG) +#include <liblangtag/langtag.h> +#else +/* Replacement code for LGPL phobic and Android systems. + * For iOS we could probably use NSLocale instead, that should have more or + * less required functionality. If it is good enough, it could be used for Mac + * OS X, too. + */ +#include "simple-langtag.cxx" +#endif + +using rtl::OUString; +using rtl::OString; +using rtl::OUStringBuffer; +using namespace com::sun::star; + +// The actual pointer type of mpImplLangtag that is declared void* to not +// pollute the entire code base with liblangtag. +#define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p)) +#define MPLANGTAG LANGTAGCAST(mpImplLangtag) + +/** Convention to signal presence of BCP 47 language tag in a Locale's Variant + field. The Locale's Language field then will contain this ISO 639-2 + reserved for local use code. */ +#define ISO639_LANGUAGE_TAG "qlt" + + +// Helper to ensure lt_error_t is free'd +struct myLtError +{ + lt_error_t* p; + myLtError() : p(NULL) {} + ~myLtError() { if (p) lt_error_unref( p); } +}; + + +// "statics" to be returned as const reference to an empty locale and string. +namespace { +struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {}; +struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {}; +} + + +/** A reference holder for liblangtag data de/initialization, one static + instance. Currently implemented such that the first "ref" inits and dtor + (our library deinitialized) tears down. +*/ +class LiblantagDataRef +{ +public: + LiblantagDataRef(); + ~LiblantagDataRef(); + inline void incRef() + { + if (mnRef != SAL_MAX_UINT32 && !mnRef++) + setup(); + } + inline void decRef() + { + if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef) + teardown(); + } +private: + rtl::OString maDataPath; // path to liblangtag data, "|" if system + sal_uInt32 mnRef; + + void setupDataPath(); + void setup(); + void teardown(); +}; + +namespace { +struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {}; +} + +LiblantagDataRef::LiblantagDataRef() + : + mnRef(0) +{ +} + +LiblantagDataRef::~LiblantagDataRef() +{ + // When destructed we're tearing down unconditionally. + if (mnRef) + mnRef = 1; + decRef(); +} + +void LiblantagDataRef::setup() +{ + SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database"); + if (maDataPath.isEmpty()) + setupDataPath(); + lt_db_initialize(); + // Hold ref eternally. + mnRef = SAL_MAX_UINT32; +} + +void LiblantagDataRef::teardown() +{ + SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database"); + lt_db_finalize(); +} + +void LiblantagDataRef::setupDataPath() +{ + // maDataPath is assumed to be empty here. + OUString aURL("$BRAND_BASE_DIR/share/liblangtag"); + rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure + + // Check if data is in our own installation, else assume system + // installation. + OUString aData( aURL); + aData += "/language-subtag-registry.xml"; + osl::DirectoryItem aDirItem; + if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None) + { + OUString aPath; + if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None) + maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8); + } + if (maDataPath.isEmpty()) + maDataPath = "|"; // assume system + else + lt_db_set_datadir( maDataPath.getStr()); +} + +LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize ) + : + maBcp47( rBcp47LanguageTag), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), + mbSystemLocale( rBcp47LanguageTag.isEmpty()), + mbInitializedBcp47( !mbSystemLocale), + mbInitializedLocale( false), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false), + mbIsFallback( false) +{ + if (bCanonicalize) + canonicalize(); +} + + +LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale ) + : + maLocale( rLocale), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), + mbSystemLocale( rLocale.Language.isEmpty()), + mbInitializedBcp47( false), + mbInitializedLocale( !mbSystemLocale), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false), + mbIsFallback( false) +{ +} + + +LanguageTag::LanguageTag( LanguageType nLanguage ) + : + mpImplLangtag( NULL), + mnLangID( nLanguage), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), + mbSystemLocale( nLanguage == LANGUAGE_SYSTEM), + mbInitializedBcp47( false), + mbInitializedLocale( false), + mbInitializedLangID( !mbSystemLocale), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false), + mbIsFallback( false) +{ +} + + +LanguageTag::LanguageTag( const OUString& rLanguage, const OUString& rCountry ) + : + maLocale( rLanguage, rCountry, ""), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), + mbSystemLocale( rLanguage.isEmpty()), + mbInitializedBcp47( false), + mbInitializedLocale( !mbSystemLocale), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false), + mbIsFallback( false) +{ +} + + +LanguageTag::LanguageTag( const rtl_Locale & rLocale ) + : + maLocale( rLocale.Language, rLocale.Country, rLocale.Variant), + mpImplLangtag( NULL), + mnLangID( LANGUAGE_DONTKNOW), + meIsValid( DECISION_DONTKNOW), + meIsIsoLocale( DECISION_DONTKNOW), + meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), + mbSystemLocale( maLocale.Language.isEmpty()), + mbInitializedBcp47( false), + mbInitializedLocale( !mbSystemLocale), + mbInitializedLangID( false), + mbCachedLanguage( false), + mbCachedScript( false), + mbCachedCountry( false), + mbIsFallback( false) +{ + convertFromRtlLocale(); +} + + +LanguageTag::LanguageTag( const LanguageTag & rLanguageTag ) + : + maLocale( rLanguageTag.maLocale), + maBcp47( rLanguageTag.maBcp47), + maCachedLanguage( rLanguageTag.maCachedLanguage), + maCachedScript( rLanguageTag.maCachedScript), + maCachedCountry( rLanguageTag.maCachedCountry), + mpImplLangtag( rLanguageTag.mpImplLangtag ? + lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL), + mnLangID( rLanguageTag.mnLangID), + meIsValid( rLanguageTag.meIsValid), + meIsIsoLocale( rLanguageTag.meIsIsoLocale), + meIsIsoODF( rLanguageTag.meIsIsoODF), + meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded), + mbSystemLocale( rLanguageTag.mbSystemLocale), + mbInitializedBcp47( rLanguageTag.mbInitializedBcp47), + mbInitializedLocale( rLanguageTag.mbInitializedLocale), + mbInitializedLangID( rLanguageTag.mbInitializedLangID), + mbCachedLanguage( rLanguageTag.mbCachedLanguage), + mbCachedScript( rLanguageTag.mbCachedScript), + mbCachedCountry( rLanguageTag.mbCachedCountry), + mbIsFallback( rLanguageTag.mbIsFallback) +{ + if (mpImplLangtag) + theDataRef::get().incRef(); +} + + +LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag ) +{ + maLocale = rLanguageTag.maLocale; + maBcp47 = rLanguageTag.maBcp47; + maCachedLanguage = rLanguageTag.maCachedLanguage; + maCachedScript = rLanguageTag.maCachedScript; + maCachedCountry = rLanguageTag.maCachedCountry; + mpImplLangtag = rLanguageTag.mpImplLangtag; + mpImplLangtag = rLanguageTag.mpImplLangtag ? + lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL; + mnLangID = rLanguageTag.mnLangID; + meIsValid = rLanguageTag.meIsValid; + meIsIsoLocale = rLanguageTag.meIsIsoLocale; + meIsIsoODF = rLanguageTag.meIsIsoODF; + meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded; + mbSystemLocale = rLanguageTag.mbSystemLocale; + mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47; + mbInitializedLocale = rLanguageTag.mbInitializedLocale; + mbInitializedLangID = rLanguageTag.mbInitializedLangID; + mbCachedLanguage = rLanguageTag.mbCachedLanguage; + mbCachedScript = rLanguageTag.mbCachedScript; + mbCachedCountry = rLanguageTag.mbCachedCountry; + mbIsFallback = rLanguageTag.mbIsFallback; + if (mpImplLangtag) + theDataRef::get().incRef(); + return *this; +} + + +LanguageTag::~LanguageTag() +{ + if (mpImplLangtag) + { + lt_tag_unref( MPLANGTAG); + theDataRef::get().decRef(); + } +} + + +void LanguageTag::resetVars() +{ + if (mpImplLangtag) + { + lt_tag_unref( MPLANGTAG); + mpImplLangtag = NULL; + theDataRef::get().decRef(); + } + + maLocale = lang::Locale(); + if (!maBcp47.isEmpty()) + maBcp47 = OUString(); + if (!maCachedLanguage.isEmpty()) + maCachedLanguage= OUString(); + if (!maCachedScript.isEmpty()) + maCachedScript = OUString(); + if (!maCachedCountry.isEmpty()) + maCachedCountry = OUString(); + mnLangID = LANGUAGE_DONTKNOW; + meIsValid = DECISION_DONTKNOW; + meIsIsoLocale = DECISION_DONTKNOW; + meIsIsoODF = DECISION_DONTKNOW; + meIsLiblangtagNeeded= DECISION_DONTKNOW; + mbSystemLocale = true; + mbInitializedBcp47 = false; + mbInitializedLocale = false; + mbInitializedLangID = false; + mbCachedLanguage = false; + mbCachedScript = false; + mbCachedCountry = false; + mbIsFallback = false; +} + + +void LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize ) +{ + resetVars(); + maBcp47 = rBcp47LanguageTag; + mbSystemLocale = rBcp47LanguageTag.isEmpty(); + mbInitializedBcp47 = !mbSystemLocale; + + if (bCanonicalize) + canonicalize(); +} + + +void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale ) +{ + resetVars(); + maLocale = rLocale; + mbSystemLocale = rLocale.Language.isEmpty(); + mbInitializedLocale = !mbSystemLocale; +} + + +void LanguageTag::reset( LanguageType nLanguage ) +{ + resetVars(); + mnLangID = nLanguage; + mbSystemLocale = nLanguage == LANGUAGE_SYSTEM; + mbInitializedLangID = !mbSystemLocale; +} + + +void LanguageTag::reset( const rtl_Locale & rLocale ) +{ + reset( lang::Locale( rLocale.Language, rLocale.Country, rLocale.Variant)); + convertFromRtlLocale(); +} + + +bool LanguageTag::canonicalize() +{ +#ifdef erDEBUG + // dump once + struct dumper + { + void** mpp; + dumper( void** pp ) : mpp( *pp ? NULL : pp) {} + ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); } + }; + dumper aDumper( &mpImplLangtag); +#endif + + // Side effect: have maBcp47 in any case, resolved system. + // Some methods calling canonicalize() (or not calling it due to + // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set + // meIsLiblangtagNeeded anywhere else than hereafter. + getBcp47( true ); + + // The simple cases and known locales don't need liblangtag processing, + // which also avoids loading liblangtag data on startup. + if (meIsLiblangtagNeeded == DECISION_DONTKNOW) + { + bool bTemporaryLocale = false; + bool bTemporaryLangID = false; + if (!mbInitializedLocale && !mbInitializedLangID) + { + if (mbSystemLocale) + { + mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); + mbInitializedLangID = true; + } + else + { + // Now this is getting funny.. we only have some BCP47 string + // and want to determine if parsing it would be possible + // without using liblangtag just to see if it is a simple known + // locale. + OUString aLanguage, aScript, aCountry; + Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry); + if (eExt != EXTRACTED_NONE) + { + if (eExt == EXTRACTED_LSC && aScript.isEmpty()) + { + maLocale.Language = aLanguage; + maLocale.Country = aCountry; + } + else + { + maLocale.Language = ISO639_LANGUAGE_TAG; + maLocale.Country = aCountry; + maLocale.Variant = maBcp47; + } + bTemporaryLocale = mbInitializedLocale = true; + } + } + } + if (mbInitializedLangID && !mbInitializedLocale) + { + // Do not call getLocale() here because that prefers + // convertBcp47ToLocale() which would end up in recursion via + // isIsoLocale()! + + // Prepare to verify that we have a known locale, not just an + // arbitrary MS-LangID. + convertLangToLocale(); + } + if (mbInitializedLocale) + { + if (maLocale.Variant.isEmpty()) + meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC] + else + { + if (!mbInitializedLangID) + { + convertLocaleToLang(); + if (bTemporaryLocale) + bTemporaryLangID = true; + } + if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM) + meIsLiblangtagNeeded = DECISION_NO; // known locale + } + } + if (bTemporaryLocale) + { + mbInitializedLocale = false; + maLocale = lang::Locale(); + } + if (bTemporaryLangID) + { + mbInitializedLangID = false; + mnLangID = LANGUAGE_DONTKNOW; + } + } + if (meIsLiblangtagNeeded == DECISION_NO) + { + meIsValid = DECISION_YES; // really, known must be valid ... + return true; // that's it + } + meIsLiblangtagNeeded = DECISION_YES; + SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47); + + if (!mpImplLangtag) + { + theDataRef::get().incRef(); + mpImplLangtag = lt_tag_new(); + } + + myLtError aError; + + if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p)) + { + char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p); + SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47); + if (pTag) + { + OUString aOld( maBcp47); + maBcp47 = OUString::createFromAscii( pTag); + // Make the lt_tag_t follow the new string if different, which + // removes default script and such. + if (maBcp47 != aOld) + { + if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p)) + { + SAL_WARN( "i18nlangtag", "LanguageTag::canonicalize: could not reparse " << maBcp47); + free( pTag); + meIsValid = DECISION_NO; + return false; + } + } + free( pTag); + meIsValid = DECISION_YES; + return true; + } + } + else + { + SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: could not parse " << maBcp47); + } + meIsValid = DECISION_NO; + return false; +} + + +void LanguageTag::convertLocaleToBcp47() +{ + if (mbSystemLocale && !mbInitializedLocale) + convertLangToLocale(); + + if (maLocale.Language == ISO639_LANGUAGE_TAG) + { + maBcp47 = maLocale.Variant; + meIsIsoLocale = DECISION_NO; + } + else + { + /* XXX NOTE: most legacy code never evaluated the Variant field, so for + * now just concatenate language and country. In case we stumbled over + * variant aware code we'd have to take care of that. */ + if (maLocale.Country.isEmpty()) + maBcp47 = maLocale.Language; + else + { + OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength()); + aBuf.append( maLocale.Language).append( '-').append( maLocale.Country); + maBcp47 = aBuf.makeStringAndClear(); + } + } + mbInitializedBcp47 = true; +} + + +void LanguageTag::convertLocaleToLang() +{ + if (mbSystemLocale) + { + mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); + } + else + { + /* FIXME: this is temporary until code base is converted to not use + * MsLangId::convert...() anymore. After that, proper new method has to + * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */ + mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale); + } + mbInitializedLangID = true; +} + + +void LanguageTag::convertBcp47ToLocale() +{ + bool bIso = isIsoLocale(); + if (bIso) + { + maLocale.Language = getLanguageFromLangtag(); + maLocale.Country = getRegionFromLangtag(); + maLocale.Variant = OUString(); + } + else + { + maLocale.Language = ISO639_LANGUAGE_TAG; + maLocale.Country = getCountry(); + maLocale.Variant = maBcp47; + } + mbInitializedLocale = true; +} + + +void LanguageTag::convertBcp47ToLang() +{ + if (mbSystemLocale) + { + mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); + } + else + { + /* FIXME: this is temporary. If we support locales that consist not + * only of language and country, e.g. added script, this probably needs + * to be adapted. */ + if (!mbInitializedLocale) + convertBcp47ToLocale(); + convertLocaleToLang(); + } + mbInitializedLangID = true; +} + + +void LanguageTag::convertLangToLocale() +{ + if (mbSystemLocale && !mbInitializedLangID) + { + mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); + mbInitializedLangID = true; + } + /* FIXME: this is temporary until code base is converted to not use + * MsLangId::convert...() anymore. After that, proper new method has to be + * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */ + // Resolve system here! + maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true); + mbInitializedLocale = true; +} + + +void LanguageTag::convertLangToBcp47() +{ + /* FIXME: this is temporary. If we support locales that consist not only of + * language and country, e.g. added script, this probably needs to be + * adapted. */ + if (!mbInitializedLocale) + convertLangToLocale(); + convertLocaleToBcp47(); + mbInitializedBcp47 = true; +} + + +void LanguageTag::convertFromRtlLocale() +{ + // The rtl_Locale follows the Open Group Base Specification, + // 8.2 Internationalization Variables + // language[_territory][.codeset][@modifier] + // On GNU/Linux systems usually being glibc locales. + // sal/osl/unx/nlsupport.c _parse_locale() parses them into + // Language: language 2 or 3 alpha code + // Country: [territory] 2 alpha code + // Variant: [.codeset][@modifier] + // Variant effectively contains anything that follows the territory, not + // looking for '.' dot delimiter or '@' modifier content. + if (!maLocale.Variant.isEmpty()) + { + OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant, + RTL_TEXTENCODING_UTF8); + /* FIXME: let liblangtag parse this entirely with + * lt_tag_convert_from_locale() but that needs a patch to pass the + * string. */ +#if 0 + myLtError aError; + theDataRef::get().incRef(); + mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p); + maBcp47 = OStringToOUString( lt_tag_get_string( MPLANGTAG), RTL_TEXTENCODING_UTF8); + mbInitializedBcp47 = true; +#else + mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr); + if (mnLangID == LANGUAGE_DONTKNOW) + { + SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr); + mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here + } + mbInitializedLangID = true; +#endif + maLocale = lang::Locale(); + mbInitializedLocale = false; + } +} + + +const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const +{ + if (!bResolveSystem && mbSystemLocale) + return theEmptyBcp47::get(); + if (!mbInitializedBcp47) + { + if (mbInitializedLocale) + const_cast<LanguageTag*>(this)->convertLocaleToBcp47(); + else + const_cast<LanguageTag*>(this)->convertLangToBcp47(); + } + return maBcp47; +} + + +OUString LanguageTag::getLanguageFromLangtag() +{ + OUString aLanguage; + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + canonicalize(); + if (maBcp47.isEmpty()) + return aLanguage; + if (mpImplLangtag) + { + const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG); + SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL"); + if (!pLangT) + return aLanguage; + const char* pLang = lt_lang_get_tag( pLangT); + SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL"); + if (pLang) + aLanguage = OUString::createFromAscii( pLang); + } + else + { + if (mbCachedLanguage || cacheSimpleLSC()) + aLanguage = maCachedLanguage; + } + return aLanguage; +} + + +OUString LanguageTag::getScriptFromLangtag() +{ + OUString aScript; + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + canonicalize(); + if (maBcp47.isEmpty()) + return aScript; + if (mpImplLangtag) + { + const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG); + // pScriptT==NULL is valid for default scripts + if (!pScriptT) + return aScript; + const char* pScript = lt_script_get_tag( pScriptT); + SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL"); + if (pScript) + aScript = OUString::createFromAscii( pScript); + } + else + { + if (mbCachedScript || cacheSimpleLSC()) + aScript = maCachedScript; + } + return aScript; +} + + +OUString LanguageTag::getRegionFromLangtag() +{ + OUString aRegion; + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + canonicalize(); + if (maBcp47.isEmpty()) + return aRegion; + if (mpImplLangtag) + { + const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG); + // pRegionT==NULL is valid for language only tags, rough check here + // that does not take sophisticated tags into account that actually + // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so + // that ll-CC and lll-CC actually fail. + SAL_WARN_IF( !pRegionT && + maBcp47.getLength() != 2 && maBcp47.getLength() != 3 && + maBcp47.getLength() != 7 && maBcp47.getLength() != 8, + "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL"); + if (!pRegionT) + return aRegion; + const char* pRegion = lt_region_get_tag( pRegionT); + SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL"); + if (pRegion) + aRegion = OUString::createFromAscii( pRegion); + } + else + { + if (mbCachedCountry || cacheSimpleLSC()) + aRegion = maCachedCountry; + } + return aRegion; +} + + +const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const +{ + if (!bResolveSystem && mbSystemLocale) + return theEmptyLocale::get(); + if (!mbInitializedLocale) + { + if (mbInitializedBcp47) + const_cast<LanguageTag*>(this)->convertBcp47ToLocale(); + else + const_cast<LanguageTag*>(this)->convertLangToLocale(); + } + return maLocale; +} + + +LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const +{ + if (!bResolveSystem && mbSystemLocale) + return LANGUAGE_SYSTEM; + if (!mbInitializedLangID) + { + if (mbInitializedBcp47) + const_cast<LanguageTag*>(this)->convertBcp47ToLang(); + else + const_cast<LanguageTag*>(this)->convertLocaleToLang(); + } + return mnLangID; +} + + +void LanguageTag::getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const +{ + if (!isIsoLocale()) + { + rLanguage = OUString(); + rCountry = OUString(); + return; + } + // After isIsoLocale() it's safe to call getLanguage() for ISO code. + rLanguage = getLanguage(); + rCountry = getCountry(); +} + + +namespace +{ + +bool isLowerAscii( sal_Unicode c ) +{ + return 'a' <= c && c <= 'z'; +} + +bool isUpperAscii( sal_Unicode c ) +{ + return 'A' <= c && c <= 'Z'; +} + +} + + +// static +bool LanguageTag::isIsoLanguage( const OUString& rLanguage ) +{ + /* TODO: ignore case? For now let's see where rubbish is used. */ + bool b2chars; + if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) && + isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) && + (b2chars || isLowerAscii( rLanguage[2]))) + return true; + SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) && + (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) || + (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag", + "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage); + return false; +} + + +// static +bool LanguageTag::isIsoCountry( const OUString& rRegion ) +{ + /* TODO: ignore case? For now let's see where rubbish is used. */ + if (rRegion.isEmpty() || + (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1]))) + return true; + SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])), + "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion); + return false; +} + + +// static +bool LanguageTag::isIsoScript( const OUString& rScript ) +{ + /* TODO: ignore case? For now let's see where rubbish is used. */ + if (rScript.isEmpty() || + (rScript.getLength() == 4 && + isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) && + isLowerAscii( rScript[2]) && isLowerAscii( rScript[3]))) + return true; + SAL_WARN_IF( rScript.getLength() == 4 && + (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) || + isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])), + "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript); + return false; +} + + +OUString LanguageTag::getLanguage() const +{ + if (!mbCachedLanguage) + { + maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag(); + mbCachedLanguage = true; + } + return maCachedLanguage; +} + + +OUString LanguageTag::getScript() const +{ + if (!mbCachedScript) + { + maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag(); + mbCachedScript = true; + } + return maCachedScript; +} + + +OUString LanguageTag::getLanguageAndScript() const +{ + OUString aLanguageScript( getLanguage()); + OUString aScript( getScript()); + if (!aScript.isEmpty()) + { + OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength()); + aBuf.append( aLanguageScript).append( '-').append( aScript); + aLanguageScript = aBuf.makeStringAndClear(); + } + return aLanguageScript; +} + + +OUString LanguageTag::getCountry() const +{ + if (!mbCachedCountry) + { + maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag(); + if (!isIsoCountry( maCachedCountry)) + maCachedCountry = OUString(); + mbCachedCountry = true; + } + return maCachedCountry; +} + + +OUString LanguageTag::getRegion() const +{ + return const_cast<LanguageTag*>(this)->getRegionFromLangtag(); +} + + +OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const +{ + OUString aRet; + if (isIsoLocale()) + { + OUString aCountry( getCountry()); + if (aCountry.isEmpty()) + aRet = getLanguage() + rEncoding; + else + aRet = getLanguage() + "_" + aCountry + rEncoding; + } + else + { + /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from + * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it. + * So far no code was prepared for anything else than a simple + * language_country locale so we don't loose anything here right now. + * */ + } + return aRet; +} + + +bool LanguageTag::hasScript() const +{ + if (!mbCachedScript) + getScript(); + return !maCachedScript.isEmpty(); +} + + +bool LanguageTag::cacheSimpleLSC() +{ + OUString aLanguage, aScript, aCountry; + bool bRet = (simpleExtract( maBcp47, aLanguage, aScript, aCountry) == EXTRACTED_LSC); + if (bRet) + { + maCachedLanguage = aLanguage; + maCachedScript = aScript; + maCachedCountry = aCountry; + mbCachedLanguage = mbCachedScript = mbCachedCountry = true; + } + return bRet; +} + + +bool LanguageTag::isIsoLocale() const +{ + if (meIsIsoLocale == DECISION_DONTKNOW) + { + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + const_cast<LanguageTag*>(this)->canonicalize(); + // It must be at most ll-CC or lll-CC + // Do not use getCountry() here, use getRegion() instead. + meIsIsoLocale = ((maBcp47.isEmpty() || + (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ? + DECISION_YES : DECISION_NO); + } + return meIsIsoLocale == DECISION_YES; +} + + +bool LanguageTag::isIsoODF() const +{ + if (meIsIsoODF == DECISION_DONTKNOW) + { + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + const_cast<LanguageTag*>(this)->canonicalize(); + if (!isIsoScript( getScript())) + return ((meIsIsoODF = DECISION_NO) == DECISION_YES); + // The usual case is lll-CC so simply check that first. + if (isIsoLocale()) + return ((meIsIsoODF = DECISION_YES) == DECISION_YES); + // If this is not ISO locale for which script must not exist it can + // still be ISO locale plus ISO script lll-Ssss-CC + meIsIsoODF = ((maBcp47.getLength() <= 11 && + isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ? + DECISION_YES : DECISION_NO); + } + return meIsIsoODF == DECISION_YES; +} + + +bool LanguageTag::isValidBcp47() const +{ + if (meIsValid == DECISION_DONTKNOW) + { + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + const_cast<LanguageTag*>(this)->canonicalize(); + SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag", + "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid"); + } + return meIsValid == DECISION_YES; +} + + +bool LanguageTag::isSystemLocale() const +{ + return mbSystemLocale; +} + + +LanguageTag & LanguageTag::makeFallback() +{ + if (!mbIsFallback) + { + if (mbInitializedLangID) + { + LanguageType nLang1 = getLanguageType(); + LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1); + if (nLang1 != nLang2) + reset( nLang2); + } + else + { + const lang::Locale& rLocale1 = getLocale(); + lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1)); + if ( rLocale1.Language != aLocale2.Language || + rLocale1.Country != aLocale2.Country || + rLocale1.Variant != aLocale2.Variant) + reset( aLocale2); + } + mbIsFallback = true; + } + return *this; +} + + +::std::vector< OUString > LanguageTag::getFallbackStrings() const +{ + ::std::vector< OUString > aVec; + OUString aLanguage( getLanguage()); + OUString aCountry( getCountry()); + if (isIsoLocale()) + { + if (!aCountry.isEmpty()) + aVec.push_back( aLanguage + "-" + aCountry); + aVec.push_back( aLanguage); + return aVec; + } + aVec.push_back( getBcp47()); + OUString aTmp; + if (hasScript()) + { + OUString aScript( getScript()); + if (!aCountry.isEmpty()) + { + aTmp = aLanguage + "-" + aScript + "-" + aCountry; + if (aTmp != aVec[0]) + aVec.push_back( aTmp); + } + aTmp = aLanguage + "-" + aScript; + if (aTmp != aVec[0]) + aVec.push_back( aTmp); + } + if (!aCountry.isEmpty()) + { + aTmp = aLanguage + "-" + aCountry; + if (aTmp != aVec[0]) + aVec.push_back( aTmp); + } + aTmp = aLanguage; + if (aTmp != aVec[0]) + aVec.push_back( aTmp); + return aVec; +} + + +bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const +{ + // If SYSTEM is not to be resolved or either both are SYSTEM or none, we + // can use the operator==() optimization. + if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale()) + return operator==( rLanguageTag); + + // Compare full language tag strings. + return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem); +} + + +bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const +{ + if (isSystemLocale() && rLanguageTag.isSystemLocale()) + return true; // both SYSTEM + + // No need to convert to BCP47 if both Lang-IDs are available. + if (mbInitializedLangID && rLanguageTag.mbInitializedLangID) + { + // Equal if same ID and no SYSTEM is involved or both are SYSTEM. + return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale(); + } + + // Compare full language tag strings but SYSTEM unresolved. + return getBcp47( false) == rLanguageTag.getBcp47( false); +} + + +bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const +{ + return !operator==( rLanguageTag); +} + + +// static +LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47, + OUString& rLanguage, OUString& rScript, OUString& rCountry ) +{ + Extraction eRet = EXTRACTED_NONE; + const sal_Int32 nLen = rBcp47.getLength(); + const sal_Int32 nHyph1 = rBcp47.indexOf( '-'); + if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker + { + // It's f*d up but we need to recognize this. + eRet = EXTRACTED_X_JOKER; + } + else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse + { + // x-... privateuse tags MUST be known to us by definition. + eRet = EXTRACTED_X; + } + else if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll + { + rLanguage = rBcp47; + rScript = rCountry = OUString(); + eRet = EXTRACTED_LSC; + } + else if ( (nLen == 5 && nHyph1 == 2) // ll-CC + || (nLen == 6 && nHyph1 == 3)) // lll-CC + { + rLanguage = rBcp47.copy( 0, nHyph1); + rCountry = rBcp47.copy( nHyph1 + 1, 2); + rScript = OUString(); + eRet = EXTRACTED_LSC; + } + else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check + || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check + { + const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1); + if (nHyph2 == nHyph1 + 5) + { + rLanguage = rBcp47.copy( 0, nHyph1); + rScript = rBcp47.copy( nHyph1 + 1, 4); + rCountry = rBcp47.copy( nHyph2 + 1, 2); + eRet = EXTRACTED_LSC; + } + } + if (eRet == EXTRACTED_NONE) + rLanguage = rScript = rCountry = OUString(); + return eRet; +} + + +// static +::std::vector< OUString >::const_iterator LanguageTag::getFallback( + const ::std::vector< OUString > & rList, const OUString & rReference ) +{ + if (rList.empty()) + return rList.end(); + + ::std::vector< OUString >::const_iterator it; + + // Try the simple case first without constructing fallbacks. + for (it = rList.begin(); it != rList.end(); ++it) + { + if (*it == rReference) + return it; // exact match + } + + ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings()); + aFallbacks.erase( aFallbacks.begin()); // first is full BCP47, we already checked that + if (rReference != "en-US") + { + aFallbacks.push_back( "en-US"); + if (rReference != "en") + aFallbacks.push_back( "en"); + } + if (rReference != "x-default") + aFallbacks.push_back( "x-default"); + if (rReference != "x-no-translate") + aFallbacks.push_back( "x-no-translate"); + /* TODO: the original comphelper::Locale::getFallback() code had + * "x-notranslate" instead of "x-no-translate", but all .xcu files use + * "x-no-translate" and "x-notranslate" apparently was never used anywhere. + * Did that ever work? Was it supposed to work at all like this? */ + + for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb) + { + for (it = rList.begin(); it != rList.end(); ++it) + { + if (*it == *fb) + return it; // fallback found + } + } + + // Did not find anything so return something of the list, the first value + // will do as well as any other as none did match any of the possible + // fallbacks. + return rList.begin(); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18nlangtag/source/languagetag/simple-langtag.cxx b/i18nlangtag/source/languagetag/simple-langtag.cxx new file mode 100644 index 000000000000..d96f721dbeef --- /dev/null +++ b/i18nlangtag/source/languagetag/simple-langtag.cxx @@ -0,0 +1,400 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +/** Cheap and cheesy replacement code for liblangtag on systems that do not + allow / want LGPL code or dependencies on glib. + + XXX NOTE: This code does not check language tags for validity or if they + are registered with IANA, does not canonicalize or strip default script + tags if included nor does it do any other fancy stuff that liblangtag is + capable of. It just makes depending code work without. + */ + +#include <cstdlib> +#include <cstring> +#include <cstdio> + +namespace { + +typedef int lt_bool_t; + +struct lt_error_t { + void *something; + lt_error_t() : something(NULL) {} +}; + +static void* g_malloc(size_t s) +{ + return malloc(s); +} + +static void g_free(void* p) +{ + if (p) + free(p); +} + +static void lt_error_unref(lt_error_t *error) +{ + if (error) + { + g_free( error->something); + g_free( error); + } +} + +struct my_ref +{ + sal_uInt32 mnRef; + explicit my_ref() : mnRef(1) {} + virtual ~my_ref() {} + void incRef() { ++mnRef; } + void decRef() { if (--mnRef == 0) delete this; } +}; + +struct my_t_impl : public my_ref +{ + char* mpStr; + explicit my_t_impl() : my_ref(), mpStr(NULL) {} + virtual ~my_t_impl() { g_free( mpStr); } + explicit my_t_impl( const my_t_impl& r ) + : + my_ref(), + mpStr(r.mpStr ? strdup( r.mpStr) : NULL) + { + } + my_t_impl& operator=( const my_t_impl& r ) + { + if (this == &r) + return *this; + g_free( mpStr); + mpStr = (r.mpStr ? strdup( r.mpStr) : NULL); + return *this; + } + void assign( const char* str ) + { + g_free( mpStr); + mpStr = (str ? strdup( str) : NULL); + } + void assign( const char* str, const char* stop ) + { + g_free( mpStr); + if (str && str < stop) + { + mpStr = static_cast<char*>(g_malloc( stop - str + 1)); + memcpy( mpStr, str, stop - str); + mpStr[stop - str] = 0; + } + else + mpStr = NULL; + } + void append( const char* str, const char* stop ) + { + if (str && str < stop) + { + size_t nOld = mpStr ? strlen( mpStr) : 0; + size_t nNew = nOld + (stop - str) + 1; + char* p = static_cast<char*>(g_malloc( nNew)); + if (nOld) + memcpy( p, mpStr, nOld); + memcpy( p + nOld, str, stop - str); + p[nNew-1] = 0; + g_free( mpStr); + mpStr = p; + } + } + void zero() + { + g_free( mpStr); + mpStr = NULL; + } +}; + +struct lt_lang_t : public my_t_impl +{ + explicit lt_lang_t() : my_t_impl() {} + virtual ~lt_lang_t() {} +}; + +struct lt_script_t : public my_t_impl +{ + explicit lt_script_t() : my_t_impl() {} + virtual ~lt_script_t() {} +}; + +struct lt_region_t : public my_t_impl +{ + explicit lt_region_t() : my_t_impl() {} + virtual ~lt_region_t() {} +}; + +struct lt_tag_t : public my_t_impl +{ + lt_lang_t maLanguage; + lt_script_t maScript; + lt_region_t maRegion; + explicit lt_tag_t() : my_t_impl(), maLanguage(), maScript(), maRegion() {} + virtual ~lt_tag_t() {} + explicit lt_tag_t( const lt_tag_t& r ) + : + my_t_impl( r), + maLanguage( r.maLanguage), + maScript( r.maScript), + maRegion( r.maRegion) + { + } + lt_tag_t& operator=( const lt_tag_t& r ) + { + if (this == &r) + return *this; + my_t_impl::operator=( r); + maLanguage = r.maLanguage; + maScript = r.maScript; + maRegion = r.maRegion; + return *this; + } + void assign( const char* str ) + { + maLanguage.zero(); + maScript.zero(); + maRegion.zero(); + my_t_impl::assign( str); + } +}; + +static void lt_db_initialize() { } +static void lt_db_finalize() { } +static void lt_db_set_datadir( const char* /* dir */ ) { } + +static lt_tag_t* lt_tag_new(void) +{ + return new lt_tag_t; +} + +static lt_tag_t* lt_tag_copy(lt_tag_t *tag) +{ + return (tag ? new lt_tag_t( *tag) : NULL); +} + +static void lt_tag_unref(lt_tag_t *tag) +{ + if (tag) + tag->decRef(); +} + +/** See http://tools.ietf.org/html/rfc5646 + + We are simply ignorant of grandfathered (irregular and regular) subtags and + may either bail out or accept them, sorry (or not). However, we do accept + any i-* irregular and x-* privateuse. Subtags are not checked for validity + (alpha, digit, registered, ...). + */ +static lt_bool_t lt_tag_parse(lt_tag_t *tag, + const char *tag_string, + lt_error_t **error) +{ + (void) error; + if (!tag) + return 0; + tag->assign( tag_string); + if (!tag_string) + return 0; + // In case we supported other subtags this would get more complicated. + my_t_impl* aSubtags[] = { &tag->maLanguage, &tag->maScript, &tag->maRegion, NULL }; + my_t_impl** ppSub = &aSubtags[0]; + const char* pStart = tag_string; + const char* p = pStart; + const char* pEnd = pStart + strlen( pStart); // scanning includes \0 + bool bStartLang = true; + bool bPrivate = false; + for ( ; p <= pEnd && ppSub && *ppSub; ++p) + { + if (p == pEnd || *p == '-') + { + size_t nLen = p - pStart; + if (*ppSub == &tag->maLanguage) + { + if (bStartLang) + { + bStartLang = false; + switch (nLen) + { + case 1: // irregular or privateuse + if (*pStart == 'i' || *pStart == 'x') + { + (*ppSub)->assign( pStart, p); + bPrivate = true; + } + else + return 0; // bad + break; + case 2: // ISO 639 alpha-2 + case 3: // ISO 639 alpha-3 + (*ppSub)->assign( pStart, p); + break; + case 4: // reserved for future use + return 0; // bad + break; + case 5: + case 6: + case 7: + case 8: // registered language subtag + (*ppSub++)->assign( pStart, p); + break; + default: + return 0; // bad + } + } + else + { + if (nLen > 8) + return 0; // bad + if (bPrivate) + { + // Any combination of "x" 1*("-" (2*8alphanum)) + // allowed, store first as language and return ok. + // For i-* simply assume the same. + (*ppSub)->append( pStart-1, p); + return !0; // ok + } + else if (nLen == 3) + { + // extlang subtag, 1 to 3 allowed we don't check that. + // But if it's numeric it's a region UN M.49 code + // instead and no script subtag is present, so advance. + if ('0' <= *pStart && *pStart <= '9') + { + ppSub += 2; // &tag->maRegion XXX watch this when inserting fields + --p; + continue; // for + } + else + (*ppSub)->append( pStart-1, p); + } + else + { + // Not part of language subtag, advance. + ++ppSub; + --p; + continue; // for + } + } + } + else if (*ppSub == &tag->maScript) + { + switch (nLen) + { + case 4: + // script subtag, or a (DIGIT 3alphanum) variant with + // no script and no region in which case we stop + // parsing. + if ('0' <= *pStart && *pStart <= '9') + ppSub = NULL; + else + (*ppSub++)->assign( pStart, p); + break; + case 3: + // This may be a region UN M.49 code if 3DIGIT and no + // script code present. Just check first character and + // advance. + if ('0' <= *pStart && *pStart <= '9') + { + ++ppSub; + --p; + continue; // for + } + else + return 0; // bad + break; + case 2: + // script omitted, region subtag, advance. + ++ppSub; + --p; + continue; // for + break; + case 1: + // script omitted, region omitted, extension subtag + // with singleton, stop parsing + ppSub = NULL; + break; + case 5: + case 6: + case 7: + case 8: + // script omitted, region omitted, variant subtag, stop + // parsing. + ppSub = NULL; + default: + return 0; // bad + } + } + else if (*ppSub == &tag->maRegion) + { + if (nLen == 2 || nLen == 3) + (*ppSub++)->assign( pStart, p); + else + return 0; // bad + } + pStart = p+1; + } + } + return !0; +} + +static char* lt_tag_canonicalize(lt_tag_t *tag, + lt_error_t **error) +{ + (void) error; + return tag && tag->mpStr ? strdup( tag->mpStr) : NULL; +} + +static const lt_lang_t* lt_tag_get_language(const lt_tag_t *tag) +{ + return tag && tag->maLanguage.mpStr ? &tag->maLanguage : NULL; +} + +static const lt_script_t *lt_tag_get_script(const lt_tag_t *tag) +{ + return tag && tag->maScript.mpStr ? &tag->maScript : NULL; +} + +static const lt_region_t *lt_tag_get_region(const lt_tag_t *tag) +{ + return tag && tag->maRegion.mpStr ? &tag->maRegion : NULL; +} + +static const char *lt_lang_get_tag(const lt_lang_t *lang) +{ + return lang ? lang->mpStr : NULL; +} + +static const char *lt_script_get_tag(const lt_script_t *script) +{ + return script ? script->mpStr : NULL; +} + +static const char *lt_region_get_tag(const lt_region_t *region) +{ + return region ? region->mpStr : NULL; +} + +#ifdef erDEBUG +static void lt_tag_dump(const lt_tag_t *tag) +{ + fprintf( stderr, "\n"); + fprintf( stderr, "SimpleLangtag langtag: %s\n", tag->mpStr); + fprintf( stderr, "SimpleLangtag language: %s\n", tag->maLanguage.mpStr); + fprintf( stderr, "SimpleLangtag script: %s\n", tag->maScript.mpStr); + fprintf( stderr, "SimpleLangtag region: %s\n", tag->maRegion.mpStr); +} +#endif + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |