1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef INCLUDED_I18NPOOL_LANGUAGETAG_HXX
#define INCLUDED_I18NPOOL_LANGUAGETAG_HXX
#include <sal/config.h>
#include <rtl/ustring.hxx>
#include <com/sun/star/lang/Locale.hpp>
#include <i18npool/i18npooldllapi.h>
#include <i18npool/lang.h>
#include <vector>
typedef struct _rtl_Locale rtl_Locale; // as in rtl/locale.h
/** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
conversions in between.
Note that member variables are mutable and may change their values even in
const methods. Getter methods return either the original value or matching
converted values.
*/
class I18NISOLANG_DLLPUBLIC LanguageTag
{
public:
/** Init LanguageTag with existing BCP 47 language tag string.
@param bCanonicalize
If TRUE, canonicalize tag and reparse, the resulting tag string may
be different.
IF FALSE, the tag is simply stored and can be retrieved with
getBcp47().
Note that conversions to ISO codes, locales or LanguageType or
obtaining language or script will canonicalize the tag string anyway,
so specifying bCanonicalize=false is not a guarantee that the tag will
stay identical to what was passed.
*/
explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
/** Init LanguageTag with Locale. */
explicit LanguageTag( const com::sun::star::lang::Locale & rLocale );
/** Init LanguageTag with LanguageType MS-LangID. */
explicit LanguageTag( LanguageType nLanguage );
/** Init LanguageTag with language and country strings.
This is a convenience ctor for places that so far use only language and
country to replace the MsLangId::convert...IsoNames...() calls. Avoid
use in new code.
*/
explicit LanguageTag( const OUString& rLanguage, const OUString& rCountry );
/** Init LanguageTag with rtl_Locale.
This is a convenience ctor.
*/
explicit LanguageTag( const rtl_Locale & rLocale );
LanguageTag( const LanguageTag & rLanguageTag );
~LanguageTag();
LanguageTag& operator=( const LanguageTag & rLanguageTag );
/** Obtain BCP 47 language tag.
@param bResolveSystem
If TRUE, resolve an empty language tag denoting the system
locale to the real locale used.
If FALSE, return an empty OUString for such a tag.
*/
const OUString & getBcp47( bool bResolveSystem = true ) const;
/** Obtain language tag as Locale.
As a convention, language tags that can not be expressed as "pure"
com::sun::star::lang::Locale content using Language and Country fields
store "qlt" (ISO 639 reserved for local use) in the Language field and
the entire BCP 47 language tag in the Variant field. The Country field
contains the corresponding ISO 3166 country code _if_ there is one, or
otherwise is empty.
@param bResolveSystem
If TRUE, resolve an empty language tag denoting the system
locale to the real locale used.
If FALSE, return an empty Locale for such a tag.
*/
const com::sun::star::lang::Locale & getLocale( bool bResolveSystem = true ) const;
/** Obtain mapping to MS-LangID.
@param bResolveSystem
If TRUE, resolve an empty language tag denoting the system
locale to the real locale used.
If FALSE, return LANGUAGE_SYSTEM for such a tag.
*/
LanguageType getLanguageType( bool bResolveSystem = true ) const;
/** Obtain ISO strings for language and country.
This is a convenience method for places that so far use only language and
country to replace the MsLangId::convert...IsoNames...() calls. Avoid
use in new code.
ATTENTION! May return empty strings if the language tag is not
expressable in valid ISO codes!
@see isIsoLocale()
Always resolves an empty tag to the system locale.
*/
void getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const;
/** Get ISO 639 language code, or BCP 47 language.
Always resolves an empty tag to the system locale.
*/
OUString getLanguage() const;
/** Get ISO 15924 script code, if not the default script according to
BCP 47. For default script an empty string is returned.
@see hasScript()
Always resolves an empty tag to the system locale.
*/
OUString getScript() const;
/** Get combined language and script code, separated by '-' if
non-default script, if default script only language.
@see hasScript()
Always resolves an empty tag to the system locale.
*/
OUString getLanguageAndScript() const;
/** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
region not expressable as 2 character country code.
Always resolves an empty tag to the system locale.
*/
OUString getCountry() const;
/** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or
any other BCP 47 region tag.
Always resolves an empty tag to the system locale.
*/
OUString getRegion() const;
/** Get a GLIBC locale string.
Always resolves an empty tag to the system locale.
@param rEncoding
An encoding to be appended to language_country, for example
".UTF-8" including the dot.
@return The resulting GLIBC locale string if it could be constructed,
if not an empty string is returned.
*/
OUString getGlibcLocaleString( const OUString & rEncoding ) const;
/** If language tag has a non-default script specified.
*/
bool hasScript() const;
/** If language tag is a locale that can be expressed using only ISO 639
language codes and ISO 3166 country codes, thus is convertible to a
conforming Locale struct without using extension mechanisms.
Note that an empty language tag or empty Locale::Language field or
LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
some context, but here is not. If you want that ask for
aTag.isSystemLocale() || aTag.isIsoLocale()
Always resolves an empty tag to the system locale.
*/
bool isIsoLocale() const;
/** If language tag is a locale that can be expressed using only ISO 639
language codes and ISO 15924 script codes and ISO 3166 country codes,
thus can be stored in an ODF document using only fo:language, fo:script
and fo:country attributes. If this is FALSE, the locale must be stored
as a <*:rfc-language-tag> element.
Always resolves an empty tag to the system locale.
*/
bool isIsoODF() const;
/** If this is a valid BCP 47 language tag.
Always resolves an empty tag to the system locale.
*/
bool isValidBcp47() const;
/** If this tag was contructed as an empty tag denoting the system locale.
*/
bool isSystemLocale() const;
/** Reset with existing BCP 47 language tag string. See ctor. */
void reset( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
/** Reset with Locale. */
void reset( const com::sun::star::lang::Locale & rLocale );
/** Reset with LanguageType MS-LangID. */
void reset( LanguageType nLanguage );
/** Reset with rtl_Locale. */
void reset( const rtl_Locale & rLocale );
/** Fall back to a known locale.
If the current tag does not represent a known (by us) locale, fall back
to the most likely locale possible known.
If the current tag is known, no change occurs.
*/
LanguageTag & makeFallback();
/** Return a vector of fall-back strings.
In order:
full BCP 47 tag, same as getBcp47()
lll-Ssss-CC
lll-Ssss
lll-CC
lll
Only strings that differ from a higher order are included, for example
if there is no script the elements will be bcp47, lll-CC, lll; if the
bcp47 string is identical to lll-CC then only lll-CC, lll.
Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
alpha code. If the region can not be expressed as ISO 3166 then no -CC
tags are included.
*/
::std::vector< OUString > getFallbackStrings() const;
/** @short search for an equal or at least for a similar locale in a list
of possible ones.
@descr First search for a locale that is equal to the reference
locale. (means: same BCP47 string)
If the reference locale could not be located, check for
"similar" locales, in the same order as obtained by
getFallbackStrings().
If no similar locale could be located, we search for a locale
"en-US" inside the given locale list.
If "en-US" could not be located, we search for a locale "en"
inside the given list.
If no "same" nor any "similar" locale could be found, we try
"x-default" and "x-no-translate" explicitly. Sometimes
variables don't use real localization. For example, in case the
localized value is a fix product name.
If no locale matched until then, we use any other locale that
exists inside the set of given ones, namely the first
encountered!
@param rList
the vector of possible locales as BCP47 strings.
@param rReference
the reference locale, BCP47 string.
@return An iterator that points to the found element inside the given
locale list. If no matching locale could be found it points to
the end of the list.
*/
static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList,
const OUString & rReference );
/** Test equality of two LanguageTag, possibly resolving system locale.
@param bResolveSystem
If TRUE, resolve empty language tags denoting the system
locale to the real locale used before comparing.
If FALSE, the behavior is identical to operator==(), system
locales are not resolved first.
*/
bool equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const;
/** Test equality of two LanguageTag.
Does NOT resolve system, i.e. if the system locale is en-US
LanguageTag("")==LanguageTag("en-US") returns false! Use
equals(...,true) instead if system locales shall be resolved.
*/
bool operator==( const LanguageTag & rLanguageTag ) const;
/** Test inequality of two LanguageTag.
Does NOT resolve system, i.e. if the system locale is en-US
LanguageTag("")!=LanguageTag("en-US") returns true! Use
!equals(,...true) instead if system locales shall be resolved.
*/
bool operator!=( const LanguageTag & rLanguageTag ) const;
private:
enum Decision
{
DECISION_DONTKNOW,
DECISION_NO,
DECISION_YES
};
mutable com::sun::star::lang::Locale maLocale;
mutable OUString maBcp47;
mutable OUString maCachedLanguage; ///< cache getLanguage()
mutable OUString maCachedScript; ///< cache getScript()
mutable OUString maCachedCountry; ///< cache getCountry()
mutable void* mpImplLangtag; ///< actually lt_tag_t pointer, encapsulated
mutable LanguageType mnLangID;
mutable Decision meIsValid;
mutable Decision meIsIsoLocale;
mutable Decision meIsIsoODF;
mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
bool mbSystemLocale : 1;
mutable bool mbInitializedBcp47 : 1;
mutable bool mbInitializedLocale : 1;
mutable bool mbInitializedLangID : 1;
mutable bool mbCachedLanguage : 1;
mutable bool mbCachedScript : 1;
mutable bool mbCachedCountry : 1;
bool mbIsFallback : 1;
void convertLocaleToBcp47();
void convertLocaleToLang();
void convertBcp47ToLocale();
void convertBcp47ToLang();
void convertLangToLocale();
void convertLangToBcp47();
void convertFromRtlLocale();
bool canonicalize();
OUString getLanguageFromLangtag();
OUString getScriptFromLangtag();
OUString getRegionFromLangtag();
void resetVars();
/** Obtain Language, Script and Country via simpleExtract() and assign them
to the cached variables if successful.
@return return of simpleExtract()
*/
bool cacheSimpleLSC();
static bool isIsoLanguage( const OUString& rLanguage );
static bool isIsoScript( const OUString& rScript );
static bool isIsoCountry( const OUString& rRegion );
enum Extraction
{
EXTRACTED_NONE,
EXTRACTED_LSC,
EXTRACTED_X,
EXTRACTED_X_JOKER
};
/** Of a simple language tag of the form lll[-Ssss][-CC] (i.e. one that
would fulfill the isIsoODF() condition) extract the portions.
Does not check case or content!
@return EXTRACTED_LSC if simple tag was detected, EXTRACTED_X if x-...
privateuse tag was detected, EXTRACTED_X_JOKER if "*" joker was
detected, else EXTRACTED_NONE.
*/
static Extraction simpleExtract( const OUString& rBcp47,
OUString& rLanguage,
OUString& rScript,
OUString& rCountry );
};
#endif // INCLUDED_I18NPOOL_LANGUAGETAG_HXX
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|