diff options
Diffstat (limited to 'i18nutil/source/utility/unicode.cxx')
-rw-r--r-- | i18nutil/source/utility/unicode.cxx | 368 |
1 files changed, 272 insertions, 96 deletions
diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index 9206c626382b..73e53a542199 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -2,9 +2,9 @@ * * $RCSfile: unicode.cxx,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: vg $ $Date: 2003-04-24 12:26:03 $ + * last change: $Author: hr $ $Date: 2004-03-08 17:12:56 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -67,112 +67,288 @@ using namespace ::com::sun::star::i18n; static ScriptTypeList defaultTypeList[] = { - { UnicodeScript_kBasicLatin, UnicodeScript_kBasicLatin }, // 0, - { UnicodeScript_kLatin1Supplement, UnicodeScript_kLatin1Supplement }, // 1, - { UnicodeScript_kLatinExtendedA, UnicodeScript_kLatinExtendedA }, // 2, - { UnicodeScript_kLatinExtendedB, UnicodeScript_kLatinExtendedB }, // 3, - { UnicodeScript_kIPAExtension, UnicodeScript_kIPAExtension }, // 4, - { UnicodeScript_kSpacingModifier, UnicodeScript_kSpacingModifier }, // 5, - { UnicodeScript_kCombiningDiacritical, UnicodeScript_kCombiningDiacritical }, // 6, - { UnicodeScript_kGreek, UnicodeScript_kGreek }, // 7, - { UnicodeScript_kCyrillic, UnicodeScript_kCyrillic }, // 8, - { UnicodeScript_kArmenian, UnicodeScript_kArmenian }, // 9, - { UnicodeScript_kHebrew, UnicodeScript_kHebrew }, // 10, - { UnicodeScript_kArabic, UnicodeScript_kArabic }, // 11, - { UnicodeScript_kSyriac, UnicodeScript_kSyriac }, // 12, - { UnicodeScript_kThaana, UnicodeScript_kThaana }, // 13, - { UnicodeScript_kDevanagari, UnicodeScript_kDevanagari }, // 14, - { UnicodeScript_kBengali, UnicodeScript_kBengali }, // 15, - { UnicodeScript_kGurmukhi, UnicodeScript_kGurmukhi }, // 16, - { UnicodeScript_kGujarati, UnicodeScript_kGujarati }, // 17, - { UnicodeScript_kOriya, UnicodeScript_kOriya }, // 18, - { UnicodeScript_kTamil, UnicodeScript_kTamil }, // 19, - { UnicodeScript_kTelugu, UnicodeScript_kTelugu }, // 20, - { UnicodeScript_kKannada, UnicodeScript_kKannada }, // 21, - { UnicodeScript_kMalayalam, UnicodeScript_kMalayalam }, // 22, - { UnicodeScript_kSinhala, UnicodeScript_kSinhala }, // 23, - { UnicodeScript_kThai, UnicodeScript_kThai }, // 24, - { UnicodeScript_kLao, UnicodeScript_kLao }, // 25, - { UnicodeScript_kTibetan, UnicodeScript_kTibetan }, // 26, - { UnicodeScript_kMyanmar, UnicodeScript_kMyanmar }, // 27, - { UnicodeScript_kGeorgian, UnicodeScript_kGeorgian }, // 28, - { UnicodeScript_kHangulJamo, UnicodeScript_kHangulJamo }, // 29, - { UnicodeScript_kEthiopic, UnicodeScript_kEthiopic }, // 30, - { UnicodeScript_kCherokee, UnicodeScript_kCherokee }, // 31, - { UnicodeScript_kUnifiedCanadianAboriginalSyllabics, UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32, - { UnicodeScript_kOgham, UnicodeScript_kOgham }, // 33, - { UnicodeScript_kRunic, UnicodeScript_kRunic }, // 34, - { UnicodeScript_kKhmer, UnicodeScript_kKhmer }, // 35, - { UnicodeScript_kMongolian, UnicodeScript_kMongolian }, // 36, - { UnicodeScript_kLatinExtendedAdditional, UnicodeScript_kLatinExtendedAdditional }, // 37, - { UnicodeScript_kGreekExtended, UnicodeScript_kGreekExtended }, // 38, - { UnicodeScript_kGeneralPunctuation, UnicodeScript_kGeneralPunctuation }, // 39, - { UnicodeScript_kSuperSubScript, UnicodeScript_kSuperSubScript }, // 40, - { UnicodeScript_kCurrencySymbolScript, UnicodeScript_kCurrencySymbolScript }, // 41, - { UnicodeScript_kSymbolCombiningMark, UnicodeScript_kSymbolCombiningMark }, // 42, - { UnicodeScript_kLetterlikeSymbol, UnicodeScript_kLetterlikeSymbol }, // 43, - { UnicodeScript_kNumberForm, UnicodeScript_kNumberForm }, // 44, - { UnicodeScript_kArrow, UnicodeScript_kArrow }, // 45, - { UnicodeScript_kMathOperator, UnicodeScript_kMathOperator }, // 46, - { UnicodeScript_kMiscTechnical, UnicodeScript_kMiscTechnical }, // 47, - { UnicodeScript_kControlPicture, UnicodeScript_kControlPicture }, // 48, - { UnicodeScript_kOpticalCharacter, UnicodeScript_kOpticalCharacter }, // 49, - { UnicodeScript_kEnclosedAlphanumeric, UnicodeScript_kEnclosedAlphanumeric }, // 50, - { UnicodeScript_kBoxDrawing, UnicodeScript_kBoxDrawing }, // 51, - { UnicodeScript_kBlockElement, UnicodeScript_kBlockElement }, // 52, - { UnicodeScript_kGeometricShape, UnicodeScript_kGeometricShape }, // 53, - { UnicodeScript_kMiscSymbol, UnicodeScript_kMiscSymbol }, // 54, - { UnicodeScript_kDingbat, UnicodeScript_kDingbat }, // 55, - { UnicodeScript_kBraillePatterns, UnicodeScript_kBraillePatterns }, // 56, - { UnicodeScript_kCJKRadicalsSupplement, UnicodeScript_kCJKRadicalsSupplement }, // 57, - { UnicodeScript_kKangxiRadicals, UnicodeScript_kKangxiRadicals }, // 58, - { UnicodeScript_kIdeographicDescriptionCharacters, UnicodeScript_kIdeographicDescriptionCharacters }, // 59, - { UnicodeScript_kCJKSymbolPunctuation, UnicodeScript_kCJKSymbolPunctuation }, // 60, - { UnicodeScript_kHiragana, UnicodeScript_kHiragana }, // 61, - { UnicodeScript_kKatakana, UnicodeScript_kKatakana }, // 62, - { UnicodeScript_kBopomofo, UnicodeScript_kBopomofo }, // 63, - { UnicodeScript_kHangulCompatibilityJamo, UnicodeScript_kHangulCompatibilityJamo }, // 64, - { UnicodeScript_kKanbun, UnicodeScript_kKanbun }, // 65, - { UnicodeScript_kBopomofoExtended, UnicodeScript_kBopomofoExtended }, // 66, - { UnicodeScript_kEnclosedCJKLetterMonth, UnicodeScript_kEnclosedCJKLetterMonth }, // 67, - { UnicodeScript_kCJKCompatibility, UnicodeScript_kCJKCompatibility }, // 68, - { UnicodeScript_k_CJKUnifiedIdeographsExtensionA, UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69, - { UnicodeScript_kCJKUnifiedIdeograph, UnicodeScript_kCJKUnifiedIdeograph }, // 70, - { UnicodeScript_kYiSyllables, UnicodeScript_kYiSyllables }, // 71, - { UnicodeScript_kYiRadicals, UnicodeScript_kYiRadicals }, // 72, - { UnicodeScript_kHangulSyllable, UnicodeScript_kHangulSyllable }, // 73, - { UnicodeScript_kHighSurrogate, UnicodeScript_kHighSurrogate }, // 74, - { UnicodeScript_kHighPrivateUseSurrogate, UnicodeScript_kHighPrivateUseSurrogate }, // 75, - { UnicodeScript_kLowSurrogate, UnicodeScript_kLowSurrogate }, // 76, - { UnicodeScript_kPrivateUse, UnicodeScript_kPrivateUse }, // 77, - { UnicodeScript_kCJKCompatibilityIdeograph, UnicodeScript_kCJKCompatibilityIdeograph }, // 78, - { UnicodeScript_kAlphabeticPresentation, UnicodeScript_kAlphabeticPresentation }, // 79, - { UnicodeScript_kArabicPresentationA, UnicodeScript_kArabicPresentationA }, // 80, - { UnicodeScript_kCombiningHalfMark, UnicodeScript_kCombiningHalfMark }, // 81, - { UnicodeScript_kCJKCompatibilityForm, UnicodeScript_kCJKCompatibilityForm }, // 82, - { UnicodeScript_kSmallFormVariant, UnicodeScript_kSmallFormVariant }, // 83, - { UnicodeScript_kArabicPresentationB, UnicodeScript_kArabicPresentationB }, // 84, - { UnicodeScript_kNoScript, UnicodeScript_kNoScript }, // 85, - { UnicodeScript_kHalfwidthFullwidthForm, UnicodeScript_kHalfwidthFullwidthForm }, // 86, - { UnicodeScript_kScriptCount, UnicodeScript_kNoScript } // 87, + { UnicodeScript_kBasicLatin, + UnicodeScript_kBasicLatin, + UnicodeScript_kBasicLatin }, // 0, + { UnicodeScript_kLatin1Supplement, + UnicodeScript_kLatin1Supplement, + UnicodeScript_kLatin1Supplement },// 1, + { UnicodeScript_kLatinExtendedA, + UnicodeScript_kLatinExtendedA, + UnicodeScript_kLatinExtendedA }, // 2, + { UnicodeScript_kLatinExtendedB, + UnicodeScript_kLatinExtendedB, + UnicodeScript_kLatinExtendedB }, // 3, + { UnicodeScript_kIPAExtension, + UnicodeScript_kIPAExtension, + UnicodeScript_kIPAExtension }, // 4, + { UnicodeScript_kSpacingModifier, + UnicodeScript_kSpacingModifier, + UnicodeScript_kSpacingModifier }, // 5, + { UnicodeScript_kCombiningDiacritical, + UnicodeScript_kCombiningDiacritical, + UnicodeScript_kCombiningDiacritical }, // 6, + { UnicodeScript_kGreek, + UnicodeScript_kGreek, + UnicodeScript_kGreek }, // 7, + { UnicodeScript_kCyrillic, + UnicodeScript_kCyrillic, + UnicodeScript_kCyrillic }, // 8, + { UnicodeScript_kArmenian, + UnicodeScript_kArmenian, + UnicodeScript_kArmenian }, // 9, + { UnicodeScript_kHebrew, + UnicodeScript_kHebrew, + UnicodeScript_kHebrew }, // 10, + { UnicodeScript_kArabic, + UnicodeScript_kArabic, + UnicodeScript_kArabic }, // 11, + { UnicodeScript_kSyriac, + UnicodeScript_kSyriac, + UnicodeScript_kSyriac }, // 12, + { UnicodeScript_kThaana, + UnicodeScript_kThaana, + UnicodeScript_kThaana }, // 13, + { UnicodeScript_kDevanagari, + UnicodeScript_kDevanagari, + UnicodeScript_kDevanagari }, // 14, + { UnicodeScript_kBengali, + UnicodeScript_kBengali, + UnicodeScript_kBengali }, // 15, + { UnicodeScript_kGurmukhi, + UnicodeScript_kGurmukhi, + UnicodeScript_kGurmukhi }, // 16, + { UnicodeScript_kGujarati, + UnicodeScript_kGujarati, + UnicodeScript_kGujarati }, // 17, + { UnicodeScript_kOriya, + UnicodeScript_kOriya, + UnicodeScript_kOriya }, // 18, + { UnicodeScript_kTamil, + UnicodeScript_kTamil, + UnicodeScript_kTamil }, // 19, + { UnicodeScript_kTelugu, + UnicodeScript_kTelugu, + UnicodeScript_kTelugu }, // 20, + { UnicodeScript_kKannada, + UnicodeScript_kKannada, + UnicodeScript_kKannada }, // 21, + { UnicodeScript_kMalayalam, + UnicodeScript_kMalayalam, + UnicodeScript_kMalayalam }, // 22, + { UnicodeScript_kSinhala, + UnicodeScript_kSinhala, + UnicodeScript_kSinhala }, // 23, + { UnicodeScript_kThai, + UnicodeScript_kThai, + UnicodeScript_kThai }, // 24, + { UnicodeScript_kLao, + UnicodeScript_kLao, + UnicodeScript_kLao }, // 25, + { UnicodeScript_kTibetan, + UnicodeScript_kTibetan, + UnicodeScript_kTibetan }, // 26, + { UnicodeScript_kMyanmar, + UnicodeScript_kMyanmar, + UnicodeScript_kMyanmar }, // 27, + { UnicodeScript_kGeorgian, + UnicodeScript_kGeorgian, + UnicodeScript_kGeorgian }, // 28, + { UnicodeScript_kHangulJamo, + UnicodeScript_kHangulJamo, + UnicodeScript_kHangulJamo }, // 29, + { UnicodeScript_kEthiopic, + UnicodeScript_kEthiopic, + UnicodeScript_kEthiopic }, // 30, + { UnicodeScript_kCherokee, + UnicodeScript_kCherokee, + UnicodeScript_kCherokee }, // 31, + { UnicodeScript_kUnifiedCanadianAboriginalSyllabics, + UnicodeScript_kUnifiedCanadianAboriginalSyllabics, + UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32, + { UnicodeScript_kOgham, + UnicodeScript_kOgham, + UnicodeScript_kOgham }, // 33, + { UnicodeScript_kRunic, + UnicodeScript_kRunic, + UnicodeScript_kRunic }, // 34, + { UnicodeScript_kKhmer, + UnicodeScript_kKhmer, + UnicodeScript_kKhmer }, // 35, + { UnicodeScript_kMongolian, + UnicodeScript_kMongolian, + UnicodeScript_kMongolian }, // 36, + { UnicodeScript_kLatinExtendedAdditional, + UnicodeScript_kLatinExtendedAdditional, + UnicodeScript_kLatinExtendedAdditional }, // 37, + { UnicodeScript_kGreekExtended, + UnicodeScript_kGreekExtended, + UnicodeScript_kGreekExtended }, // 38, + { UnicodeScript_kGeneralPunctuation, + UnicodeScript_kGeneralPunctuation, + UnicodeScript_kGeneralPunctuation }, // 39, + { UnicodeScript_kSuperSubScript, + UnicodeScript_kSuperSubScript, + UnicodeScript_kSuperSubScript }, // 40, + { UnicodeScript_kCurrencySymbolScript, + UnicodeScript_kCurrencySymbolScript, + UnicodeScript_kCurrencySymbolScript }, // 41, + { UnicodeScript_kSymbolCombiningMark, + UnicodeScript_kSymbolCombiningMark, + UnicodeScript_kSymbolCombiningMark }, // 42, + { UnicodeScript_kLetterlikeSymbol, + UnicodeScript_kLetterlikeSymbol, + UnicodeScript_kLetterlikeSymbol }, // 43, + { UnicodeScript_kNumberForm, + UnicodeScript_kNumberForm, + UnicodeScript_kNumberForm }, // 44, + { UnicodeScript_kArrow, + UnicodeScript_kArrow, + UnicodeScript_kArrow }, // 45, + { UnicodeScript_kMathOperator, + UnicodeScript_kMathOperator, + UnicodeScript_kMathOperator }, // 46, + { UnicodeScript_kMiscTechnical, + UnicodeScript_kMiscTechnical, + UnicodeScript_kMiscTechnical }, // 47, + { UnicodeScript_kControlPicture, + UnicodeScript_kControlPicture, + UnicodeScript_kControlPicture }, // 48, + { UnicodeScript_kOpticalCharacter, + UnicodeScript_kOpticalCharacter, + UnicodeScript_kOpticalCharacter }, // 49, + { UnicodeScript_kEnclosedAlphanumeric, + UnicodeScript_kEnclosedAlphanumeric, + UnicodeScript_kEnclosedAlphanumeric }, // 50, + { UnicodeScript_kBoxDrawing, + UnicodeScript_kBoxDrawing, + UnicodeScript_kBoxDrawing }, // 51, + { UnicodeScript_kBlockElement, + UnicodeScript_kBlockElement, + UnicodeScript_kBlockElement }, // 52, + { UnicodeScript_kGeometricShape, + UnicodeScript_kGeometricShape, + UnicodeScript_kGeometricShape }, // 53, + { UnicodeScript_kMiscSymbol, + UnicodeScript_kMiscSymbol, + UnicodeScript_kMiscSymbol }, // 54, + { UnicodeScript_kDingbat, + UnicodeScript_kDingbat, + UnicodeScript_kDingbat }, // 55, + { UnicodeScript_kBraillePatterns, + UnicodeScript_kBraillePatterns, + UnicodeScript_kBraillePatterns }, // 56, + { UnicodeScript_kCJKRadicalsSupplement, + UnicodeScript_kCJKRadicalsSupplement, + UnicodeScript_kCJKRadicalsSupplement }, // 57, + { UnicodeScript_kKangxiRadicals, + UnicodeScript_kKangxiRadicals, + UnicodeScript_kKangxiRadicals }, // 58, + { UnicodeScript_kIdeographicDescriptionCharacters, + UnicodeScript_kIdeographicDescriptionCharacters, + UnicodeScript_kIdeographicDescriptionCharacters }, // 59, + { UnicodeScript_kCJKSymbolPunctuation, + UnicodeScript_kCJKSymbolPunctuation, + UnicodeScript_kCJKSymbolPunctuation }, // 60, + { UnicodeScript_kHiragana, + UnicodeScript_kHiragana, + UnicodeScript_kHiragana }, // 61, + { UnicodeScript_kKatakana, + UnicodeScript_kKatakana, + UnicodeScript_kKatakana }, // 62, + { UnicodeScript_kBopomofo, + UnicodeScript_kBopomofo, + UnicodeScript_kBopomofo }, // 63, + { UnicodeScript_kHangulCompatibilityJamo, + UnicodeScript_kHangulCompatibilityJamo, + UnicodeScript_kHangulCompatibilityJamo }, // 64, + { UnicodeScript_kKanbun, + UnicodeScript_kKanbun, + UnicodeScript_kKanbun }, // 65, + { UnicodeScript_kBopomofoExtended, + UnicodeScript_kBopomofoExtended, + UnicodeScript_kBopomofoExtended }, // 66, + { UnicodeScript_kEnclosedCJKLetterMonth, + UnicodeScript_kEnclosedCJKLetterMonth, + UnicodeScript_kEnclosedCJKLetterMonth }, // 67, + { UnicodeScript_kCJKCompatibility, + UnicodeScript_kCJKCompatibility, + UnicodeScript_kCJKCompatibility }, // 68, + { UnicodeScript_k_CJKUnifiedIdeographsExtensionA, + UnicodeScript_k_CJKUnifiedIdeographsExtensionA, + UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69, + { UnicodeScript_kCJKUnifiedIdeograph, + UnicodeScript_kCJKUnifiedIdeograph, + UnicodeScript_kCJKUnifiedIdeograph }, // 70, + { UnicodeScript_kYiSyllables, + UnicodeScript_kYiSyllables, + UnicodeScript_kYiSyllables }, // 71, + { UnicodeScript_kYiRadicals, + UnicodeScript_kYiRadicals, + UnicodeScript_kYiRadicals }, // 72, + { UnicodeScript_kHangulSyllable, + UnicodeScript_kHangulSyllable, + UnicodeScript_kHangulSyllable }, // 73, + { UnicodeScript_kHighSurrogate, + UnicodeScript_kHighSurrogate, + UnicodeScript_kHighSurrogate }, // 74, + { UnicodeScript_kHighPrivateUseSurrogate, + UnicodeScript_kHighPrivateUseSurrogate, + UnicodeScript_kHighPrivateUseSurrogate }, // 75, + { UnicodeScript_kLowSurrogate, + UnicodeScript_kLowSurrogate, + UnicodeScript_kLowSurrogate }, // 76, + { UnicodeScript_kPrivateUse, + UnicodeScript_kPrivateUse, + UnicodeScript_kPrivateUse }, // 77, + { UnicodeScript_kCJKCompatibilityIdeograph, + UnicodeScript_kCJKCompatibilityIdeograph, + UnicodeScript_kCJKCompatibilityIdeograph }, // 78, + { UnicodeScript_kAlphabeticPresentation, + UnicodeScript_kAlphabeticPresentation, + UnicodeScript_kAlphabeticPresentation }, // 79, + { UnicodeScript_kArabicPresentationA, + UnicodeScript_kArabicPresentationA, + UnicodeScript_kArabicPresentationA }, // 80, + { UnicodeScript_kCombiningHalfMark, + UnicodeScript_kCombiningHalfMark, + UnicodeScript_kCombiningHalfMark }, // 81, + { UnicodeScript_kCJKCompatibilityForm, + UnicodeScript_kCJKCompatibilityForm, + UnicodeScript_kCJKCompatibilityForm }, // 82, + { UnicodeScript_kSmallFormVariant, + UnicodeScript_kSmallFormVariant, + UnicodeScript_kSmallFormVariant }, // 83, + { UnicodeScript_kArabicPresentationB, + UnicodeScript_kArabicPresentationB, + UnicodeScript_kArabicPresentationB }, // 84, + { UnicodeScript_kNoScript, + UnicodeScript_kNoScript, + UnicodeScript_kNoScript }, // 85, + { UnicodeScript_kHalfwidthFullwidthForm, + UnicodeScript_kHalfwidthFullwidthForm, + UnicodeScript_kHalfwidthFullwidthForm }, // 86, + { UnicodeScript_kScriptCount, + UnicodeScript_kScriptCount, + UnicodeScript_kNoScript } // 87, }; sal_Int16 SAL_CALL unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) { if (!typeList) { - typeList = defaultTypeList; - unknownType = UnicodeScript_kNoScript; + typeList = defaultTypeList; + unknownType = UnicodeScript_kNoScript; } - sal_Int16 i = 0, type = typeList[0].from; + sal_Int16 i = 0, type = typeList[0].to; while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) { - type = typeList[++i].from; + type = typeList[++i].to; } return (type < UnicodeScript_kScriptCount && - ch >= UnicodeScriptType[type][UnicodeScriptTypeFrom]) ? - typeList[i].to : unknownType; + ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ? + typeList[i].value : unknownType; } sal_Bool SAL_CALL |