diff options
author | Bustamam Harun <bustamam@openoffice.org> | 2002-03-26 12:23:20 +0000 |
---|---|---|
committer | Bustamam Harun <bustamam@openoffice.org> | 2002-03-26 12:23:20 +0000 |
commit | 1224ea06d3dae30e854916472ecbe378389f0d72 (patch) | |
tree | 59fa2024220fa2693cd0a4f26b38b0c8858268b0 /i18npool | |
parent | 0c29287f73850063ce65112e520cfe8cb8fe68c1 (diff) |
#97583# add Transliteration
Diffstat (limited to 'i18npool')
35 files changed, 6642 insertions, 0 deletions
diff --git a/i18npool/source/transliteration/fullwidthToHalfwidth.cxx b/i18npool/source/transliteration/fullwidthToHalfwidth.cxx new file mode 100644 index 000000000000..2b506268f970 --- /dev/null +++ b/i18npool/source/transliteration/fullwidthToHalfwidth.cxx @@ -0,0 +1,572 @@ +/************************************************************************* + * + * $RCSfile: fullwidthToHalfwidth.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_fullwidthToHalfwidth +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +fullwidthToHalfwidth::fullwidthToHalfwidth() +{ + transliterationName = "fullwidthToHalfwidth"; + implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTH_HALFWIDTH"; +} + +/* + Unicode Normalization Forms + http://www.unicode.org/unicode/reports/tr15/index.html + http://www.w3.org/International/charlint/ +*/ + +typedef struct { + sal_Unicode decomposited_character_1; + sal_Unicode decomposited_character_2; +} decomposition_table_entry_t; + +const decomposition_table_entry_t decomposition_table[] = { + { 0x0000, 0x0000 }, // 0x3040 + { 0x0000, 0x0000 }, // 0x3041 HIRAGANA LETTER SMALL A + { 0x0000, 0x0000 }, // 0x3042 HIRAGANA LETTER A + { 0x0000, 0x0000 }, // 0x3043 HIRAGANA LETTER SMALL I + { 0x0000, 0x0000 }, // 0x3044 HIRAGANA LETTER I + { 0x0000, 0x0000 }, // 0x3045 HIRAGANA LETTER SMALL U + { 0x0000, 0x0000 }, // 0x3046 HIRAGANA LETTER U + { 0x0000, 0x0000 }, // 0x3047 HIRAGANA LETTER SMALL E + { 0x0000, 0x0000 }, // 0x3048 HIRAGANA LETTER E + { 0x0000, 0x0000 }, // 0x3049 HIRAGANA LETTER SMALL O + { 0x0000, 0x0000 }, // 0x304a HIRAGANA LETTER O + { 0x0000, 0x0000 }, // 0x304b HIRAGANA LETTER KA + { 0x304b, 0x3099 }, // 0x304c HIRAGANA LETTER GA --> HIRAGANA LETTER KA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x304d HIRAGANA LETTER KI + { 0x304d, 0x3099 }, // 0x304e HIRAGANA LETTER GI --> HIRAGANA LETTER KI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x304f HIRAGANA LETTER KU + { 0x304f, 0x3099 }, // 0x3050 HIRAGANA LETTER GU --> HIRAGANA LETTER KU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3051 HIRAGANA LETTER KE + { 0x3051, 0x3099 }, // 0x3052 HIRAGANA LETTER GE --> HIRAGANA LETTER KE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3053 HIRAGANA LETTER KO + { 0x3053, 0x3099 }, // 0x3054 HIRAGANA LETTER GO --> HIRAGANA LETTER KO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3055 HIRAGANA LETTER SA + { 0x3055, 0x3099 }, // 0x3056 HIRAGANA LETTER ZA --> HIRAGANA LETTER SA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3057 HIRAGANA LETTER SI + { 0x3057, 0x3099 }, // 0x3058 HIRAGANA LETTER ZI --> HIRAGANA LETTER SI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3059 HIRAGANA LETTER SU + { 0x3059, 0x3099 }, // 0x305a HIRAGANA LETTER ZU --> HIRAGANA LETTER SU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x305b HIRAGANA LETTER SE + { 0x305b, 0x3099 }, // 0x305c HIRAGANA LETTER ZE --> HIRAGANA LETTER SE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x305d HIRAGANA LETTER SO + { 0x305d, 0x3099 }, // 0x305e HIRAGANA LETTER ZO --> HIRAGANA LETTER SO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x305f HIRAGANA LETTER TA + { 0x305f, 0x3099 }, // 0x3060 HIRAGANA LETTER DA --> HIRAGANA LETTER TA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3061 HIRAGANA LETTER TI + { 0x3061, 0x3099 }, // 0x3062 HIRAGANA LETTER DI --> HIRAGANA LETTER TI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3063 HIRAGANA LETTER SMALL TU + { 0x0000, 0x0000 }, // 0x3064 HIRAGANA LETTER TU + { 0x3064, 0x3099 }, // 0x3065 HIRAGANA LETTER DU --> HIRAGANA LETTER TU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3066 HIRAGANA LETTER TE + { 0x3066, 0x3099 }, // 0x3067 HIRAGANA LETTER DE --> HIRAGANA LETTER TE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3068 HIRAGANA LETTER TO + { 0x3068, 0x3099 }, // 0x3069 HIRAGANA LETTER DO --> HIRAGANA LETTER TO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x306a HIRAGANA LETTER NA + { 0x0000, 0x0000 }, // 0x306b HIRAGANA LETTER NI + { 0x0000, 0x0000 }, // 0x306c HIRAGANA LETTER NU + { 0x0000, 0x0000 }, // 0x306d HIRAGANA LETTER NE + { 0x0000, 0x0000 }, // 0x306e HIRAGANA LETTER NO + { 0x0000, 0x0000 }, // 0x306f HIRAGANA LETTER HA + { 0x306f, 0x3099 }, // 0x3070 HIRAGANA LETTER BA --> HIRAGANA LETTER HA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x306f, 0x309a }, // 0x3071 HIRAGANA LETTER PA --> HIRAGANA LETTER HA + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3072 HIRAGANA LETTER HI + { 0x3072, 0x3099 }, // 0x3073 HIRAGANA LETTER BI --> HIRAGANA LETTER HI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x3072, 0x309a }, // 0x3074 HIRAGANA LETTER PI --> HIRAGANA LETTER HI + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3075 HIRAGANA LETTER HU + { 0x3075, 0x3099 }, // 0x3076 HIRAGANA LETTER BU --> HIRAGANA LETTER HU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x3075, 0x309a }, // 0x3077 HIRAGANA LETTER PU --> HIRAGANA LETTER HU + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3078 HIRAGANA LETTER HE + { 0x3078, 0x3099 }, // 0x3079 HIRAGANA LETTER BE --> HIRAGANA LETTER HE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x3078, 0x309a }, // 0x307a HIRAGANA LETTER PE --> HIRAGANA LETTER HE + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x307b HIRAGANA LETTER HO + { 0x307b, 0x3099 }, // 0x307c HIRAGANA LETTER BO --> HIRAGANA LETTER HO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x307b, 0x309a }, // 0x307d HIRAGANA LETTER PO --> HIRAGANA LETTER HO + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x307e HIRAGANA LETTER MA + { 0x0000, 0x0000 }, // 0x307f HIRAGANA LETTER MI + { 0x0000, 0x0000 }, // 0x3080 HIRAGANA LETTER MU + { 0x0000, 0x0000 }, // 0x3081 HIRAGANA LETTER ME + { 0x0000, 0x0000 }, // 0x3082 HIRAGANA LETTER MO + { 0x0000, 0x0000 }, // 0x3083 HIRAGANA LETTER SMALL YA + { 0x0000, 0x0000 }, // 0x3084 HIRAGANA LETTER YA + { 0x0000, 0x0000 }, // 0x3085 HIRAGANA LETTER SMALL YU + { 0x0000, 0x0000 }, // 0x3086 HIRAGANA LETTER YU + { 0x0000, 0x0000 }, // 0x3087 HIRAGANA LETTER SMALL YO + { 0x0000, 0x0000 }, // 0x3088 HIRAGANA LETTER YO + { 0x0000, 0x0000 }, // 0x3089 HIRAGANA LETTER RA + { 0x0000, 0x0000 }, // 0x308a HIRAGANA LETTER RI + { 0x0000, 0x0000 }, // 0x308b HIRAGANA LETTER RU + { 0x0000, 0x0000 }, // 0x308c HIRAGANA LETTER RE + { 0x0000, 0x0000 }, // 0x308d HIRAGANA LETTER RO + { 0x0000, 0x0000 }, // 0x308e HIRAGANA LETTER SMALL WA + { 0x0000, 0x0000 }, // 0x308f HIRAGANA LETTER WA + { 0x0000, 0x0000 }, // 0x3090 HIRAGANA LETTER WI + { 0x0000, 0x0000 }, // 0x3091 HIRAGANA LETTER WE + { 0x0000, 0x0000 }, // 0x3092 HIRAGANA LETTER WO + { 0x0000, 0x0000 }, // 0x3093 HIRAGANA LETTER N + { 0x3046, 0x3099 }, // 0x3094 HIRAGANA LETTER VU --> HIRAGANA LETTER U + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x3095 + { 0x0000, 0x0000 }, // 0x3096 + { 0x0000, 0x0000 }, // 0x3097 + { 0x0000, 0x0000 }, // 0x3098 + { 0x0000, 0x0000 }, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309d HIRAGANA ITERATION MARK + { 0x0000, 0x0000 }, // 0x309e HIRAGANA VOICED ITERATION MARK + { 0x0000, 0x0000 }, // 0x309f + { 0x0000, 0x0000 }, // 0x30a0 + { 0x0000, 0x0000 }, // 0x30a1 KATAKANA LETTER SMALL A + { 0x0000, 0x0000 }, // 0x30a2 KATAKANA LETTER A + { 0x0000, 0x0000 }, // 0x30a3 KATAKANA LETTER SMALL I + { 0x0000, 0x0000 }, // 0x30a4 KATAKANA LETTER I + { 0x0000, 0x0000 }, // 0x30a5 KATAKANA LETTER SMALL U + { 0x0000, 0x0000 }, // 0x30a6 KATAKANA LETTER U + { 0x0000, 0x0000 }, // 0x30a7 KATAKANA LETTER SMALL E + { 0x0000, 0x0000 }, // 0x30a8 KATAKANA LETTER E + { 0x0000, 0x0000 }, // 0x30a9 KATAKANA LETTER SMALL O + { 0x0000, 0x0000 }, // 0x30aa KATAKANA LETTER O + { 0x0000, 0x0000 }, // 0x30ab KATAKANA LETTER KA + { 0x30ab, 0x3099 }, // 0x30ac KATAKANA LETTER GA --> KATAKANA LETTER KA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30ad KATAKANA LETTER KI + { 0x30ad, 0x3099 }, // 0x30ae KATAKANA LETTER GI --> KATAKANA LETTER KI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30af KATAKANA LETTER KU + { 0x30af, 0x3099 }, // 0x30b0 KATAKANA LETTER GU --> KATAKANA LETTER KU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30b1 KATAKANA LETTER KE + { 0x30b1, 0x3099 }, // 0x30b2 KATAKANA LETTER GE --> KATAKANA LETTER KE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30b3 KATAKANA LETTER KO + { 0x30b3, 0x3099 }, // 0x30b4 KATAKANA LETTER GO --> KATAKANA LETTER KO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30b5 KATAKANA LETTER SA + { 0x30b5, 0x3099 }, // 0x30b6 KATAKANA LETTER ZA --> KATAKANA LETTER SA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30b7 KATAKANA LETTER SI + { 0x30b7, 0x3099 }, // 0x30b8 KATAKANA LETTER ZI --> KATAKANA LETTER SI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30b9 KATAKANA LETTER SU + { 0x30b9, 0x3099 }, // 0x30ba KATAKANA LETTER ZU --> KATAKANA LETTER SU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30bb KATAKANA LETTER SE + { 0x30bb, 0x3099 }, // 0x30bc KATAKANA LETTER ZE --> KATAKANA LETTER SE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30bd KATAKANA LETTER SO + { 0x30bd, 0x3099 }, // 0x30be KATAKANA LETTER ZO --> KATAKANA LETTER SO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30bf KATAKANA LETTER TA + { 0x30bf, 0x3099 }, // 0x30c0 KATAKANA LETTER DA --> KATAKANA LETTER TA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30c1 KATAKANA LETTER TI + { 0x30c1, 0x3099 }, // 0x30c2 KATAKANA LETTER DI --> KATAKANA LETTER TI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30c3 KATAKANA LETTER SMALL TU + { 0x0000, 0x0000 }, // 0x30c4 KATAKANA LETTER TU + { 0x30c4, 0x3099 }, // 0x30c5 KATAKANA LETTER DU --> KATAKANA LETTER TU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30c6 KATAKANA LETTER TE + { 0x30c6, 0x3099 }, // 0x30c7 KATAKANA LETTER DE --> KATAKANA LETTER TE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30c8 KATAKANA LETTER TO + { 0x30c8, 0x3099 }, // 0x30c9 KATAKANA LETTER DO --> KATAKANA LETTER TO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30ca KATAKANA LETTER NA + { 0x0000, 0x0000 }, // 0x30cb KATAKANA LETTER NI + { 0x0000, 0x0000 }, // 0x30cc KATAKANA LETTER NU + { 0x0000, 0x0000 }, // 0x30cd KATAKANA LETTER NE + { 0x0000, 0x0000 }, // 0x30ce KATAKANA LETTER NO + { 0x0000, 0x0000 }, // 0x30cf KATAKANA LETTER HA + { 0x30cf, 0x3099 }, // 0x30d0 KATAKANA LETTER BA --> KATAKANA LETTER HA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30cf, 0x309a }, // 0x30d1 KATAKANA LETTER PA --> KATAKANA LETTER HA + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30d2 KATAKANA LETTER HI + { 0x30d2, 0x3099 }, // 0x30d3 KATAKANA LETTER BI --> KATAKANA LETTER HI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30d2, 0x309a }, // 0x30d4 KATAKANA LETTER PI --> KATAKANA LETTER HI + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30d5 KATAKANA LETTER HU + { 0x30d5, 0x3099 }, // 0x30d6 KATAKANA LETTER BU --> KATAKANA LETTER HU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30d5, 0x309a }, // 0x30d7 KATAKANA LETTER PU --> KATAKANA LETTER HU + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30d8 KATAKANA LETTER HE + { 0x30d8, 0x3099 }, // 0x30d9 KATAKANA LETTER BE --> KATAKANA LETTER HE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30d8, 0x309a }, // 0x30da KATAKANA LETTER PE --> KATAKANA LETTER HE + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30db KATAKANA LETTER HO + { 0x30db, 0x3099 }, // 0x30dc KATAKANA LETTER BO --> KATAKANA LETTER HO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30db, 0x309a }, // 0x30dd KATAKANA LETTER PO --> KATAKANA LETTER HO + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30de KATAKANA LETTER MA + { 0x0000, 0x0000 }, // 0x30df KATAKANA LETTER MI + { 0x0000, 0x0000 }, // 0x30e0 KATAKANA LETTER MU + { 0x0000, 0x0000 }, // 0x30e1 KATAKANA LETTER ME + { 0x0000, 0x0000 }, // 0x30e2 KATAKANA LETTER MO + { 0x0000, 0x0000 }, // 0x30e3 KATAKANA LETTER SMALL YA + { 0x0000, 0x0000 }, // 0x30e4 KATAKANA LETTER YA + { 0x0000, 0x0000 }, // 0x30e5 KATAKANA LETTER SMALL YU + { 0x0000, 0x0000 }, // 0x30e6 KATAKANA LETTER YU + { 0x0000, 0x0000 }, // 0x30e7 KATAKANA LETTER SMALL YO + { 0x0000, 0x0000 }, // 0x30e8 KATAKANA LETTER YO + { 0x0000, 0x0000 }, // 0x30e9 KATAKANA LETTER RA + { 0x0000, 0x0000 }, // 0x30ea KATAKANA LETTER RI + { 0x0000, 0x0000 }, // 0x30eb KATAKANA LETTER RU + { 0x0000, 0x0000 }, // 0x30ec KATAKANA LETTER RE + { 0x0000, 0x0000 }, // 0x30ed KATAKANA LETTER RO + { 0x0000, 0x0000 }, // 0x30ee KATAKANA LETTER SMALL WA + { 0x0000, 0x0000 }, // 0x30ef KATAKANA LETTER WA + { 0x0000, 0x0000 }, // 0x30f0 KATAKANA LETTER WI + { 0x0000, 0x0000 }, // 0x30f1 KATAKANA LETTER WE + { 0x0000, 0x0000 }, // 0x30f2 KATAKANA LETTER WO + { 0x0000, 0x0000 }, // 0x30f3 KATAKANA LETTER N + { 0x30a6, 0x3099 }, // 0x30f4 KATAKANA LETTER VU --> KATAKANA LETTER U + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30f5 KATAKANA LETTER SMALL KA + { 0x0000, 0x0000 }, // 0x30f6 KATAKANA LETTER SMALL KE + { 0x30ef, 0x3099 }, // 0x30f7 KATAKANA LETTER VA --> KATAKANA LETTER WA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30f0, 0x3099 }, // 0x30f8 KATAKANA LETTER VI --> KATAKANA LETTER WI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30f1, 0x3099 }, // 0x30f9 KATAKANA LETTER VE --> KATAKANA LETTER WE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x30f2, 0x3099 }, // 0x30fa KATAKANA LETTER VO --> KATAKANA LETTER WO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x30fb KATAKANA MIDDLE DOT + { 0x0000, 0x0000 }, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK + { 0x0000, 0x0000 }, // 0x30fd KATAKANA ITERATION MARK + { 0x0000, 0x0000 }, // 0x30fe KATAKANA VOICED ITERATION MARK + { 0x0000, 0x0000 } // 0x30ff +}; + + +/** + * Decompose Japanese specific voiced and semi-voiced sound marks. + */ +OUString SAL_CALL +decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) +{ + // Create a string buffer which can hold nCount * 2 + 1 characters. + // Its size may become double of nCount. + rtl_uString * newStr; + x_rtl_uString_new_WithLength( &newStr, nCount * 2 ); // defined in x_rtl_ustring.h The reference count is 0 now. + + // Allocate double of nCount length to offset argument. + offset.realloc( nCount * 2 ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // Prepare pointers of unicode character arrays. + const sal_Unicode* src = inStr.getStr() + startPos; + sal_Unicode* dst = newStr->buffer; + + // Decomposition: GA --> KA + voice-mark + while (nCount -- > 0) { + sal_Unicode c = *src++; + // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) + // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) + // Hiragana is not applied to decomposition. + // Only Katakana is applied to decomposition + if (0x30a0 <= c && c <= 0x30ff) { + int i = int(c - 0x3040); + sal_Unicode first = decomposition_table[i].decomposited_character_1; + if (first != 0x0000) { + *dst ++ = first; + *dst ++ = decomposition_table[i].decomposited_character_2; // second + *p ++ = position; + *p ++ = position ++; + continue; + } + } + *dst ++ = c; + *p ++ = position ++; + } + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + + +/* + Halfwidth and Fullwidth Forms (U+FF00..U+FFEF) + ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt + ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html + http://charts.unicode.org/Web/UFF00.html +*/ + +OneToOneMappingTable_t full2half[] = { + MAKE_PAIR( 0x2190, 0xFFE9 ), // LEFTWARDS ARROW --> HALFWIDTH LEFTWARDS ARROW + MAKE_PAIR( 0x2191, 0xFFEA ), // UPWARDS ARROW --> HALFWIDTH UPWARDS ARROW + MAKE_PAIR( 0x2192, 0xFFEB ), // RIGHTWARDS ARROW --> HALFWIDTH RIGHTWARDS ARROW + MAKE_PAIR( 0x2193, 0xFFEC ), // DOWNWARDS ARROW --> HALFWIDTH DOWNWARDS ARROW + + MAKE_PAIR( 0x2212, 0x002D ), // MINUS SIGN --> HYPHEN-MINUS + + MAKE_PAIR( 0x2502, 0xFFE8 ), // BOX DRAWINGS LIGHT VERTICAL --> HALFWIDTH FORMS LIGHT VERTICAL + MAKE_PAIR( 0x25A0, 0xFFED ), // BLACK SQUARE --> HALFWIDTH BLACK SQUARE + MAKE_PAIR( 0x25CB, 0xFFEE ), // WHITE CIRCLE --> HALFWIDTH WHITE CIRCLE + MAKE_PAIR( 0x3000, 0x0020 ), // IDEOGRAPHIC SPACE --> SPACE + MAKE_PAIR( 0x3001, 0xFF64 ), // IDEOGRAPHIC COMMA --> HALFWIDTH IDEOGRAPHIC COMMA + MAKE_PAIR( 0x3002, 0xFF61 ), // IDEOGRAPHIC FULL STOP --> HALFWIDTH IDEOGRAPHIC FULL STOP + MAKE_PAIR( 0x300C, 0xFF62 ), // LEFT CORNER BRACKET --> HALFWIDTH LEFT CORNER BRACKET + MAKE_PAIR( 0x300D, 0xFF63 ), // RIGHT CORNER BRACKET --> HALFWIDTH RIGHT CORNER BRACKET + MAKE_PAIR( 0x3099, 0xFF9E ), // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK --> HALFWIDTH KATAKANA VOICED SOUND MARK + MAKE_PAIR( 0x309A, 0xFF9F ), // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK --> HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + MAKE_PAIR( 0x30A1, 0xFF67 ), // KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER SMALL A + MAKE_PAIR( 0x30A2, 0xFF71 ), // KATAKANA LETTER A --> HALFWIDTH KATAKANA LETTER A + MAKE_PAIR( 0x30A3, 0xFF68 ), // KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER SMALL I + MAKE_PAIR( 0x30A4, 0xFF72 ), // KATAKANA LETTER I --> HALFWIDTH KATAKANA LETTER I + MAKE_PAIR( 0x30A5, 0xFF69 ), // KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER SMALL U + MAKE_PAIR( 0x30A6, 0xFF73 ), // KATAKANA LETTER U --> HALFWIDTH KATAKANA LETTER U + MAKE_PAIR( 0x30A7, 0xFF6A ), // KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER SMALL E + MAKE_PAIR( 0x30A8, 0xFF74 ), // KATAKANA LETTER E --> HALFWIDTH KATAKANA LETTER E + MAKE_PAIR( 0x30A9, 0xFF6B ), // KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER SMALL O + MAKE_PAIR( 0x30AA, 0xFF75 ), // KATAKANA LETTER O --> HALFWIDTH KATAKANA LETTER O + MAKE_PAIR( 0x30AB, 0xFF76 ), // KATAKANA LETTER KA --> HALFWIDTH KATAKANA LETTER KA + MAKE_PAIR( 0x30AD, 0xFF77 ), // KATAKANA LETTER KI --> HALFWIDTH KATAKANA LETTER KI + MAKE_PAIR( 0x30AF, 0xFF78 ), // KATAKANA LETTER KU --> HALFWIDTH KATAKANA LETTER KU + MAKE_PAIR( 0x30B1, 0xFF79 ), // KATAKANA LETTER KE --> HALFWIDTH KATAKANA LETTER KE + MAKE_PAIR( 0x30B3, 0xFF7A ), // KATAKANA LETTER KO --> HALFWIDTH KATAKANA LETTER KO + MAKE_PAIR( 0x30B5, 0xFF7B ), // KATAKANA LETTER SA --> HALFWIDTH KATAKANA LETTER SA + MAKE_PAIR( 0x30B7, 0xFF7C ), // KATAKANA LETTER SI --> HALFWIDTH KATAKANA LETTER SI + MAKE_PAIR( 0x30B9, 0xFF7D ), // KATAKANA LETTER SU --> HALFWIDTH KATAKANA LETTER SU + MAKE_PAIR( 0x30BB, 0xFF7E ), // KATAKANA LETTER SE --> HALFWIDTH KATAKANA LETTER SE + MAKE_PAIR( 0x30BD, 0xFF7F ), // KATAKANA LETTER SO --> HALFWIDTH KATAKANA LETTER SO + MAKE_PAIR( 0x30BF, 0xFF80 ), // KATAKANA LETTER TA --> HALFWIDTH KATAKANA LETTER TA + MAKE_PAIR( 0x30C1, 0xFF81 ), // KATAKANA LETTER TI --> HALFWIDTH KATAKANA LETTER TI + MAKE_PAIR( 0x30C3, 0xFF6F ), // KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER SMALL TU + MAKE_PAIR( 0x30C4, 0xFF82 ), // KATAKANA LETTER TU --> HALFWIDTH KATAKANA LETTER TU + MAKE_PAIR( 0x30C6, 0xFF83 ), // KATAKANA LETTER TE --> HALFWIDTH KATAKANA LETTER TE + MAKE_PAIR( 0x30C8, 0xFF84 ), // KATAKANA LETTER TO --> HALFWIDTH KATAKANA LETTER TO + MAKE_PAIR( 0x30CA, 0xFF85 ), // KATAKANA LETTER NA --> HALFWIDTH KATAKANA LETTER NA + MAKE_PAIR( 0x30CB, 0xFF86 ), // KATAKANA LETTER NI --> HALFWIDTH KATAKANA LETTER NI + MAKE_PAIR( 0x30CC, 0xFF87 ), // KATAKANA LETTER NU --> HALFWIDTH KATAKANA LETTER NU + MAKE_PAIR( 0x30CD, 0xFF88 ), // KATAKANA LETTER NE --> HALFWIDTH KATAKANA LETTER NE + MAKE_PAIR( 0x30CE, 0xFF89 ), // KATAKANA LETTER NO --> HALFWIDTH KATAKANA LETTER NO + MAKE_PAIR( 0x30CF, 0xFF8A ), // KATAKANA LETTER HA --> HALFWIDTH KATAKANA LETTER HA + MAKE_PAIR( 0x30D2, 0xFF8B ), // KATAKANA LETTER HI --> HALFWIDTH KATAKANA LETTER HI + MAKE_PAIR( 0x30D5, 0xFF8C ), // KATAKANA LETTER HU --> HALFWIDTH KATAKANA LETTER HU + MAKE_PAIR( 0x30D8, 0xFF8D ), // KATAKANA LETTER HE --> HALFWIDTH KATAKANA LETTER HE + MAKE_PAIR( 0x30DB, 0xFF8E ), // KATAKANA LETTER HO --> HALFWIDTH KATAKANA LETTER HO + MAKE_PAIR( 0x30DE, 0xFF8F ), // KATAKANA LETTER MA --> HALFWIDTH KATAKANA LETTER MA + MAKE_PAIR( 0x30DF, 0xFF90 ), // KATAKANA LETTER MI --> HALFWIDTH KATAKANA LETTER MI + MAKE_PAIR( 0x30E0, 0xFF91 ), // KATAKANA LETTER MU --> HALFWIDTH KATAKANA LETTER MU + MAKE_PAIR( 0x30E1, 0xFF92 ), // KATAKANA LETTER ME --> HALFWIDTH KATAKANA LETTER ME + MAKE_PAIR( 0x30E2, 0xFF93 ), // KATAKANA LETTER MO --> HALFWIDTH KATAKANA LETTER MO + MAKE_PAIR( 0x30E3, 0xFF6C ), // KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER SMALL YA + MAKE_PAIR( 0x30E4, 0xFF94 ), // KATAKANA LETTER YA --> HALFWIDTH KATAKANA LETTER YA + MAKE_PAIR( 0x30E5, 0xFF6D ), // KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER SMALL YU + MAKE_PAIR( 0x30E6, 0xFF95 ), // KATAKANA LETTER YU --> HALFWIDTH KATAKANA LETTER YU + MAKE_PAIR( 0x30E7, 0xFF6E ), // KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER SMALL YO + MAKE_PAIR( 0x30E8, 0xFF96 ), // KATAKANA LETTER YO --> HALFWIDTH KATAKANA LETTER YO + MAKE_PAIR( 0x30E9, 0xFF97 ), // KATAKANA LETTER RA --> HALFWIDTH KATAKANA LETTER RA + MAKE_PAIR( 0x30EA, 0xFF98 ), // KATAKANA LETTER RI --> HALFWIDTH KATAKANA LETTER RI + MAKE_PAIR( 0x30EB, 0xFF99 ), // KATAKANA LETTER RU --> HALFWIDTH KATAKANA LETTER RU + MAKE_PAIR( 0x30EC, 0xFF9A ), // KATAKANA LETTER RE --> HALFWIDTH KATAKANA LETTER RE + MAKE_PAIR( 0x30ED, 0xFF9B ), // KATAKANA LETTER RO --> HALFWIDTH KATAKANA LETTER RO + MAKE_PAIR( 0x30EF, 0xFF9C ), // KATAKANA LETTER WA --> HALFWIDTH KATAKANA LETTER WA + MAKE_PAIR( 0x30F2, 0xFF66 ), // KATAKANA LETTER WO --> HALFWIDTH KATAKANA LETTER WO + MAKE_PAIR( 0x30F3, 0xFF9D ), // KATAKANA LETTER N --> HALFWIDTH KATAKANA LETTER N + MAKE_PAIR( 0x30FB, 0xFF65 ), // KATAKANA MIDDLE DOT --> HALFWIDTH KATAKANA MIDDLE DOT + MAKE_PAIR( 0x30FC, 0xFF70 ), // KATAKANA-HIRAGANA PROLONGED SOUND MARK --> HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + MAKE_PAIR( 0x3131, 0xFFA1 ), // HANGUL LETTER KIYEOK --> HALFWIDTH HANGUL LETTER KIYEOK + MAKE_PAIR( 0x3132, 0xFFA2 ), // HANGUL LETTER SSANGKIYEOK --> HALFWIDTH HANGUL LETTER SSANGKIYEOK + MAKE_PAIR( 0x3133, 0xFFA3 ), // HANGUL LETTER KIYEOK-SIOS --> HALFWIDTH HANGUL LETTER KIYEOK-SIOS + MAKE_PAIR( 0x3134, 0xFFA4 ), // HANGUL LETTER NIEUN --> HALFWIDTH HANGUL LETTER NIEUN + MAKE_PAIR( 0x3135, 0xFFA5 ), // HANGUL LETTER NIEUN-CIEUC --> HALFWIDTH HANGUL LETTER NIEUN-CIEUC + MAKE_PAIR( 0x3136, 0xFFA6 ), // HANGUL LETTER NIEUN-HIEUH --> HALFWIDTH HANGUL LETTER NIEUN-HIEUH + MAKE_PAIR( 0x3137, 0xFFA7 ), // HANGUL LETTER TIKEUT --> HALFWIDTH HANGUL LETTER TIKEUT + MAKE_PAIR( 0x3138, 0xFFA8 ), // HANGUL LETTER SSANGTIKEUT --> HALFWIDTH HANGUL LETTER SSANGTIKEUT + MAKE_PAIR( 0x3139, 0xFFA9 ), // HANGUL LETTER RIEUL --> HALFWIDTH HANGUL LETTER RIEUL + MAKE_PAIR( 0x313A, 0xFFAA ), // HANGUL LETTER RIEUL-KIYEOK --> HALFWIDTH HANGUL LETTER RIEUL-KIYEOK + MAKE_PAIR( 0x313B, 0xFFAB ), // HANGUL LETTER RIEUL-MIEUM --> HALFWIDTH HANGUL LETTER RIEUL-MIEUM + MAKE_PAIR( 0x313C, 0xFFAC ), // HANGUL LETTER RIEUL-PIEUP --> HALFWIDTH HANGUL LETTER RIEUL-PIEUP + MAKE_PAIR( 0x313D, 0xFFAD ), // HANGUL LETTER RIEUL-SIOS --> HALFWIDTH HANGUL LETTER RIEUL-SIOS + MAKE_PAIR( 0x313E, 0xFFAE ), // HANGUL LETTER RIEUL-THIEUTH --> HALFWIDTH HANGUL LETTER RIEUL-THIEUTH + MAKE_PAIR( 0x313F, 0xFFAF ), // HANGUL LETTER RIEUL-PHIEUPH --> HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH + MAKE_PAIR( 0x3140, 0xFFB0 ), // HANGUL LETTER RIEUL-HIEUH --> HALFWIDTH HANGUL LETTER RIEUL-HIEUH + MAKE_PAIR( 0x3141, 0xFFB1 ), // HANGUL LETTER MIEUM --> HALFWIDTH HANGUL LETTER MIEUM + MAKE_PAIR( 0x3142, 0xFFB2 ), // HANGUL LETTER PIEUP --> HALFWIDTH HANGUL LETTER PIEUP + MAKE_PAIR( 0x3143, 0xFFB3 ), // HANGUL LETTER SSANGPIEUP --> HALFWIDTH HANGUL LETTER SSANGPIEUP + MAKE_PAIR( 0x3144, 0xFFB4 ), // HANGUL LETTER PIEUP-SIOS --> HALFWIDTH HANGUL LETTER PIEUP-SIOS + MAKE_PAIR( 0x3145, 0xFFB5 ), // HANGUL LETTER SIOS --> HALFWIDTH HANGUL LETTER SIOS + MAKE_PAIR( 0x3146, 0xFFB6 ), // HANGUL LETTER SSANGSIOS --> HALFWIDTH HANGUL LETTER SSANGSIOS + MAKE_PAIR( 0x3147, 0xFFB7 ), // HANGUL LETTER IEUNG --> HALFWIDTH HANGUL LETTER IEUNG + MAKE_PAIR( 0x3148, 0xFFB8 ), // HANGUL LETTER CIEUC --> HALFWIDTH HANGUL LETTER CIEUC + MAKE_PAIR( 0x3149, 0xFFB9 ), // HANGUL LETTER SSANGCIEUC --> HALFWIDTH HANGUL LETTER SSANGCIEUC + MAKE_PAIR( 0x314A, 0xFFBA ), // HANGUL LETTER CHIEUCH --> HALFWIDTH HANGUL LETTER CHIEUCH + MAKE_PAIR( 0x314B, 0xFFBB ), // HANGUL LETTER KHIEUKH --> HALFWIDTH HANGUL LETTER KHIEUKH + MAKE_PAIR( 0x314C, 0xFFBC ), // HANGUL LETTER THIEUTH --> HALFWIDTH HANGUL LETTER THIEUTH + MAKE_PAIR( 0x314D, 0xFFBD ), // HANGUL LETTER PHIEUPH --> HALFWIDTH HANGUL LETTER PHIEUPH + MAKE_PAIR( 0x314E, 0xFFBE ), // HANGUL LETTER HIEUH --> HALFWIDTH HANGUL LETTER HIEUH + MAKE_PAIR( 0x314F, 0xFFC2 ), // HANGUL LETTER A --> HALFWIDTH HANGUL LETTER A + MAKE_PAIR( 0x3150, 0xFFC3 ), // HANGUL LETTER AE --> HALFWIDTH HANGUL LETTER AE + MAKE_PAIR( 0x3151, 0xFFC4 ), // HANGUL LETTER YA --> HALFWIDTH HANGUL LETTER YA + MAKE_PAIR( 0x3152, 0xFFC5 ), // HANGUL LETTER YAE --> HALFWIDTH HANGUL LETTER YAE + MAKE_PAIR( 0x3153, 0xFFC6 ), // HANGUL LETTER EO --> HALFWIDTH HANGUL LETTER EO + MAKE_PAIR( 0x3154, 0xFFC7 ), // HANGUL LETTER E --> HALFWIDTH HANGUL LETTER E + MAKE_PAIR( 0x3155, 0xFFCA ), // HANGUL LETTER YEO --> HALFWIDTH HANGUL LETTER YEO + MAKE_PAIR( 0x3156, 0xFFCB ), // HANGUL LETTER YE --> HALFWIDTH HANGUL LETTER YE + MAKE_PAIR( 0x3157, 0xFFCC ), // HANGUL LETTER O --> HALFWIDTH HANGUL LETTER O + MAKE_PAIR( 0x3158, 0xFFCD ), // HANGUL LETTER WA --> HALFWIDTH HANGUL LETTER WA + MAKE_PAIR( 0x3159, 0xFFCE ), // HANGUL LETTER WAE --> HALFWIDTH HANGUL LETTER WAE + MAKE_PAIR( 0x315A, 0xFFCF ), // HANGUL LETTER OE --> HALFWIDTH HANGUL LETTER OE + MAKE_PAIR( 0x315B, 0xFFD2 ), // HANGUL LETTER YO --> HALFWIDTH HANGUL LETTER YO + MAKE_PAIR( 0x315C, 0xFFD3 ), // HANGUL LETTER U --> HALFWIDTH HANGUL LETTER U + MAKE_PAIR( 0x315D, 0xFFD4 ), // HANGUL LETTER WEO --> HALFWIDTH HANGUL LETTER WEO + MAKE_PAIR( 0x315E, 0xFFD5 ), // HANGUL LETTER WE --> HALFWIDTH HANGUL LETTER WE + MAKE_PAIR( 0x315F, 0xFFD6 ), // HANGUL LETTER WI --> HALFWIDTH HANGUL LETTER WI + MAKE_PAIR( 0x3160, 0xFFD7 ), // HANGUL LETTER YU --> HALFWIDTH HANGUL LETTER YU + MAKE_PAIR( 0x3161, 0xFFDA ), // HANGUL LETTER EU --> HALFWIDTH HANGUL LETTER EU + MAKE_PAIR( 0x3162, 0xFFDB ), // HANGUL LETTER YI --> HALFWIDTH HANGUL LETTER YI + MAKE_PAIR( 0x3163, 0xFFDC ), // HANGUL LETTER I --> HALFWIDTH HANGUL LETTER I + MAKE_PAIR( 0x3164, 0xFFA0 ), // HANGUL FILLER --> HALFWIDTH HANGUL FILLER + MAKE_PAIR( 0xFF01, 0x0021 ), // FULLWIDTH EXCLAMATION MARK --> EXCLAMATION MARK + MAKE_PAIR( 0xFF02, 0x0022 ), // FULLWIDTH QUOTATION MARK --> QUOTATION MARK + MAKE_PAIR( 0xFF03, 0x0023 ), // FULLWIDTH NUMBER SIGN --> NUMBER SIGN + MAKE_PAIR( 0xFF04, 0x0024 ), // FULLWIDTH DOLLAR SIGN --> DOLLAR SIGN + MAKE_PAIR( 0xFF05, 0x0025 ), // FULLWIDTH PERCENT SIGN --> PERCENT SIGN + MAKE_PAIR( 0xFF06, 0x0026 ), // FULLWIDTH AMPERSAND --> AMPERSAND + MAKE_PAIR( 0xFF07, 0x0027 ), // FULLWIDTH APOSTROPHE --> APOSTROPHE + MAKE_PAIR( 0xFF08, 0x0028 ), // FULLWIDTH LEFT PARENTHESIS --> LEFT PARENTHESIS + MAKE_PAIR( 0xFF09, 0x0029 ), // FULLWIDTH RIGHT PARENTHESIS --> RIGHT PARENTHESIS + MAKE_PAIR( 0xFF0A, 0x002A ), // FULLWIDTH ASTERISK --> ASTERISK + MAKE_PAIR( 0xFF0B, 0x002B ), // FULLWIDTH PLUS SIGN --> PLUS SIGN + MAKE_PAIR( 0xFF0C, 0x002C ), // FULLWIDTH COMMA --> COMMA + +//MAKE_PAIR( 0xFF0D, 0x002D ), // FULLWIDTH HYPHEN-MINUS --> HYPHEN-MINUS + + MAKE_PAIR( 0xFF0E, 0x002E ), // FULLWIDTH FULL STOP --> FULL STOP + MAKE_PAIR( 0xFF0F, 0x002F ), // FULLWIDTH SOLIDUS --> SOLIDUS + MAKE_PAIR( 0xFF10, 0x0030 ), // FULLWIDTH DIGIT ZERO --> DIGIT ZERO + MAKE_PAIR( 0xFF11, 0x0031 ), // FULLWIDTH DIGIT ONE --> DIGIT ONE + MAKE_PAIR( 0xFF12, 0x0032 ), // FULLWIDTH DIGIT TWO --> DIGIT TWO + MAKE_PAIR( 0xFF13, 0x0033 ), // FULLWIDTH DIGIT THREE --> DIGIT THREE + MAKE_PAIR( 0xFF14, 0x0034 ), // FULLWIDTH DIGIT FOUR --> DIGIT FOUR + MAKE_PAIR( 0xFF15, 0x0035 ), // FULLWIDTH DIGIT FIVE --> DIGIT FIVE + MAKE_PAIR( 0xFF16, 0x0036 ), // FULLWIDTH DIGIT SIX --> DIGIT SIX + MAKE_PAIR( 0xFF17, 0x0037 ), // FULLWIDTH DIGIT SEVEN --> DIGIT SEVEN + MAKE_PAIR( 0xFF18, 0x0038 ), // FULLWIDTH DIGIT EIGHT --> DIGIT EIGHT + MAKE_PAIR( 0xFF19, 0x0039 ), // FULLWIDTH DIGIT NINE --> DIGIT NINE + MAKE_PAIR( 0xFF1A, 0x003A ), // FULLWIDTH COLON --> COLON + MAKE_PAIR( 0xFF1B, 0x003B ), // FULLWIDTH SEMICOLON --> SEMICOLON + MAKE_PAIR( 0xFF1C, 0x003C ), // FULLWIDTH LESS-THAN SIGN --> LESS-THAN SIGN + MAKE_PAIR( 0xFF1D, 0x003D ), // FULLWIDTH EQUALS SIGN --> EQUALS SIGN + MAKE_PAIR( 0xFF1E, 0x003E ), // FULLWIDTH GREATER-THAN SIGN --> GREATER-THAN SIGN + MAKE_PAIR( 0xFF1F, 0x003F ), // FULLWIDTH QUESTION MARK --> QUESTION MARK + MAKE_PAIR( 0xFF20, 0x0040 ), // FULLWIDTH COMMERCIAL AT --> COMMERCIAL AT + MAKE_PAIR( 0xFF21, 0x0041 ), // FULLWIDTH LATIN CAPITAL LETTER A --> LATIN CAPITAL LETTER A + MAKE_PAIR( 0xFF22, 0x0042 ), // FULLWIDTH LATIN CAPITAL LETTER B --> LATIN CAPITAL LETTER B + MAKE_PAIR( 0xFF23, 0x0043 ), // FULLWIDTH LATIN CAPITAL LETTER C --> LATIN CAPITAL LETTER C + MAKE_PAIR( 0xFF24, 0x0044 ), // FULLWIDTH LATIN CAPITAL LETTER D --> LATIN CAPITAL LETTER D + MAKE_PAIR( 0xFF25, 0x0045 ), // FULLWIDTH LATIN CAPITAL LETTER E --> LATIN CAPITAL LETTER E + MAKE_PAIR( 0xFF26, 0x0046 ), // FULLWIDTH LATIN CAPITAL LETTER F --> LATIN CAPITAL LETTER F + MAKE_PAIR( 0xFF27, 0x0047 ), // FULLWIDTH LATIN CAPITAL LETTER G --> LATIN CAPITAL LETTER G + MAKE_PAIR( 0xFF28, 0x0048 ), // FULLWIDTH LATIN CAPITAL LETTER H --> LATIN CAPITAL LETTER H + MAKE_PAIR( 0xFF29, 0x0049 ), // FULLWIDTH LATIN CAPITAL LETTER I --> LATIN CAPITAL LETTER I + MAKE_PAIR( 0xFF2A, 0x004A ), // FULLWIDTH LATIN CAPITAL LETTER J --> LATIN CAPITAL LETTER J + MAKE_PAIR( 0xFF2B, 0x004B ), // FULLWIDTH LATIN CAPITAL LETTER K --> LATIN CAPITAL LETTER K + MAKE_PAIR( 0xFF2C, 0x004C ), // FULLWIDTH LATIN CAPITAL LETTER L --> LATIN CAPITAL LETTER L + MAKE_PAIR( 0xFF2D, 0x004D ), // FULLWIDTH LATIN CAPITAL LETTER M --> LATIN CAPITAL LETTER M + MAKE_PAIR( 0xFF2E, 0x004E ), // FULLWIDTH LATIN CAPITAL LETTER N --> LATIN CAPITAL LETTER N + MAKE_PAIR( 0xFF2F, 0x004F ), // FULLWIDTH LATIN CAPITAL LETTER O --> LATIN CAPITAL LETTER O + MAKE_PAIR( 0xFF30, 0x0050 ), // FULLWIDTH LATIN CAPITAL LETTER P --> LATIN CAPITAL LETTER P + MAKE_PAIR( 0xFF31, 0x0051 ), // FULLWIDTH LATIN CAPITAL LETTER Q --> LATIN CAPITAL LETTER Q + MAKE_PAIR( 0xFF32, 0x0052 ), // FULLWIDTH LATIN CAPITAL LETTER R --> LATIN CAPITAL LETTER R + MAKE_PAIR( 0xFF33, 0x0053 ), // FULLWIDTH LATIN CAPITAL LETTER S --> LATIN CAPITAL LETTER S + MAKE_PAIR( 0xFF34, 0x0054 ), // FULLWIDTH LATIN CAPITAL LETTER T --> LATIN CAPITAL LETTER T + MAKE_PAIR( 0xFF35, 0x0055 ), // FULLWIDTH LATIN CAPITAL LETTER U --> LATIN CAPITAL LETTER U + MAKE_PAIR( 0xFF36, 0x0056 ), // FULLWIDTH LATIN CAPITAL LETTER V --> LATIN CAPITAL LETTER V + MAKE_PAIR( 0xFF37, 0x0057 ), // FULLWIDTH LATIN CAPITAL LETTER W --> LATIN CAPITAL LETTER W + MAKE_PAIR( 0xFF38, 0x0058 ), // FULLWIDTH LATIN CAPITAL LETTER X --> LATIN CAPITAL LETTER X + MAKE_PAIR( 0xFF39, 0x0059 ), // FULLWIDTH LATIN CAPITAL LETTER Y --> LATIN CAPITAL LETTER Y + MAKE_PAIR( 0xFF3A, 0x005A ), // FULLWIDTH LATIN CAPITAL LETTER Z --> LATIN CAPITAL LETTER Z + MAKE_PAIR( 0xFF3B, 0x005B ), // FULLWIDTH LEFT SQUARE BRACKET --> LEFT SQUARE BRACKET + MAKE_PAIR( 0xFF3C, 0x005C ), // FULLWIDTH REVERSE SOLIDUS --> REVERSE SOLIDUS + MAKE_PAIR( 0xFF3D, 0x005D ), // FULLWIDTH RIGHT SQUARE BRACKET --> RIGHT SQUARE BRACKET + MAKE_PAIR( 0xFF3E, 0x005E ), // FULLWIDTH CIRCUMFLEX ACCENT --> CIRCUMFLEX ACCENT + MAKE_PAIR( 0xFF3F, 0x005F ), // FULLWIDTH LOW LINE --> LOW LINE + MAKE_PAIR( 0xFF40, 0x0060 ), // FULLWIDTH GRAVE ACCENT --> GRAVE ACCENT + MAKE_PAIR( 0xFF41, 0x0061 ), // FULLWIDTH LATIN SMALL LETTER A --> LATIN SMALL LETTER A + MAKE_PAIR( 0xFF42, 0x0062 ), // FULLWIDTH LATIN SMALL LETTER B --> LATIN SMALL LETTER B + MAKE_PAIR( 0xFF43, 0x0063 ), // FULLWIDTH LATIN SMALL LETTER C --> LATIN SMALL LETTER C + MAKE_PAIR( 0xFF44, 0x0064 ), // FULLWIDTH LATIN SMALL LETTER D --> LATIN SMALL LETTER D + MAKE_PAIR( 0xFF45, 0x0065 ), // FULLWIDTH LATIN SMALL LETTER E --> LATIN SMALL LETTER E + MAKE_PAIR( 0xFF46, 0x0066 ), // FULLWIDTH LATIN SMALL LETTER F --> LATIN SMALL LETTER F + MAKE_PAIR( 0xFF47, 0x0067 ), // FULLWIDTH LATIN SMALL LETTER G --> LATIN SMALL LETTER G + MAKE_PAIR( 0xFF48, 0x0068 ), // FULLWIDTH LATIN SMALL LETTER H --> LATIN SMALL LETTER H + MAKE_PAIR( 0xFF49, 0x0069 ), // FULLWIDTH LATIN SMALL LETTER I --> LATIN SMALL LETTER I + MAKE_PAIR( 0xFF4A, 0x006A ), // FULLWIDTH LATIN SMALL LETTER J --> LATIN SMALL LETTER J + MAKE_PAIR( 0xFF4B, 0x006B ), // FULLWIDTH LATIN SMALL LETTER K --> LATIN SMALL LETTER K + MAKE_PAIR( 0xFF4C, 0x006C ), // FULLWIDTH LATIN SMALL LETTER L --> LATIN SMALL LETTER L + MAKE_PAIR( 0xFF4D, 0x006D ), // FULLWIDTH LATIN SMALL LETTER M --> LATIN SMALL LETTER M + MAKE_PAIR( 0xFF4E, 0x006E ), // FULLWIDTH LATIN SMALL LETTER N --> LATIN SMALL LETTER N + MAKE_PAIR( 0xFF4F, 0x006F ), // FULLWIDTH LATIN SMALL LETTER O --> LATIN SMALL LETTER O + MAKE_PAIR( 0xFF50, 0x0070 ), // FULLWIDTH LATIN SMALL LETTER P --> LATIN SMALL LETTER P + MAKE_PAIR( 0xFF51, 0x0071 ), // FULLWIDTH LATIN SMALL LETTER Q --> LATIN SMALL LETTER Q + MAKE_PAIR( 0xFF52, 0x0072 ), // FULLWIDTH LATIN SMALL LETTER R --> LATIN SMALL LETTER R + MAKE_PAIR( 0xFF53, 0x0073 ), // FULLWIDTH LATIN SMALL LETTER S --> LATIN SMALL LETTER S + MAKE_PAIR( 0xFF54, 0x0074 ), // FULLWIDTH LATIN SMALL LETTER T --> LATIN SMALL LETTER T + MAKE_PAIR( 0xFF55, 0x0075 ), // FULLWIDTH LATIN SMALL LETTER U --> LATIN SMALL LETTER U + MAKE_PAIR( 0xFF56, 0x0076 ), // FULLWIDTH LATIN SMALL LETTER V --> LATIN SMALL LETTER V + MAKE_PAIR( 0xFF57, 0x0077 ), // FULLWIDTH LATIN SMALL LETTER W --> LATIN SMALL LETTER W + MAKE_PAIR( 0xFF58, 0x0078 ), // FULLWIDTH LATIN SMALL LETTER X --> LATIN SMALL LETTER X + MAKE_PAIR( 0xFF59, 0x0079 ), // FULLWIDTH LATIN SMALL LETTER Y --> LATIN SMALL LETTER Y + MAKE_PAIR( 0xFF5A, 0x007A ), // FULLWIDTH LATIN SMALL LETTER Z --> LATIN SMALL LETTER Z + MAKE_PAIR( 0xFF5B, 0x007B ), // FULLWIDTH LEFT CURLY BRACKET --> LEFT CURLY BRACKET + MAKE_PAIR( 0xFF5C, 0x007C ), // FULLWIDTH VERTICAL LINE --> VERTICAL LINE + MAKE_PAIR( 0xFF5D, 0x007D ), // FULLWIDTH RIGHT CURLY BRACKET --> RIGHT CURLY BRACKET + MAKE_PAIR( 0xFF5E, 0x007E ), // FULLWIDTH TILDE --> TILDE + MAKE_PAIR( 0xFFE0, 0x00A2 ), // FULLWIDTH CENT SIGN --> CENT SIGN + MAKE_PAIR( 0xFFE1, 0x00A3 ), // FULLWIDTH POUND SIGN --> POUND SIGN + MAKE_PAIR( 0xFFE2, 0x00AC ), // FULLWIDTH NOT SIGN --> NOT SIGN + MAKE_PAIR( 0xFFE3, 0x00AF ), // FULLWIDTH MACRON --> MACRON + MAKE_PAIR( 0xFFE4, 0x00A6 ), // FULLWIDTH BROKEN BAR --> BROKEN BAR + MAKE_PAIR( 0xFFE5, 0x00A5 ), // FULLWIDTH YEN SIGN --> YEN SIGN + MAKE_PAIR( 0xFFE6, 0x20A9 ) // FULLWIDTH WON SIGN --> WON SIGN +}; + +/** + * Transliterate fullwidth to halfwidth. + * The output is a reference of OUString. You MUST delete this object when you do not need to use it any more + * The output string contains a transliterated string only, not whole string. + */ +OUString SAL_CALL +fullwidthToHalfwidth::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Decomposition: GA --> KA + voice-mark + OUString newStr = decompose_ja_voiced_sound_marks (inStr, startPos, nCount, offset); + + // One to One mapping + oneToOneMapping table(full2half, sizeof(full2half)); + return transliteration_OneToOne::transliterate( newStr, 0, newStr.getLength(), table ); +} + +} } } } + diff --git a/i18npool/source/transliteration/halfwidthToFullwidth.cxx b/i18npool/source/transliteration/halfwidthToFullwidth.cxx new file mode 100644 index 000000000000..eacacbf2aeec --- /dev/null +++ b/i18npool/source/transliteration/halfwidthToFullwidth.cxx @@ -0,0 +1,599 @@ +/************************************************************************* + * + * $RCSfile: halfwidthToFullwidth.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_halfwidthToFullwidth +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +halfwidthToFullwidth::halfwidthToFullwidth() +{ + transliterationName = "halfwidthToFullwidth"; + implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTH_FULLWIDTH"; +} + +/* + Unicode Normalization Forms + http://www.unicode.org/unicode/reports/tr15/index.html + http://www.w3.org/International/charlint/ +*/ + +const sal_Unicode composition_table[][2] = { + { 0x0000, 0x0000 }, // 0x3040 + { 0x0000, 0x0000 }, // 0x3041 HIRAGANA LETTER SMALL A + { 0x0000, 0x0000 }, // 0x3042 HIRAGANA LETTER A + { 0x0000, 0x0000 }, // 0x3043 HIRAGANA LETTER SMALL I + { 0x0000, 0x0000 }, // 0x3044 HIRAGANA LETTER I + { 0x0000, 0x0000 }, // 0x3045 HIRAGANA LETTER SMALL U + { 0x3094, 0x0000 }, // 0x3046 HIRAGANA LETTER U --> HIRAGANA LETTER VU + { 0x0000, 0x0000 }, // 0x3047 HIRAGANA LETTER SMALL E + { 0x0000, 0x0000 }, // 0x3048 HIRAGANA LETTER E + { 0x0000, 0x0000 }, // 0x3049 HIRAGANA LETTER SMALL O + { 0x0000, 0x0000 }, // 0x304a HIRAGANA LETTER O + { 0x304c, 0x0000 }, // 0x304b HIRAGANA LETTER KA --> HIRAGANA LETTER GA + { 0x0000, 0x0000 }, // 0x304c HIRAGANA LETTER GA + { 0x304e, 0x0000 }, // 0x304d HIRAGANA LETTER KI --> HIRAGANA LETTER GI + { 0x0000, 0x0000 }, // 0x304e HIRAGANA LETTER GI + { 0x3050, 0x0000 }, // 0x304f HIRAGANA LETTER KU --> HIRAGANA LETTER GU + { 0x0000, 0x0000 }, // 0x3050 HIRAGANA LETTER GU + { 0x3052, 0x0000 }, // 0x3051 HIRAGANA LETTER KE --> HIRAGANA LETTER GE + { 0x0000, 0x0000 }, // 0x3052 HIRAGANA LETTER GE + { 0x3054, 0x0000 }, // 0x3053 HIRAGANA LETTER KO --> HIRAGANA LETTER GO + { 0x0000, 0x0000 }, // 0x3054 HIRAGANA LETTER GO + { 0x3056, 0x0000 }, // 0x3055 HIRAGANA LETTER SA --> HIRAGANA LETTER ZA + { 0x0000, 0x0000 }, // 0x3056 HIRAGANA LETTER ZA + { 0x3058, 0x0000 }, // 0x3057 HIRAGANA LETTER SI --> HIRAGANA LETTER ZI + { 0x0000, 0x0000 }, // 0x3058 HIRAGANA LETTER ZI + { 0x305a, 0x0000 }, // 0x3059 HIRAGANA LETTER SU --> HIRAGANA LETTER ZU + { 0x0000, 0x0000 }, // 0x305a HIRAGANA LETTER ZU + { 0x305c, 0x0000 }, // 0x305b HIRAGANA LETTER SE --> HIRAGANA LETTER ZE + { 0x0000, 0x0000 }, // 0x305c HIRAGANA LETTER ZE + { 0x305e, 0x0000 }, // 0x305d HIRAGANA LETTER SO --> HIRAGANA LETTER ZO + { 0x0000, 0x0000 }, // 0x305e HIRAGANA LETTER ZO + { 0x3060, 0x0000 }, // 0x305f HIRAGANA LETTER TA --> HIRAGANA LETTER DA + { 0x0000, 0x0000 }, // 0x3060 HIRAGANA LETTER DA + { 0x3062, 0x0000 }, // 0x3061 HIRAGANA LETTER TI --> HIRAGANA LETTER DI + { 0x0000, 0x0000 }, // 0x3062 HIRAGANA LETTER DI + { 0x0000, 0x0000 }, // 0x3063 HIRAGANA LETTER SMALL TU + { 0x3065, 0x0000 }, // 0x3064 HIRAGANA LETTER TU --> HIRAGANA LETTER DU + { 0x0000, 0x0000 }, // 0x3065 HIRAGANA LETTER DU + { 0x3067, 0x0000 }, // 0x3066 HIRAGANA LETTER TE --> HIRAGANA LETTER DE + { 0x0000, 0x0000 }, // 0x3067 HIRAGANA LETTER DE + { 0x3069, 0x0000 }, // 0x3068 HIRAGANA LETTER TO --> HIRAGANA LETTER DO + { 0x0000, 0x0000 }, // 0x3069 HIRAGANA LETTER DO + { 0x0000, 0x0000 }, // 0x306a HIRAGANA LETTER NA + { 0x0000, 0x0000 }, // 0x306b HIRAGANA LETTER NI + { 0x0000, 0x0000 }, // 0x306c HIRAGANA LETTER NU + { 0x0000, 0x0000 }, // 0x306d HIRAGANA LETTER NE + { 0x0000, 0x0000 }, // 0x306e HIRAGANA LETTER NO + { 0x3070, 0x3071 }, // 0x306f HIRAGANA LETTER HA --> HIRAGANA LETTER BA or HIRAGANA LETTER PA + { 0x0000, 0x0000 }, // 0x3070 HIRAGANA LETTER BA + { 0x0000, 0x0000 }, // 0x3071 HIRAGANA LETTER PA + { 0x3073, 0x3074 }, // 0x3072 HIRAGANA LETTER HI --> HIRAGANA LETTER BI or HIRAGANA LETTER PI + { 0x0000, 0x0000 }, // 0x3073 HIRAGANA LETTER BI + { 0x0000, 0x0000 }, // 0x3074 HIRAGANA LETTER PI + { 0x3076, 0x3077 }, // 0x3075 HIRAGANA LETTER HU --> HIRAGANA LETTER BU or HIRAGANA LETTER PU + { 0x0000, 0x0000 }, // 0x3076 HIRAGANA LETTER BU + { 0x0000, 0x0000 }, // 0x3077 HIRAGANA LETTER PU + { 0x3079, 0x307a }, // 0x3078 HIRAGANA LETTER HE --> HIRAGANA LETTER BE or HIRAGANA LETTER PE + { 0x0000, 0x0000 }, // 0x3079 HIRAGANA LETTER BE + { 0x0000, 0x0000 }, // 0x307a HIRAGANA LETTER PE + { 0x307c, 0x307d }, // 0x307b HIRAGANA LETTER HO --> HIRAGANA LETTER BO or HIRAGANA LETTER PO + { 0x0000, 0x0000 }, // 0x307c HIRAGANA LETTER BO + { 0x0000, 0x0000 }, // 0x307d HIRAGANA LETTER PO + { 0x0000, 0x0000 }, // 0x307e HIRAGANA LETTER MA + { 0x0000, 0x0000 }, // 0x307f HIRAGANA LETTER MI + { 0x0000, 0x0000 }, // 0x3080 HIRAGANA LETTER MU + { 0x0000, 0x0000 }, // 0x3081 HIRAGANA LETTER ME + { 0x0000, 0x0000 }, // 0x3082 HIRAGANA LETTER MO + { 0x0000, 0x0000 }, // 0x3083 HIRAGANA LETTER SMALL YA + { 0x0000, 0x0000 }, // 0x3084 HIRAGANA LETTER YA + { 0x0000, 0x0000 }, // 0x3085 HIRAGANA LETTER SMALL YU + { 0x0000, 0x0000 }, // 0x3086 HIRAGANA LETTER YU + { 0x0000, 0x0000 }, // 0x3087 HIRAGANA LETTER SMALL YO + { 0x0000, 0x0000 }, // 0x3088 HIRAGANA LETTER YO + { 0x0000, 0x0000 }, // 0x3089 HIRAGANA LETTER RA + { 0x0000, 0x0000 }, // 0x308a HIRAGANA LETTER RI + { 0x0000, 0x0000 }, // 0x308b HIRAGANA LETTER RU + { 0x0000, 0x0000 }, // 0x308c HIRAGANA LETTER RE + { 0x0000, 0x0000 }, // 0x308d HIRAGANA LETTER RO + { 0x0000, 0x0000 }, // 0x308e HIRAGANA LETTER SMALL WA + { 0x0000, 0x0000 }, // 0x308f HIRAGANA LETTER WA + { 0x0000, 0x0000 }, // 0x3090 HIRAGANA LETTER WI + { 0x0000, 0x0000 }, // 0x3091 HIRAGANA LETTER WE + { 0x0000, 0x0000 }, // 0x3092 HIRAGANA LETTER WO + { 0x0000, 0x0000 }, // 0x3093 HIRAGANA LETTER N + { 0x0000, 0x0000 }, // 0x3094 HIRAGANA LETTER VU + { 0x0000, 0x0000 }, // 0x3095 + { 0x0000, 0x0000 }, // 0x3096 + { 0x0000, 0x0000 }, // 0x3097 + { 0x0000, 0x0000 }, // 0x3098 + { 0x0000, 0x0000 }, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + { 0x0000, 0x0000 }, // 0x309d HIRAGANA ITERATION MARK + { 0x0000, 0x0000 }, // 0x309e HIRAGANA VOICED ITERATION MARK + { 0x0000, 0x0000 }, // 0x309f + { 0x0000, 0x0000 }, // 0x30a0 + { 0x0000, 0x0000 }, // 0x30a1 KATAKANA LETTER SMALL A + { 0x0000, 0x0000 }, // 0x30a2 KATAKANA LETTER A + { 0x0000, 0x0000 }, // 0x30a3 KATAKANA LETTER SMALL I + { 0x0000, 0x0000 }, // 0x30a4 KATAKANA LETTER I + { 0x0000, 0x0000 }, // 0x30a5 KATAKANA LETTER SMALL U + { 0x30f4, 0x0000 }, // 0x30a6 KATAKANA LETTER U --> KATAKANA LETTER VU + { 0x0000, 0x0000 }, // 0x30a7 KATAKANA LETTER SMALL E + { 0x0000, 0x0000 }, // 0x30a8 KATAKANA LETTER E + { 0x0000, 0x0000 }, // 0x30a9 KATAKANA LETTER SMALL O + { 0x0000, 0x0000 }, // 0x30aa KATAKANA LETTER O + { 0x30ac, 0x0000 }, // 0x30ab KATAKANA LETTER KA --> KATAKANA LETTER GA + { 0x0000, 0x0000 }, // 0x30ac KATAKANA LETTER GA + { 0x30ae, 0x0000 }, // 0x30ad KATAKANA LETTER KI --> KATAKANA LETTER GI + { 0x0000, 0x0000 }, // 0x30ae KATAKANA LETTER GI + { 0x30b0, 0x0000 }, // 0x30af KATAKANA LETTER KU --> KATAKANA LETTER GU + { 0x0000, 0x0000 }, // 0x30b0 KATAKANA LETTER GU + { 0x30b2, 0x0000 }, // 0x30b1 KATAKANA LETTER KE --> KATAKANA LETTER GE + { 0x0000, 0x0000 }, // 0x30b2 KATAKANA LETTER GE + { 0x30b4, 0x0000 }, // 0x30b3 KATAKANA LETTER KO --> KATAKANA LETTER GO + { 0x0000, 0x0000 }, // 0x30b4 KATAKANA LETTER GO + { 0x30b6, 0x0000 }, // 0x30b5 KATAKANA LETTER SA --> KATAKANA LETTER ZA + { 0x0000, 0x0000 }, // 0x30b6 KATAKANA LETTER ZA + { 0x30b8, 0x0000 }, // 0x30b7 KATAKANA LETTER SI --> KATAKANA LETTER ZI + { 0x0000, 0x0000 }, // 0x30b8 KATAKANA LETTER ZI + { 0x30ba, 0x0000 }, // 0x30b9 KATAKANA LETTER SU --> KATAKANA LETTER ZU + { 0x0000, 0x0000 }, // 0x30ba KATAKANA LETTER ZU + { 0x30bc, 0x0000 }, // 0x30bb KATAKANA LETTER SE --> KATAKANA LETTER ZE + { 0x0000, 0x0000 }, // 0x30bc KATAKANA LETTER ZE + { 0x30be, 0x0000 }, // 0x30bd KATAKANA LETTER SO --> KATAKANA LETTER ZO + { 0x0000, 0x0000 }, // 0x30be KATAKANA LETTER ZO + { 0x30c0, 0x0000 }, // 0x30bf KATAKANA LETTER TA --> KATAKANA LETTER DA + { 0x0000, 0x0000 }, // 0x30c0 KATAKANA LETTER DA + { 0x30c2, 0x0000 }, // 0x30c1 KATAKANA LETTER TI --> KATAKANA LETTER DI + { 0x0000, 0x0000 }, // 0x30c2 KATAKANA LETTER DI + { 0x0000, 0x0000 }, // 0x30c3 KATAKANA LETTER SMALL TU + { 0x30c5, 0x0000 }, // 0x30c4 KATAKANA LETTER TU --> KATAKANA LETTER DU + { 0x0000, 0x0000 }, // 0x30c5 KATAKANA LETTER DU + { 0x30c7, 0x0000 }, // 0x30c6 KATAKANA LETTER TE --> KATAKANA LETTER DE + { 0x0000, 0x0000 }, // 0x30c7 KATAKANA LETTER DE + { 0x30c9, 0x0000 }, // 0x30c8 KATAKANA LETTER TO --> KATAKANA LETTER DO + { 0x0000, 0x0000 }, // 0x30c9 KATAKANA LETTER DO + { 0x0000, 0x0000 }, // 0x30ca KATAKANA LETTER NA + { 0x0000, 0x0000 }, // 0x30cb KATAKANA LETTER NI + { 0x0000, 0x0000 }, // 0x30cc KATAKANA LETTER NU + { 0x0000, 0x0000 }, // 0x30cd KATAKANA LETTER NE + { 0x0000, 0x0000 }, // 0x30ce KATAKANA LETTER NO + { 0x30d0, 0x30d1 }, // 0x30cf KATAKANA LETTER HA --> KATAKANA LETTER BA or KATAKANA LETTER PA + { 0x0000, 0x0000 }, // 0x30d0 KATAKANA LETTER BA + { 0x0000, 0x0000 }, // 0x30d1 KATAKANA LETTER PA + { 0x30d3, 0x30d4 }, // 0x30d2 KATAKANA LETTER HI --> KATAKANA LETTER BI or KATAKANA LETTER PI + { 0x0000, 0x0000 }, // 0x30d3 KATAKANA LETTER BI + { 0x0000, 0x0000 }, // 0x30d4 KATAKANA LETTER PI + { 0x30d6, 0x30d7 }, // 0x30d5 KATAKANA LETTER HU --> KATAKANA LETTER BU or KATAKANA LETTER PU + { 0x0000, 0x0000 }, // 0x30d6 KATAKANA LETTER BU + { 0x0000, 0x0000 }, // 0x30d7 KATAKANA LETTER PU + { 0x30d9, 0x30da }, // 0x30d8 KATAKANA LETTER HE --> KATAKANA LETTER BE or KATAKANA LETTER PE + { 0x0000, 0x0000 }, // 0x30d9 KATAKANA LETTER BE + { 0x0000, 0x0000 }, // 0x30da KATAKANA LETTER PE + { 0x30dc, 0x30dd }, // 0x30db KATAKANA LETTER HO --> KATAKANA LETTER BO or KATAKANA LETTER PO + { 0x0000, 0x0000 }, // 0x30dc KATAKANA LETTER BO + { 0x0000, 0x0000 }, // 0x30dd KATAKANA LETTER PO + { 0x0000, 0x0000 }, // 0x30de KATAKANA LETTER MA + { 0x0000, 0x0000 }, // 0x30df KATAKANA LETTER MI + { 0x0000, 0x0000 }, // 0x30e0 KATAKANA LETTER MU + { 0x0000, 0x0000 }, // 0x30e1 KATAKANA LETTER ME + { 0x0000, 0x0000 }, // 0x30e2 KATAKANA LETTER MO + { 0x0000, 0x0000 }, // 0x30e3 KATAKANA LETTER SMALL YA + { 0x0000, 0x0000 }, // 0x30e4 KATAKANA LETTER YA + { 0x0000, 0x0000 }, // 0x30e5 KATAKANA LETTER SMALL YU + { 0x0000, 0x0000 }, // 0x30e6 KATAKANA LETTER YU + { 0x0000, 0x0000 }, // 0x30e7 KATAKANA LETTER SMALL YO + { 0x0000, 0x0000 }, // 0x30e8 KATAKANA LETTER YO + { 0x0000, 0x0000 }, // 0x30e9 KATAKANA LETTER RA + { 0x0000, 0x0000 }, // 0x30ea KATAKANA LETTER RI + { 0x0000, 0x0000 }, // 0x30eb KATAKANA LETTER RU + { 0x0000, 0x0000 }, // 0x30ec KATAKANA LETTER RE + { 0x0000, 0x0000 }, // 0x30ed KATAKANA LETTER RO + { 0x0000, 0x0000 }, // 0x30ee KATAKANA LETTER SMALL WA + { 0x30f7, 0x0000 }, // 0x30ef KATAKANA LETTER WA --> KATAKANA LETTER VA + { 0x30f8, 0x0000 }, // 0x30f0 KATAKANA LETTER WI --> KATAKANA LETTER VI + { 0x30f9, 0x0000 }, // 0x30f1 KATAKANA LETTER WE --> KATAKANA LETTER VE + { 0x30fa, 0x0000 }, // 0x30f2 KATAKANA LETTER WO --> KATAKANA LETTER VO + { 0x0000, 0x0000 }, // 0x30f3 KATAKANA LETTER N + { 0x0000, 0x0000 }, // 0x30f4 KATAKANA LETTER VU + { 0x0000, 0x0000 }, // 0x30f5 KATAKANA LETTER SMALL KA + { 0x0000, 0x0000 }, // 0x30f6 KATAKANA LETTER SMALL KE + { 0x0000, 0x0000 }, // 0x30f7 KATAKANA LETTER VA + { 0x0000, 0x0000 }, // 0x30f8 KATAKANA LETTER VI + { 0x0000, 0x0000 }, // 0x30f9 KATAKANA LETTER VE + { 0x0000, 0x0000 }, // 0x30fa KATAKANA LETTER VO + { 0x0000, 0x0000 }, // 0x30fb KATAKANA MIDDLE DOT + { 0x0000, 0x0000 }, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK + { 0x0000, 0x0000 }, // 0x30fd KATAKANA ITERATION MARK + { 0x0000, 0x0000 }, // 0x30fe KATAKANA VOICED ITERATION MARK + { 0x0000, 0x0000 } // 0x30ff +}; + +/** + * Compose Japanese specific voiced and semi-voiced sound marks. + */ +OUString SAL_CALL +compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) +{ + // Create a string buffer which can hold nCount + 1 characters. + // Its size may become equal to nCount or smaller. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + + // Prepare pointers of unicode character arrays. + const sal_Unicode* src = inStr.getStr() + startPos; + sal_Unicode* dst = newStr->buffer; + + // This conversion algorithm requires at least one character. + if (nCount <= 0) { + newStr->length = 0; + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. + } + + // .. .. KA VOICE .. .. + // ^ ^ + // previousChar currentChar + // ^ + // position + // + // will be converted to + // .. .. GA .. .. + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Composition: KA + voice-mark --> GA + while (-- nCount > 0) { + currentChar = *src ++; + // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) + // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) + // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + int j = currentChar - 0x3099; // 0x3099 or 0x309a ? + if (0 <= j && j <= 1) { // 0 addresses a code point regarding 0x3099, 1 is 0x309a + int i = int(previousChar - 0x3040); // i acts as an index of array + if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j]) { + position ++; + *p ++ = position; + position ++; + *dst ++ = composition_table[i][j]; + previousChar = *src ++; + nCount --; + continue; + } + } + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + + + +/* + Halfwidth and Fullwidth Forms (U+FF00..U+FFEF) + ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt + ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html + http://charts.unicode.org/Web/UFF00.html +*/ + +OneToOneMappingTable_t half2full[] = { + MAKE_PAIR( 0x0020, 0x3000 ), // SPACE --> IDEOGRAPHIC SPACE + MAKE_PAIR( 0x0021, 0xFF01 ), // EXCLAMATION MARK --> FULLWIDTH EXCLAMATION MARK + MAKE_PAIR( 0x0022, 0xFF02 ), // QUOTATION MARK --> FULLWIDTH QUOTATION MARK + MAKE_PAIR( 0x0023, 0xFF03 ), // NUMBER SIGN --> FULLWIDTH NUMBER SIGN + MAKE_PAIR( 0x0024, 0xFF04 ), // DOLLAR SIGN --> FULLWIDTH DOLLAR SIGN + MAKE_PAIR( 0x0025, 0xFF05 ), // PERCENT SIGN --> FULLWIDTH PERCENT SIGN + MAKE_PAIR( 0x0026, 0xFF06 ), // AMPERSAND --> FULLWIDTH AMPERSAND + MAKE_PAIR( 0x0027, 0xFF07 ), // APOSTROPHE --> FULLWIDTH APOSTROPHE + MAKE_PAIR( 0x0028, 0xFF08 ), // LEFT PARENTHESIS --> FULLWIDTH LEFT PARENTHESIS + MAKE_PAIR( 0x0029, 0xFF09 ), // RIGHT PARENTHESIS --> FULLWIDTH RIGHT PARENTHESIS + MAKE_PAIR( 0x002A, 0xFF0A ), // ASTERISK --> FULLWIDTH ASTERISK + MAKE_PAIR( 0x002B, 0xFF0B ), // PLUS SIGN --> FULLWIDTH PLUS SIGN + MAKE_PAIR( 0x002C, 0xFF0C ), // COMMA --> FULLWIDTH COMMA + +//MAKE_PAIR( 0x002D, 0xFF0D ), // HYPHEN-MINUS --> FULLWIDTH HYPHEN-MINUS + MAKE_PAIR( 0x002D, 0x2212 ), // HYPHEN-MINUS --> MINUS SIGN + + MAKE_PAIR( 0x002E, 0xFF0E ), // FULL STOP --> FULLWIDTH FULL STOP + MAKE_PAIR( 0x002F, 0xFF0F ), // SOLIDUS --> FULLWIDTH SOLIDUS + MAKE_PAIR( 0x0030, 0xFF10 ), // DIGIT ZERO --> FULLWIDTH DIGIT ZERO + MAKE_PAIR( 0x0031, 0xFF11 ), // DIGIT ONE --> FULLWIDTH DIGIT ONE + MAKE_PAIR( 0x0032, 0xFF12 ), // DIGIT TWO --> FULLWIDTH DIGIT TWO + MAKE_PAIR( 0x0033, 0xFF13 ), // DIGIT THREE --> FULLWIDTH DIGIT THREE + MAKE_PAIR( 0x0034, 0xFF14 ), // DIGIT FOUR --> FULLWIDTH DIGIT FOUR + MAKE_PAIR( 0x0035, 0xFF15 ), // DIGIT FIVE --> FULLWIDTH DIGIT FIVE + MAKE_PAIR( 0x0036, 0xFF16 ), // DIGIT SIX --> FULLWIDTH DIGIT SIX + MAKE_PAIR( 0x0037, 0xFF17 ), // DIGIT SEVEN --> FULLWIDTH DIGIT SEVEN + MAKE_PAIR( 0x0038, 0xFF18 ), // DIGIT EIGHT --> FULLWIDTH DIGIT EIGHT + MAKE_PAIR( 0x0039, 0xFF19 ), // DIGIT NINE --> FULLWIDTH DIGIT NINE + MAKE_PAIR( 0x003A, 0xFF1A ), // COLON --> FULLWIDTH COLON + MAKE_PAIR( 0x003B, 0xFF1B ), // SEMICOLON --> FULLWIDTH SEMICOLON + MAKE_PAIR( 0x003C, 0xFF1C ), // LESS-THAN SIGN --> FULLWIDTH LESS-THAN SIGN + MAKE_PAIR( 0x003D, 0xFF1D ), // EQUALS SIGN --> FULLWIDTH EQUALS SIGN + MAKE_PAIR( 0x003E, 0xFF1E ), // GREATER-THAN SIGN --> FULLWIDTH GREATER-THAN SIGN + MAKE_PAIR( 0x003F, 0xFF1F ), // QUESTION MARK --> FULLWIDTH QUESTION MARK + MAKE_PAIR( 0x0040, 0xFF20 ), // COMMERCIAL AT --> FULLWIDTH COMMERCIAL AT + MAKE_PAIR( 0x0041, 0xFF21 ), // LATIN CAPITAL LETTER A --> FULLWIDTH LATIN CAPITAL LETTER A + MAKE_PAIR( 0x0042, 0xFF22 ), // LATIN CAPITAL LETTER B --> FULLWIDTH LATIN CAPITAL LETTER B + MAKE_PAIR( 0x0043, 0xFF23 ), // LATIN CAPITAL LETTER C --> FULLWIDTH LATIN CAPITAL LETTER C + MAKE_PAIR( 0x0044, 0xFF24 ), // LATIN CAPITAL LETTER D --> FULLWIDTH LATIN CAPITAL LETTER D + MAKE_PAIR( 0x0045, 0xFF25 ), // LATIN CAPITAL LETTER E --> FULLWIDTH LATIN CAPITAL LETTER E + MAKE_PAIR( 0x0046, 0xFF26 ), // LATIN CAPITAL LETTER F --> FULLWIDTH LATIN CAPITAL LETTER F + MAKE_PAIR( 0x0047, 0xFF27 ), // LATIN CAPITAL LETTER G --> FULLWIDTH LATIN CAPITAL LETTER G + MAKE_PAIR( 0x0048, 0xFF28 ), // LATIN CAPITAL LETTER H --> FULLWIDTH LATIN CAPITAL LETTER H + MAKE_PAIR( 0x0049, 0xFF29 ), // LATIN CAPITAL LETTER I --> FULLWIDTH LATIN CAPITAL LETTER I + MAKE_PAIR( 0x004A, 0xFF2A ), // LATIN CAPITAL LETTER J --> FULLWIDTH LATIN CAPITAL LETTER J + MAKE_PAIR( 0x004B, 0xFF2B ), // LATIN CAPITAL LETTER K --> FULLWIDTH LATIN CAPITAL LETTER K + MAKE_PAIR( 0x004C, 0xFF2C ), // LATIN CAPITAL LETTER L --> FULLWIDTH LATIN CAPITAL LETTER L + MAKE_PAIR( 0x004D, 0xFF2D ), // LATIN CAPITAL LETTER M --> FULLWIDTH LATIN CAPITAL LETTER M + MAKE_PAIR( 0x004E, 0xFF2E ), // LATIN CAPITAL LETTER N --> FULLWIDTH LATIN CAPITAL LETTER N + MAKE_PAIR( 0x004F, 0xFF2F ), // LATIN CAPITAL LETTER O --> FULLWIDTH LATIN CAPITAL LETTER O + MAKE_PAIR( 0x0050, 0xFF30 ), // LATIN CAPITAL LETTER P --> FULLWIDTH LATIN CAPITAL LETTER P + MAKE_PAIR( 0x0051, 0xFF31 ), // LATIN CAPITAL LETTER Q --> FULLWIDTH LATIN CAPITAL LETTER Q + MAKE_PAIR( 0x0052, 0xFF32 ), // LATIN CAPITAL LETTER R --> FULLWIDTH LATIN CAPITAL LETTER R + MAKE_PAIR( 0x0053, 0xFF33 ), // LATIN CAPITAL LETTER S --> FULLWIDTH LATIN CAPITAL LETTER S + MAKE_PAIR( 0x0054, 0xFF34 ), // LATIN CAPITAL LETTER T --> FULLWIDTH LATIN CAPITAL LETTER T + MAKE_PAIR( 0x0055, 0xFF35 ), // LATIN CAPITAL LETTER U --> FULLWIDTH LATIN CAPITAL LETTER U + MAKE_PAIR( 0x0056, 0xFF36 ), // LATIN CAPITAL LETTER V --> FULLWIDTH LATIN CAPITAL LETTER V + MAKE_PAIR( 0x0057, 0xFF37 ), // LATIN CAPITAL LETTER W --> FULLWIDTH LATIN CAPITAL LETTER W + MAKE_PAIR( 0x0058, 0xFF38 ), // LATIN CAPITAL LETTER X --> FULLWIDTH LATIN CAPITAL LETTER X + MAKE_PAIR( 0x0059, 0xFF39 ), // LATIN CAPITAL LETTER Y --> FULLWIDTH LATIN CAPITAL LETTER Y + MAKE_PAIR( 0x005A, 0xFF3A ), // LATIN CAPITAL LETTER Z --> FULLWIDTH LATIN CAPITAL LETTER Z + MAKE_PAIR( 0x005B, 0xFF3B ), // LEFT SQUARE BRACKET --> FULLWIDTH LEFT SQUARE BRACKET + MAKE_PAIR( 0x005C, 0xFF3C ), // REVERSE SOLIDUS --> FULLWIDTH REVERSE SOLIDUS + MAKE_PAIR( 0x005D, 0xFF3D ), // RIGHT SQUARE BRACKET --> FULLWIDTH RIGHT SQUARE BRACKET + MAKE_PAIR( 0x005E, 0xFF3E ), // CIRCUMFLEX ACCENT --> FULLWIDTH CIRCUMFLEX ACCENT + MAKE_PAIR( 0x005F, 0xFF3F ), // LOW LINE --> FULLWIDTH LOW LINE + MAKE_PAIR( 0x0060, 0xFF40 ), // GRAVE ACCENT --> FULLWIDTH GRAVE ACCENT + MAKE_PAIR( 0x0061, 0xFF41 ), // LATIN SMALL LETTER A --> FULLWIDTH LATIN SMALL LETTER A + MAKE_PAIR( 0x0062, 0xFF42 ), // LATIN SMALL LETTER B --> FULLWIDTH LATIN SMALL LETTER B + MAKE_PAIR( 0x0063, 0xFF43 ), // LATIN SMALL LETTER C --> FULLWIDTH LATIN SMALL LETTER C + MAKE_PAIR( 0x0064, 0xFF44 ), // LATIN SMALL LETTER D --> FULLWIDTH LATIN SMALL LETTER D + MAKE_PAIR( 0x0065, 0xFF45 ), // LATIN SMALL LETTER E --> FULLWIDTH LATIN SMALL LETTER E + MAKE_PAIR( 0x0066, 0xFF46 ), // LATIN SMALL LETTER F --> FULLWIDTH LATIN SMALL LETTER F + MAKE_PAIR( 0x0067, 0xFF47 ), // LATIN SMALL LETTER G --> FULLWIDTH LATIN SMALL LETTER G + MAKE_PAIR( 0x0068, 0xFF48 ), // LATIN SMALL LETTER H --> FULLWIDTH LATIN SMALL LETTER H + MAKE_PAIR( 0x0069, 0xFF49 ), // LATIN SMALL LETTER I --> FULLWIDTH LATIN SMALL LETTER I + MAKE_PAIR( 0x006A, 0xFF4A ), // LATIN SMALL LETTER J --> FULLWIDTH LATIN SMALL LETTER J + MAKE_PAIR( 0x006B, 0xFF4B ), // LATIN SMALL LETTER K --> FULLWIDTH LATIN SMALL LETTER K + MAKE_PAIR( 0x006C, 0xFF4C ), // LATIN SMALL LETTER L --> FULLWIDTH LATIN SMALL LETTER L + MAKE_PAIR( 0x006D, 0xFF4D ), // LATIN SMALL LETTER M --> FULLWIDTH LATIN SMALL LETTER M + MAKE_PAIR( 0x006E, 0xFF4E ), // LATIN SMALL LETTER N --> FULLWIDTH LATIN SMALL LETTER N + MAKE_PAIR( 0x006F, 0xFF4F ), // LATIN SMALL LETTER O --> FULLWIDTH LATIN SMALL LETTER O + MAKE_PAIR( 0x0070, 0xFF50 ), // LATIN SMALL LETTER P --> FULLWIDTH LATIN SMALL LETTER P + MAKE_PAIR( 0x0071, 0xFF51 ), // LATIN SMALL LETTER Q --> FULLWIDTH LATIN SMALL LETTER Q + MAKE_PAIR( 0x0072, 0xFF52 ), // LATIN SMALL LETTER R --> FULLWIDTH LATIN SMALL LETTER R + MAKE_PAIR( 0x0073, 0xFF53 ), // LATIN SMALL LETTER S --> FULLWIDTH LATIN SMALL LETTER S + MAKE_PAIR( 0x0074, 0xFF54 ), // LATIN SMALL LETTER T --> FULLWIDTH LATIN SMALL LETTER T + MAKE_PAIR( 0x0075, 0xFF55 ), // LATIN SMALL LETTER U --> FULLWIDTH LATIN SMALL LETTER U + MAKE_PAIR( 0x0076, 0xFF56 ), // LATIN SMALL LETTER V --> FULLWIDTH LATIN SMALL LETTER V + MAKE_PAIR( 0x0077, 0xFF57 ), // LATIN SMALL LETTER W --> FULLWIDTH LATIN SMALL LETTER W + MAKE_PAIR( 0x0078, 0xFF58 ), // LATIN SMALL LETTER X --> FULLWIDTH LATIN SMALL LETTER X + MAKE_PAIR( 0x0079, 0xFF59 ), // LATIN SMALL LETTER Y --> FULLWIDTH LATIN SMALL LETTER Y + MAKE_PAIR( 0x007A, 0xFF5A ), // LATIN SMALL LETTER Z --> FULLWIDTH LATIN SMALL LETTER Z + MAKE_PAIR( 0x007B, 0xFF5B ), // LEFT CURLY BRACKET --> FULLWIDTH LEFT CURLY BRACKET + MAKE_PAIR( 0x007C, 0xFF5C ), // VERTICAL LINE --> FULLWIDTH VERTICAL LINE + MAKE_PAIR( 0x007D, 0xFF5D ), // RIGHT CURLY BRACKET --> FULLWIDTH RIGHT CURLY BRACKET + MAKE_PAIR( 0x007E, 0xFF5E ), // TILDE --> FULLWIDTH TILDE + MAKE_PAIR( 0x00A2, 0xFFE0 ), // CENT SIGN --> FULLWIDTH CENT SIGN + MAKE_PAIR( 0x00A3, 0xFFE1 ), // POUND SIGN --> FULLWIDTH POUND SIGN + MAKE_PAIR( 0x00A5, 0xFFE5 ), // YEN SIGN --> FULLWIDTH YEN SIGN + MAKE_PAIR( 0x00A6, 0xFFE4 ), // BROKEN BAR --> FULLWIDTH BROKEN BAR + MAKE_PAIR( 0x00AC, 0xFFE2 ), // NOT SIGN --> FULLWIDTH NOT SIGN + MAKE_PAIR( 0x00AF, 0xFFE3 ), // MACRON --> FULLWIDTH MACRON + MAKE_PAIR( 0x20A9, 0xFFE6 ), // WON SIGN --> FULLWIDTH WON SIGN + MAKE_PAIR( 0xFF61, 0x3002 ), // HALFWIDTH IDEOGRAPHIC FULL STOP --> IDEOGRAPHIC FULL STOP + MAKE_PAIR( 0xFF62, 0x300C ), // HALFWIDTH LEFT CORNER BRACKET --> LEFT CORNER BRACKET + MAKE_PAIR( 0xFF63, 0x300D ), // HALFWIDTH RIGHT CORNER BRACKET --> RIGHT CORNER BRACKET + MAKE_PAIR( 0xFF64, 0x3001 ), // HALFWIDTH IDEOGRAPHIC COMMA --> IDEOGRAPHIC COMMA + MAKE_PAIR( 0xFF65, 0x30FB ), // HALFWIDTH KATAKANA MIDDLE DOT --> KATAKANA MIDDLE DOT + MAKE_PAIR( 0xFF66, 0x30F2 ), // HALFWIDTH KATAKANA LETTER WO --> KATAKANA LETTER WO + MAKE_PAIR( 0xFF67, 0x30A1 ), // HALFWIDTH KATAKANA LETTER SMALL A --> KATAKANA LETTER SMALL A + MAKE_PAIR( 0xFF68, 0x30A3 ), // HALFWIDTH KATAKANA LETTER SMALL I --> KATAKANA LETTER SMALL I + MAKE_PAIR( 0xFF69, 0x30A5 ), // HALFWIDTH KATAKANA LETTER SMALL U --> KATAKANA LETTER SMALL U + MAKE_PAIR( 0xFF6A, 0x30A7 ), // HALFWIDTH KATAKANA LETTER SMALL E --> KATAKANA LETTER SMALL E + MAKE_PAIR( 0xFF6B, 0x30A9 ), // HALFWIDTH KATAKANA LETTER SMALL O --> KATAKANA LETTER SMALL O + MAKE_PAIR( 0xFF6C, 0x30E3 ), // HALFWIDTH KATAKANA LETTER SMALL YA --> KATAKANA LETTER SMALL YA + MAKE_PAIR( 0xFF6D, 0x30E5 ), // HALFWIDTH KATAKANA LETTER SMALL YU --> KATAKANA LETTER SMALL YU + MAKE_PAIR( 0xFF6E, 0x30E7 ), // HALFWIDTH KATAKANA LETTER SMALL YO --> KATAKANA LETTER SMALL YO + MAKE_PAIR( 0xFF6F, 0x30C3 ), // HALFWIDTH KATAKANA LETTER SMALL TU --> KATAKANA LETTER SMALL TU + MAKE_PAIR( 0xFF70, 0x30FC ), // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK --> KATAKANA-HIRAGANA PROLONGED SOUND MARK + MAKE_PAIR( 0xFF71, 0x30A2 ), // HALFWIDTH KATAKANA LETTER A --> KATAKANA LETTER A + MAKE_PAIR( 0xFF72, 0x30A4 ), // HALFWIDTH KATAKANA LETTER I --> KATAKANA LETTER I + MAKE_PAIR( 0xFF73, 0x30A6 ), // HALFWIDTH KATAKANA LETTER U --> KATAKANA LETTER U + MAKE_PAIR( 0xFF74, 0x30A8 ), // HALFWIDTH KATAKANA LETTER E --> KATAKANA LETTER E + MAKE_PAIR( 0xFF75, 0x30AA ), // HALFWIDTH KATAKANA LETTER O --> KATAKANA LETTER O + MAKE_PAIR( 0xFF76, 0x30AB ), // HALFWIDTH KATAKANA LETTER KA --> KATAKANA LETTER KA + MAKE_PAIR( 0xFF77, 0x30AD ), // HALFWIDTH KATAKANA LETTER KI --> KATAKANA LETTER KI + MAKE_PAIR( 0xFF78, 0x30AF ), // HALFWIDTH KATAKANA LETTER KU --> KATAKANA LETTER KU + MAKE_PAIR( 0xFF79, 0x30B1 ), // HALFWIDTH KATAKANA LETTER KE --> KATAKANA LETTER KE + MAKE_PAIR( 0xFF7A, 0x30B3 ), // HALFWIDTH KATAKANA LETTER KO --> KATAKANA LETTER KO + MAKE_PAIR( 0xFF7B, 0x30B5 ), // HALFWIDTH KATAKANA LETTER SA --> KATAKANA LETTER SA + MAKE_PAIR( 0xFF7C, 0x30B7 ), // HALFWIDTH KATAKANA LETTER SI --> KATAKANA LETTER SI + MAKE_PAIR( 0xFF7D, 0x30B9 ), // HALFWIDTH KATAKANA LETTER SU --> KATAKANA LETTER SU + MAKE_PAIR( 0xFF7E, 0x30BB ), // HALFWIDTH KATAKANA LETTER SE --> KATAKANA LETTER SE + MAKE_PAIR( 0xFF7F, 0x30BD ), // HALFWIDTH KATAKANA LETTER SO --> KATAKANA LETTER SO + MAKE_PAIR( 0xFF80, 0x30BF ), // HALFWIDTH KATAKANA LETTER TA --> KATAKANA LETTER TA + MAKE_PAIR( 0xFF81, 0x30C1 ), // HALFWIDTH KATAKANA LETTER TI --> KATAKANA LETTER TI + MAKE_PAIR( 0xFF82, 0x30C4 ), // HALFWIDTH KATAKANA LETTER TU --> KATAKANA LETTER TU + MAKE_PAIR( 0xFF83, 0x30C6 ), // HALFWIDTH KATAKANA LETTER TE --> KATAKANA LETTER TE + MAKE_PAIR( 0xFF84, 0x30C8 ), // HALFWIDTH KATAKANA LETTER TO --> KATAKANA LETTER TO + MAKE_PAIR( 0xFF85, 0x30CA ), // HALFWIDTH KATAKANA LETTER NA --> KATAKANA LETTER NA + MAKE_PAIR( 0xFF86, 0x30CB ), // HALFWIDTH KATAKANA LETTER NI --> KATAKANA LETTER NI + MAKE_PAIR( 0xFF87, 0x30CC ), // HALFWIDTH KATAKANA LETTER NU --> KATAKANA LETTER NU + MAKE_PAIR( 0xFF88, 0x30CD ), // HALFWIDTH KATAKANA LETTER NE --> KATAKANA LETTER NE + MAKE_PAIR( 0xFF89, 0x30CE ), // HALFWIDTH KATAKANA LETTER NO --> KATAKANA LETTER NO + MAKE_PAIR( 0xFF8A, 0x30CF ), // HALFWIDTH KATAKANA LETTER HA --> KATAKANA LETTER HA + MAKE_PAIR( 0xFF8B, 0x30D2 ), // HALFWIDTH KATAKANA LETTER HI --> KATAKANA LETTER HI + MAKE_PAIR( 0xFF8C, 0x30D5 ), // HALFWIDTH KATAKANA LETTER HU --> KATAKANA LETTER HU + MAKE_PAIR( 0xFF8D, 0x30D8 ), // HALFWIDTH KATAKANA LETTER HE --> KATAKANA LETTER HE + MAKE_PAIR( 0xFF8E, 0x30DB ), // HALFWIDTH KATAKANA LETTER HO --> KATAKANA LETTER HO + MAKE_PAIR( 0xFF8F, 0x30DE ), // HALFWIDTH KATAKANA LETTER MA --> KATAKANA LETTER MA + MAKE_PAIR( 0xFF90, 0x30DF ), // HALFWIDTH KATAKANA LETTER MI --> KATAKANA LETTER MI + MAKE_PAIR( 0xFF91, 0x30E0 ), // HALFWIDTH KATAKANA LETTER MU --> KATAKANA LETTER MU + MAKE_PAIR( 0xFF92, 0x30E1 ), // HALFWIDTH KATAKANA LETTER ME --> KATAKANA LETTER ME + MAKE_PAIR( 0xFF93, 0x30E2 ), // HALFWIDTH KATAKANA LETTER MO --> KATAKANA LETTER MO + MAKE_PAIR( 0xFF94, 0x30E4 ), // HALFWIDTH KATAKANA LETTER YA --> KATAKANA LETTER YA + MAKE_PAIR( 0xFF95, 0x30E6 ), // HALFWIDTH KATAKANA LETTER YU --> KATAKANA LETTER YU + MAKE_PAIR( 0xFF96, 0x30E8 ), // HALFWIDTH KATAKANA LETTER YO --> KATAKANA LETTER YO + MAKE_PAIR( 0xFF97, 0x30E9 ), // HALFWIDTH KATAKANA LETTER RA --> KATAKANA LETTER RA + MAKE_PAIR( 0xFF98, 0x30EA ), // HALFWIDTH KATAKANA LETTER RI --> KATAKANA LETTER RI + MAKE_PAIR( 0xFF99, 0x30EB ), // HALFWIDTH KATAKANA LETTER RU --> KATAKANA LETTER RU + MAKE_PAIR( 0xFF9A, 0x30EC ), // HALFWIDTH KATAKANA LETTER RE --> KATAKANA LETTER RE + MAKE_PAIR( 0xFF9B, 0x30ED ), // HALFWIDTH KATAKANA LETTER RO --> KATAKANA LETTER RO + MAKE_PAIR( 0xFF9C, 0x30EF ), // HALFWIDTH KATAKANA LETTER WA --> KATAKANA LETTER WA + MAKE_PAIR( 0xFF9D, 0x30F3 ), // HALFWIDTH KATAKANA LETTER N --> KATAKANA LETTER N + MAKE_PAIR( 0xFF9E, 0x3099 ), // HALFWIDTH KATAKANA VOICED SOUND MARK --> COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + MAKE_PAIR( 0xFF9F, 0x309A ), // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK --> COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + MAKE_PAIR( 0xFFA0, 0x3164 ), // HALFWIDTH HANGUL FILLER --> HANGUL FILLER + MAKE_PAIR( 0xFFA1, 0x3131 ), // HALFWIDTH HANGUL LETTER KIYEOK --> HANGUL LETTER KIYEOK + MAKE_PAIR( 0xFFA2, 0x3132 ), // HALFWIDTH HANGUL LETTER SSANGKIYEOK --> HANGUL LETTER SSANGKIYEOK + MAKE_PAIR( 0xFFA3, 0x3133 ), // HALFWIDTH HANGUL LETTER KIYEOK-SIOS --> HANGUL LETTER KIYEOK-SIOS + MAKE_PAIR( 0xFFA4, 0x3134 ), // HALFWIDTH HANGUL LETTER NIEUN --> HANGUL LETTER NIEUN + MAKE_PAIR( 0xFFA5, 0x3135 ), // HALFWIDTH HANGUL LETTER NIEUN-CIEUC --> HANGUL LETTER NIEUN-CIEUC + MAKE_PAIR( 0xFFA6, 0x3136 ), // HALFWIDTH HANGUL LETTER NIEUN-HIEUH --> HANGUL LETTER NIEUN-HIEUH + MAKE_PAIR( 0xFFA7, 0x3137 ), // HALFWIDTH HANGUL LETTER TIKEUT --> HANGUL LETTER TIKEUT + MAKE_PAIR( 0xFFA8, 0x3138 ), // HALFWIDTH HANGUL LETTER SSANGTIKEUT --> HANGUL LETTER SSANGTIKEUT + MAKE_PAIR( 0xFFA9, 0x3139 ), // HALFWIDTH HANGUL LETTER RIEUL --> HANGUL LETTER RIEUL + MAKE_PAIR( 0xFFAA, 0x313A ), // HALFWIDTH HANGUL LETTER RIEUL-KIYEOK --> HANGUL LETTER RIEUL-KIYEOK + MAKE_PAIR( 0xFFAB, 0x313B ), // HALFWIDTH HANGUL LETTER RIEUL-MIEUM --> HANGUL LETTER RIEUL-MIEUM + MAKE_PAIR( 0xFFAC, 0x313C ), // HALFWIDTH HANGUL LETTER RIEUL-PIEUP --> HANGUL LETTER RIEUL-PIEUP + MAKE_PAIR( 0xFFAD, 0x313D ), // HALFWIDTH HANGUL LETTER RIEUL-SIOS --> HANGUL LETTER RIEUL-SIOS + MAKE_PAIR( 0xFFAE, 0x313E ), // HALFWIDTH HANGUL LETTER RIEUL-THIEUTH --> HANGUL LETTER RIEUL-THIEUTH + MAKE_PAIR( 0xFFAF, 0x313F ), // HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH --> HANGUL LETTER RIEUL-PHIEUPH + MAKE_PAIR( 0xFFB0, 0x3140 ), // HALFWIDTH HANGUL LETTER RIEUL-HIEUH --> HANGUL LETTER RIEUL-HIEUH + MAKE_PAIR( 0xFFB1, 0x3141 ), // HALFWIDTH HANGUL LETTER MIEUM --> HANGUL LETTER MIEUM + MAKE_PAIR( 0xFFB2, 0x3142 ), // HALFWIDTH HANGUL LETTER PIEUP --> HANGUL LETTER PIEUP + MAKE_PAIR( 0xFFB3, 0x3143 ), // HALFWIDTH HANGUL LETTER SSANGPIEUP --> HANGUL LETTER SSANGPIEUP + MAKE_PAIR( 0xFFB4, 0x3144 ), // HALFWIDTH HANGUL LETTER PIEUP-SIOS --> HANGUL LETTER PIEUP-SIOS + MAKE_PAIR( 0xFFB5, 0x3145 ), // HALFWIDTH HANGUL LETTER SIOS --> HANGUL LETTER SIOS + MAKE_PAIR( 0xFFB6, 0x3146 ), // HALFWIDTH HANGUL LETTER SSANGSIOS --> HANGUL LETTER SSANGSIOS + MAKE_PAIR( 0xFFB7, 0x3147 ), // HALFWIDTH HANGUL LETTER IEUNG --> HANGUL LETTER IEUNG + MAKE_PAIR( 0xFFB8, 0x3148 ), // HALFWIDTH HANGUL LETTER CIEUC --> HANGUL LETTER CIEUC + MAKE_PAIR( 0xFFB9, 0x3149 ), // HALFWIDTH HANGUL LETTER SSANGCIEUC --> HANGUL LETTER SSANGCIEUC + MAKE_PAIR( 0xFFBA, 0x314A ), // HALFWIDTH HANGUL LETTER CHIEUCH --> HANGUL LETTER CHIEUCH + MAKE_PAIR( 0xFFBB, 0x314B ), // HALFWIDTH HANGUL LETTER KHIEUKH --> HANGUL LETTER KHIEUKH + MAKE_PAIR( 0xFFBC, 0x314C ), // HALFWIDTH HANGUL LETTER THIEUTH --> HANGUL LETTER THIEUTH + MAKE_PAIR( 0xFFBD, 0x314D ), // HALFWIDTH HANGUL LETTER PHIEUPH --> HANGUL LETTER PHIEUPH + MAKE_PAIR( 0xFFBE, 0x314E ), // HALFWIDTH HANGUL LETTER HIEUH --> HANGUL LETTER HIEUH + MAKE_PAIR( 0xFFC2, 0x314F ), // HALFWIDTH HANGUL LETTER A --> HANGUL LETTER A + MAKE_PAIR( 0xFFC3, 0x3150 ), // HALFWIDTH HANGUL LETTER AE --> HANGUL LETTER AE + MAKE_PAIR( 0xFFC4, 0x3151 ), // HALFWIDTH HANGUL LETTER YA --> HANGUL LETTER YA + MAKE_PAIR( 0xFFC5, 0x3152 ), // HALFWIDTH HANGUL LETTER YAE --> HANGUL LETTER YAE + MAKE_PAIR( 0xFFC6, 0x3153 ), // HALFWIDTH HANGUL LETTER EO --> HANGUL LETTER EO + MAKE_PAIR( 0xFFC7, 0x3154 ), // HALFWIDTH HANGUL LETTER E --> HANGUL LETTER E + MAKE_PAIR( 0xFFCA, 0x3155 ), // HALFWIDTH HANGUL LETTER YEO --> HANGUL LETTER YEO + MAKE_PAIR( 0xFFCB, 0x3156 ), // HALFWIDTH HANGUL LETTER YE --> HANGUL LETTER YE + MAKE_PAIR( 0xFFCC, 0x3157 ), // HALFWIDTH HANGUL LETTER O --> HANGUL LETTER O + MAKE_PAIR( 0xFFCD, 0x3158 ), // HALFWIDTH HANGUL LETTER WA --> HANGUL LETTER WA + MAKE_PAIR( 0xFFCE, 0x3159 ), // HALFWIDTH HANGUL LETTER WAE --> HANGUL LETTER WAE + MAKE_PAIR( 0xFFCF, 0x315A ), // HALFWIDTH HANGUL LETTER OE --> HANGUL LETTER OE + MAKE_PAIR( 0xFFD2, 0x315B ), // HALFWIDTH HANGUL LETTER YO --> HANGUL LETTER YO + MAKE_PAIR( 0xFFD3, 0x315C ), // HALFWIDTH HANGUL LETTER U --> HANGUL LETTER U + MAKE_PAIR( 0xFFD4, 0x315D ), // HALFWIDTH HANGUL LETTER WEO --> HANGUL LETTER WEO + MAKE_PAIR( 0xFFD5, 0x315E ), // HALFWIDTH HANGUL LETTER WE --> HANGUL LETTER WE + MAKE_PAIR( 0xFFD6, 0x315F ), // HALFWIDTH HANGUL LETTER WI --> HANGUL LETTER WI + MAKE_PAIR( 0xFFD7, 0x3160 ), // HALFWIDTH HANGUL LETTER YU --> HANGUL LETTER YU + MAKE_PAIR( 0xFFDA, 0x3161 ), // HALFWIDTH HANGUL LETTER EU --> HANGUL LETTER EU + MAKE_PAIR( 0xFFDB, 0x3162 ), // HALFWIDTH HANGUL LETTER YI --> HANGUL LETTER YI + MAKE_PAIR( 0xFFDC, 0x3163 ), // HALFWIDTH HANGUL LETTER I --> HANGUL LETTER I + MAKE_PAIR( 0xFFE8, 0x2502 ), // HALFWIDTH FORMS LIGHT VERTICAL --> BOX DRAWINGS LIGHT VERTICAL + MAKE_PAIR( 0xFFE9, 0x2190 ), // HALFWIDTH LEFTWARDS ARROW --> LEFTWARDS ARROW + MAKE_PAIR( 0xFFEA, 0x2191 ), // HALFWIDTH UPWARDS ARROW --> UPWARDS ARROW + MAKE_PAIR( 0xFFEB, 0x2192 ), // HALFWIDTH RIGHTWARDS ARROW --> RIGHTWARDS ARROW + MAKE_PAIR( 0xFFEC, 0x2193 ), // HALFWIDTH DOWNWARDS ARROW --> DOWNWARDS ARROW + MAKE_PAIR( 0xFFED, 0x25A0 ), // HALFWIDTH BLACK SQUARE --> BLACK SQUARE + MAKE_PAIR( 0xFFEE, 0x25CB ) // HALFWIDTH WHITE CIRCLE --> WHITE CIRCLE +}; + +oneToOneMapping& gethalf2fullTable(void) +{ + static oneToOneMapping table(half2full, sizeof(half2full)); + table.makeIndex(); + return table; +} + +OUString SAL_CALL +halfwidthToFullwidth::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // One to One mapping + OUString newStr = transliteration_OneToOne::transliterate( inStr, startPos, nCount, gethalf2fullTable()); + + // Composition: KA + voice-mark --> GA + return compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), offset ); +} + +sal_Unicode getCompositionChar(sal_Unicode c1, sal_Unicode c2) +{ + return composition_table[c1 - 0x3040][c2 - 0x3099]; +} + +} } } } diff --git a/i18npool/source/transliteration/hiraganaToKatakana.cxx b/i18npool/source/transliteration/hiraganaToKatakana.cxx new file mode 100644 index 000000000000..68c4cd4d73ec --- /dev/null +++ b/i18npool/source/transliteration/hiraganaToKatakana.cxx @@ -0,0 +1,75 @@ +/************************************************************************* + * + * $RCSfile: hiraganaToKatakana.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_hiraganaToKatakana +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +hiraganaToKatakana::hiraganaToKatakana() +{ + transliterationName = "hiraganaToKatakana"; + implementationName = "com.sun.star.i18n.Transliteration.HIRAGANA_KATAKANA"; +} + +// see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +static sal_Unicode toKatakana (const sal_Unicode c) { + if (0x3040 <= c && c <= 0x3094 || 0x309d <= c && c <= 0x309f) { // 3040 - 309F HIRAGANA LETTER + // shift code point by 0x0060 + return c + (0x30a0 - 0x3040); + } + return c; +} + +OUString SAL_CALL +hiraganaToKatakana::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + return transliteration_OneToOne::transliterate( inStr, startPos, nCount, offset, (TransFunc) toKatakana ); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx b/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx new file mode 100644 index 000000000000..06891f47cf2c --- /dev/null +++ b/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx @@ -0,0 +1,136 @@ +/************************************************************************* + * + * $RCSfile: ignoreBaFa_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_BaFa_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreBaFa_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + if (previousChar == 0x30F4 && // KATAKANA LETTER VU + currentChar == 0x30A1 ) { // KATAKANA LETTER SMALL A + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30D0; // KATAKANA LETTER BA + previousChar = *src ++; + nCount --; + continue; + } + + if (previousChar == 0x3094 && // HIRAGANA LETTER VU + currentChar == 0x3041 ) { // HIRAGANA LETTER SMALL A + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x3070; // HIRAGANA LETTER BA + previousChar = *src ++; + nCount --; + continue; + } + + if (previousChar == 0x30D5 && // KATAKANA LETTER HU + currentChar == 0x30A1 ) { // KATAKANA LETTER SMALL A + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30CF; // KATAKANA LETTER HA + previousChar = *src ++; + nCount --; + continue; + } + + if (previousChar == 0x3075 && // HIRAGANA LETTER HU + currentChar == 0x3041 ) { // HIRAGANA LETTER SMALL A + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x306F; // HIRAGANA LETTER HA + previousChar = *src ++; + nCount --; + continue; + } + + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx b/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx new file mode 100644 index 000000000000..ceb4b7e7232e --- /dev/null +++ b/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx @@ -0,0 +1,145 @@ +/************************************************************************* + * + * $RCSfile: ignoreHyuByu_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_HyuByu_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreHyuByu_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + if (previousChar == 0x30D5 && // KATAKANA LETTER HU + currentChar == 0x30E5 ) { // KATAKANA LETTER SMALL YU + *p ++ = position; + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30D2; // KATAKANA LETTER HI + *dst ++ = currentChar; // KATAKANA LETTER SMALL YU + previousChar = *src ++; + nCount --; + continue; + } + + if (previousChar == 0x3075 && // HIRAGANA LETTER HU + currentChar == 0x3085 ) { // HIRAGANA LETTER SMALL YU + *p ++ = position; + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x3072; // HIRAGANA LETTER HI + *dst ++ = currentChar; // HIRAGANA LETTER SMALL YU + previousChar = *src ++; + nCount --; + continue; + } + + if (previousChar == 0x30F4 && // KATAKANA LETTER VU + currentChar == 0x30E5 ) { // KATAKANA LETTER SMALL YU + *p ++ = position; + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30D3; // KATAKANA LETTER BI + *dst ++ = currentChar; // KATAKANA LETTER SMALL YU + previousChar = *src ++; + nCount --; + continue; + } + + if (previousChar == 0x3094 && // HIRAGANA LETTER VU + currentChar == 0x3085 ) { // HIRAGANA LETTER SMALL YU + *p ++ = position; + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x3073; // HIRAGANA LETTER BI + *dst ++ = currentChar; // HIRAGANA LETTER SMALL YU + previousChar = *src ++; + nCount --; + continue; + } + + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx b/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx new file mode 100644 index 000000000000..c46e4172789d --- /dev/null +++ b/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx @@ -0,0 +1,152 @@ +/************************************************************************* + * + * $RCSfile: ignoreIandEfollowedByYa_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_IandEfollowedByYa_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OneToOneMappingTable_t IandE[] = { + MAKE_PAIR( 0x30A3, 0x0000 ), // KATAKANA LETTER SMALL I + MAKE_PAIR( 0x30A4, 0x0000 ), // KATAKANA LETTER I + MAKE_PAIR( 0x30A7, 0x0000 ), // KATAKANA LETTER SMALL E + MAKE_PAIR( 0x30A8, 0x0000 ), // KATAKANA LETTER E + MAKE_PAIR( 0x30AD, 0x0000 ), // KATAKANA LETTER KI + MAKE_PAIR( 0x30AE, 0x0000 ), // KATAKANA LETTER GI + MAKE_PAIR( 0x30B1, 0x0000 ), // KATAKANA LETTER KE + MAKE_PAIR( 0x30B2, 0x0000 ), // KATAKANA LETTER GE + MAKE_PAIR( 0x30B7, 0x0000 ), // KATAKANA LETTER SI + MAKE_PAIR( 0x30B8, 0x0000 ), // KATAKANA LETTER ZI + MAKE_PAIR( 0x30BB, 0x0000 ), // KATAKANA LETTER SE + MAKE_PAIR( 0x30BC, 0x0000 ), // KATAKANA LETTER ZE + MAKE_PAIR( 0x30C1, 0x0000 ), // KATAKANA LETTER TI + MAKE_PAIR( 0x30C2, 0x0000 ), // KATAKANA LETTER DI + MAKE_PAIR( 0x30C6, 0x0000 ), // KATAKANA LETTER TE + MAKE_PAIR( 0x30C7, 0x0000 ), // KATAKANA LETTER DE + MAKE_PAIR( 0x30CB, 0x0000 ), // KATAKANA LETTER NI + MAKE_PAIR( 0x30CD, 0x0000 ), // KATAKANA LETTER NE + MAKE_PAIR( 0x30D2, 0x0000 ), // KATAKANA LETTER HI + MAKE_PAIR( 0x30D3, 0x0000 ), // KATAKANA LETTER BI + MAKE_PAIR( 0x30D4, 0x0000 ), // KATAKANA LETTER PI + MAKE_PAIR( 0x30D8, 0x0000 ), // KATAKANA LETTER HE + MAKE_PAIR( 0x30D9, 0x0000 ), // KATAKANA LETTER BE + MAKE_PAIR( 0x30DA, 0x0000 ), // KATAKANA LETTER PE + MAKE_PAIR( 0x30DF, 0x0000 ), // KATAKANA LETTER MI + MAKE_PAIR( 0x30E1, 0x0000 ), // KATAKANA LETTER ME + MAKE_PAIR( 0x30EA, 0x0000 ), // KATAKANA LETTER RI + MAKE_PAIR( 0x30EC, 0x0000 ), // KATAKANA LETTER RE + MAKE_PAIR( 0x30F0, 0x0000 ), // KATAKANA LETTER WI + MAKE_PAIR( 0x30F1, 0x0000 ), // KATAKANA LETTER WE + MAKE_PAIR( 0x30F6, 0x0000 ), // KATAKANA LETTER SMALL KE + MAKE_PAIR( 0x30F8, 0x0000 ), // KATAKANA LETTER VI + MAKE_PAIR( 0x30F9, 0x0000 ) // KATAKANA LETTER VE +}; + + + + +OUString SAL_CALL +ignoreIandEfollowedByYa_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // One to one mapping + oneToOneMapping table(IandE, sizeof(IandE)); + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + // the character listed in above table + YA --> the character + A + if (currentChar == 0x30E3 || // KATAKANA LETTER SMALL YA + currentChar == 0x30E4) { // KATAKANA LETTER YA + if (table[ previousChar ] != previousChar) { + *p ++ = position; + position ++; + *p ++ = position; + position ++; + *dst ++ = previousChar; + *dst ++ = 0x30A2; // KATAKANA LETTER A + previousChar = *src ++; + nCount --; + continue; + } + } + + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx b/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx new file mode 100644 index 000000000000..1696e6ab98ee --- /dev/null +++ b/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx @@ -0,0 +1,159 @@ +/************************************************************************* + * + * $RCSfile: ignoreIterationMark_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_IterationMark_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OneToOneMappingTable_t ignoreIterationMark_ja_JP_mappingTable[] = { + MAKE_PAIR( 0x3046, 0x3094 ), // HIRAGANA LETTER U --> HIRAGANA LETTER VU + MAKE_PAIR( 0x304B, 0x304C ), // HIRAGANA LETTER KA --> HIRAGANA LETTER GA + MAKE_PAIR( 0x304D, 0x304E ), // HIRAGANA LETTER KI --> HIRAGANA LETTER GI + MAKE_PAIR( 0x304F, 0x3050 ), // HIRAGANA LETTER KU --> HIRAGANA LETTER GU + MAKE_PAIR( 0x3051, 0x3052 ), // HIRAGANA LETTER KE --> HIRAGANA LETTER GE + MAKE_PAIR( 0x3053, 0x3054 ), // HIRAGANA LETTER KO --> HIRAGANA LETTER GO + MAKE_PAIR( 0x3055, 0x3056 ), // HIRAGANA LETTER SA --> HIRAGANA LETTER ZA + MAKE_PAIR( 0x3057, 0x3058 ), // HIRAGANA LETTER SI --> HIRAGANA LETTER ZI + MAKE_PAIR( 0x3059, 0x305A ), // HIRAGANA LETTER SU --> HIRAGANA LETTER ZU + MAKE_PAIR( 0x305B, 0x305C ), // HIRAGANA LETTER SE --> HIRAGANA LETTER ZE + MAKE_PAIR( 0x305D, 0x305E ), // HIRAGANA LETTER SO --> HIRAGANA LETTER ZO + MAKE_PAIR( 0x305F, 0x3060 ), // HIRAGANA LETTER TA --> HIRAGANA LETTER DA + MAKE_PAIR( 0x3061, 0x3062 ), // HIRAGANA LETTER TI --> HIRAGANA LETTER DI + MAKE_PAIR( 0x3064, 0x3065 ), // HIRAGANA LETTER TU --> HIRAGANA LETTER DU + MAKE_PAIR( 0x3066, 0x3067 ), // HIRAGANA LETTER TE --> HIRAGANA LETTER DE + MAKE_PAIR( 0x3068, 0x3069 ), // HIRAGANA LETTER TO --> HIRAGANA LETTER DO + MAKE_PAIR( 0x306F, 0x3070 ), // HIRAGANA LETTER HA --> HIRAGANA LETTER BA + MAKE_PAIR( 0x3072, 0x3073 ), // HIRAGANA LETTER HI --> HIRAGANA LETTER BI + MAKE_PAIR( 0x3075, 0x3076 ), // HIRAGANA LETTER HU --> HIRAGANA LETTER BU + MAKE_PAIR( 0x3078, 0x3079 ), // HIRAGANA LETTER HE --> HIRAGANA LETTER BE + MAKE_PAIR( 0x307B, 0x307C ), // HIRAGANA LETTER HO --> HIRAGANA LETTER BO + MAKE_PAIR( 0x309D, 0x309E ), // HIRAGANA ITERATION MARK --> HIRAGANA VOICED ITERATION MARK + MAKE_PAIR( 0x30A6, 0x30F4 ), // KATAKANA LETTER U --> KATAKANA LETTER VU + MAKE_PAIR( 0x30AB, 0x30AC ), // KATAKANA LETTER KA --> KATAKANA LETTER GA + MAKE_PAIR( 0x30AD, 0x30AE ), // KATAKANA LETTER KI --> KATAKANA LETTER GI + MAKE_PAIR( 0x30AF, 0x30B0 ), // KATAKANA LETTER KU --> KATAKANA LETTER GU + MAKE_PAIR( 0x30B1, 0x30B2 ), // KATAKANA LETTER KE --> KATAKANA LETTER GE + MAKE_PAIR( 0x30B3, 0x30B4 ), // KATAKANA LETTER KO --> KATAKANA LETTER GO + MAKE_PAIR( 0x30B5, 0x30B6 ), // KATAKANA LETTER SA --> KATAKANA LETTER ZA + MAKE_PAIR( 0x30B7, 0x30B8 ), // KATAKANA LETTER SI --> KATAKANA LETTER ZI + MAKE_PAIR( 0x30B9, 0x30BA ), // KATAKANA LETTER SU --> KATAKANA LETTER ZU + MAKE_PAIR( 0x30BB, 0x30BC ), // KATAKANA LETTER SE --> KATAKANA LETTER ZE + MAKE_PAIR( 0x30BD, 0x30BE ), // KATAKANA LETTER SO --> KATAKANA LETTER ZO + MAKE_PAIR( 0x30BF, 0x30C0 ), // KATAKANA LETTER TA --> KATAKANA LETTER DA + MAKE_PAIR( 0x30C1, 0x30C2 ), // KATAKANA LETTER TI --> KATAKANA LETTER DI + MAKE_PAIR( 0x30C4, 0x30C5 ), // KATAKANA LETTER TU --> KATAKANA LETTER DU + MAKE_PAIR( 0x30C6, 0x30C7 ), // KATAKANA LETTER TE --> KATAKANA LETTER DE + MAKE_PAIR( 0x30C8, 0x30C9 ), // KATAKANA LETTER TO --> KATAKANA LETTER DO + MAKE_PAIR( 0x30CF, 0x30D0 ), // KATAKANA LETTER HA --> KATAKANA LETTER BA + MAKE_PAIR( 0x30D2, 0x30D3 ), // KATAKANA LETTER HI --> KATAKANA LETTER BI + MAKE_PAIR( 0x30D5, 0x30D6 ), // KATAKANA LETTER HU --> KATAKANA LETTER BU + MAKE_PAIR( 0x30D8, 0x30D9 ), // KATAKANA LETTER HE --> KATAKANA LETTER BE + MAKE_PAIR( 0x30DB, 0x30DC ), // KATAKANA LETTER HO --> KATAKANA LETTER BO + MAKE_PAIR( 0x30EF, 0x30F7 ), // KATAKANA LETTER WA --> KATAKANA LETTER VA + MAKE_PAIR( 0x30F0, 0x30F8 ), // KATAKANA LETTER WI --> KATAKANA LETTER VI + MAKE_PAIR( 0x30F1, 0x30F9 ), // KATAKANA LETTER WE --> KATAKANA LETTER VE + MAKE_PAIR( 0x30F2, 0x30FA ), // KATAKANA LETTER WO --> KATAKANA LETTER VO + MAKE_PAIR( 0x30FD, 0x30FE ) // KATAKANA ITERATION MARK --> KATAKANA VOICED ITERATION MARK +}; + + +OUString SAL_CALL +ignoreIterationMark_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + oneToOneMapping table(ignoreIterationMark_ja_JP_mappingTable, sizeof(ignoreIterationMark_ja_JP_mappingTable)); + + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Conversion + while (-- nCount > 0) { + currentChar = *src ++; + + switch ( currentChar ) { + case 0x30fd: // KATAKANA ITERATION MARK + case 0x309d: // HIRAGANA ITERATION MARK + case 0x3005: // IDEOGRAPHIC ITERATION MARK + currentChar = previousChar; + break; + case 0x30fe: // KATAKANA VOICED ITERATION MARK + case 0x309e: // HIRAGANA VOICED ITERATION MARK + currentChar = table[ previousChar ]; + break; + } + *p ++ = position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. + +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreKana.cxx b/i18npool/source/transliteration/ignoreKana.cxx new file mode 100644 index 000000000000..14b9befeb130 --- /dev/null +++ b/i18npool/source/transliteration/ignoreKana.cxx @@ -0,0 +1,73 @@ +/************************************************************************* + * + * $RCSfile: ignoreKana.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_Kana +#include <transliteration_Ignore.hxx> +#define TRANSLITERATION_hiraganaToKatakana +#define TRANSLITERATION_katakanaToHiragana +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreKana::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + hiraganaToKatakana t1; + return t1.transliterate(inStr, startPos, nCount, offset); +} + +Sequence< OUString > SAL_CALL +ignoreKana::transliterateRange( const OUString& str1, const OUString& str2 ) + throw(RuntimeException) +{ + hiraganaToKatakana t1; + katakanaToHiragana t2; + + return transliteration_Ignore::transliterateRange(str1, str2, t1, t2); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx b/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx new file mode 100644 index 000000000000..780b56dee387 --- /dev/null +++ b/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx @@ -0,0 +1,109 @@ +/************************************************************************* + * + * $RCSfile: ignoreKiKuFollowedBySa_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_KiKuFollowedBySa_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreKiKuFollowedBySa_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + // KU + Sa-So --> KI + Sa-So + if (previousChar == 0x30AF ) { // KATAKANA LETTER KU + if (0x30B5 <= currentChar && // KATAKANA LETTER SA + currentChar <= 0x30BE) { // KATAKANA LETTER ZO + *p ++ = position; + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30AD; // KATAKANA LETTER KI + *dst ++ = currentChar; + previousChar = *src ++; + nCount --; + continue; + } + } + + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx b/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx new file mode 100644 index 000000000000..d22ab1b07053 --- /dev/null +++ b/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx @@ -0,0 +1,72 @@ +/************************************************************************* + * + * $RCSfile: ignoreMiddleDot_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_MiddleDot_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Unicode +ignoreMiddleDot_ja_JP_translator (const sal_Unicode c) +{ + switch (c) { + case 0x30FB: // KATAKANA MIDDLE DOT + case 0xFF65: // HALFWIDTH KATAKANA MIDDLE DOT + // no break; + return 0xffff; // Skip this character + } + return c; +} + + +OUString SAL_CALL +ignoreMiddleDot_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, ignoreMiddleDot_ja_JP_translator ); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx b/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx new file mode 100644 index 000000000000..d1a02f2f52ed --- /dev/null +++ b/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx @@ -0,0 +1,71 @@ +/************************************************************************* + * + * $RCSfile: ignoreMinusSign_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <transliteration_OneToOne.hxx> +#define TRANSLITERATION_MinusSign_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Unicode +ignoreMinusSign_ja_JP_translator (const sal_Unicode c) +{ + switch (c) { + case 0x2212: // MINUS SIGN + case 0x002d: // HYPHEN-MINUS + return 0x30fc; // KATAKANA-HIRAGANA PROLONGED SOUND MARK + } + return c; +} + +OUString SAL_CALL +ignoreMinusSign_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, ignoreMinusSign_ja_JP_translator ); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx b/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx new file mode 100644 index 000000000000..e5e9429b83a9 --- /dev/null +++ b/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx @@ -0,0 +1,366 @@ +/************************************************************************* + * + * $RCSfile: ignoreProlongedSoundMark_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_ProlongedSoundMark_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +static sal_Unicode table_normalwidth[] = { + // 0x0000, // 0x3040 + 0x3041, // 0x3041 HIRAGANA LETTER SMALL A + 0x3042, // 0x3042 HIRAGANA LETTER A + 0x3043, // 0x3043 HIRAGANA LETTER SMALL I + 0x3044, // 0x3044 HIRAGANA LETTER I + 0x3045, // 0x3045 HIRAGANA LETTER SMALL U + 0x3046, // 0x3046 HIRAGANA LETTER U + 0x3047, // 0x3047 HIRAGANA LETTER SMALL E + 0x3048, // 0x3048 HIRAGANA LETTER E + 0x3049, // 0x3049 HIRAGANA LETTER SMALL O + 0x304a, // 0x304a HIRAGANA LETTER O + 0x3042, // 0x304b HIRAGANA LETTER KA + 0x3042, // 0x304c HIRAGANA LETTER GA + 0x3044, // 0x304d HIRAGANA LETTER KI + 0x3044, // 0x304e HIRAGANA LETTER GI + 0x3046, // 0x304f HIRAGANA LETTER KU + 0x3046, // 0x3050 HIRAGANA LETTER GU + 0x3048, // 0x3051 HIRAGANA LETTER KE + 0x3048, // 0x3052 HIRAGANA LETTER GE + 0x304a, // 0x3053 HIRAGANA LETTER KO + 0x304a, // 0x3054 HIRAGANA LETTER GO + 0x3042, // 0x3055 HIRAGANA LETTER SA + 0x3042, // 0x3056 HIRAGANA LETTER ZA + 0x3044, // 0x3057 HIRAGANA LETTER SI + 0x3044, // 0x3058 HIRAGANA LETTER ZI + 0x3046, // 0x3059 HIRAGANA LETTER SU + 0x3046, // 0x305a HIRAGANA LETTER ZU + 0x3048, // 0x305b HIRAGANA LETTER SE + 0x3048, // 0x305c HIRAGANA LETTER ZE + 0x304a, // 0x305d HIRAGANA LETTER SO + 0x304a, // 0x305e HIRAGANA LETTER ZO + 0x3042, // 0x305f HIRAGANA LETTER TA + 0x3042, // 0x3060 HIRAGANA LETTER DA + 0x3044, // 0x3061 HIRAGANA LETTER TI + 0x3044, // 0x3062 HIRAGANA LETTER DI + 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU + 0x3046, // 0x3064 HIRAGANA LETTER TU + 0x3046, // 0x3065 HIRAGANA LETTER DU + 0x3048, // 0x3066 HIRAGANA LETTER TE + 0x3048, // 0x3067 HIRAGANA LETTER DE + 0x304a, // 0x3068 HIRAGANA LETTER TO + 0x304a, // 0x3069 HIRAGANA LETTER DO + 0x3042, // 0x306a HIRAGANA LETTER NA + 0x3044, // 0x306b HIRAGANA LETTER NI + 0x3046, // 0x306c HIRAGANA LETTER NU + 0x3048, // 0x306d HIRAGANA LETTER NE + 0x304a, // 0x306e HIRAGANA LETTER NO + 0x3042, // 0x306f HIRAGANA LETTER HA + 0x3042, // 0x3070 HIRAGANA LETTER BA + 0x3042, // 0x3071 HIRAGANA LETTER PA + 0x3044, // 0x3072 HIRAGANA LETTER HI + 0x3044, // 0x3073 HIRAGANA LETTER BI + 0x3044, // 0x3074 HIRAGANA LETTER PI + 0x3046, // 0x3075 HIRAGANA LETTER HU + 0x3046, // 0x3076 HIRAGANA LETTER BU + 0x3046, // 0x3077 HIRAGANA LETTER PU + 0x3048, // 0x3078 HIRAGANA LETTER HE + 0x3048, // 0x3079 HIRAGANA LETTER BE + 0x3048, // 0x307a HIRAGANA LETTER PE + 0x304a, // 0x307b HIRAGANA LETTER HO + 0x304a, // 0x307c HIRAGANA LETTER BO + 0x304a, // 0x307d HIRAGANA LETTER PO + 0x3042, // 0x307e HIRAGANA LETTER MA + 0x3044, // 0x307f HIRAGANA LETTER MI + 0x3046, // 0x3080 HIRAGANA LETTER MU + 0x3048, // 0x3081 HIRAGANA LETTER ME + 0x304a, // 0x3082 HIRAGANA LETTER MO + 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA + 0x3042, // 0x3084 HIRAGANA LETTER YA + 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU + 0x3046, // 0x3086 HIRAGANA LETTER YU + 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO + 0x304a, // 0x3088 HIRAGANA LETTER YO + 0x3042, // 0x3089 HIRAGANA LETTER RA + 0x3044, // 0x308a HIRAGANA LETTER RI + 0x3046, // 0x308b HIRAGANA LETTER RU + 0x3048, // 0x308c HIRAGANA LETTER RE + 0x304a, // 0x308d HIRAGANA LETTER RO + 0x3041, // 0x308e HIRAGANA LETTER SMALL WA + 0x3042, // 0x308f HIRAGANA LETTER WA + 0x3044, // 0x3090 HIRAGANA LETTER WI + 0x3048, // 0x3091 HIRAGANA LETTER WE + 0x304a, // 0x3092 HIRAGANA LETTER WO + 0x0000, // 0x3093 HIRAGANA LETTER N + 0x3046, // 0x3094 HIRAGANA LETTER VU + 0x0000, // 0x3095 + 0x0000, // 0x3096 + 0x0000, // 0x3097 + 0x0000, // 0x3098 + 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK + 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + 0x0000, // 0x309d HIRAGANA ITERATION MARK + 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK + 0x0000, // 0x309f + 0x0000, // 0x30a0 + 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A + 0x30a2, // 0x30a2 KATAKANA LETTER A + 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I + 0x30a4, // 0x30a4 KATAKANA LETTER I + 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U + 0x30a6, // 0x30a6 KATAKANA LETTER U + 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E + 0x30a8, // 0x30a8 KATAKANA LETTER E + 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O + 0x30aa, // 0x30aa KATAKANA LETTER O + 0x30a2, // 0x30ab KATAKANA LETTER KA + 0x30a2, // 0x30ac KATAKANA LETTER GA + 0x30a4, // 0x30ad KATAKANA LETTER KI + 0x30a4, // 0x30ae KATAKANA LETTER GI + 0x30a6, // 0x30af KATAKANA LETTER KU + 0x30a6, // 0x30b0 KATAKANA LETTER GU + 0x30a8, // 0x30b1 KATAKANA LETTER KE + 0x30a8, // 0x30b2 KATAKANA LETTER GE + 0x30aa, // 0x30b3 KATAKANA LETTER KO + 0x30aa, // 0x30b4 KATAKANA LETTER GO + 0x30a2, // 0x30b5 KATAKANA LETTER SA + 0x30a2, // 0x30b6 KATAKANA LETTER ZA + 0x30a4, // 0x30b7 KATAKANA LETTER SI + 0x30a4, // 0x30b8 KATAKANA LETTER ZI + 0x30a6, // 0x30b9 KATAKANA LETTER SU + 0x30a6, // 0x30ba KATAKANA LETTER ZU + 0x30a8, // 0x30bb KATAKANA LETTER SE + 0x30a8, // 0x30bc KATAKANA LETTER ZE + 0x30aa, // 0x30bd KATAKANA LETTER SO + 0x30aa, // 0x30be KATAKANA LETTER ZO + 0x30a2, // 0x30bf KATAKANA LETTER TA + 0x30a2, // 0x30c0 KATAKANA LETTER DA + 0x30a4, // 0x30c1 KATAKANA LETTER TI + 0x30a4, // 0x30c2 KATAKANA LETTER DI + 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU + 0x30a6, // 0x30c4 KATAKANA LETTER TU + 0x30a6, // 0x30c5 KATAKANA LETTER DU + 0x30a8, // 0x30c6 KATAKANA LETTER TE + 0x30a8, // 0x30c7 KATAKANA LETTER DE + 0x30aa, // 0x30c8 KATAKANA LETTER TO + 0x30aa, // 0x30c9 KATAKANA LETTER DO + 0x30a2, // 0x30ca KATAKANA LETTER NA + 0x30a4, // 0x30cb KATAKANA LETTER NI + 0x30a6, // 0x30cc KATAKANA LETTER NU + 0x30a8, // 0x30cd KATAKANA LETTER NE + 0x30aa, // 0x30ce KATAKANA LETTER NO + 0x30a2, // 0x30cf KATAKANA LETTER HA + 0x30a2, // 0x30d0 KATAKANA LETTER BA + 0x30a2, // 0x30d1 KATAKANA LETTER PA + 0x30a4, // 0x30d2 KATAKANA LETTER HI + 0x30a4, // 0x30d3 KATAKANA LETTER BI + 0x30a4, // 0x30d4 KATAKANA LETTER PI + 0x30a6, // 0x30d5 KATAKANA LETTER HU + 0x30a6, // 0x30d6 KATAKANA LETTER BU + 0x30a6, // 0x30d7 KATAKANA LETTER PU + 0x30a8, // 0x30d8 KATAKANA LETTER HE + 0x30a8, // 0x30d9 KATAKANA LETTER BE + 0x30a8, // 0x30da KATAKANA LETTER PE + 0x30aa, // 0x30db KATAKANA LETTER HO + 0x30aa, // 0x30dc KATAKANA LETTER BO + 0x30aa, // 0x30dd KATAKANA LETTER PO + 0x30a2, // 0x30de KATAKANA LETTER MA + 0x30a4, // 0x30df KATAKANA LETTER MI + 0x30a6, // 0x30e0 KATAKANA LETTER MU + 0x30a8, // 0x30e1 KATAKANA LETTER ME + 0x30aa, // 0x30e2 KATAKANA LETTER MO + 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA + 0x30a2, // 0x30e4 KATAKANA LETTER YA + 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU + 0x30a6, // 0x30e6 KATAKANA LETTER YU + 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO + 0x30aa, // 0x30e8 KATAKANA LETTER YO + 0x30a2, // 0x30e9 KATAKANA LETTER RA + 0x30a4, // 0x30ea KATAKANA LETTER RI + 0x30a6, // 0x30eb KATAKANA LETTER RU + 0x30a8, // 0x30ec KATAKANA LETTER RE + 0x30aa, // 0x30ed KATAKANA LETTER RO + 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA + 0x30a2, // 0x30ef KATAKANA LETTER WA + 0x30a4, // 0x30f0 KATAKANA LETTER WI + 0x30a8, // 0x30f1 KATAKANA LETTER WE + 0x30aa, // 0x30f2 KATAKANA LETTER WO + 0x0000, // 0x30f3 KATAKANA LETTER N + 0x30a6, // 0x30f4 KATAKANA LETTER VU + 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA + 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE + 0x30a2, // 0x30f7 KATAKANA LETTER VA + 0x30a4, // 0x30f8 KATAKANA LETTER VI + 0x30a8, // 0x30f9 KATAKANA LETTER VE + 0x30aa // 0x30fa KATAKANA LETTER VO + // 0x0000, // 0x30fb KATAKANA MIDDLE DOT + // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK + // 0x0000, // 0x30fd KATAKANA ITERATION MARK + // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK + // 0x0000 // 0x30ff +}; + +static sal_Unicode table_halfwidth[] = { + // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP + // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET + // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET + // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA + // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT + 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO + 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A + 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I + 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U + 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E + 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O + 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA + 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU + 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO + 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU + 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A + 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I + 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U + 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E + 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O + 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA + 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI + 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU + 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE + 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO + 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA + 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI + 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU + 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE + 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO + 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA + 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI + 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU + 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE + 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO + 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA + 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI + 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU + 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE + 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO + 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA + 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI + 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU + 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE + 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO + 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA + 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI + 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU + 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME + 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO + 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA + 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU + 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO + 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA + 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI + 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU + 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE + 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO + 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA + // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N + // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK + // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +}; + + +OUString SAL_CALL +ignoreProlongedSoundMark_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Conversion + while (-- nCount > 0) { + currentChar = *src ++; + + if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK + currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + + if (0x3041 <= previousChar && previousChar <= 0x30fa) { + currentChar = table_normalwidth[ previousChar - 0x3041 ]; + } + else if (0xff66 <= previousChar && previousChar <= 0xff9c) { + currentChar = table_halfwidth[ previousChar - 0xff66 ]; + } + } + + *p ++ = position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. + +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx b/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx new file mode 100644 index 000000000000..537899442f9b --- /dev/null +++ b/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx @@ -0,0 +1,142 @@ +/************************************************************************* + * + * $RCSfile: ignoreSeZe_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_SeZe_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreSeZe_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + // SI + E --> SE + if (previousChar == 0x30B7 && // KATAKANA LETTER SI + currentChar == 0x30A7 ) { // KATAKANA LETTER SMALL E + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30BB; // KATAKANA LETTER SE + previousChar = *src ++; + nCount --; + continue; + } + + // SI + E --> SE + if (previousChar == 0x3057 && // HIRAGANA LETTER SI + currentChar == 0x3047 ) { // HIRAGANA LETTER SMALL E + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x305B; // HIRAGANA LETTER SE + previousChar = *src ++; + nCount --; + continue; + } + + // ZI + E --> ZE + if (previousChar == 0x30B8 && // KATAKANA LETTER ZI + currentChar == 0x30A7 ) { // KATAKANA LETTER SMALL E + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30BC; // KATAKANA LETTER ZE + previousChar = *src ++; + nCount --; + continue; + } + + // ZI + E --> ZE + if (previousChar == 0x3058 && // HIRAGANA LETTER ZI + currentChar == 0x3047 ) { // HIRAGANA LETTER SMALL E + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x305C; // HIRAGANA LETTER ZE + previousChar = *src ++; + nCount --; + continue; + } + + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + + +} } } } diff --git a/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx b/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx new file mode 100644 index 000000000000..3518488ff2d3 --- /dev/null +++ b/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx @@ -0,0 +1,111 @@ +/************************************************************************* + * + * $RCSfile: ignoreSeparator_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_Separator_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OneToOneMappingTable_t ignoreSeparatorTable[] = { + MAKE_PAIR( 0x0021, 0x002C ), // EXCLAMATION MARK + MAKE_PAIR( 0x0023, 0x002C ), // NUMBER SIGN + MAKE_PAIR( 0x0024, 0x002C ), // DOLLAR SIGN + MAKE_PAIR( 0x0025, 0x002C ), // PERCENT SIGN + MAKE_PAIR( 0x0026, 0x002C ), // AMPERSAND + MAKE_PAIR( 0x002A, 0x002C ), // ASTERISK + MAKE_PAIR( 0x002B, 0x002C ), // PLUS SIGN + MAKE_PAIR( 0x002C, 0x002C ), // COMMA + MAKE_PAIR( 0x002D, 0x002C ), // HYPHEN-MINUS + MAKE_PAIR( 0x002E, 0x002C ), // FULL STOP + MAKE_PAIR( 0x002F, 0x002C ), // SOLIDUS + MAKE_PAIR( 0x003A, 0x002C ), // COLON + MAKE_PAIR( 0x003B, 0x002C ), // SEMICOLON + MAKE_PAIR( 0x003C, 0x002C ), // LESS-THAN SIGN + MAKE_PAIR( 0x003D, 0x002C ), // EQUALS SIGN + MAKE_PAIR( 0x003E, 0x002C ), // GREATER-THAN SIGN + MAKE_PAIR( 0x005C, 0x002C ), // REVERSE SOLIDUS + MAKE_PAIR( 0x005F, 0x002C ), // LOW LINE + MAKE_PAIR( 0x007B, 0x002C ), // LEFT CURLY BRACKET + MAKE_PAIR( 0x007C, 0x002C ), // VERTICAL LINE + MAKE_PAIR( 0x007D, 0x002C ), // RIGHT CURLY BRACKET + MAKE_PAIR( 0x007E, 0x002C ), // TILDE + MAKE_PAIR( 0x00A5, 0x002C ), // YEN SIGN + MAKE_PAIR( 0xFF01, 0x002C ), // FULLWIDTH EXCLAMATION MARK + MAKE_PAIR( 0xFF03, 0x002C ), // FULLWIDTH NUMBER SIGN + MAKE_PAIR( 0xFF04, 0x002C ), // FULLWIDTH DOLLAR SIGN + MAKE_PAIR( 0xFF05, 0x002C ), // FULLWIDTH PERCENT SIGN + MAKE_PAIR( 0xFF06, 0x002C ), // FULLWIDTH AMPERSAND + MAKE_PAIR( 0xFF0A, 0x002C ), // FULLWIDTH ASTERISK + MAKE_PAIR( 0xFF0B, 0x002C ), // FULLWIDTH PLUS SIGN + MAKE_PAIR( 0xFF0C, 0x002C ), // FULLWIDTH COMMA + MAKE_PAIR( 0xFF0D, 0x002C ), // FULLWIDTH HYPHEN-MINUS + MAKE_PAIR( 0xFF0E, 0x002C ), // FULLWIDTH FULL STOP + MAKE_PAIR( 0xFF0F, 0x002C ), // FULLWIDTH SOLIDUS + MAKE_PAIR( 0xFF1A, 0x002C ), // FULLWIDTH COLON + MAKE_PAIR( 0xFF1B, 0x002C ), // FULLWIDTH SEMICOLON + MAKE_PAIR( 0xFF1C, 0x002C ), // FULLWIDTH LESS-THAN SIGN + MAKE_PAIR( 0xFF1D, 0x002C ), // FULLWIDTH EQUALS SIGN + MAKE_PAIR( 0xFF1E, 0x002C ), // FULLWIDTH GREATER-THAN SIGN + MAKE_PAIR( 0xFF3C, 0x002C ), // FULLWIDTH REVERSE SOLIDUS + MAKE_PAIR( 0xFF3F, 0x002C ), // FULLWIDTH LOW LINE + MAKE_PAIR( 0xFF5B, 0x002C ), // FULLWIDTH LEFT CURLY BRACKET + MAKE_PAIR( 0xFF5C, 0x002C ), // FULLWIDTH VERTICAL LINE + MAKE_PAIR( 0xFF5D, 0x002C ), // FULLWIDTH RIGHT CURLY BRACKET + MAKE_PAIR( 0xFF5E, 0x002C ), // FULLWIDTH TILDE + MAKE_PAIR( 0xFFE5, 0x002C ), // FULLWIDTH YEN SIGN +}; + +OUString SAL_CALL +ignoreSeparator_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + oneToOneMapping table(ignoreSeparatorTable, sizeof(ignoreSeparatorTable)); + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, table ); +} + +} } } } + diff --git a/i18npool/source/transliteration/ignoreSize_ja_JP.cxx b/i18npool/source/transliteration/ignoreSize_ja_JP.cxx new file mode 100644 index 000000000000..7c4d2ce4279c --- /dev/null +++ b/i18npool/source/transliteration/ignoreSize_ja_JP.cxx @@ -0,0 +1,74 @@ +/************************************************************************* + * + * $RCSfile: ignoreSize_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_Size_ja_JP +#include <transliteration_Ignore.hxx> +#define TRANSLITERATION_smallToLarge_ja_JP +#define TRANSLITERATION_largeToSmall_ja_JP +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreSize_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + smallToLarge_ja_JP t1; + return t1.transliterate(inStr, startPos, nCount, offset); +} + + +Sequence< OUString > SAL_CALL +ignoreSize_ja_JP::transliterateRange( const OUString& str1, const OUString& str2 ) + throw(RuntimeException) +{ + smallToLarge_ja_JP t1; + largeToSmall_ja_JP t2; + + return transliteration_Ignore::transliterateRange(str1, str2, t1, t2); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx b/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx new file mode 100644 index 000000000000..83fa4e5c530f --- /dev/null +++ b/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx @@ -0,0 +1,80 @@ +/************************************************************************* + * + * $RCSfile: ignoreSpace_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_Space_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OneToOneMappingTable_t ignoreSpace_ja_JP_mappingTable[] = { + MAKE_PAIR( 0x0020, 0xffff ), // SPACE + MAKE_PAIR( 0x00A0, 0xffff ), // NO-BREAK SPACE + MAKE_PAIR( 0x2002, 0xffff ), // EN SPACE + MAKE_PAIR( 0x2003, 0xffff ), // EM SPACE + MAKE_PAIR( 0x2004, 0xffff ), // THREE-PER-EM SPACE + MAKE_PAIR( 0x2005, 0xffff ), // FOUR-PER-EM SPACE + MAKE_PAIR( 0x2006, 0xffff ), // SIX-PER-EM SPACE + MAKE_PAIR( 0x2007, 0xffff ), // FIGURE SPACE + MAKE_PAIR( 0x2008, 0xffff ), // PUNCTUATION SPACE + MAKE_PAIR( 0x2009, 0xffff ), // THIN SPACE + MAKE_PAIR( 0x200A, 0xffff ), // HAIR SPACE + MAKE_PAIR( 0x200B, 0xffff ), // ZERO WIDTH SPACE + MAKE_PAIR( 0x202F, 0xffff ), // NARROW NO-BREAK SPACE + MAKE_PAIR( 0x3000, 0xffff ), // IDEOGRAPHIC SPACE + MAKE_PAIR( 0x303F, 0xffff ) // IDEOGRAPHIC HALF FILL SPACE +}; + + +OUString SAL_CALL +ignoreSpace_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + oneToOneMapping table(ignoreSpace_ja_JP_mappingTable, sizeof(ignoreSpace_ja_JP_mappingTable)); + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, table ); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx b/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx new file mode 100644 index 000000000000..4146bbfd7cde --- /dev/null +++ b/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx @@ -0,0 +1,145 @@ +/************************************************************************* + * + * $RCSfile: ignoreTiJi_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_TiJi_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreTiJi_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + // TU + I --> TI + // TE + I --> TI + if ( (previousChar == 0x30C4 || // KATAKANA LETTER TU + previousChar == 0x30C6 ) && // KATAKANA LETTER TE + currentChar == 0x30A3 ) { // KATAKANA LETTER SMALL I + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30C1; // KATAKANA LETTER TI + previousChar = *src ++; + nCount --; + continue; + } + + // TU + I --> TI + // TE + I --> TI + if ( (previousChar == 0x3064 || // HIRAGANA LETTER TU + previousChar == 0x3066 ) && // HIRAGANA LETTER TE + currentChar == 0x3043 ) { // HIRAGANA LETTER SMALL I + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x3061; // HIRAGANA LETTER TI + previousChar = *src ++; + nCount --; + continue; + } + + // DE + I --> ZI + if (previousChar == 0x30C7 && // KATAKANA LETTER DE + currentChar == 0x30A3 ) { // KATAKANA LETTER SMALL I + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x30B8; // KATAKANA LETTER ZI + previousChar = *src ++; + nCount --; + continue; + } + + // DE + I --> ZI + if (previousChar == 0x3067 && // HIRAGANA LETTER DE + currentChar == 0x3043 ) { // HIRAGANA LETTER SMALL I + position ++; + *p ++ = position; + position ++; + *dst ++ = 0x3058; // HIRAGANA LETTER ZI + previousChar = *src ++; + nCount --; + continue; + } + + *p ++ = position; + position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *p = position; + *dst ++ = previousChar; + } + + *dst = (sal_Unicode) 0; + + newStr->length = sal_Int32(dst - newStr->buffer); + offset.realloc(newStr->length); + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx b/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx new file mode 100644 index 000000000000..8817126824b2 --- /dev/null +++ b/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx @@ -0,0 +1,79 @@ +/************************************************************************* + * + * $RCSfile: ignoreTraditionalKana_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_TraditionalKana_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Unicode +ignoreTraditionalKana_ja_JP_translator (const sal_Unicode c) +{ + + switch (c) { + case 0x3090: // HIRAGANA LETTER WI + return 0x3044; // HIRAGANA LETTER I + + case 0x3091: // HIRAGANA LETTER WE + return 0x3048; // HIRAGANA LETTER E + + case 0x30F0: // KATAKANA LETTER WI + return 0x30A4; // KATAKANA LETTER I + + case 0x30F1: // KATAKANA LETTER WE + return 0x30A8; // KATAKANA LETTER E + } + return c; +} + +OUString SAL_CALL +ignoreTraditionalKana_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, ignoreTraditionalKana_ja_JP_translator ); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx b/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx new file mode 100644 index 000000000000..8f79ebf28f5f --- /dev/null +++ b/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx @@ -0,0 +1,757 @@ +/************************************************************************* + * + * $RCSfile: ignoreTraditionalKanji_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_TraditionalKanji_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +// traditional Kanji characters --> modern Kanji characters +OneToOneMappingTable_t traditionalKanji2updateKanji[] = { + MAKE_PAIR( 0x4E17, 0x4E16 ), + MAKE_PAIR( 0x4E55, 0x864E ), + MAKE_PAIR( 0x4E58, 0x4E57 ), + MAKE_PAIR( 0x4E82, 0x4E71 ), + MAKE_PAIR( 0x4E8A, 0x4E8B ), + MAKE_PAIR( 0x4E98, 0x4E99 ), + MAKE_PAIR( 0x4E9F, 0x6975 ), + MAKE_PAIR( 0x4EB0, 0x4EAC ), + MAKE_PAIR( 0x4EC2, 0x50CD ), + MAKE_PAIR( 0x4ECE, 0x5F93 ), + MAKE_PAIR( 0x4EED, 0x4EDE ), + MAKE_PAIR( 0x4F1C, 0x5005 ), + MAKE_PAIR( 0x4F5B, 0x4ECF ), + MAKE_PAIR( 0x4F86, 0x6765 ), + MAKE_PAIR( 0x4FAB, 0x4F5E ), + MAKE_PAIR( 0x5047, 0x4EEE ), + MAKE_PAIR( 0x509A, 0x52B9 ), + MAKE_PAIR( 0x50B3, 0x4F1D ), + MAKE_PAIR( 0x50DE, 0x507D ), + MAKE_PAIR( 0x50E3, 0x50ED ), + MAKE_PAIR( 0x50F9, 0x4FA1 ), + MAKE_PAIR( 0x5109, 0x5039 ), + MAKE_PAIR( 0x5118, 0x4FAD ), + MAKE_PAIR( 0x5152, 0x5150 ), + MAKE_PAIR( 0x5154, 0x514E ), + MAKE_PAIR( 0x5169, 0x4E21 ), + MAKE_PAIR( 0x518C, 0x518A ), + MAKE_PAIR( 0x5190, 0x5192 ), + MAKE_PAIR( 0x51A8, 0x5BCC ), + MAKE_PAIR( 0x51A9, 0x5199 ), + MAKE_PAIR( 0x51B0, 0x6C37 ), + MAKE_PAIR( 0x51B1, 0x51B4 ), + MAKE_PAIR( 0x51B2, 0x6C96 ), + MAKE_PAIR( 0x51B3, 0x6C7A ), + MAKE_PAIR( 0x51B5, 0x6CC1 ), + MAKE_PAIR( 0x51C9, 0x6DBC ), + MAKE_PAIR( 0x51D6, 0x6E96 ), + MAKE_PAIR( 0x51DC, 0x51DB ), + MAKE_PAIR( 0x51FE, 0x51FD ), + MAKE_PAIR( 0x5204, 0x5203 ), + MAKE_PAIR( 0x5227, 0x52AB ), + MAKE_PAIR( 0x5269, 0x5270 ), + MAKE_PAIR( 0x5271, 0x5263 ), + MAKE_PAIR( 0x5283, 0x753B ), + MAKE_PAIR( 0x528D, 0x5263 ), + MAKE_PAIR( 0x5291, 0x5264 ), + MAKE_PAIR( 0x5292, 0x5263 ), + MAKE_PAIR( 0x5294, 0x5263 ), + MAKE_PAIR( 0x52B5, 0x5238 ), + MAKE_PAIR( 0x52DE, 0x52B4 ), + MAKE_PAIR( 0x52F3, 0x52F2 ), + MAKE_PAIR( 0x52F5, 0x52B1 ), + MAKE_PAIR( 0x52F8, 0x52E7 ), + MAKE_PAIR( 0x5340, 0x533A ), + MAKE_PAIR( 0x5346, 0x5352 ), + MAKE_PAIR( 0x5377, 0x5DFB ), + MAKE_PAIR( 0x537B, 0x5374 ), + MAKE_PAIR( 0x53B0, 0x5EE0 ), + MAKE_PAIR( 0x53C3, 0x53C2 ), + MAKE_PAIR( 0x548F, 0x8A60 ), + MAKE_PAIR( 0x5492, 0x546A ), + MAKE_PAIR( 0x54AF, 0x5580 ), + MAKE_PAIR( 0x54E5, 0x6B4C ), + MAKE_PAIR( 0x5557, 0x5556 ), + MAKE_PAIR( 0x55AE, 0x5358 ), + MAKE_PAIR( 0x5650, 0x5668 ), + MAKE_PAIR( 0x5680, 0x5BE7 ), + MAKE_PAIR( 0x568F, 0x5694 ), + MAKE_PAIR( 0x56B4, 0x53B3 ), + MAKE_PAIR( 0x56D1, 0x5631 ), + MAKE_PAIR( 0x56D8, 0x56DE ), + MAKE_PAIR( 0x5708, 0x570F ), + MAKE_PAIR( 0x570B, 0x56FD ), + MAKE_PAIR( 0x570D, 0x56F2 ), + MAKE_PAIR( 0x5713, 0x5186 ), + MAKE_PAIR( 0x5716, 0x56F3 ), + MAKE_PAIR( 0x5718, 0x56E3 ), + MAKE_PAIR( 0x57C0, 0x5782 ), + MAKE_PAIR( 0x57D3, 0x57D2 ), + MAKE_PAIR( 0x57FC, 0x5D0E ), + MAKE_PAIR( 0x582F, 0x5C2D ), + MAKE_PAIR( 0x5872, 0x5834 ), + MAKE_PAIR( 0x58AE, 0x5815 ), + MAKE_PAIR( 0x58D3, 0x5727 ), + MAKE_PAIR( 0x58D8, 0x5841 ), + MAKE_PAIR( 0x58DE, 0x58CA ), + MAKE_PAIR( 0x58E4, 0x58CC ), + MAKE_PAIR( 0x58EF, 0x58EE ), + MAKE_PAIR( 0x58F1, 0x4E00 ), + MAKE_PAIR( 0x58F9, 0x4E00 ), + MAKE_PAIR( 0x58FA, 0x58F7 ), + MAKE_PAIR( 0x58FB, 0x5A7F ), + MAKE_PAIR( 0x58FD, 0x5BFF ), + MAKE_PAIR( 0x5918, 0x536F ), + MAKE_PAIR( 0x591B, 0x591A ), + MAKE_PAIR( 0x5932, 0x672C ), + MAKE_PAIR( 0x5967, 0x5965 ), + MAKE_PAIR( 0x5969, 0x5333 ), + MAKE_PAIR( 0x596C, 0x5968 ), + MAKE_PAIR( 0x59D9, 0x598A ), + MAKE_PAIR( 0x5A6C, 0x6DEB ), + MAKE_PAIR( 0x5AD0, 0x5B32 ), + MAKE_PAIR( 0x5AFB, 0x5AFA ), + MAKE_PAIR( 0x5B43, 0x5B22 ), + MAKE_PAIR( 0x5B78, 0x5B66 ), + MAKE_PAIR( 0x5B9D, 0x73CD ), + MAKE_PAIR( 0x5BC3, 0x51A4 ), + MAKE_PAIR( 0x5BC7, 0x51A6 ), + MAKE_PAIR( 0x5BC9, 0x9DB4 ), + MAKE_PAIR( 0x5BE2, 0x5BDD ), + MAKE_PAIR( 0x5BE6, 0x5B9F ), + MAKE_PAIR( 0x5BEB, 0x5199 ), + MAKE_PAIR( 0x5BF3, 0x73CD ), + MAKE_PAIR( 0x5BF6, 0x73CD ), + MAKE_PAIR( 0x5C05, 0x524B ), + MAKE_PAIR( 0x5C07, 0x5C06 ), + MAKE_PAIR( 0x5C08, 0x5C02 ), + MAKE_PAIR( 0x5C0D, 0x5BFE ), + MAKE_PAIR( 0x5C13, 0x723E ), + MAKE_PAIR( 0x5C46, 0x5C4A ), + MAKE_PAIR( 0x5C6C, 0x5C5E ), + MAKE_PAIR( 0x5CE9, 0x5CE8 ), + MAKE_PAIR( 0x5CEF, 0x5CF0 ), + MAKE_PAIR( 0x5CFD, 0x5CE1 ), + MAKE_PAIR( 0x5D15, 0x5D16 ), + MAKE_PAIR( 0x5D17, 0x5CA1 ), + MAKE_PAIR( 0x5D18, 0x5D19 ), + MAKE_PAIR( 0x5D5C, 0x5D0E ), + MAKE_PAIR( 0x5D73, 0x5D6F ), + MAKE_PAIR( 0x5D8B, 0x5CF6 ), + MAKE_PAIR( 0x5D8C, 0x5CF6 ), + MAKE_PAIR( 0x5DBD, 0x5CB3 ), + MAKE_PAIR( 0x5DD6, 0x5DCC ), + MAKE_PAIR( 0x5DDB, 0x5DDD ), + MAKE_PAIR( 0x5DF5, 0x536E ), + MAKE_PAIR( 0x5E0B, 0x7D19 ), + MAKE_PAIR( 0x5E36, 0x5E2F ), + MAKE_PAIR( 0x5E64, 0x5E63 ), + MAKE_PAIR( 0x5EC1, 0x53A0 ), + MAKE_PAIR( 0x5EC8, 0x53A6 ), + MAKE_PAIR( 0x5ECF, 0x53A9 ), + MAKE_PAIR( 0x5ED0, 0x53A9 ), + MAKE_PAIR( 0x5EDA, 0x53A8 ), + MAKE_PAIR( 0x5EDD, 0x53AE ), + MAKE_PAIR( 0x5EE2, 0x5EC3 ), + MAKE_PAIR( 0x5EE3, 0x5E83 ), + MAKE_PAIR( 0x5EF0, 0x5E81 ), + MAKE_PAIR( 0x5EF3, 0x5E81 ), + MAKE_PAIR( 0x5F03, 0x68C4 ), + MAKE_PAIR( 0x5F09, 0x5958 ), + MAKE_PAIR( 0x5F0C, 0x4E00 ), + MAKE_PAIR( 0x5F0D, 0x4E8C ), + MAKE_PAIR( 0x5F10, 0x4E8C ), + MAKE_PAIR( 0x5F2F, 0x6E7E ), + MAKE_PAIR( 0x5F48, 0x5F3E ), + MAKE_PAIR( 0x5F4C, 0x5F25 ), + MAKE_PAIR( 0x5F4E, 0x6E7E ), + MAKE_PAIR( 0x5F5C, 0x5F5D ), + MAKE_PAIR( 0x5F83, 0x5F80 ), + MAKE_PAIR( 0x5F91, 0x5F84 ), + MAKE_PAIR( 0x5F9E, 0x5F93 ), + MAKE_PAIR( 0x5FA0, 0x6765 ), + MAKE_PAIR( 0x5FF0, 0x60B4 ), + MAKE_PAIR( 0x5FFB, 0x6B23 ), + MAKE_PAIR( 0x6046, 0x6052 ), + MAKE_PAIR( 0x604A, 0x5354 ), + MAKE_PAIR( 0x6060, 0x602A ), + MAKE_PAIR( 0x60B3, 0x5FB3 ), + MAKE_PAIR( 0x60E0, 0x6075 ), + MAKE_PAIR( 0x60E1, 0x60AA ), + MAKE_PAIR( 0x60F1, 0x60A9 ), + MAKE_PAIR( 0x613C, 0x614E ), + MAKE_PAIR( 0x613D, 0x535A ), + MAKE_PAIR( 0x6158, 0x60E8 ), + MAKE_PAIR( 0x615A, 0x6159 ), + MAKE_PAIR( 0x6187, 0x61A9 ), + MAKE_PAIR( 0x61C9, 0x5FDC ), + MAKE_PAIR( 0x61F4, 0x61FA ), + MAKE_PAIR( 0x61F7, 0x61D0 ), + MAKE_PAIR( 0x6200, 0x604B ), + MAKE_PAIR( 0x621D, 0x8CA1 ), + MAKE_PAIR( 0x621E, 0x621B ), + MAKE_PAIR( 0x6230, 0x6226 ), + MAKE_PAIR( 0x6232, 0x622F ), + MAKE_PAIR( 0x6268, 0x6260 ), + MAKE_PAIR( 0x62AC, 0x64E1 ), + MAKE_PAIR( 0x62C2, 0x6255 ), + MAKE_PAIR( 0x62D4, 0x629C ), + MAKE_PAIR( 0x62DC, 0x62DD ), + MAKE_PAIR( 0x633E, 0x631F ), + MAKE_PAIR( 0x63D2, 0x633F ), + MAKE_PAIR( 0x6416, 0x63FA ), + MAKE_PAIR( 0x641C, 0x635C ), + MAKE_PAIR( 0x64C7, 0x629E ), + MAKE_PAIR( 0x64D4, 0x62C5 ), + MAKE_PAIR( 0x64DA, 0x62E0 ), + MAKE_PAIR( 0x64E7, 0x6319 ), + MAKE_PAIR( 0x64F4, 0x62E1 ), + MAKE_PAIR( 0x651C, 0x643A ), + MAKE_PAIR( 0x651D, 0x6442 ), + MAKE_PAIR( 0x652A, 0x64B9 ), + MAKE_PAIR( 0x6536, 0x53CE ), + MAKE_PAIR( 0x6548, 0x52B9 ), + MAKE_PAIR( 0x654D, 0x53D9 ), + MAKE_PAIR( 0x6555, 0x52C5 ), + MAKE_PAIR( 0x6558, 0x53D9 ), + MAKE_PAIR( 0x6578, 0x6570 ), + MAKE_PAIR( 0x6588, 0x5B66 ), + MAKE_PAIR( 0x6589, 0x658E ), + MAKE_PAIR( 0x65B7, 0x65AD ), + MAKE_PAIR( 0x65D9, 0x65DB ), + MAKE_PAIR( 0x663F, 0x66E0 ), + MAKE_PAIR( 0x6644, 0x6643 ), + MAKE_PAIR( 0x6649, 0x664B ), + MAKE_PAIR( 0x665D, 0x663C ), + MAKE_PAIR( 0x668E, 0x6620 ), + MAKE_PAIR( 0x66B8, 0x77AD ), + MAKE_PAIR( 0x66C9, 0x6681 ), + MAKE_PAIR( 0x66F0, 0x4E91 ), + MAKE_PAIR( 0x66F5, 0x66F3 ), + MAKE_PAIR( 0x66FD, 0x66FE ), + MAKE_PAIR( 0x6703, 0x4F1A ), + MAKE_PAIR( 0x6716, 0x6717 ), + MAKE_PAIR( 0x671E, 0x671F ), + MAKE_PAIR( 0x6764, 0x6803 ), + MAKE_PAIR( 0x67A1, 0x685D ), + MAKE_PAIR( 0x67A6, 0x6AE8 ), + MAKE_PAIR( 0x67A9, 0x677E ), + MAKE_PAIR( 0x67CF, 0x6822 ), + MAKE_PAIR( 0x6816, 0x68F2 ), + MAKE_PAIR( 0x6840, 0x685D ), + MAKE_PAIR( 0x689D, 0x6761 ), + MAKE_PAIR( 0x68A6, 0x5922 ), + MAKE_PAIR( 0x68CA, 0x68CB ), + MAKE_PAIR( 0x68E7, 0x685F ), + MAKE_PAIR( 0x6936, 0x68D5 ), + MAKE_PAIR( 0x695D, 0x68DF ), + MAKE_PAIR( 0x6973, 0x6885 ), + MAKE_PAIR( 0x69AE, 0x6804 ), + MAKE_PAIR( 0x69C7, 0x69D9 ), + MAKE_PAIR( 0x6A02, 0x697D ), + MAKE_PAIR( 0x6A13, 0x697C ), + MAKE_PAIR( 0x6A1E, 0x67A2 ), + MAKE_PAIR( 0x6A23, 0x69D8 ), + MAKE_PAIR( 0x6A62, 0x6955 ), + MAKE_PAIR( 0x6A9C, 0x6867 ), + MAKE_PAIR( 0x6AA2, 0x691C ), + MAKE_PAIR( 0x6AAA, 0x6ADF ), + MAKE_PAIR( 0x6AAE, 0x68BC ), + MAKE_PAIR( 0x6AB3, 0x68B9 ), + MAKE_PAIR( 0x6AC1, 0x6A12 ), + MAKE_PAIR( 0x6AFB, 0x685C ), + MAKE_PAIR( 0x6B0A, 0x6A29 ), + MAKE_PAIR( 0x6B50, 0x6B27 ), + MAKE_PAIR( 0x6B61, 0x6B53 ), + MAKE_PAIR( 0x6B78, 0x5E30 ), + MAKE_PAIR( 0x6B7F, 0x6CA1 ), + MAKE_PAIR( 0x6B98, 0x6B8B ), + MAKE_PAIR( 0x6BB1, 0x6BB2 ), + MAKE_PAIR( 0x6BBC, 0x6BBB ), + MAKE_PAIR( 0x6BC6, 0x6BB4 ), + MAKE_PAIR( 0x6BCB, 0x6BCD ), + MAKE_PAIR( 0x6BD3, 0x80B2 ), + MAKE_PAIR( 0x6C23, 0x6C17 ), + MAKE_PAIR( 0x6C8D, 0x51B4 ), + MAKE_PAIR( 0x6C92, 0x6CA1 ), + MAKE_PAIR( 0x6CEA, 0x6D99 ), + MAKE_PAIR( 0x6D0C, 0x51BD ), + MAKE_PAIR( 0x6D35, 0x5F87 ), + MAKE_PAIR( 0x6D79, 0x5CE1 ), + MAKE_PAIR( 0x6D8C, 0x6E67 ), + MAKE_PAIR( 0x6DB8, 0x51C5 ), + MAKE_PAIR( 0x6DD2, 0x51C4 ), + MAKE_PAIR( 0x6DE8, 0x6D44 ), + MAKE_PAIR( 0x6DFA, 0x6D45 ), + MAKE_PAIR( 0x6E0A, 0x6DF5 ), + MAKE_PAIR( 0x6E15, 0x6DF5 ), + MAKE_PAIR( 0x6EAA, 0x6E13 ), + MAKE_PAIR( 0x6EAF, 0x9061 ), + MAKE_PAIR( 0x6EEF, 0x6EDE ), + MAKE_PAIR( 0x6EFF, 0x6E80 ), + MAKE_PAIR( 0x6F5B, 0x6F5C ), + MAKE_PAIR( 0x6F74, 0x7026 ), + MAKE_PAIR( 0x6F80, 0x6E0B ), + MAKE_PAIR( 0x6F81, 0x6E0B ), + MAKE_PAIR( 0x6F82, 0x6F84 ), + MAKE_PAIR( 0x6F91, 0x6E9C ), + MAKE_PAIR( 0x6FA4, 0x6CA2 ), + MAKE_PAIR( 0x6FD5, 0x6E7F ), + MAKE_PAIR( 0x6FDF, 0x6E08 ), + MAKE_PAIR( 0x6FE4, 0x6D9B ), + MAKE_PAIR( 0x6FF1, 0x6D5C ), + MAKE_PAIR( 0x6FF3, 0x6F5C ), + MAKE_PAIR( 0x6FF6, 0x95CA ), + MAKE_PAIR( 0x7027, 0x6EDD ), + MAKE_PAIR( 0x7030, 0x6FD4 ), + MAKE_PAIR( 0x704C, 0x6F45 ), + MAKE_PAIR( 0x7063, 0x6E7E ), + MAKE_PAIR( 0x70CB, 0x6077 ), + MAKE_PAIR( 0x70DF, 0x7159 ), + MAKE_PAIR( 0x70F1, 0x70AF ), + MAKE_PAIR( 0x7188, 0x7155 ), + MAKE_PAIR( 0x7199, 0x7155 ), + MAKE_PAIR( 0x71C8, 0x706F ), + MAKE_PAIR( 0x71D2, 0x713C ), + MAKE_PAIR( 0x71DF, 0x55B6 ), + MAKE_PAIR( 0x71FB, 0x718F ), + MAKE_PAIR( 0x71FF, 0x8000 ), + MAKE_PAIR( 0x7210, 0x7089 ), + MAKE_PAIR( 0x722D, 0x4E89 ), + MAKE_PAIR( 0x7232, 0x70BA ), + MAKE_PAIR( 0x723C, 0x4FCE ), + MAKE_PAIR( 0x7246, 0x58BB ), + MAKE_PAIR( 0x7281, 0x7282 ), + MAKE_PAIR( 0x72A7, 0x72A0 ), + MAKE_PAIR( 0x72E2, 0x72F8 ), + MAKE_PAIR( 0x72F9, 0x72ED ), + MAKE_PAIR( 0x734E, 0x5968 ), + MAKE_PAIR( 0x7368, 0x72EC ), + MAKE_PAIR( 0x7375, 0x731F ), + MAKE_PAIR( 0x7378, 0x7363 ), + MAKE_PAIR( 0x737B, 0x732E ), + MAKE_PAIR( 0x73CE, 0x73CD ), + MAKE_PAIR( 0x73F1, 0x74D4 ), + MAKE_PAIR( 0x7464, 0x7476 ), + MAKE_PAIR( 0x746F, 0x7405 ), + MAKE_PAIR( 0x74A2, 0x7460 ), + MAKE_PAIR( 0x74E3, 0x5F01 ), + MAKE_PAIR( 0x751E, 0x5617 ), + MAKE_PAIR( 0x753C, 0x753A ), + MAKE_PAIR( 0x7544, 0x7559 ), + MAKE_PAIR( 0x7546, 0x755D ), + MAKE_PAIR( 0x754A, 0x8015 ), + MAKE_PAIR( 0x754D, 0x754C ), + MAKE_PAIR( 0x7567, 0x7565 ), + MAKE_PAIR( 0x756B, 0x753B ), + MAKE_PAIR( 0x7574, 0x7587 ), + MAKE_PAIR( 0x7576, 0x5F53 ), + MAKE_PAIR( 0x7582, 0x7573 ), + MAKE_PAIR( 0x7589, 0x7573 ), + MAKE_PAIR( 0x758A, 0x7573 ), + MAKE_PAIR( 0x7609, 0x7652 ), + MAKE_PAIR( 0x7661, 0x75F4 ), + MAKE_PAIR( 0x767C, 0x767A ), + MAKE_PAIR( 0x7683, 0x8C8C ), + MAKE_PAIR( 0x7688, 0x5E30 ), + MAKE_PAIR( 0x768B, 0x7690 ), + MAKE_PAIR( 0x7693, 0x6667 ), + MAKE_PAIR( 0x76B7, 0x9F13 ), + MAKE_PAIR( 0x76B9, 0x76B8 ), + MAKE_PAIR( 0x76C3, 0x676F ), + MAKE_PAIR( 0x76D6, 0x84CB ), + MAKE_PAIR( 0x76DC, 0x76D7 ), + MAKE_PAIR( 0x76E1, 0x5C3D ), + MAKE_PAIR( 0x771E, 0x771F ), + MAKE_PAIR( 0x7726, 0x7725 ), + MAKE_PAIR( 0x777F, 0x53E1 ), + MAKE_PAIR( 0x783F, 0x9271 ), + MAKE_PAIR( 0x7845, 0x73EA ), + MAKE_PAIR( 0x788E, 0x7815 ), + MAKE_PAIR( 0x7895, 0x5D0E ), + MAKE_PAIR( 0x78AF, 0x7459 ), + MAKE_PAIR( 0x7919, 0x788D ), + MAKE_PAIR( 0x7926, 0x9271 ), + MAKE_PAIR( 0x792A, 0x783A ), + MAKE_PAIR( 0x7955, 0x79D8 ), + MAKE_PAIR( 0x7962, 0x79B0 ), + MAKE_PAIR( 0x797F, 0x7984 ), + MAKE_PAIR( 0x7980, 0x7A1F ), + MAKE_PAIR( 0x79AA, 0x7985 ), + MAKE_PAIR( 0x79AE, 0x793C ), + MAKE_PAIR( 0x79CB, 0x7A50 ), + MAKE_PAIR( 0x7A31, 0x79F0 ), + MAKE_PAIR( 0x7A3B, 0x7A32 ), + MAKE_PAIR( 0x7A3E, 0x7A3F ), + MAKE_PAIR( 0x7A49, 0x7A1A ), + MAKE_PAIR( 0x7A57, 0x7A42 ), + MAKE_PAIR( 0x7A69, 0x7A4F ), + MAKE_PAIR( 0x7A70, 0x7A63 ), + MAKE_PAIR( 0x7A97, 0x7A93 ), + MAKE_PAIR( 0x7AB0, 0x7AAF ), + MAKE_PAIR( 0x7AC8, 0x7AC3 ), + MAKE_PAIR( 0x7ACA, 0x7A83 ), + MAKE_PAIR( 0x7AD2, 0x5947 ), + MAKE_PAIR( 0x7ADD, 0x4E26 ), + MAKE_PAIR( 0x7B0B, 0x7B4D ), + MAKE_PAIR( 0x7B11, 0x54B2 ), + MAKE_PAIR( 0x7B5D, 0x7B8F ), + MAKE_PAIR( 0x7B71, 0x7BE0 ), + MAKE_PAIR( 0x7B7A, 0x7B50 ), + MAKE_PAIR( 0x7B8B, 0x724B ), + MAKE_PAIR( 0x7B9A, 0x5273 ), + MAKE_PAIR( 0x7B9F, 0x7B98 ), + MAKE_PAIR( 0x7BCF, 0x5D4C ), + MAKE_PAIR( 0x7BE6, 0x7B86 ), + MAKE_PAIR( 0x7C11, 0x84D1 ), + MAKE_PAIR( 0x7C14, 0x84D1 ), + MAKE_PAIR( 0x7C54, 0x85AE ), + MAKE_PAIR( 0x7C56, 0x7C64 ), + MAKE_PAIR( 0x7C58, 0x7C50 ), + MAKE_PAIR( 0x7C60, 0x7BED ), + MAKE_PAIR( 0x7CAE, 0x7CE7 ), + MAKE_PAIR( 0x7CB9, 0x7C8B ), + MAKE_PAIR( 0x7CE2, 0x6A21 ), + MAKE_PAIR( 0x7CFA, 0x7CFE ), + MAKE_PAIR( 0x7D4F, 0x7D32 ), + MAKE_PAIR( 0x7D72, 0x7CF8 ), + MAKE_PAIR( 0x7D89, 0x7E4D ), + MAKE_PAIR( 0x7D93, 0x7D4C ), + MAKE_PAIR( 0x7DAB, 0x7DDA ), + MAKE_PAIR( 0x7DD5, 0x7E83 ), + MAKE_PAIR( 0x7DDC, 0x7DBF ), + MAKE_PAIR( 0x7DE8, 0x7BC7 ), + MAKE_PAIR( 0x7E23, 0x770C ), + MAKE_PAIR( 0x7E31, 0x7E26 ), + MAKE_PAIR( 0x7E3D, 0x7DCF ), + MAKE_PAIR( 0x7E69, 0x7E04 ), + MAKE_PAIR( 0x7E6A, 0x7D75 ), + MAKE_PAIR( 0x7E7C, 0x7D99 ), + MAKE_PAIR( 0x7E8C, 0x7D9A ), + MAKE_PAIR( 0x7E8E, 0x7E4A ), + MAKE_PAIR( 0x7E92, 0x7E8F ), + MAKE_PAIR( 0x7E96, 0x7E4A ), + MAKE_PAIR( 0x7F3A, 0x6B20 ), + MAKE_PAIR( 0x7F4E, 0x58DC ), + MAKE_PAIR( 0x7F50, 0x7F36 ), + MAKE_PAIR( 0x7F78, 0x7F70 ), + MAKE_PAIR( 0x7F83, 0x51AA ), + MAKE_PAIR( 0x7F87, 0x7F88 ), + MAKE_PAIR( 0x7FA3, 0x7FA4 ), + MAKE_PAIR( 0x7FAE, 0x7FB9 ), + MAKE_PAIR( 0x7FC6, 0x7FE0 ), + MAKE_PAIR( 0x803B, 0x6065 ), + MAKE_PAIR( 0x805F, 0x5A7F ), + MAKE_PAIR( 0x8068, 0x806F ), + MAKE_PAIR( 0x8070, 0x8061 ), + MAKE_PAIR( 0x8072, 0x58F0 ), + MAKE_PAIR( 0x807D, 0x8074 ), + MAKE_PAIR( 0x8085, 0x7C9B ), + MAKE_PAIR( 0x8089, 0x5B8D ), + MAKE_PAIR( 0x80AC, 0x75A3 ), + MAKE_PAIR( 0x80CC, 0x810A ), + MAKE_PAIR( 0x80EF, 0x8DE8 ), + MAKE_PAIR( 0x8109, 0x8108 ), + MAKE_PAIR( 0x8123, 0x5507 ), + MAKE_PAIR( 0x815F, 0x81A3 ), + MAKE_PAIR( 0x8166, 0x8133 ), + MAKE_PAIR( 0x8193, 0x8178 ), + MAKE_PAIR( 0x81B8, 0x9AC4 ), + MAKE_PAIR( 0x81BD, 0x80C6 ), + MAKE_PAIR( 0x81C8, 0x81D8 ), + MAKE_PAIR( 0x81DF, 0x81D3 ), + MAKE_PAIR( 0x81FA, 0x53F0 ), + MAKE_PAIR( 0x8207, 0x4E0E ), + MAKE_PAIR( 0x8209, 0x6319 ), + MAKE_PAIR( 0x820A, 0x65E7 ), + MAKE_PAIR( 0x820D, 0x820E ), + MAKE_PAIR( 0x8216, 0x8217 ), + MAKE_PAIR( 0x8218, 0x9928 ), + MAKE_PAIR( 0x8229, 0x8239 ), + MAKE_PAIR( 0x822E, 0x826B ), + MAKE_PAIR( 0x8235, 0x67C1 ), + MAKE_PAIR( 0x8277, 0x8276 ), + MAKE_PAIR( 0x82C5, 0x5208 ), + MAKE_PAIR( 0x8358, 0x5E84 ), + MAKE_PAIR( 0x8373, 0x8C46 ), + MAKE_PAIR( 0x838A, 0x5E84 ), + MAKE_PAIR( 0x8393, 0x82FA ), + MAKE_PAIR( 0x8396, 0x830E ), + MAKE_PAIR( 0x83B5, 0x514E ), + MAKE_PAIR( 0x83DF, 0x514E ), + MAKE_PAIR( 0x83F4, 0x5EB5 ), + MAKE_PAIR( 0x8420, 0x840C ), + MAKE_PAIR( 0x842C, 0x4E07 ), + MAKE_PAIR( 0x8462, 0x84CB ), + MAKE_PAIR( 0x84AD, 0x82BB ), + MAKE_PAIR( 0x854B, 0x854A ), + MAKE_PAIR( 0x855A, 0x843C ), + MAKE_PAIR( 0x8597, 0x5712 ), + MAKE_PAIR( 0x85C1, 0x7A3F ), + MAKE_PAIR( 0x85CF, 0x8535 ), + MAKE_PAIR( 0x85DD, 0x82B8 ), + MAKE_PAIR( 0x85E5, 0x85AC ), + MAKE_PAIR( 0x85EA, 0x85AE ), + MAKE_PAIR( 0x8602, 0x854A ), + MAKE_PAIR( 0x8606, 0x82A6 ), + MAKE_PAIR( 0x8613, 0x8607 ), + MAKE_PAIR( 0x8617, 0x6A97 ), + MAKE_PAIR( 0x862F, 0x76EA ), + MAKE_PAIR( 0x8655, 0x51E6 ), + MAKE_PAIR( 0x865F, 0x53F7 ), + MAKE_PAIR( 0x8768, 0x8671 ), + MAKE_PAIR( 0x87A2, 0x86CD ), + MAKE_PAIR( 0x87C6, 0x87C7 ), + MAKE_PAIR( 0x87D2, 0x880E ), + MAKE_PAIR( 0x87F2, 0x866B ), + MAKE_PAIR( 0x8805, 0x877F ), + MAKE_PAIR( 0x880F, 0x87F9 ), + MAKE_PAIR( 0x8823, 0x86CE ), + MAKE_PAIR( 0x8827, 0x8839 ), + MAKE_PAIR( 0x8836, 0x8695 ), + MAKE_PAIR( 0x883B, 0x86EE ), + MAKE_PAIR( 0x8842, 0x8844 ), + MAKE_PAIR( 0x885E, 0x885B ), + MAKE_PAIR( 0x88B5, 0x887D ), + MAKE_PAIR( 0x88DD, 0x88C5 ), + MAKE_PAIR( 0x88E1, 0x88CF ), + MAKE_PAIR( 0x891D, 0x894C ), + MAKE_PAIR( 0x8943, 0x8912 ), + MAKE_PAIR( 0x894D, 0x96D1 ), + MAKE_PAIR( 0x898A, 0x7F88 ), + MAKE_PAIR( 0x89BA, 0x899A ), + MAKE_PAIR( 0x89BD, 0x89A7 ), + MAKE_PAIR( 0x89C0, 0x89B3 ), + MAKE_PAIR( 0x89E7, 0x89E3 ), + MAKE_PAIR( 0x89F8, 0x89E6 ), + MAKE_PAIR( 0x8A00, 0x4E91 ), + MAKE_PAIR( 0x8A3B, 0x6CE8 ), + MAKE_PAIR( 0x8AE1, 0x8B1A ), + MAKE_PAIR( 0x8AEB, 0x8ACC ), + MAKE_PAIR( 0x8B0C, 0x6B4C ), + MAKE_PAIR( 0x8B20, 0x8B21 ), + MAKE_PAIR( 0x8B41, 0x5629 ), + MAKE_PAIR( 0x8B49, 0x8A3C ), + MAKE_PAIR( 0x8B5B, 0x8B56 ), + MAKE_PAIR( 0x8B5F, 0x566A ), + MAKE_PAIR( 0x8B6F, 0x8A33 ), + MAKE_PAIR( 0x8B71, 0x5584 ), + MAKE_PAIR( 0x8B7D, 0x8A89 ), + MAKE_PAIR( 0x8B80, 0x8AAD ), + MAKE_PAIR( 0x8B8A, 0x5909 ), + MAKE_PAIR( 0x8B8E, 0x8B90 ), + MAKE_PAIR( 0x8B93, 0x8B72 ), + MAKE_PAIR( 0x8B9A, 0x8B83 ), + MAKE_PAIR( 0x8C3F, 0x6E13 ), + MAKE_PAIR( 0x8C4E, 0x7AEA ), + MAKE_PAIR( 0x8C50, 0x8C4A ), + MAKE_PAIR( 0x8C6B, 0x4E88 ), + MAKE_PAIR( 0x8C6C, 0x732A ), + MAKE_PAIR( 0x8C7A, 0x72B2 ), + MAKE_PAIR( 0x8C7C, 0x8C94 ), + MAKE_PAIR( 0x8C89, 0x72F8 ), + MAKE_PAIR( 0x8C8D, 0x72F8 ), + MAKE_PAIR( 0x8C8E, 0x730A ), + MAKE_PAIR( 0x8C98, 0x734F ), + MAKE_PAIR( 0x8CAD, 0x8CEA ), + MAKE_PAIR( 0x8CAE, 0x4E8C ), + MAKE_PAIR( 0x8CB3, 0x4E8C ), + MAKE_PAIR( 0x8CCD, 0x81D3 ), + MAKE_PAIR( 0x8CE3, 0x58F2 ), + MAKE_PAIR( 0x8CE4, 0x8CCE ), + MAKE_PAIR( 0x8D0A, 0x8CDB ), + MAKE_PAIR( 0x8D13, 0x81D3 ), + MAKE_PAIR( 0x8D71, 0x8D70 ), + MAKE_PAIR( 0x8E08, 0x758E ), + MAKE_PAIR( 0x8E10, 0x8DF5 ), + MAKE_PAIR( 0x8E34, 0x8E0A ), + MAKE_PAIR( 0x8E48, 0x8E0F ), + MAKE_PAIR( 0x8E87, 0x58B8 ), + MAKE_PAIR( 0x8EAA, 0x8E99 ), + MAKE_PAIR( 0x8EB0, 0x4F53 ), + MAKE_PAIR( 0x8EC6, 0x4F53 ), + MAKE_PAIR( 0x8EE3, 0x8F5F ), + MAKE_PAIR( 0x8F0C, 0x4E21 ), + MAKE_PAIR( 0x8F15, 0x8EFD ), + MAKE_PAIR( 0x8F19, 0x8F12 ), + MAKE_PAIR( 0x8F1B, 0x4E21 ), + MAKE_PAIR( 0x8F49, 0x8EE2 ), + MAKE_PAIR( 0x8FA7, 0x5F01 ), + MAKE_PAIR( 0x8FA8, 0x5F01 ), + MAKE_PAIR( 0x8FAD, 0x8F9E ), + MAKE_PAIR( 0x8FAF, 0x5F01 ), + MAKE_PAIR( 0x8FEA, 0x5EF8 ), + MAKE_PAIR( 0x8FEF, 0x9003 ), + MAKE_PAIR( 0x8FF4, 0x5EFB ), + MAKE_PAIR( 0x8FFA, 0x5EFC ), + MAKE_PAIR( 0x900E, 0x9052 ), + MAKE_PAIR( 0x9015, 0x5F84 ), + MAKE_PAIR( 0x9023, 0x806F ), + MAKE_PAIR( 0x9039, 0x9054 ), + MAKE_PAIR( 0x9059, 0x9065 ), + MAKE_PAIR( 0x905E, 0x9013 ), + MAKE_PAIR( 0x9072, 0x9045 ), + MAKE_PAIR( 0x9087, 0x8FE9 ), + MAKE_PAIR( 0x9089, 0x8FBA ), + MAKE_PAIR( 0x908A, 0x8FBA ), + MAKE_PAIR( 0x90B1, 0x4E18 ), + MAKE_PAIR( 0x9130, 0x96A3 ), + MAKE_PAIR( 0x9189, 0x9154 ), + MAKE_PAIR( 0x918B, 0x9162 ), + MAKE_PAIR( 0x91AB, 0x533B ), + MAKE_PAIR( 0x91BA, 0x718F ), + MAKE_PAIR( 0x91C0, 0x91B8 ), + MAKE_PAIR( 0x91CB, 0x91C8 ), + MAKE_PAIR( 0x91E1, 0x91DC ), + MAKE_PAIR( 0x91FC, 0x5263 ), + MAKE_PAIR( 0x9214, 0x6284 ), + MAKE_PAIR( 0x9229, 0x946A ), + MAKE_PAIR( 0x922C, 0x9438 ), + MAKE_PAIR( 0x9248, 0x91F6 ), + MAKE_PAIR( 0x9264, 0x920E ), + MAKE_PAIR( 0x9295, 0x9244 ), + MAKE_PAIR( 0x92EA, 0x8217 ), + MAKE_PAIR( 0x9322, 0x92AD ), + MAKE_PAIR( 0x9394, 0x7194 ), + MAKE_PAIR( 0x93AD, 0x93AE ), + MAKE_PAIR( 0x941A, 0x930F ), + MAKE_PAIR( 0x9421, 0x9244 ), + MAKE_PAIR( 0x9435, 0x9244 ), + MAKE_PAIR( 0x9444, 0x92F3 ), + MAKE_PAIR( 0x9452, 0x9451 ), + MAKE_PAIR( 0x945A, 0x947D ), + MAKE_PAIR( 0x945B, 0x9271 ), + MAKE_PAIR( 0x9587, 0x9589 ), + MAKE_PAIR( 0x95A0, 0x958F ), + MAKE_PAIR( 0x95DC, 0x95A2 ), + MAKE_PAIR( 0x962A, 0x5742 ), + MAKE_PAIR( 0x962F, 0x5740 ), + MAKE_PAIR( 0x9644, 0x4ED8 ), + MAKE_PAIR( 0x9670, 0x852D ), + MAKE_PAIR( 0x9677, 0x9665 ), + MAKE_PAIR( 0x96A8, 0x968F ), + MAKE_PAIR( 0x96AA, 0x967A ), + MAKE_PAIR( 0x96B1, 0x852D ), + MAKE_PAIR( 0x96B8, 0x96B7 ), + MAKE_PAIR( 0x96C6, 0x8F2F ), + MAKE_PAIR( 0x96D9, 0x53CC ), + MAKE_PAIR( 0x96DC, 0x96D1 ), + MAKE_PAIR( 0x9738, 0x8987 ), + MAKE_PAIR( 0x9748, 0x970A ), + MAKE_PAIR( 0x975C, 0x9759 ), + MAKE_PAIR( 0x976B, 0x976D ), + MAKE_PAIR( 0x9771, 0x976D ), + MAKE_PAIR( 0x97ED, 0x97EE ), + MAKE_PAIR( 0x97F2, 0x9F4F ), + MAKE_PAIR( 0x97F5, 0x97FB ), + MAKE_PAIR( 0x9834, 0x7A4E ), + MAKE_PAIR( 0x9838, 0x981A ), + MAKE_PAIR( 0x984B, 0x816E ), + MAKE_PAIR( 0x984F, 0x9854 ), + MAKE_PAIR( 0x986F, 0x9855 ), + MAKE_PAIR( 0x98B1, 0x53F0 ), + MAKE_PAIR( 0x98C3, 0x98C4 ), + MAKE_PAIR( 0x98DC, 0x7FFB ), + MAKE_PAIR( 0x98EE, 0x98F2 ), + MAKE_PAIR( 0x9918, 0x4F59 ), + MAKE_PAIR( 0x991D, 0x98FE ), + MAKE_PAIR( 0x9920, 0x9905 ), + MAKE_PAIR( 0x99C8, 0x99C6 ), + MAKE_PAIR( 0x99F2, 0x99B4 ), + MAKE_PAIR( 0x9A37, 0x9A12 ), + MAKE_PAIR( 0x9A45, 0x99C6 ), + MAKE_PAIR( 0x9A57, 0x9A13 ), + MAKE_PAIR( 0x9A5B, 0x99C5 ), + MAKE_PAIR( 0x9AD3, 0x9AC4 ), + MAKE_PAIR( 0x9AD4, 0x4F53 ), + MAKE_PAIR( 0x9AEE, 0x9AEA ), + MAKE_PAIR( 0x9AF4, 0x5F7F ), + MAKE_PAIR( 0x9B27, 0x9599 ), + MAKE_PAIR( 0x9B28, 0x95A7 ), + MAKE_PAIR( 0x9B2A, 0x95D8 ), + MAKE_PAIR( 0x9B31, 0x6B1D ), + MAKE_PAIR( 0x9B3B, 0x7CA5 ), + MAKE_PAIR( 0x9C1B, 0x9C2E ), + MAKE_PAIR( 0x9C3A, 0x9BF5 ), + MAKE_PAIR( 0x9CEB, 0x96C1 ), + MAKE_PAIR( 0x9CEC, 0x9CE7 ), + MAKE_PAIR( 0x9D08, 0x96C1 ), + MAKE_PAIR( 0x9D44, 0x9D1F ), + MAKE_PAIR( 0x9D5E, 0x9D5D ), + MAKE_PAIR( 0x9DAB, 0x9D87 ), + MAKE_PAIR( 0x9DAF, 0x9D2C ), + MAKE_PAIR( 0x9DC4, 0x9D8F ), + MAKE_PAIR( 0x9DCF, 0x9DC6 ), + MAKE_PAIR( 0x9E7D, 0x5869 ), + MAKE_PAIR( 0x9EA5, 0x9EA6 ), + MAKE_PAIR( 0x9EAA, 0x9EBA ), + MAKE_PAIR( 0x9EB8, 0x9EA9 ), + MAKE_PAIR( 0x9ED8, 0x9ED9 ), + MAKE_PAIR( 0x9EDE, 0x70B9 ), + MAKE_PAIR( 0x9EE8, 0x515A ), + MAKE_PAIR( 0x9F07, 0x9C32 ), + MAKE_PAIR( 0x9F21, 0x9F20 ), + MAKE_PAIR( 0x9F4A, 0x658E ), + MAKE_PAIR( 0x9F4B, 0x658E ), + MAKE_PAIR( 0x9F52, 0x6B6F ), + MAKE_PAIR( 0x9F61, 0x9F62 ), + MAKE_PAIR( 0x9F67, 0x56D3 ), + MAKE_PAIR( 0x9F8D, 0x7ADC ), + MAKE_PAIR( 0x9F9C, 0x4E80 ), + MAKE_PAIR( 0x9F9D, 0x7A50 ), + MAKE_PAIR( 0xFEFF, 0x4E9C ) +}; + +OUString SAL_CALL +ignoreTraditionalKanji_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + oneToOneMapping table(traditionalKanji2updateKanji, sizeof(traditionalKanji2updateKanji)); + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, table); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreWidth.cxx b/i18npool/source/transliteration/ignoreWidth.cxx new file mode 100644 index 000000000000..96a8830d9e58 --- /dev/null +++ b/i18npool/source/transliteration/ignoreWidth.cxx @@ -0,0 +1,74 @@ +/************************************************************************* + * + * $RCSfile: ignoreWidth.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_Width +#include <transliteration_Ignore.hxx> +#define TRANSLITERATION_fullwidthToHalfwidth +#define TRANSLITERATION_halfwidthToFullwidth +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL +ignoreWidth::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + fullwidthToHalfwidth t1; + return t1.transliterate(inStr, startPos, nCount, offset); +} + + +Sequence< OUString > SAL_CALL +ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 ) + throw(RuntimeException) +{ + halfwidthToFullwidth t1; + fullwidthToHalfwidth t2; + + return transliteration_Ignore::transliterateRange(str1, str2, t1, t2); +} + +} } } } diff --git a/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx b/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx new file mode 100644 index 000000000000..8762259c329c --- /dev/null +++ b/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx @@ -0,0 +1,79 @@ +/************************************************************************* + * + * $RCSfile: ignoreZiZu_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_ZiZu_ja_JP +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Unicode +ignoreZiZu_ja_JP_translator (const sal_Unicode c) +{ + + switch (c) { + case 0x30C2: // KATAKANA LETTER DI + return 0x30B8; // KATAKANA LETTER ZI + + case 0x3062: // HIRAGANA LETTER DI + return 0x3058; // HIRAGANA LETTER ZI + + case 0x30C5: // KATAKANA LETTER DU + return 0x30BA; // KATAKANA LETTER ZU + + case 0x3065: // HIRAGANA LETTER DU + return 0x305A; // HIRAGANA LETTER ZU + } + return c; +} + +OUString SAL_CALL +ignoreZiZu_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + return transliteration_Ignore::transliterate( inStr, startPos, nCount, offset, ignoreZiZu_ja_JP_translator ); +} + +} } } } diff --git a/i18npool/source/transliteration/katakanaToHiragana.cxx b/i18npool/source/transliteration/katakanaToHiragana.cxx new file mode 100644 index 000000000000..636e9fde3c95 --- /dev/null +++ b/i18npool/source/transliteration/katakanaToHiragana.cxx @@ -0,0 +1,77 @@ +/************************************************************************* + * + * $RCSfile: katakanaToHiragana.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_katakanaToHiragana +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +katakanaToHiragana::katakanaToHiragana() +{ + transliterationName = "katakanaToHiragana"; + implementationName = "com.sun.star.i18n.Transliteration.KATAKANA_HIRAGANA"; +} + +// see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +static sal_Unicode toHiragana (const sal_Unicode c) +{ + if (0x30a0 <= c && c <= 0x30f4 || 0x30fd <= c && c <= 0x30ff) { // 30A0 - 30FF KATAKANA LETTER + // shift code point by 0x0060 + return c - (0x30a0 - 0x3040); + } + return c; +} + + +OUString SAL_CALL +katakanaToHiragana::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + return transliteration_OneToOne::transliterate( inStr, startPos, nCount, offset, (TransFunc) toHiragana ); +} + +} } } } diff --git a/i18npool/source/transliteration/largeToSmall_ja_JP.cxx b/i18npool/source/transliteration/largeToSmall_ja_JP.cxx new file mode 100644 index 000000000000..9c21846eff5b --- /dev/null +++ b/i18npool/source/transliteration/largeToSmall_ja_JP.cxx @@ -0,0 +1,109 @@ +/************************************************************************* + * + * $RCSfile: largeToSmall_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_largeToSmall_ja_JP +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +largeToSmall_ja_JP::largeToSmall_ja_JP() +{ + transliterationName = "largeToSmall_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.largeToSmall_ja_JP"; +} + +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html +// http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +// http://charts.unicode.org/Web/UFF00.html + +OneToOneMappingTable_t large2small[] = { + MAKE_PAIR( 0x3041, 0x3042 ), // HIRAGANA LETTER SMALL A --> HIRAGANA LETTER A + MAKE_PAIR( 0x3043, 0x3044 ), // HIRAGANA LETTER SMALL I --> HIRAGANA LETTER I + MAKE_PAIR( 0x3045, 0x3046 ), // HIRAGANA LETTER SMALL U --> HIRAGANA LETTER U + MAKE_PAIR( 0x3047, 0x3048 ), // HIRAGANA LETTER SMALL E --> HIRAGANA LETTER E + MAKE_PAIR( 0x3049, 0x304A ), // HIRAGANA LETTER SMALL O --> HIRAGANA LETTER O + MAKE_PAIR( 0x3063, 0x3064 ), // HIRAGANA LETTER SMALL TU --> HIRAGANA LETTER TU + MAKE_PAIR( 0x3083, 0x3084 ), // HIRAGANA LETTER SMALL YA --> HIRAGANA LETTER YA + MAKE_PAIR( 0x3085, 0x3086 ), // HIRAGANA LETTER SMALL YU --> HIRAGANA LETTER YU + MAKE_PAIR( 0x3087, 0x3088 ), // HIRAGANA LETTER SMALL YO --> HIRAGANA LETTER YO + MAKE_PAIR( 0x308E, 0x308F ), // HIRAGANA LETTER SMALL WA --> HIRAGANA LETTER WA + MAKE_PAIR( 0x30A1, 0x30A2 ), // KATAKANA LETTER SMALL A --> KATAKANA LETTER A + MAKE_PAIR( 0x30A3, 0x30A4 ), // KATAKANA LETTER SMALL I --> KATAKANA LETTER I + MAKE_PAIR( 0x30A5, 0x30A6 ), // KATAKANA LETTER SMALL U --> KATAKANA LETTER U + MAKE_PAIR( 0x30A7, 0x30A8 ), // KATAKANA LETTER SMALL E --> KATAKANA LETTER E + MAKE_PAIR( 0x30A9, 0x30AA ), // KATAKANA LETTER SMALL O --> KATAKANA LETTER O + MAKE_PAIR( 0x30C3, 0x30C4 ), // KATAKANA LETTER SMALL TU --> KATAKANA LETTER TU + MAKE_PAIR( 0x30E3, 0x30E4 ), // KATAKANA LETTER SMALL YA --> KATAKANA LETTER YA + MAKE_PAIR( 0x30E5, 0x30E6 ), // KATAKANA LETTER SMALL YU --> KATAKANA LETTER YU + MAKE_PAIR( 0x30E7, 0x30E8 ), // KATAKANA LETTER SMALL YO --> KATAKANA LETTER YO + MAKE_PAIR( 0x30EE, 0x30EF ), // KATAKANA LETTER SMALL WA --> KATAKANA LETTER WA + MAKE_PAIR( 0x30F5, 0x30AB ), // KATAKANA LETTER SMALL KA --> KATAKANA LETTER KA + MAKE_PAIR( 0x30F6, 0x30B1 ), // KATAKANA LETTER SMALL KE --> KATAKANA LETTER KE + MAKE_PAIR( 0xFF67, 0xFF71 ), // HALFWIDTH KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER A + MAKE_PAIR( 0xFF68, 0xFF72 ), // HALFWIDTH KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER I + MAKE_PAIR( 0xFF69, 0xFF73 ), // HALFWIDTH KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER U + MAKE_PAIR( 0xFF6A, 0xFF74 ), // HALFWIDTH KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER E + MAKE_PAIR( 0xFF6B, 0xFF75 ), // HALFWIDTH KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER O + MAKE_PAIR( 0xFF6C, 0xFF94 ), // HALFWIDTH KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER YA + MAKE_PAIR( 0xFF6D, 0xFF95 ), // HALFWIDTH KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER YU + MAKE_PAIR( 0xFF6E, 0xFF96 ), // HALFWIDTH KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER YO + MAKE_PAIR( 0xFF6F, 0xFF82 ) // HALFWIDTH KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER TU +}; + + +OUString SAL_CALL +largeToSmall_ja_JP::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // One to One mapping + oneToOneMapping table(large2small, sizeof(large2small)); + return transliteration_OneToOne::transliterate( inStr, startPos, nCount, offset, table ); +} + +} } } } diff --git a/i18npool/source/transliteration/makefile.mk b/i18npool/source/transliteration/makefile.mk new file mode 100644 index 000000000000..598f561dbd7c --- /dev/null +++ b/i18npool/source/transliteration/makefile.mk @@ -0,0 +1,102 @@ +#************************************************************************* +#* +#* $RCSfile: makefile.mk,v $ +#* +#* $Revision: 1.1 $ +#* +#* last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ +#* +#* The Contents of this file are made available subject to the terms of +#* either of the following licenses +#* +#* - Sun Industry Standards Source License Version 1.1 +#* +#* Sun Microsystems Inc., October, 2000 +#* +#* Sun Industry Standards Source License Version 1.1 +#* ================================================= +#* The contents of this file are subject to the Sun Industry Standards +#* Source License Version 1.1 (the "License"); You may not use this file +#* except in compliance with the License. You may obtain a copy of the +#* License at http://www.openoffice.org/license.html. +#* +#* Software provided under this License is provided on an "AS IS" basis, +#* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +#* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +#* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +#* See the License for the specific provisions governing your rights and +#* obligations concerning the Software. +#* +#* The Initial Developer of the Original Code is: Sun Microsystems, Inc. +#* +#* Copyright: 2000 by Sun Microsystems, Inc. +#* +#* All Rights Reserved. +#* +#* Contributor(s): _______________________________________ +#* +#* +#************************************************************************/ +PRJ=..$/.. + +PRJNAME=i18npool +TARGET=transliterationImpl + +ENABLE_EXCEPTIONS=TRUE + +# --- Settings ----------------------------------------------------- + +.INCLUDE : svpre.mk +.INCLUDE : settings.mk +.INCLUDE : sv.mk + +# --- Files -------------------------------------------------------- + +SLOFILES= \ + $(SLO)$/transliteration_body.obj \ + $(SLO)$/transliteration_caseignore.obj \ + $(SLO)$/transliterationImpl.obj \ + $(SLO)$/transliteration_commonclass.obj \ + $(SLO)$/transliteration_OneToOne.obj \ + $(SLO)$/transliteration_Ignore.obj \ + $(SLO)$/transliteration_Numeric.obj \ + $(SLO)$/oneToOneMapping.obj \ + $(SLO)$/hiraganaToKatakana.obj \ + $(SLO)$/katakanaToHiragana.obj \ + $(SLO)$/ignoreKana.obj \ + $(SLO)$/halfwidthToFullwidth.obj \ + $(SLO)$/fullwidthToHalfwidth.obj \ + $(SLO)$/ignoreWidth.obj \ + $(SLO)$/smallToLarge_ja_JP.obj \ + $(SLO)$/largeToSmall_ja_JP.obj \ + $(SLO)$/ignoreSize_ja_JP.obj \ + $(SLO)$/ignoreMinusSign_ja_JP.obj \ + $(SLO)$/ignoreIterationMark_ja_JP.obj \ + $(SLO)$/ignoreTraditionalKana_ja_JP.obj \ + $(SLO)$/ignoreTraditionalKanji_ja_JP.obj \ + $(SLO)$/ignoreProlongedSoundMark_ja_JP.obj \ + $(SLO)$/ignoreZiZu_ja_JP.obj \ + $(SLO)$/ignoreBaFa_ja_JP.obj \ + $(SLO)$/ignoreTiJi_ja_JP.obj \ + $(SLO)$/ignoreHyuByu_ja_JP.obj \ + $(SLO)$/ignoreSeZe_ja_JP.obj \ + $(SLO)$/ignoreIandEfollowedByYa_ja_JP.obj \ + $(SLO)$/ignoreKiKuFollowedBySa_ja_JP.obj \ + $(SLO)$/ignoreSeparator_ja_JP.obj \ + $(SLO)$/ignoreSpace_ja_JP.obj \ + $(SLO)$/ignoreMiddleDot_ja_JP.obj \ + $(SLO)$/textToPronounce_zh.obj \ + $(SLO)$/numtochar.obj \ + $(SLO)$/numtotext_cjk.obj + +# MACOSX: manually initialization some static data members +.IF "$(OS)"=="MACOSX" +SLOFILES+=$(SLO)$/staticmbtransliteration.obj +.ENDIF + +# --- Targets ------------------------------------------------------ + +.INCLUDE : target.mk + + + diff --git a/i18npool/source/transliteration/numtochar.cxx b/i18npool/source/transliteration/numtochar.cxx new file mode 100644 index 000000000000..0c44be9b8280 --- /dev/null +++ b/i18npool/source/transliteration/numtochar.cxx @@ -0,0 +1,100 @@ +/************************************************************************* + * + * $RCSfile: numtochar.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#define TRANSLITERATION_ALL +#include <numtochar.hxx> +#include <data/numberchar.h> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL NumToChar::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) throw(RuntimeException) { + + // Create a string buffer which can hold nCount + 1 characters. + rtl_uString *newStr; + x_rtl_uString_new_WithLength( &newStr, nCount ); // defined in x_rtl_ustring.h The reference count is 0 now. + + // Prepare pointers of unicode character arrays. + const sal_Unicode *src = inStr.getStr() + startPos; + sal_Unicode *dst = newStr->buffer; + + // Allocate the same length as inStr to offset argument. + offset.realloc(inStr.getLength()); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + for (sal_Int32 index = 0; index < nCount; index++) { + sal_Unicode ch = src[index]; + dst[index] = (0x0030 <= ch && ch <= 0x0039) ? num2char[ ch - 0x0030 ] : ch; + *p++ = position++; + } + + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +#define TRANSLITERATION_NUMTOCHAR( number, name ) \ +NumToChar##name::NumToChar##name() \ +{ \ + num2char = NumberChar[number]; \ + transliterationName = "NumToChar"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.NumToChar"#name; \ +} + +TRANSLITERATION_NUMTOCHAR( NumberChar_HalfWidth, ) +TRANSLITERATION_NUMTOCHAR( NumberChar_FullWidth, Fullwidth) +TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_zh, Lower_zh_CN) +TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_zh, Lower_zh_TW) +TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_zh, Upper_zh_CN) +TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_zh_TW, Upper_zh_TW) +TRANSLITERATION_NUMTOCHAR( NumberChar_Modern_ja, KanjiShort_ja_JP) +TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_ko, Lower_ko) +TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_ko, Upper_ko) +TRANSLITERATION_NUMTOCHAR( NumberChar_Hangul_ko, Hangul_ko) +TRANSLITERATION_NUMTOCHAR( NumberChar_Indic_ar, Indic_ar) +TRANSLITERATION_NUMTOCHAR( NumberChar_EastIndic_ar, EastIndic_ar) +TRANSLITERATION_NUMTOCHAR( NumberChar_Indic_hi, Indic_hi) +TRANSLITERATION_NUMTOCHAR( NumberChar_th, _th) +#undef TRANSLITERATION_NUMTOCHAR + +} } } } diff --git a/i18npool/source/transliteration/numtotext_cjk.cxx b/i18npool/source/transliteration/numtotext_cjk.cxx new file mode 100644 index 000000000000..f8efdc63cf36 --- /dev/null +++ b/i18npool/source/transliteration/numtotext_cjk.cxx @@ -0,0 +1,315 @@ +/************************************************************************* + * + * $RCSfile: numtotext_cjk.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#define TRANSLITERATION_ALL +#include <numtotext_cjk.hxx> +#include <data/numberchar.h> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +#define NUMBER_OMIT_ZERO (1 << 0) +#define NUMBER_OMIT_ONE (1 << 1) + +#define NUMBER_ZERO 0x0030 +#define NUMBER_ONE 0x0031 + +#define isNum(n) ( 0x0030 <= n && n <= 0x0039 ) + +NumToText_CJK::NumToText_CJK() { + numberChar = NULL; +} + +sal_Bool SAL_CALL NumToText_CJK::numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len, + sal_Unicode *dst, sal_Int32& count, sal_Unicode multiplierChar, sal_Int32** offset) +{ + if ( len == 1 ) { + **offset++ = count; + if (str[begin] != NUMBER_ZERO) { + if (!(numberFlag & NUMBER_OMIT_ONE) || str[begin] != NUMBER_ONE) + dst[count++] = numberChar[(sal_Int16)(str[begin] - NUMBER_ZERO)]; + if (multiplierChar > 0) + dst[count++] = multiplierChar; + } else if (!(numberFlag & NUMBER_OMIT_ZERO) && dst[count-1] != numberChar[0]) + dst[count++] = numberChar[0]; + return str[begin] != NUMBER_ZERO; + } else { + sal_Bool printPower = sal_False; + sal_Int16 last = 0; + for (sal_Int16 i = 1; numberMultiplier[i].power >= 0; i++) { + sal_Int32 tmp = len - numberMultiplier[i].power; + if (tmp > 0) { + printPower |= numberMaker(str, begin, tmp, dst, count, + numberMultiplier[i].multiplierChar, offset); + begin += tmp; + len -= tmp; + } + } + if (printPower) { + if (dst[count-1] == numberChar[0]) + count--; + if (multiplierChar > 0) + dst[count++] = multiplierChar; + } + return printPower; + } +} + +OUString SAL_CALL NumToText_CJK::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ + if (! numberChar) throw RuntimeException(); + + sal_Int32 strLen = inStr.getLength() - startPos; + + if (nCount > strLen) + nCount = strLen; + + if (nCount > 0) { + const sal_Unicode *str = inStr.getStr() + startPos; + rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount * 2); + sal_Int32 i, len = 0, count = 0, begin, end; + + offset.realloc( nCount * 2 ); + sal_Int32 *p = offset.getArray(); + + for (i = 0; i <= nCount; i++) { + if (i < nCount && isNum(str[i])) { + if (len == 0) + begin = i; + len++; + } else { + if (len > 0) { + for (end = begin + (len % numberMultiplier[0].power); + end <= i; begin = end, end += numberMultiplier[0].power) + numberMaker(str, begin, end - begin, newStr->buffer, count, + end == i ? 0 : numberMultiplier[0].multiplierChar, &p); + len = 0; + } + if (i < nCount) { + *p++ = count; + newStr->buffer[count++] = str[i]; + } + } + } + + offset.realloc(count); + for (i = 0; i < count; i++) + offset[i] += startPos; + return OUString(newStr->buffer, count); + } + return OUString(); +} + +static NumberMultiplier multiplier_Lower_zh[] = { + { 12, 0x5146 }, // fourth four digits group, ten billion + { 8, 0x4EBF }, // third four digits group, hundred million + { 4, 0x4E07 }, // second four digits group, ten thousand + { 3, 0x5343 }, // Unicode Chinese Lower Thousand + { 2, 0x767E }, // Unicode Chinese Lower Hundred + { 1, 0x5341 }, // Unicode Chinese Lower Ten + { 0, 0x0000 } +}; + +NumToTextLower_zh_CN::NumToTextLower_zh_CN() { + numberChar = NumberChar[NumberChar_Lower_zh]; + numberMultiplier = multiplier_Lower_zh; + numberFlag = 0; + transliterationName = "NumToTextLower_zh_CN"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextLower_zh_CN"; +} + + +static NumberMultiplier multiplier_Upper_zh[] = { + { 12, 0x5146 }, // fourth four digits group, ten billion + { 8, 0x4EBF }, // third four digits group, hundred million + { 4, 0x4E07 }, // second four digits group, ten thousand + { 3, 0x4EDF }, // Unicode Chinese Lower Thousand + { 2, 0x4F70 }, // Unicode Chinese Lower Hundred + { 1, 0x62FE }, // Unicode Chinese Lower Ten + { 0, 0x0000 } +}; + +NumToTextUpper_zh_CN::NumToTextUpper_zh_CN() { + numberChar = NumberChar[NumberChar_Upper_zh]; + numberMultiplier = multiplier_Upper_zh; + numberFlag = 0; + transliterationName = "NumToTextUpper_zh_CN"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextUpper_zh_CN"; +} + +static NumberMultiplier multiplier_Lower_zh_TW[] = { + { 12, 0x5146 }, // fourth four digits group, ten billion + { 8, 0x5104 }, // third four digits group, hundred million + { 4, 0x842C }, // second four digits group, ten thousand + { 3, 0x5343 }, // Unicode Chinese Lower Thousand + { 2, 0x767E }, // Unicode Chinese Lower Hundred + { 1, 0x5341 }, // Unicode Chinese Lower Ten + { 0, 0x0000 } +}; + +NumToTextLower_zh_TW::NumToTextLower_zh_TW() { + numberChar = NumberChar[NumberChar_Lower_zh]; + numberMultiplier = multiplier_Lower_zh_TW; + numberFlag = 0; + transliterationName = "NumToTextLower_zh_TW"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextLower_zh_TW"; +} + +static NumberMultiplier multiplier_Upper_zh_TW[] = { + { 12, 0x5146 }, // fourth four digits group, ten billion + { 8, 0x5104 }, // third four digits group, hundred million + { 4, 0x842C }, // second four digits group, ten thousand + { 3, 0x4EDF }, // Unicode Chinese Lower Thousand + { 2, 0x4F70 }, // Unicode Chinese Lower Hundred + { 1, 0x62FE }, // Unicode Chinese Lower Ten + { 0, 0x0000 } +}; + +NumToTextUpper_zh_TW::NumToTextUpper_zh_TW() { + numberChar = NumberChar[NumberChar_Upper_zh_TW]; + numberMultiplier = multiplier_Upper_zh_TW; + numberFlag = 0; + transliterationName = "NumToTextUpper_zh_TW"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextUpper_zh_TW"; +} + +NumToTextFormalLower_ko::NumToTextFormalLower_ko() { + numberChar = NumberChar[NumberChar_Lower_ko]; + numberMultiplier = multiplier_Lower_zh_TW; + numberFlag = NUMBER_OMIT_ZERO; + transliterationName = "NumToTextFormalLower_ko"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalLower_ko"; +} + +NumToTextFormalUpper_ko::NumToTextFormalUpper_ko() { + numberChar = NumberChar[NumberChar_Upper_ko]; + numberMultiplier = multiplier_Lower_zh_TW; + numberFlag = NUMBER_OMIT_ZERO; + transliterationName = "NumToTextFormalUpper_ko"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalUpper_ko"; +} + +NumToTextInformalLower_ko::NumToTextInformalLower_ko() { + numberChar = NumberChar[NumberChar_Lower_ko]; + numberMultiplier = multiplier_Lower_zh_TW; + numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; + transliterationName = "NumToTextInformalLower_ko"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalLower_ko"; +} + +NumToTextInformalUpper_ko::NumToTextInformalUpper_ko() { + numberChar = NumberChar[NumberChar_Upper_ko]; + numberMultiplier = multiplier_Lower_zh_TW; + numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; + transliterationName = "NumToTextInformalUpper_ko"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalUpper_ko"; +} + +static NumberMultiplier multiplier_Hangul_ko[] = { + { 12, 0xC870 }, // fourth four digits group, ten billion + { 8, 0xC5B5 }, // third four digits group, hundred million + { 4, 0xB9CC }, // second four digits group, ten thousand + { 3, 0xCC9C }, // Unicode Chinese Lower Thousand + { 2, 0xBC31 }, // Unicode Chinese Lower Hundred + { 1, 0xC2ED }, // Unicode Chinese Lower Ten + { 0, 0x0000 } +}; + +NumToTextFormalHangul_ko::NumToTextFormalHangul_ko() { + numberChar = NumberChar[NumberChar_Hangul_ko]; + numberMultiplier = multiplier_Hangul_ko; + numberFlag = NUMBER_OMIT_ZERO; + transliterationName = "NumToTextFormalHangul_ko"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalHangul_ko"; +} + +NumToTextInformalHangul_ko::NumToTextInformalHangul_ko() { + numberChar = NumberChar[NumberChar_Hangul_ko]; + numberMultiplier = multiplier_Hangul_ko; + numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; + transliterationName = "NumToTextInformalHangul_ko"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalHangul_ko"; +} + +static NumberMultiplier multiplier_Traditional_ja[] = { + { 9, 0x62FE }, // billion // 10 * 100000000 + { 8, 0x5104 }, // hundred million // 1 * 100000000 // needs a preceding "one" + { 7, 0x9621 }, // ten million // 1000 * 10000 + { 6, 0x767E }, // million // 100 * 10000 + { 5, 0x62FE }, // hundred thousand // 10 * 10000 + { 4, 0x842C }, // ten thousand // 1 * 10000 // needs a preceding "one" + { 3, 0x9621 }, // thousand // 1000 + { 2, 0x767E }, // hundred // 100 + { 1, 0x62FE }, // ten // 10 + { 0, 0x0000 } // one // 1 // needs a "one" +}; + +NumToTextKanjiLongTraditional_ja_JP::NumToTextKanjiLongTraditional_ja_JP() { + numberChar = NumberChar[NumberChar_Traditional_ja]; + numberMultiplier = multiplier_Traditional_ja; + numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; + transliterationName = "NumToTextKanjiLongTraditional_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextKanjiLongTraditional_ja_JP"; +} + +static NumberMultiplier multiplier_Modern_ja[] = { + { 9, 0x5341 }, // billion // 10 * 100000000 + { 8, 0x5104 }, // hundred million // 1 * 100000000 // needs a preceding "one" + { 7, 0x5343 }, // ten million // 1000 * 10000 + { 6, 0x767E }, // million // 100 * 10000 + { 5, 0x5341 }, // hundred thousand // 10 * 10000 + { 4, 0x4E07 }, // ten thousand // 1 * 10000 // needs a preceding "one" + { 3, 0x5343 }, // thousand // 1000 + { 2, 0x767E }, // hundred // 100 + { 1, 0x5341 }, // ten // 10 + { 0, 0x0000 } // one // 1 // needs a "one" +}; + +NumToTextKanjiLongModern_ja_JP::NumToTextKanjiLongModern_ja_JP() { + numberChar = NumberChar[NumberChar_Modern_ja]; + numberMultiplier = multiplier_Modern_ja; + numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; + transliterationName = "NumToTextKanjiLongModern_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.NumToTextKanjiLongModern_ja_JP"; +} + +} } } } diff --git a/i18npool/source/transliteration/smallToLarge_ja_JP.cxx b/i18npool/source/transliteration/smallToLarge_ja_JP.cxx new file mode 100644 index 000000000000..49e054df486f --- /dev/null +++ b/i18npool/source/transliteration/smallToLarge_ja_JP.cxx @@ -0,0 +1,108 @@ +/************************************************************************* + * + * $RCSfile: smallToLarge_ja_JP.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <oneToOneMapping.hxx> +#define TRANSLITERATION_smallToLarge_ja_JP +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +smallToLarge_ja_JP::smallToLarge_ja_JP() +{ + transliterationName = "smallToLarge_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.smallToLarge_ja_JP"; +} + +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html +// http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +// http://charts.unicode.org/Web/UFF00.html + +OneToOneMappingTable_t small2large[] = { + MAKE_PAIR( 0x3041, 0x3042 ), // HIRAGANA LETTER SMALL A --> HIRAGANA LETTER A + MAKE_PAIR( 0x3043, 0x3044 ), // HIRAGANA LETTER SMALL I --> HIRAGANA LETTER I + MAKE_PAIR( 0x3045, 0x3046 ), // HIRAGANA LETTER SMALL U --> HIRAGANA LETTER U + MAKE_PAIR( 0x3047, 0x3048 ), // HIRAGANA LETTER SMALL E --> HIRAGANA LETTER E + MAKE_PAIR( 0x3049, 0x304A ), // HIRAGANA LETTER SMALL O --> HIRAGANA LETTER O + MAKE_PAIR( 0x3063, 0x3064 ), // HIRAGANA LETTER SMALL TU --> HIRAGANA LETTER TU + MAKE_PAIR( 0x3083, 0x3084 ), // HIRAGANA LETTER SMALL YA --> HIRAGANA LETTER YA + MAKE_PAIR( 0x3085, 0x3086 ), // HIRAGANA LETTER SMALL YU --> HIRAGANA LETTER YU + MAKE_PAIR( 0x3087, 0x3088 ), // HIRAGANA LETTER SMALL YO --> HIRAGANA LETTER YO + MAKE_PAIR( 0x308E, 0x308F ), // HIRAGANA LETTER SMALL WA --> HIRAGANA LETTER WA + MAKE_PAIR( 0x30A1, 0x30A2 ), // KATAKANA LETTER SMALL A --> KATAKANA LETTER A + MAKE_PAIR( 0x30A3, 0x30A4 ), // KATAKANA LETTER SMALL I --> KATAKANA LETTER I + MAKE_PAIR( 0x30A5, 0x30A6 ), // KATAKANA LETTER SMALL U --> KATAKANA LETTER U + MAKE_PAIR( 0x30A7, 0x30A8 ), // KATAKANA LETTER SMALL E --> KATAKANA LETTER E + MAKE_PAIR( 0x30A9, 0x30AA ), // KATAKANA LETTER SMALL O --> KATAKANA LETTER O + MAKE_PAIR( 0x30C3, 0x30C4 ), // KATAKANA LETTER SMALL TU --> KATAKANA LETTER TU + MAKE_PAIR( 0x30E3, 0x30E4 ), // KATAKANA LETTER SMALL YA --> KATAKANA LETTER YA + MAKE_PAIR( 0x30E5, 0x30E6 ), // KATAKANA LETTER SMALL YU --> KATAKANA LETTER YU + MAKE_PAIR( 0x30E7, 0x30E8 ), // KATAKANA LETTER SMALL YO --> KATAKANA LETTER YO + MAKE_PAIR( 0x30EE, 0x30EF ), // KATAKANA LETTER SMALL WA --> KATAKANA LETTER WA + MAKE_PAIR( 0x30F5, 0x30AB ), // KATAKANA LETTER SMALL KA --> KATAKANA LETTER KA + MAKE_PAIR( 0x30F6, 0x30B1 ), // KATAKANA LETTER SMALL KE --> KATAKANA LETTER KE + MAKE_PAIR( 0xFF67, 0xFF71 ), // HALFWIDTH KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER A + MAKE_PAIR( 0xFF68, 0xFF72 ), // HALFWIDTH KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER I + MAKE_PAIR( 0xFF69, 0xFF73 ), // HALFWIDTH KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER U + MAKE_PAIR( 0xFF6A, 0xFF74 ), // HALFWIDTH KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER E + MAKE_PAIR( 0xFF6B, 0xFF75 ), // HALFWIDTH KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER O + MAKE_PAIR( 0xFF6C, 0xFF94 ), // HALFWIDTH KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER YA + MAKE_PAIR( 0xFF6D, 0xFF95 ), // HALFWIDTH KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER YU + MAKE_PAIR( 0xFF6E, 0xFF96 ), // HALFWIDTH KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER YO + MAKE_PAIR( 0xFF6F, 0xFF82 ) // HALFWIDTH KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER TU +}; + +OUString SAL_CALL +smallToLarge_ja_JP::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // One to One mapping + oneToOneMapping table(small2large, sizeof(small2large)); + return transliteration_OneToOne::transliterate( inStr, startPos, nCount, offset, table ); +} + +} } } } diff --git a/i18npool/source/transliteration/textToPronounce_zh.cxx b/i18npool/source/transliteration/textToPronounce_zh.cxx new file mode 100644 index 000000000000..803b1e21978f --- /dev/null +++ b/i18npool/source/transliteration/textToPronounce_zh.cxx @@ -0,0 +1,150 @@ +/************************************************************************* + * + * $RCSfile: textToPronounce_zh.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <rtl/string.hxx> +#include <rtl/ustrbuf.hxx> +#define TRANSLITERATION_ALL +#include <textToPronounce_zh.hxx> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Int16 SAL_CALL TextToPronounce_zh::getType() throw (RuntimeException) +{ + return TransliterationType::ONE_TO_ONE| TransliterationType::IGNORE; +} + +OUString SAL_CALL +TextToPronounce_zh::folding(const OUString & inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 > & offset) throw (RuntimeException) +{ + return transliterate(inStr, startPos, nCount, offset); +} + +OUString SAL_CALL +TextToPronounce_zh::transliterate(const OUString & inStr, sal_Int32 startPos, + sal_Int32 nCount, Sequence< sal_Int32 > & offset) throw (RuntimeException) +{ + sal_Unicode u; + OUStringBuffer sb; + const sal_Unicode * chArr = inStr.getStr() + startPos; + + sal_Int32 j; + if (startPos < 0) + throw RuntimeException(); + + if (startPos + nCount > inStr.getLength()) + nCount = inStr.getLength() - startPos; + + offset[0] = 0; + for (sal_Int32 i = 0; i < nCount; i++) { + u = chArr[i]; + j = pronTab[u]; + if (j == -1) { + offset[i + 1] = offset[i]; + continue; + } + + if (j == -1) + break; + sb.append(&pronList[pronIdx[j]], pronIdx[j + 1] - pronIdx[j]); + + offset[i + 1] = offset[i] + pronIdx[j + 1] - pronIdx[j]; + } + return OUString(sb.getStr()); +} + +sal_Bool SAL_CALL +TextToPronounce_zh::equals( const OUString & str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32 & nMatch1, + const OUString & str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32 & nMatch2) + throw (RuntimeException) +{ + sal_Int32 realCount; + int i; // loop variable + const sal_Unicode * s1, * s2; + sal_Unicode u1, u2; + + if (nCount1 + pos1 > str1.getLength()) + nCount1 = str1.getLength() - pos1; + + if (nCount2 + pos2 > str2.getLength()) + nCount2 = str2.getLength() - pos2; + + realCount = ((nCount1 > nCount2) ? nCount2 : nCount1); + + s1 = str1.getStr() + pos1; + s2 = str2.getStr() + pos2; + for (i = 0; i < realCount; i++) { + u1 = * s1++; + u2 = * s2 ++; + if (pronTab[u1] != pronTab[u2]) { + nMatch1 = nMatch2 = i; + return sal_False; + } + } + nMatch1 = nMatch2 = realCount; + return sal_True && (nCount1 == nCount2); +} + +#include <data/pron_zh_cn.h> + +TextToPinyin_zh_CN::TextToPinyin_zh_CN() { + pronList = pronList_zh_cn; + pronIdx = pronIdx_zh_cn; + pronTab = pronTab_zh_cn; + transliterationName = "ChineseCharacterToPinyin"; + implementationName = "com.sun.star.i18n.Transliteration.TextToPinyin_zh_CN"; +} + +#include <data/pron_zh_tw.h> + +TextToChuyin_zh_TW::TextToChuyin_zh_TW() { + pronList = pronList_zh_tw; + pronIdx = pronIdx_zh_tw; + pronTab = pronTab_zh_tw; + transliterationName = "ChineseCharacterToChuyin"; + implementationName = "com.sun.star.i18n.Transliteration.TextToChuyin_zh_TW"; +} + +} } } } diff --git a/i18npool/source/transliteration/transliterationImpl.cxx b/i18npool/source/transliteration/transliterationImpl.cxx new file mode 100644 index 000000000000..48040a49cadb --- /dev/null +++ b/i18npool/source/transliteration/transliterationImpl.cxx @@ -0,0 +1,586 @@ +/************************************************************************* + * + * $RCSfile: transliterationImpl.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include "transliterationImpl.hxx" +#include "servicename.hxx" + +#include <com/sun/star/i18n/XTransliteration.hpp> +#include <com/sun/star/i18n/TransliterationType.hpp> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/container/XContentEnumerationAccess.hpp> +#include <com/sun/star/container/XEnumeration.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> + +#include <comphelper/processfactory.hxx> +#include <rtl/string.h> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> + +#include <tools/string.hxx> + +#ifdef DEBUG +#include <stdio.h> +#endif + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; +using namespace com::sun::star::container; + +namespace com { namespace sun { namespace star { namespace i18n { + +#define ERROR RuntimeException() + +#define TmItem1( name ) \ + {TransliterationModules_##name, TransliterationModulesNew_##name, #name} + +#define TmItem2( name ) \ + {(TransliterationModules)0, TransliterationModulesNew_##name, #name} + +// Ignore Module list +static struct TMlist { + TransliterationModules tm; + TransliterationModulesNew tmn; + sal_Char *implName; +} TMlist[] = { // Modules ModulesNew + TmItem1 (IGNORE_CASE), // (1<<8 256) (7) + TmItem1 (IGNORE_WIDTH), // (1<<9 512) (8) + TmItem1 (IGNORE_KANA), // (1<<10 1024) (9) +// No enum define for this trans. application has to use impl name to load it +// TmItem1 (IGNORE_CASE_SIMPLE), // (1<<11 1024) (66) + + TmItem1 (ignoreTraditionalKanji_ja_JP), // (1<<12 4096) (10) + TmItem1 (ignoreTraditionalKana_ja_JP), // (1<<13 8192) (11) + TmItem1 (ignoreMinusSign_ja_JP), // (1<<13 16384) (12) + TmItem1 (ignoreIterationMark_ja_JP), // (1<<14 32768) (13) + TmItem1 (ignoreSeparator_ja_JP), // (1<<15 65536) (14) + TmItem1 (ignoreZiZu_ja_JP), // (1<<16 131072) (15) + TmItem1 (ignoreBaFa_ja_JP), // (1<<17 262144) (16) + TmItem1 (ignoreTiJi_ja_JP), // (1<<18 524288) (17) + TmItem1 (ignoreHyuByu_ja_JP), // (1<<19 1048576) (18) + TmItem1 (ignoreSeZe_ja_JP), // (1<<20 2097152) (19) + TmItem1 (ignoreIandEfollowedByYa_ja_JP), // (1<<21 4194304) (20) + TmItem1 (ignoreKiKuFollowedBySa_ja_JP), // (1<<22 8388608) (21) + TmItem1 (ignoreSize_ja_JP), // (1<<23 16777216) (22) + TmItem1 (ignoreProlongedSoundMark_ja_JP), // (1<<24 33554432) (23) + TmItem1 (ignoreMiddleDot_ja_JP), // (1<<25 67108864) (24) + TmItem1 (ignoreSpace_ja_JP), // (1<<26 134217728) (25) + TmItem1 (UPPERCASE_LOWERCASE), // (1) (1) + TmItem1 (LOWERCASE_UPPERCASE), // (2) (2) + TmItem1 (HALFWIDTH_FULLWIDTH), // (3) (3) + TmItem1 (FULLWIDTH_HALFWIDTH), // (4) (4) + TmItem1 (KATAKANA_HIRAGANA), // (5) (5) + TmItem1 (HIRAGANA_KATAKANA), // (6) (6) + + TmItem1 (smallToLarge_ja_JP), // (1<<27 268435456) (26) + TmItem1 (largeToSmall_ja_JP), // (1<<28 536870912) (27) + TmItem2 (NumToTextLower_zh_CN), // () (28) + TmItem2 (NumToTextUpper_zh_CN), // () (29) + TmItem2 (NumToTextLower_zh_TW), // () (30) + TmItem2 (NumToTextUpper_zh_TW), // () (31) + TmItem2 (NumToTextFormalHangul_ko), // () (32) + TmItem2 (NumToTextFormalLower_ko), // () (33) + TmItem2 (NumToTextFormalUpper_ko), // () (34) + TmItem2 (NumToTextInformalHangul_ko), // () (35) + TmItem2 (NumToTextInformalLower_ko), // () (36) + TmItem2 (NumToTextInformalUpper_ko), // () (37) + TmItem2 (NumToCharLower_zh_CN), // () (38) + TmItem2 (NumToCharUpper_zh_CN), // () (39) + TmItem2 (NumToCharLower_zh_TW), // () (40) + TmItem2 (NumToCharUpper_zh_TW), // () (41) + TmItem2 (NumToCharHangul_ko), // () (42) + TmItem2 (NumToCharLower_ko), // () (43) + TmItem2 (NumToCharUpper_ko), // () (44) + TmItem2 (NumToCharFullwidth), // () (45) + TmItem2 (NumToCharKanjiShort_ja_JP), // () (46) + TmItem2 (TextToNumLower_zh_CN), // () (47) + TmItem2 (TextToNumUpper_zh_CN), // () (48) + TmItem2 (TextToNumLower_zh_TW), // () (49) + TmItem2 (TextToNumUpper_zh_TW), // () (50) + TmItem2 (TextToNumFormalHangul_ko), // () (51) + TmItem2 (TextToNumFormalLower_ko), // () (52) + TmItem2 (TextToNumFormalUpper_ko), // () (53) + TmItem2 (TextToNumInformalHangul_ko), // () (54) + TmItem2 (TextToNumInformalLower_ko), // () (55) + TmItem2 (TextToNumInformalUpper_ko), // () (56) + + TmItem2 (CharToNumLower_zh_CN), // () (59) + TmItem2 (CharToNumUpper_zh_CN), // () (60) + TmItem2 (CharToNumLower_zh_TW), // () (61) + TmItem2 (CharToNumUpper_zh_TW), // () (62) + TmItem2 (CharToNumHangul_ko), // () (63) + TmItem2 (CharToNumLower_ko), // () (64) + TmItem2 (CharToNumUpper_ko), // () (65) + +// no enum defined for these trans. application has to use impl name to load them +// TmItem2 (NumToCharArabic_Indic), // () (67) +// TmItem2 (NumToCharEstern_Arabic_Indic),// () (68) +// TmItem2 (NumToCharIndic), // () (69) +// TmItem2 (NumToCharThai), // () (70) + {(TransliterationModules)0, (TransliterationModulesNew)0, NULL} +}; + + +// Constructor/Destructor +TransliterationImpl::TransliterationImpl(const Reference <XMultiServiceFactory>& xMSF) : xSMgr(xMSF) +{ + numCascade = 0; + caseignoreOnly = sal_True; + + if ( xMSF.is() ) + { + Reference < XInterface > xI= + xMSF->createInstance(OUString::createFromAscii("com.sun.star.i18n.LocaleData")); + if ( xI.is() ) { + Any x = xI->queryInterface( ::getCppuType( (const uno::Reference< i18n::XLocaleData >*)0) ); + x >>= localedata; + } + } +} + +TransliterationImpl::~TransliterationImpl() +{ + localedata.clear(); + clear(); +} + + +// Methods +OUString SAL_CALL +TransliterationImpl::getName() throw(RuntimeException) +{ + if (numCascade == 1 && bodyCascade[0].is()) + return bodyCascade[0]->getName(); + if (numCascade < 1) + return ( OUString::createFromAscii("Not Loaded")); + throw ERROR; +} + +sal_Int16 SAL_CALL +TransliterationImpl::getType() throw(RuntimeException) +{ + if (numCascade > 1) + return (TransliterationType::CASCADE|TransliterationType::IGNORE); + if (bodyCascade[0].is()) + return(bodyCascade[0]->getType()); + throw ERROR; +} + +void SAL_CALL +TransliterationImpl::loadModule( TransliterationModules modType, const Locale& rLocale ) + throw(RuntimeException) +{ + clear(); + if (modType&TransliterationModules_IGNORE_MASK && modType&TransliterationModules_NON_IGNORE_MASK) { + throw ERROR; + } else if (modType&TransliterationModules_IGNORE_MASK) { +#define TransliterationModules_IGNORE_CASE_MASK (TransliterationModules_IGNORE_CASE | \ + TransliterationModules_IGNORE_WIDTH | \ + TransliterationModules_IGNORE_KANA) + sal_Int32 mask = ((modType&TransliterationModules_IGNORE_CASE_MASK) == modType) ? + TransliterationModules_IGNORE_CASE_MASK : TransliterationModules_IGNORE_MASK; + for (sal_Int16 i = 0; TMlist[i].tm & mask; i++) { + if (modType & TMlist[i].tm) + if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), + bodyCascade[numCascade], rLocale)) + numCascade++; + } + } else if (modType&TransliterationModules_NON_IGNORE_MASK) { + for (sal_Int16 i = 0; TMlist[i].tm; i++) { + if (TMlist[i].tm == modType) { + if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), bodyCascade[numCascade], rLocale)) + numCascade++; + break; + } + } + } +} + +void SAL_CALL +TransliterationImpl::loadModuleNew( const Sequence < TransliterationModulesNew > & modType, const Locale& rLocale ) + throw(RuntimeException) +{ + clear(); + for (sal_Int16 i = 0; i < modType.getLength(); i++) { + for (sal_Int16 j = 0; TMlist[j].tmn; j++) { + if (TMlist[j].tmn == modType[i]) { + if (loadModuleByName(OUString::createFromAscii(TMlist[j].implName), bodyCascade[numCascade], rLocale)) + numCascade++; + break; + } + } + } +} + +void SAL_CALL +TransliterationImpl::loadModuleByImplName(const OUString& implName, const Locale& rLocale) + throw(RuntimeException) +{ + clear(); + if (loadModuleByName(implName, bodyCascade[numCascade], rLocale)) + numCascade++; +} + + +void SAL_CALL +TransliterationImpl::loadModulesByImplNames(const Sequence< OUString >& implNameList, const Locale& rLocale ) throw(RuntimeException) +{ + if (implNameList.getLength() > maxCascade || implNameList.getLength() <= 0) + throw ERROR; + + clear(); + for (sal_Int32 i = 0; i < implNameList.getLength(); i++) + if (loadModuleByName(implNameList[i], bodyCascade[numCascade], rLocale)) + numCascade++; +} + + +Sequence<OUString> SAL_CALL +TransliterationImpl::getAvailableModules( const Locale& rLocale, sal_Int16 sType ) throw(RuntimeException) +{ + Sequence<OUString> &translist = localedata->getTransliterations(rLocale); + Sequence<OUString> &r = *new Sequence< OUString > (translist.getLength()); + Reference<XTransliteration> body; + sal_Int32 n = 0; + for (sal_Int32 i = 0; i < translist.getLength(); i++) + { + if (loadModuleByName(translist[i], body, rLocale)) { + if (body->getType() & sType) + r[n++] = translist[i]; + body.clear(); + } + } + r.realloc(n); + return (r); +} + + +OUString SAL_CALL +TransliterationImpl::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ + if (numCascade == 1) + return bodyCascade[0]->transliterate(inStr, startPos, nCount, offset); + + OUString tmpStr(inStr.getStr()+startPos, nCount); + offset.realloc(nCount); + for (sal_Int32 j = 0; j < nCount; j++) + offset[j] = startPos + j; + + sal_Int16 from = 0, to = 1, tmp; + Sequence<sal_Int32> off[2]; + + off[to] = offset; + for (sal_Int32 i = 0; i < numCascade; i++) { + tmpStr = bodyCascade[i]->transliterate(tmpStr, 0, nCount, off[from]); + + nCount = tmpStr.getLength(); + + tmp = from; to = from; from = tmp; + for (sal_Int32 j = 0; j < nCount; j++) + off[to][j] = off[from][off[to][j]]; + } + offset = off[to]; + return tmpStr; +} + + +// +OUString SAL_CALL +TransliterationImpl::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ + if (numCascade == 1) + return bodyCascade[0]->folding(inStr, startPos, nCount, offset); + + OUString tmpStr(inStr.getStr()+startPos, nCount); + offset.realloc(nCount); + for (sal_Int32 j = 0; j < nCount; j++) + offset[j] = startPos + j; + + sal_Int16 from = 0, to = 1, tmp; + Sequence<sal_Int32> off[2]; + + off[to] = offset; + for (sal_Int32 i = 0; i < numCascade; i++) { + tmpStr = bodyCascade[i]->folding(tmpStr, 0, nCount, off[from]); + + nCount = tmpStr.getLength(); + + tmp = from; to = from; from = tmp; + for (sal_Int32 j = 0; j < nCount; j++) + off[to][j] = off[from][off[to][j]]; + } + offset = off[to]; + return tmpStr; +} + + +sal_Bool SAL_CALL +TransliterationImpl::equals( + const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) + throw(RuntimeException) +{ + // since this is an API function make it user fail safe + if ( nCount1 < 0 ) { + pos1 += nCount1; + nCount1 = -nCount1; + } + if ( nCount2 < 0 ) { + pos2 += nCount2; + nCount2 = -nCount2; + } + if ( !nCount1 || !nCount2 || + pos1 >= str1.getLength() || pos2 >= str2.getLength() || + pos1 < 0 || pos2 < 0 ) { + nMatch1 = nMatch2 = 0; + // two empty strings return true, else false + return !nCount1 && !nCount2 && pos1 == str1.getLength() && pos2 == str2.getLength(); + } + if ( pos1 + nCount1 > str1.getLength() ) + nCount1 = str1.getLength() - pos1; + if ( pos2 + nCount2 > str2.getLength() ) + nCount2 = str2.getLength() - pos2; + + if (caseignoreOnly && caseignore.is()) + return caseignore->equals(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2); + + Sequence<long> offset1, offset2; + + OUString tmpStr1 = folding(str1, pos1, nCount1, offset1); + OUString tmpStr2 = folding(str2, pos2, nCount2, offset2); + + const sal_Unicode *p1 = tmpStr1.getStr(); + const sal_Unicode *p2 = tmpStr2.getStr(); + sal_Int32 i, nLen = (tmpStr1.getLength() < tmpStr1.getLength() ? + tmpStr1.getLength() : tmpStr2.getLength()); + for (i = 0; i < nLen; ++i, ++p1, ++p2 ) { + if (*p1 != *p2) { + // return number of matched code points so far + nMatch1 = offset1[i]; + nMatch2 = offset2[i]; + return sal_False; + } + } + // i==nLen + if ( tmpStr1.getLength() != tmpStr2.getLength() ) { + // return number of matched code points so far + nMatch1 = offset1[i-1] + 1; + nMatch2 = offset2[i-1] + 1; + return sal_False; + } else { + nMatch1 = nCount1; + nMatch2 = nCount2; + return sal_True; + } +} + +#define MaxOutput 2 + +Sequence< OUString > SAL_CALL +TransliterationImpl::getRange(const Sequence< OUString > &inStrs, + const sal_Int32 length, sal_Int16 _numCascade) throw(RuntimeException) +{ + if (_numCascade >= numCascade || ! bodyCascade[_numCascade].is()) + return inStrs; + + sal_Int32 j_tmp = 0; + Sequence< OUString > ostr(MaxOutput*length); + for (sal_Int32 j = 0; j < length; j+=2) { + Sequence< OUString >& temp = bodyCascade[_numCascade]->transliterateRange(inStrs[j], inStrs[j+1]); + + for ( sal_Int32 k = 0; k < temp.getLength(); k++) { + if ( j_tmp >= MaxOutput*length ) throw ERROR; + ostr[j_tmp++] = temp[k]; + } + } + ostr.realloc(j_tmp); + + return this->getRange(ostr, j_tmp, ++_numCascade); +} + + +Sequence< OUString > SAL_CALL +TransliterationImpl::transliterateRange( const OUString& str1, const OUString& str2 ) +throw(RuntimeException) +{ + if (numCascade == 1) + return bodyCascade[0]->transliterateRange(str1, str2); + + Sequence< OUString > ostr(2); + ostr[0] = str1; + ostr[1] = str2; + + return this->getRange(ostr, 2, 0); +} + + +sal_Int32 SAL_CALL +TransliterationImpl::compareSubstring( + const OUString& str1, sal_Int32 off1, sal_Int32 len1, + const OUString& str2, sal_Int32 off2, sal_Int32 len2) + throw(RuntimeException) +{ + if (caseignoreOnly && caseignore.is()) + return caseignore->compareSubstring(str1, off1, len1, str2, off2, len2); + + Sequence <long> offset; + + OUString in_str1 = this->transliterate(str1, off1, len1, offset); + OUString in_str2 = this->transliterate(str2, off2, len2, offset); + const sal_Unicode* unistr1 = in_str1.getStr(); + const sal_Unicode* unistr2 = in_str2.getStr(); + sal_Int32 strlen1 = in_str1.getLength(); + sal_Int32 strlen2 = in_str2.getLength(); + + while (strlen1 && strlen2) { + if (*unistr1 != *unistr2) + return *unistr1 > *unistr2 ? 1 : -1; + + unistr1++; unistr2++; strlen1--; strlen2--; + } + return strlen1 = strlen2 ? 0 : (strlen1 > strlen2 ? 1 : -1); +} + + +sal_Int32 SAL_CALL +TransliterationImpl::compareString(const OUString& str1, const OUString& str2 ) throw (RuntimeException) +{ + if (caseignoreOnly && caseignore.is()) + return caseignore->compareString(str1, str2); + else + return this->compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength()); +} + + +void +TransliterationImpl::clear() +{ + for (sal_Int32 i = 0; i < numCascade; i++) + if (bodyCascade[i].is()) + bodyCascade[i].clear(); + numCascade = 0; + caseignore.clear(); + caseignoreOnly = sal_True; +} + +static void SAL_CALL loadBody( Reference<XMultiServiceFactory>& xSMgr, OUString &implName, + Reference<XTransliteration>& body) throw(RuntimeException) +{ + Reference< XContentEnumerationAccess > xEnumAccess( xSMgr, UNO_QUERY ); + Reference< XEnumeration > xEnum(xEnumAccess->createContentEnumeration( + OUString::createFromAscii(TRLT_SERVICELNAME_L10N))); + if (xEnum.is()) { + while (xEnum->hasMoreElements()) { + Any a = xEnum->nextElement(); + Reference< XServiceInfo > xsInfo; + if (a >>= xsInfo) { + if (implName.equals(xsInfo->getImplementationName())) { + Reference< XSingleServiceFactory > xFactory; + if (a >>= xFactory) { + Reference< XInterface > xI = xFactory->createInstance(); + if (xI.is()) { + a = xI->queryInterface(::getCppuType(( + const Reference<XTransliteration>*)0)); + a >>= body; + return; + } + } + } + } + } + } + throw ERROR; +} + +sal_Bool SAL_CALL +TransliterationImpl::loadModuleByName( const OUString& implName, + Reference<XTransliteration>& body, const Locale& rLocale) throw(RuntimeException) +{ + OUString bname = OUString::createFromAscii(TRLT_IMPLNAME_PREFIX) + implName; + loadBody(xSMgr, bname, body); + if (body.is()) { + body->loadModule((TransliterationModules)0, rLocale); // toUpper/toLoad need rLocale + + // if the module is ignore case/kana/width, load caseignore for equals/compareString mothed + for (sal_Int16 i = 0; i < 3; i++) { + if (implName.compareToAscii(TMlist[i].implName) == 0) { + if (i == 0) // current module is caseignore + body->loadModule(TMlist[0].tm, rLocale); // caseingore need to setup module name + if (! caseignore.is()) { + OUString bname = OUString::createFromAscii(TRLT_IMPLNAME_PREFIX) + + OUString::createFromAscii(TMlist[0].implName); + loadBody(xSMgr, bname, caseignore); + } + if (caseignore.is()) + caseignore->loadModule(TMlist[i].tm, rLocale); + return sal_True; + } + } + caseignoreOnly = sal_False; // has other module than just ignore case/kana/width + } + return body.is(); +} + +const sal_Char cTrans[] = "com.sun.star.i18n.Transliteration"; + +OUString SAL_CALL +TransliterationImpl::getImplementationName() throw( RuntimeException ) +{ + return OUString::createFromAscii(cTrans); +} + + +sal_Bool SAL_CALL +TransliterationImpl::supportsService(const OUString& rServiceName) throw( RuntimeException ) +{ + return !rServiceName.compareToAscii(cTrans); +} + +Sequence< OUString > SAL_CALL +TransliterationImpl::getSupportedServiceNames(void) throw( RuntimeException ) +{ + Sequence< OUString > aRet(1); + aRet[0] = OUString::createFromAscii(cTrans); + return aRet; +} + +} } } } diff --git a/i18npool/source/transliteration/transliteration_Ignore.cxx b/i18npool/source/transliteration/transliteration_Ignore.cxx new file mode 100644 index 000000000000..497d11fce8aa --- /dev/null +++ b/i18npool/source/transliteration/transliteration_Ignore.cxx @@ -0,0 +1,202 @@ +/************************************************************************* + * + * $RCSfile: transliteration_Ignore.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +inline sal_Int32 Min( sal_Int32 a, sal_Int32 b ) { return a > b ? b : a; } + +sal_Bool SAL_CALL +transliteration_Ignore::equals(const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2 ) throw(RuntimeException) +{ + Sequence< sal_Int32 > offset1; + Sequence< sal_Int32 > offset2; + + // The method folding is defined in a sub class. + OUString s1 = this->folding( str1, pos1, nCount1, offset1); + OUString s2 = this->folding( str2, pos2, nCount2, offset2); + + const sal_Unicode * p1 = s1.getStr(); + const sal_Unicode * p2 = s2.getStr(); + sal_Int32 length = Min(s1.getLength(), s2.getLength()); + + for (sal_Int32 nmatch = 0; nmatch < length; nmatch++) + if (*p1++ != *p2++) + break; + + if (nmatch > 0) { + nMatch1 = offset1[ nmatch - 1 ] + 1; // Subtract 1 from nmatch because the index starts from zero. + nMatch2 = offset2[ nmatch - 1 ] + 1; // And then, add 1 to position because it means the number of character matched. + } + else { + nMatch1 = 0; // No character was matched. + nMatch2 = 0; + } + + return (nmatch == s1.getLength()) && (nmatch == s2.getLength()); +} + + +Sequence< OUString > SAL_CALL +transliteration_Ignore::transliterateRange( const OUString& str1, const OUString& str2 ) throw(RuntimeException) +{ + if (str1.getLength() < 1 || str2.getLength() < 1) + throw RuntimeException(); + + Sequence< OUString > r(2); + r[0] = str1.copy(0, 1); + r[1] = str2.copy(0, 1); + return r; +} + + +sal_Int16 SAL_CALL +transliteration_Ignore::getType() throw(RuntimeException) +{ + // The type is also defined in com/sun/star/util/TransliterationType.hdl + return TransliterationType::IGNORE; +} + + +OUString SAL_CALL +transliteration_Ignore::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ + // The method folding is defined in a sub class. + return this->folding( inStr, startPos, nCount, offset); +} + +Sequence< OUString > SAL_CALL +transliteration_Ignore::transliterateRange( const OUString& str1, const OUString& str2, + XTransliteration& t1, XTransliteration& t2 ) throw(RuntimeException) +{ + if (str1.getLength() < 1 || str2.getLength() < 1) + throw RuntimeException(); + + Sequence< sal_Int32 > offset; + OUString s11 = t1.transliterate( str1, 0, 1, offset ); + OUString s12 = t1.transliterate( str2, 0, 1, offset ); + OUString s21 = t2.transliterate( str1, 0, 1, offset ); + OUString s22 = t2.transliterate( str2, 0, 1, offset ); + + if ( (s11 == s21) && (s12 == s22) ) { + Sequence< OUString > r(2); + r[0] = s11; + r[1] = s12; + return r; + } + + Sequence< OUString > r(4); + r[0] = s11; + r[1] = s12; + r[2] = s21; + r[3] = s22; + return r; +} + +OUString SAL_CALL +transliteration_Ignore::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, oneToOneMapping& table ) throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // Translation + while (nCount -- > 0) { + sal_Unicode c = table[ *src++ ]; + // if the "func" returns 0xffff, skip the character. + if (c != 0xffff) { + *dst ++ = c; + *p ++ = position; + } + position ++; + } + *dst = (sal_Unicode) 0; + + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + + +OUString SAL_CALL +transliteration_Ignore::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, sal_Unicode (*func)(const sal_Unicode) ) throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // Translation + while (nCount -- > 0) { + sal_Unicode c = func( *src++ ); + // if the "func" returns 0xffff, skip the character. + if (c != 0xffff) { + *dst ++ = c; + *p ++ = position; + } + position ++; + } + *dst = (sal_Unicode) 0; + + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } diff --git a/i18npool/source/transliteration/transliteration_Numeric.cxx b/i18npool/source/transliteration/transliteration_Numeric.cxx new file mode 100644 index 000000000000..f34b42eae5ab --- /dev/null +++ b/i18npool/source/transliteration/transliteration_Numeric.cxx @@ -0,0 +1,73 @@ +/************************************************************************* + * + * $RCSfile: transliteration_Numeric.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include <transliteration_Numeric.hxx> +using namespace com::sun::star::uno; + +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Int16 SAL_CALL transliteration_Numeric::getType() throw(RuntimeException) +{ + return TransliterationType::NUMERIC; +} + +OUString SAL_CALL +transliteration_Numeric::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + throw (new RuntimeException()); +} + +sal_Bool SAL_CALL +transliteration_Numeric::equals( const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2 ) + throw(RuntimeException) +{ + throw (new RuntimeException()); +} + +Sequence< OUString > SAL_CALL +transliteration_Numeric::transliterateRange( const OUString& str1, const OUString& str2 ) + throw(RuntimeException) +{ + throw (new RuntimeException()); +} + +} } } } diff --git a/i18npool/source/transliteration/transliteration_OneToOne.cxx b/i18npool/source/transliteration/transliteration_OneToOne.cxx new file mode 100644 index 000000000000..ebf3e37f05da --- /dev/null +++ b/i18npool/source/transliteration/transliteration_OneToOne.cxx @@ -0,0 +1,152 @@ +/************************************************************************* + * + * $RCSfile: transliteration_OneToOne.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +sal_Int16 SAL_CALL transliteration_OneToOne::getType() throw(RuntimeException) +{ + // This type is also defined in com/sun/star/util/TransliterationType.hdl + return TransliterationType::ONE_TO_ONE; +} + +OUString SAL_CALL +transliteration_OneToOne::folding( const OUString& inStr, sal_Int32 startPos, + sal_Int32 nCount, Sequence< sal_Int32 >& offset) throw(RuntimeException) +{ + throw RuntimeException(); +} + +sal_Bool SAL_CALL +transliteration_OneToOne::equals( const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, + sal_Int32& nMatch1, const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2 ) + throw(RuntimeException) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL +transliteration_OneToOne::transliterateRange( const OUString& str1, const OUString& str2 ) + throw(RuntimeException) +{ + throw RuntimeException(); +} + + +OUString SAL_CALL +transliteration_OneToOne::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, oneToOneMapping& table ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Translation + while (nCount -- > 0) { + sal_Unicode c = *src++; + *dst ++ = table[ c ]; + } + *dst = (sal_Unicode) 0; + + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +OUString SAL_CALL +transliteration_OneToOne::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, oneToOneMapping& table ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // Translation + while (nCount -- > 0) { + sal_Unicode c = *src++; + *dst ++ = table[ c ]; + *p ++ = position ++; + } + *dst = (sal_Unicode) 0; + + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +OUString SAL_CALL +transliteration_OneToOne::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, TransFunc func ) + throw(RuntimeException) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 0 now. + rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + sal_Int32 *p = offset.getArray(); + sal_Int32 position = startPos; + + // Translation + while (nCount -- > 0) { + sal_Unicode c = *src++; + *dst ++ = func( c ); + *p ++ = position ++; + } + *dst = (sal_Unicode) 0; + + return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. +} + +} } } } + diff --git a/i18npool/source/transliteration/transliteration_body.cxx b/i18npool/source/transliteration/transliteration_body.cxx new file mode 100644 index 000000000000..90eb4e6bd3b4 --- /dev/null +++ b/i18npool/source/transliteration/transliteration_body.cxx @@ -0,0 +1,220 @@ +/************************************************************************* + * + * $RCSfile: transliteration_body.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#define TRANSLITERATION_ALL +#include "transliteration_body.hxx" +#include "data/transliteration_casemapping.h" + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +Transliteration_body::Transliteration_body() +{ + aMappingType = 0; + transliterationName = "Transliteration_body"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body"; +} + +sal_Int16 SAL_CALL Transliteration_body::getType() throw(RuntimeException) +{ + return TransliterationType::ONE_TO_ONE; +} + +sal_Bool SAL_CALL Transliteration_body::equals( + const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) + throw(RuntimeException) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL +Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 ) + throw( RuntimeException) +{ + Sequence< OUString > ostr(2); + ostr[0] = str1; + ostr[1] = str2; + return ostr; +} + +OUString SAL_CALL +Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset) throw(RuntimeException) +{ + // Allocate the max possible buffer. Try to use stack instead of heap which + // would have to be reallocated most times anyway. + const sal_Int32 nLocalBuf = 512 * NMAPPINGMAX; + sal_Unicode aLocalBuf[nLocalBuf], *out = aLocalBuf, *aHeapBuf = NULL; + sal_Unicode *in = (sal_Unicode*) inStr.getStr() + startPos; + + if (nCount > 512) + out = aHeapBuf = (sal_Unicode*) malloc((nCount * NMAPPINGMAX) * sizeof(sal_Unicode)); + + offset.realloc(nCount * NMAPPINGMAX); + sal_Int32 j = 0; + for (sal_Int32 i = 0; i < nCount; i++) { + Mapping &map = getValue(in, i, nCount); + for (sal_Int32 k = 0; k < map.nmap; k++) { + out[j] = map.map[k]; + offset[j++] = i + startPos; + } + } + offset.realloc(j); + + OUString r(out, j); + + if (aHeapBuf) + free(aHeapBuf); + + return r; +} + +OUString SAL_CALL +Transliteration_body::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset) throw(RuntimeException) +{ + return this->transliterate(inStr, startPos, nCount, offset); +} + +static Mapping mapping_03a3[] = {{0, 1, 0x03c2, 0, 0 },{0, 1, 0x03c3, 0, 0}}; +static Mapping mapping_0307[] = {{0, 0, 0, 0, 0 },{0, 1, 0x0307, 0, 0}}; +static Mapping mapping_0049[] = {{0, 2, 0x0069, 0x0307, 0},{0, 1, 0x0131, 0, 0},{0, 1, 0x0069, 0, 0}}; +static Mapping mapping_004a[] = {{0, 2, 0x006a, 0x0307, 0},{0, 1, 0x006a, 0, 0}}; +static Mapping mapping_012e[] = {{0, 2, 0x012f, 0x0307, 0},{0, 1, 0x012f, 0, 0}}; +static Mapping mapping_00cc[] = {{0, 3, 0x0069, 0x0307, 0x0300},{0, 1, 0x00ec, 0, 0}}; +static Mapping mapping_00cd[] = {{0, 3, 0x0069, 0x0307, 0x0301},{0, 1, 0x00ed, 0, 0}}; +static Mapping mapping_0128[] = {{0, 3, 0x0069, 0x0307, 0x0303},{0, 1, 0x0129, 0, 0}}; +static Mapping mapping_0069[] = {{0, 1, 0x0130, 0, 0},{0, 1, 0x0049, 0, 0}}; + +#define langIs(lang) (aLocale.Language.compareToAscii(lang) == 0) + +// only check simple case, there is more complicated case need to be checked. +#define type_i(ch) (ch == 0x0069 || ch == 0x006a) + +#define cased_letter(ch) (CaseMappingIndex[ch>>8] >= 0 && (CaseMappingValue[(CaseMappingIndex[ch>>8] << 8) + (ch&0xff)].type & CasedLetter)) + +Mapping& Transliteration_body::getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len) throw(RuntimeException) +{ + switch(str[pos]) { + case 0x03a3: + // final_sigma (not followed by cased and preceded by cased character) + // DOES NOT check ignorable sequence yet (more complicated implementation). + return !(pos < len && cased_letter(str[pos+1])) && (pos > 0 && cased_letter(str[pos-1])) ? + mapping_03a3[0] : mapping_03a3[1]; + case 0x0307: + return ((aMappingType == MappingTypeLowerToUpper && langIs("lt") || + aMappingType == MappingTypeUpperToLower && (langIs("tr") || langIs("az"))) && + (pos > 0 && type_i(str[pos-1]))) ? // after_i + mapping_0307[0] : mapping_0307[1]; + case 0x0069: + return (langIs("tr") || langIs("az")) ? mapping_0069[0] : mapping_0069[1]; + case 0x0049: return langIs("lt") ? mapping_0049[0] : + (langIs("tr") || langIs("az")) ? mapping_0049[1] : mapping_0049[2]; + case 0x004a: return langIs("lt") ? mapping_004a[0] : mapping_004a[1]; + case 0x012e: return langIs("lt") ? mapping_012e[0] : mapping_012e[1]; + case 0x00cc: return langIs("lt") ? mapping_00cc[0] : mapping_00cc[1]; + case 0x00cd: return langIs("lt") ? mapping_00cd[0] : mapping_00cd[1]; + case 0x0128: return langIs("lt") ? mapping_0128[0] : mapping_0128[1]; + } +} + +Mapping& Transliteration_body::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len) throw(RuntimeException) +{ + static Mapping dummy = { 0, 1, 0, 0, 0 }; + sal_Int16 address = CaseMappingIndex[str[pos] >> 8] << 8; + + dummy.map[0] = str[pos]; + + if (address >= 0 && (CaseMappingValue[address += (str[pos] & 0xFF)].type & aMappingType)) { + sal_uInt8 type = CaseMappingValue[address].type; + if (type & ValueTypeNotValue) { + if (CaseMappingValue[address].value == 0) + return getConditionalValue(str, pos, len); + else { + for (int map = CaseMappingValue[address].value; + map < CaseMappingValue[address].value + MaxCaseMappingExtras; map++) { + if (CaseMappingExtra[map].type & aMappingType) { + if (CaseMappingExtra[map].type & ValueTypeNotValue) + return getConditionalValue(str, pos, len); + else + return CaseMappingExtra[map]; + } + } + // Should not come here + throw RuntimeException(); + } + } else + dummy.map[0] = CaseMappingValue[address].value; + } + return dummy; +} + +Transliteration_casemapping::Transliteration_casemapping() +{ + aMappingType = 0; + transliterationName = "casemapping(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping"; +} + +void SAL_CALL +Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType, const Locale& rLocale ) +{ + aMappingType = rMappingType; + aLocale = rLocale; +} + +Transliteration_u2l::Transliteration_u2l() +{ + aMappingType = MappingTypeUpperToLower; + transliterationName = "upper_to_lower(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l"; +} + +Transliteration_l2u::Transliteration_l2u() +{ + aMappingType = MappingTypeLowerToUpper; + transliterationName = "lower_to_upper(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u"; +} + +} } } } diff --git a/i18npool/source/transliteration/transliteration_caseignore.cxx b/i18npool/source/transliteration/transliteration_caseignore.cxx new file mode 100644 index 000000000000..21eb37315f2d --- /dev/null +++ b/i18npool/source/transliteration/transliteration_caseignore.cxx @@ -0,0 +1,221 @@ +/************************************************************************* + * + * $RCSfile: transliteration_caseignore.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +// prevent internal compiler error with MSVC6SP3 +#include <stl/utility> + +#include "oneToOneMapping.hxx" +#include "transliteration_caseignore.hxx" + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +oneToOneMapping& gethalf2fullTable(void); +sal_Unicode getCompositionChar(sal_Unicode c1, sal_Unicode c2); + +Transliteration_caseignore::Transliteration_caseignore() +{ + aMappingType = MappingTypeFullFolding; + moduleLoaded = (TransliterationModules)0; + transliterationName = "case ignore (generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_caseignore"; +} + +Transliteration_simplecaseignore::Transliteration_simplecaseignore() +{ + aMappingType = MappingTypeSimpleFolding; + moduleLoaded = (TransliterationModules)0; + transliterationName = "simple case ignore (generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_simplecaseignore"; +} + +void SAL_CALL +Transliteration_caseignore::loadModule( TransliterationModules modName, const Locale& rLocale ) + throw(RuntimeException) +{ + moduleLoaded = (TransliterationModules) (moduleLoaded|modName); + aLocale = rLocale; +} + +sal_Int16 SAL_CALL Transliteration_caseignore::getType() throw(RuntimeException) +{ + // It's NOT TransliterationType::ONE_TO_ONE because it's using casefolding + return TransliterationType::IGNORE; +} + + +Sequence< OUString > SAL_CALL +Transliteration_caseignore::transliterateRange( const OUString& str1, const OUString& str2 ) + throw( RuntimeException) +{ + if (str1.getLength() != 1 || str2.getLength() != 1) + throw RuntimeException(); + + static Transliteration_u2l u2l; + static Transliteration_l2u l2u; + + u2l.loadModule((TransliterationModules)0, aLocale); + l2u.loadModule((TransliterationModules)0, aLocale); + + Sequence< sal_Int32 > offset; + + OUString l1 = u2l.transliterate(str1, 0, str1.getLength(), offset); + OUString u1 = l2u.transliterate(str1, 0, str1.getLength(), offset); + OUString l2 = u2l.transliterate(str2, 0, str2.getLength(), offset); + OUString u2 = l2u.transliterate(str2, 0, str2.getLength(), offset); + + if ((l1 == u1) && (l2 == u2)) { + Sequence< OUString > &r = *new Sequence< OUString > (2); + r[0] = l1; + r[1] = l2; + return r; + } else { + Sequence< OUString > &r = *new Sequence< OUString > (4); + r[0] = l1; + r[1] = l2; + r[2] = u1; + r[3] = u2; + return r; + } +} + +sal_Bool SAL_CALL +Transliteration_caseignore::equals( + const ::rtl::OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const ::rtl::OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) + throw(::com::sun::star::uno::RuntimeException) +{ + return (compare(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2) == 0); +} + +sal_Int32 SAL_CALL +Transliteration_caseignore::compareSubstring( + const ::rtl::OUString& str1, sal_Int32 off1, sal_Int32 len1, + const ::rtl::OUString& str2, sal_Int32 off2, sal_Int32 len2) + throw(RuntimeException) +{ + sal_Int32 nMatch1, nMatch2; + return compare(str1, off1, len1, nMatch1, str2, off2, len2, nMatch2); +} + + +sal_Int32 SAL_CALL +Transliteration_caseignore::compareString( + const ::rtl::OUString& str1, + const ::rtl::OUString& str2) + throw(RuntimeException) +{ + sal_Int32 nMatch1, nMatch2; + return compare(str1, 0, str1.getLength(), nMatch1, str2, 0, str2.getLength(), nMatch2); +} + +inline sal_Bool SAL_CALL +is_ja_voice_sound_mark(sal_Unicode& current, sal_Unicode next) +{ + sal_Unicode c = 0; + + if ((next == 0x3099 || next == 0x309a) && (c = getCompositionChar(current, next))) + current = c; + return c != 0; +} + + +sal_Unicode SAL_CALL +Transliteration_caseignore::getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e) + throw(RuntimeException) +{ + sal_Unicode c; + if (moduleLoaded & TransliterationModules_IGNORE_CASE) { + if (e.current >= e.element.nmap && idx < len ) { + e.element = getValue(str, idx++, len); + e.current = 0; + } + c = e.element.map[e.current++]; + } else { + c = *(str + idx++); + } + if (moduleLoaded & TransliterationModules_IGNORE_KANA) { + if (0x3040 <= c && c <= 0x3094 || 0x309d <= c && c <= 0x309f) + c += 0x60; + } + + // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail + if (moduleLoaded & TransliterationModules_IGNORE_WIDTH) { + static oneToOneMapping& half2fullTable = gethalf2fullTable(); + c = half2fullTable[c]; + if (0x3040 <= c && c <= 0x30ff && idx < len && + is_ja_voice_sound_mark(c, half2fullTable[*(str + idx)])) + idx++; + } + return c; +} + + +sal_Int32 SAL_CALL +Transliteration_caseignore::compare( + const ::rtl::OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const ::rtl::OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) + throw(RuntimeException) +{ + const sal_Unicode *unistr1 = (sal_Unicode*) str1.getStr() + pos1; + const sal_Unicode *unistr2 = (sal_Unicode*) str2.getStr() + pos2; + sal_Unicode c1, c2; + MappingElement e1, e2; + nMatch1 = nMatch2 = 0; + +#define NOT_END_OF_STR1 (nMatch1 < nCount1 || e1.current < e1.element.nmap) +#define NOT_END_OF_STR2 (nMatch2 < nCount2 || e2.current < e2.element.nmap) + + while (NOT_END_OF_STR1 && NOT_END_OF_STR2) { + c1 = getNextChar(unistr1, nMatch1, nCount1, e1); + c2 = getNextChar(unistr2, nMatch2, nCount2, e2); + if (c1 != c2) { + nMatch1--; nMatch2--; + return c1 > c2 ? 1 : -1; + } + } + + return (!NOT_END_OF_STR1 && !NOT_END_OF_STR2) ? 0 + : (NOT_END_OF_STR1 ? 1 : -1); +} + +} } } } diff --git a/i18npool/source/transliteration/transliteration_commonclass.cxx b/i18npool/source/transliteration/transliteration_commonclass.cxx new file mode 100644 index 000000000000..f0902bc2f25d --- /dev/null +++ b/i18npool/source/transliteration/transliteration_commonclass.cxx @@ -0,0 +1,157 @@ +/************************************************************************* + * + * $RCSfile: transliteration_commonclass.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 13:23:20 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include <transliteration_commonclass.hxx> +#include <com/sun/star/i18n/CollatorOptions.hpp> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +transliteration_commonclass::transliteration_commonclass() +{ + transliterationName = ""; + implementationName = ""; +} + +OUString SAL_CALL transliteration_commonclass::getName() throw(RuntimeException) +{ + return OUString::createFromAscii(transliterationName); +} + +void SAL_CALL transliteration_commonclass::loadModule( TransliterationModules modName, const Locale& rLocale ) + throw(RuntimeException) +{ + aLocale = rLocale; +} + + +void SAL_CALL +transliteration_commonclass::loadModuleNew( const Sequence < TransliterationModulesNew >& modName, const Locale& rLocale ) + throw(RuntimeException) +{ + throw RuntimeException(); +} + + +void SAL_CALL +transliteration_commonclass::loadModuleByImplName( const OUString& implName, const Locale& rLocale ) + throw(RuntimeException) +{ + throw RuntimeException(); +} + +void SAL_CALL +transliteration_commonclass::loadModulesByImplNames(const Sequence< OUString >& modNamelist, const Locale& rLocale) + throw(RuntimeException) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL +transliteration_commonclass::getAvailableModules( const Locale& rLocale, sal_Int16 sType ) + throw(RuntimeException) +{ + throw RuntimeException(); +} + +sal_Int32 SAL_CALL +transliteration_commonclass::compareSubstring( + const OUString& str1, sal_Int32 off1, sal_Int32 len1, + const OUString& str2, sal_Int32 off2, sal_Int32 len2) + throw(RuntimeException) +{ + const sal_Unicode* unistr1 = NULL; + const sal_Unicode* unistr2 = NULL; + sal_uInt32 strlen1; + sal_uInt32 strlen2; + + Sequence <long> offset1(2*len1); + Sequence <long> offset2(2*len2); + + OUString in_str1 = this->transliterate(str1, off1, len1, offset1); + OUString in_str2 = this->transliterate(str2, off2, len2, offset2); + strlen1 = in_str1.getLength(); + strlen2 = in_str2.getLength(); + unistr1 = in_str1.getStr(); + unistr2 = in_str2.getStr(); + + while (strlen1 && strlen2) + { + sal_uInt32 ret = *unistr1 - *unistr2; + if (ret) + return ret; + + unistr1++; + unistr2++; + strlen1--; + strlen2--; + } + return strlen1 - strlen2; +} + +sal_Int32 SAL_CALL +transliteration_commonclass::compareString( const OUString& str1, const OUString& str2 ) throw ( RuntimeException) +{ + return( this->compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength())); +} + + +OUString SAL_CALL transliteration_commonclass::getImplementationName() throw( RuntimeException ) +{ + return OUString::createFromAscii(implementationName); +} + +const sal_Char cTrans[] = "com.sun.star.i18n.Transliteration.l10n"; + +sal_Bool SAL_CALL transliteration_commonclass::supportsService(const OUString& rServiceName) throw( RuntimeException ) +{ + return rServiceName.equalsAscii(cTrans); +} + +Sequence< OUString > SAL_CALL transliteration_commonclass::getSupportedServiceNames() throw( RuntimeException ) +{ + Sequence< OUString > aRet(1); + aRet[0] = OUString::createFromAscii(cTrans); + return aRet; +} + +} } } } |