/************************************************************************* * * $RCSfile: fullwidthToHalfwidth.cxx,v $ * * $Revision: 1.2 $ * * last change: $Author: er $ $Date: 2002-03-26 17:13:19 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses * * - GNU Lesser General Public License Version 2.1 * - Sun Industry Standards Source License Version 1.1 * * Sun Microsystems Inc., October, 2000 * * GNU Lesser General Public License Version 2.1 * ============================================= * Copyright 2000 by Sun Microsystems, Inc. * 901 San Antonio Road, Palo Alto, CA 94303, USA * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * * * Sun Industry Standards Source License Version 1.1 * ================================================= * The contents of this file are subject to the Sun Industry Standards * Source License Version 1.1 (the "License"); You may not use this file * except in compliance with the License. You may obtain a copy of the * License at http://www.openoffice.org/license.html. * * Software provided under this License is provided on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. * See the License for the specific provisions governing your rights and * obligations concerning the Software. * * The Initial Developer of the Original Code is: Sun Microsystems, Inc. * * Copyright: 2000 by Sun Microsystems, Inc. * * All Rights Reserved. * * Contributor(s): _______________________________________ * * ************************************************************************/ // prevent internal compiler error with MSVC6SP3 #include #include #define TRANSLITERATION_fullwidthToHalfwidth #include using namespace com::sun::star::uno; using namespace com::sun::star::lang; using namespace rtl; namespace com { namespace sun { namespace star { namespace i18n { fullwidthToHalfwidth::fullwidthToHalfwidth() { transliterationName = "fullwidthToHalfwidth"; implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTH_HALFWIDTH"; } /* Unicode Normalization Forms http://www.unicode.org/unicode/reports/tr15/index.html http://www.w3.org/International/charlint/ */ typedef struct { sal_Unicode decomposited_character_1; sal_Unicode decomposited_character_2; } decomposition_table_entry_t; const decomposition_table_entry_t decomposition_table[] = { { 0x0000, 0x0000 }, // 0x3040 { 0x0000, 0x0000 }, // 0x3041 HIRAGANA LETTER SMALL A { 0x0000, 0x0000 }, // 0x3042 HIRAGANA LETTER A { 0x0000, 0x0000 }, // 0x3043 HIRAGANA LETTER SMALL I { 0x0000, 0x0000 }, // 0x3044 HIRAGANA LETTER I { 0x0000, 0x0000 }, // 0x3045 HIRAGANA LETTER SMALL U { 0x0000, 0x0000 }, // 0x3046 HIRAGANA LETTER U { 0x0000, 0x0000 }, // 0x3047 HIRAGANA LETTER SMALL E { 0x0000, 0x0000 }, // 0x3048 HIRAGANA LETTER E { 0x0000, 0x0000 }, // 0x3049 HIRAGANA LETTER SMALL O { 0x0000, 0x0000 }, // 0x304a HIRAGANA LETTER O { 0x0000, 0x0000 }, // 0x304b HIRAGANA LETTER KA { 0x304b, 0x3099 }, // 0x304c HIRAGANA LETTER GA --> HIRAGANA LETTER KA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x304d HIRAGANA LETTER KI { 0x304d, 0x3099 }, // 0x304e HIRAGANA LETTER GI --> HIRAGANA LETTER KI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x304f HIRAGANA LETTER KU { 0x304f, 0x3099 }, // 0x3050 HIRAGANA LETTER GU --> HIRAGANA LETTER KU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3051 HIRAGANA LETTER KE { 0x3051, 0x3099 }, // 0x3052 HIRAGANA LETTER GE --> HIRAGANA LETTER KE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3053 HIRAGANA LETTER KO { 0x3053, 0x3099 }, // 0x3054 HIRAGANA LETTER GO --> HIRAGANA LETTER KO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3055 HIRAGANA LETTER SA { 0x3055, 0x3099 }, // 0x3056 HIRAGANA LETTER ZA --> HIRAGANA LETTER SA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3057 HIRAGANA LETTER SI { 0x3057, 0x3099 }, // 0x3058 HIRAGANA LETTER ZI --> HIRAGANA LETTER SI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3059 HIRAGANA LETTER SU { 0x3059, 0x3099 }, // 0x305a HIRAGANA LETTER ZU --> HIRAGANA LETTER SU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x305b HIRAGANA LETTER SE { 0x305b, 0x3099 }, // 0x305c HIRAGANA LETTER ZE --> HIRAGANA LETTER SE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x305d HIRAGANA LETTER SO { 0x305d, 0x3099 }, // 0x305e HIRAGANA LETTER ZO --> HIRAGANA LETTER SO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x305f HIRAGANA LETTER TA { 0x305f, 0x3099 }, // 0x3060 HIRAGANA LETTER DA --> HIRAGANA LETTER TA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3061 HIRAGANA LETTER TI { 0x3061, 0x3099 }, // 0x3062 HIRAGANA LETTER DI --> HIRAGANA LETTER TI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3063 HIRAGANA LETTER SMALL TU { 0x0000, 0x0000 }, // 0x3064 HIRAGANA LETTER TU { 0x3064, 0x3099 }, // 0x3065 HIRAGANA LETTER DU --> HIRAGANA LETTER TU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3066 HIRAGANA LETTER TE { 0x3066, 0x3099 }, // 0x3067 HIRAGANA LETTER DE --> HIRAGANA LETTER TE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3068 HIRAGANA LETTER TO { 0x3068, 0x3099 }, // 0x3069 HIRAGANA LETTER DO --> HIRAGANA LETTER TO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x306a HIRAGANA LETTER NA { 0x0000, 0x0000 }, // 0x306b HIRAGANA LETTER NI { 0x0000, 0x0000 }, // 0x306c HIRAGANA LETTER NU { 0x0000, 0x0000 }, // 0x306d HIRAGANA LETTER NE { 0x0000, 0x0000 }, // 0x306e HIRAGANA LETTER NO { 0x0000, 0x0000 }, // 0x306f HIRAGANA LETTER HA { 0x306f, 0x3099 }, // 0x3070 HIRAGANA LETTER BA --> HIRAGANA LETTER HA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x306f, 0x309a }, // 0x3071 HIRAGANA LETTER PA --> HIRAGANA LETTER HA + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3072 HIRAGANA LETTER HI { 0x3072, 0x3099 }, // 0x3073 HIRAGANA LETTER BI --> HIRAGANA LETTER HI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x3072, 0x309a }, // 0x3074 HIRAGANA LETTER PI --> HIRAGANA LETTER HI + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3075 HIRAGANA LETTER HU { 0x3075, 0x3099 }, // 0x3076 HIRAGANA LETTER BU --> HIRAGANA LETTER HU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x3075, 0x309a }, // 0x3077 HIRAGANA LETTER PU --> HIRAGANA LETTER HU + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3078 HIRAGANA LETTER HE { 0x3078, 0x3099 }, // 0x3079 HIRAGANA LETTER BE --> HIRAGANA LETTER HE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x3078, 0x309a }, // 0x307a HIRAGANA LETTER PE --> HIRAGANA LETTER HE + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x307b HIRAGANA LETTER HO { 0x307b, 0x3099 }, // 0x307c HIRAGANA LETTER BO --> HIRAGANA LETTER HO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x307b, 0x309a }, // 0x307d HIRAGANA LETTER PO --> HIRAGANA LETTER HO + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x307e HIRAGANA LETTER MA { 0x0000, 0x0000 }, // 0x307f HIRAGANA LETTER MI { 0x0000, 0x0000 }, // 0x3080 HIRAGANA LETTER MU { 0x0000, 0x0000 }, // 0x3081 HIRAGANA LETTER ME { 0x0000, 0x0000 }, // 0x3082 HIRAGANA LETTER MO { 0x0000, 0x0000 }, // 0x3083 HIRAGANA LETTER SMALL YA { 0x0000, 0x0000 }, // 0x3084 HIRAGANA LETTER YA { 0x0000, 0x0000 }, // 0x3085 HIRAGANA LETTER SMALL YU { 0x0000, 0x0000 }, // 0x3086 HIRAGANA LETTER YU { 0x0000, 0x0000 }, // 0x3087 HIRAGANA LETTER SMALL YO { 0x0000, 0x0000 }, // 0x3088 HIRAGANA LETTER YO { 0x0000, 0x0000 }, // 0x3089 HIRAGANA LETTER RA { 0x0000, 0x0000 }, // 0x308a HIRAGANA LETTER RI { 0x0000, 0x0000 }, // 0x308b HIRAGANA LETTER RU { 0x0000, 0x0000 }, // 0x308c HIRAGANA LETTER RE { 0x0000, 0x0000 }, // 0x308d HIRAGANA LETTER RO { 0x0000, 0x0000 }, // 0x308e HIRAGANA LETTER SMALL WA { 0x0000, 0x0000 }, // 0x308f HIRAGANA LETTER WA { 0x0000, 0x0000 }, // 0x3090 HIRAGANA LETTER WI { 0x0000, 0x0000 }, // 0x3091 HIRAGANA LETTER WE { 0x0000, 0x0000 }, // 0x3092 HIRAGANA LETTER WO { 0x0000, 0x0000 }, // 0x3093 HIRAGANA LETTER N { 0x3046, 0x3099 }, // 0x3094 HIRAGANA LETTER VU --> HIRAGANA LETTER U + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x3095 { 0x0000, 0x0000 }, // 0x3096 { 0x0000, 0x0000 }, // 0x3097 { 0x0000, 0x0000 }, // 0x3098 { 0x0000, 0x0000 }, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x309d HIRAGANA ITERATION MARK { 0x0000, 0x0000 }, // 0x309e HIRAGANA VOICED ITERATION MARK { 0x0000, 0x0000 }, // 0x309f { 0x0000, 0x0000 }, // 0x30a0 { 0x0000, 0x0000 }, // 0x30a1 KATAKANA LETTER SMALL A { 0x0000, 0x0000 }, // 0x30a2 KATAKANA LETTER A { 0x0000, 0x0000 }, // 0x30a3 KATAKANA LETTER SMALL I { 0x0000, 0x0000 }, // 0x30a4 KATAKANA LETTER I { 0x0000, 0x0000 }, // 0x30a5 KATAKANA LETTER SMALL U { 0x0000, 0x0000 }, // 0x30a6 KATAKANA LETTER U { 0x0000, 0x0000 }, // 0x30a7 KATAKANA LETTER SMALL E { 0x0000, 0x0000 }, // 0x30a8 KATAKANA LETTER E { 0x0000, 0x0000 }, // 0x30a9 KATAKANA LETTER SMALL O { 0x0000, 0x0000 }, // 0x30aa KATAKANA LETTER O { 0x0000, 0x0000 }, // 0x30ab KATAKANA LETTER KA { 0x30ab, 0x3099 }, // 0x30ac KATAKANA LETTER GA --> KATAKANA LETTER KA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30ad KATAKANA LETTER KI { 0x30ad, 0x3099 }, // 0x30ae KATAKANA LETTER GI --> KATAKANA LETTER KI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30af KATAKANA LETTER KU { 0x30af, 0x3099 }, // 0x30b0 KATAKANA LETTER GU --> KATAKANA LETTER KU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30b1 KATAKANA LETTER KE { 0x30b1, 0x3099 }, // 0x30b2 KATAKANA LETTER GE --> KATAKANA LETTER KE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30b3 KATAKANA LETTER KO { 0x30b3, 0x3099 }, // 0x30b4 KATAKANA LETTER GO --> KATAKANA LETTER KO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30b5 KATAKANA LETTER SA { 0x30b5, 0x3099 }, // 0x30b6 KATAKANA LETTER ZA --> KATAKANA LETTER SA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30b7 KATAKANA LETTER SI { 0x30b7, 0x3099 }, // 0x30b8 KATAKANA LETTER ZI --> KATAKANA LETTER SI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30b9 KATAKANA LETTER SU { 0x30b9, 0x3099 }, // 0x30ba KATAKANA LETTER ZU --> KATAKANA LETTER SU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30bb KATAKANA LETTER SE { 0x30bb, 0x3099 }, // 0x30bc KATAKANA LETTER ZE --> KATAKANA LETTER SE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30bd KATAKANA LETTER SO { 0x30bd, 0x3099 }, // 0x30be KATAKANA LETTER ZO --> KATAKANA LETTER SO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30bf KATAKANA LETTER TA { 0x30bf, 0x3099 }, // 0x30c0 KATAKANA LETTER DA --> KATAKANA LETTER TA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30c1 KATAKANA LETTER TI { 0x30c1, 0x3099 }, // 0x30c2 KATAKANA LETTER DI --> KATAKANA LETTER TI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30c3 KATAKANA LETTER SMALL TU { 0x0000, 0x0000 }, // 0x30c4 KATAKANA LETTER TU { 0x30c4, 0x3099 }, // 0x30c5 KATAKANA LETTER DU --> KATAKANA LETTER TU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30c6 KATAKANA LETTER TE { 0x30c6, 0x3099 }, // 0x30c7 KATAKANA LETTER DE --> KATAKANA LETTER TE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30c8 KATAKANA LETTER TO { 0x30c8, 0x3099 }, // 0x30c9 KATAKANA LETTER DO --> KATAKANA LETTER TO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30ca KATAKANA LETTER NA { 0x0000, 0x0000 }, // 0x30cb KATAKANA LETTER NI { 0x0000, 0x0000 }, // 0x30cc KATAKANA LETTER NU { 0x0000, 0x0000 }, // 0x30cd KATAKANA LETTER NE { 0x0000, 0x0000 }, // 0x30ce KATAKANA LETTER NO { 0x0000, 0x0000 }, // 0x30cf KATAKANA LETTER HA { 0x30cf, 0x3099 }, // 0x30d0 KATAKANA LETTER BA --> KATAKANA LETTER HA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30cf, 0x309a }, // 0x30d1 KATAKANA LETTER PA --> KATAKANA LETTER HA + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30d2 KATAKANA LETTER HI { 0x30d2, 0x3099 }, // 0x30d3 KATAKANA LETTER BI --> KATAKANA LETTER HI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30d2, 0x309a }, // 0x30d4 KATAKANA LETTER PI --> KATAKANA LETTER HI + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30d5 KATAKANA LETTER HU { 0x30d5, 0x3099 }, // 0x30d6 KATAKANA LETTER BU --> KATAKANA LETTER HU + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30d5, 0x309a }, // 0x30d7 KATAKANA LETTER PU --> KATAKANA LETTER HU + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30d8 KATAKANA LETTER HE { 0x30d8, 0x3099 }, // 0x30d9 KATAKANA LETTER BE --> KATAKANA LETTER HE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30d8, 0x309a }, // 0x30da KATAKANA LETTER PE --> KATAKANA LETTER HE + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30db KATAKANA LETTER HO { 0x30db, 0x3099 }, // 0x30dc KATAKANA LETTER BO --> KATAKANA LETTER HO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30db, 0x309a }, // 0x30dd KATAKANA LETTER PO --> KATAKANA LETTER HO + COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30de KATAKANA LETTER MA { 0x0000, 0x0000 }, // 0x30df KATAKANA LETTER MI { 0x0000, 0x0000 }, // 0x30e0 KATAKANA LETTER MU { 0x0000, 0x0000 }, // 0x30e1 KATAKANA LETTER ME { 0x0000, 0x0000 }, // 0x30e2 KATAKANA LETTER MO { 0x0000, 0x0000 }, // 0x30e3 KATAKANA LETTER SMALL YA { 0x0000, 0x0000 }, // 0x30e4 KATAKANA LETTER YA { 0x0000, 0x0000 }, // 0x30e5 KATAKANA LETTER SMALL YU { 0x0000, 0x0000 }, // 0x30e6 KATAKANA LETTER YU { 0x0000, 0x0000 }, // 0x30e7 KATAKANA LETTER SMALL YO { 0x0000, 0x0000 }, // 0x30e8 KATAKANA LETTER YO { 0x0000, 0x0000 }, // 0x30e9 KATAKANA LETTER RA { 0x0000, 0x0000 }, // 0x30ea KATAKANA LETTER RI { 0x0000, 0x0000 }, // 0x30eb KATAKANA LETTER RU { 0x0000, 0x0000 }, // 0x30ec KATAKANA LETTER RE { 0x0000, 0x0000 }, // 0x30ed KATAKANA LETTER RO { 0x0000, 0x0000 }, // 0x30ee KATAKANA LETTER SMALL WA { 0x0000, 0x0000 }, // 0x30ef KATAKANA LETTER WA { 0x0000, 0x0000 }, // 0x30f0 KATAKANA LETTER WI { 0x0000, 0x0000 }, // 0x30f1 KATAKANA LETTER WE { 0x0000, 0x0000 }, // 0x30f2 KATAKANA LETTER WO { 0x0000, 0x0000 }, // 0x30f3 KATAKANA LETTER N { 0x30a6, 0x3099 }, // 0x30f4 KATAKANA LETTER VU --> KATAKANA LETTER U + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30f5 KATAKANA LETTER SMALL KA { 0x0000, 0x0000 }, // 0x30f6 KATAKANA LETTER SMALL KE { 0x30ef, 0x3099 }, // 0x30f7 KATAKANA LETTER VA --> KATAKANA LETTER WA + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30f0, 0x3099 }, // 0x30f8 KATAKANA LETTER VI --> KATAKANA LETTER WI + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30f1, 0x3099 }, // 0x30f9 KATAKANA LETTER VE --> KATAKANA LETTER WE + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x30f2, 0x3099 }, // 0x30fa KATAKANA LETTER VO --> KATAKANA LETTER WO + COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK { 0x0000, 0x0000 }, // 0x30fb KATAKANA MIDDLE DOT { 0x0000, 0x0000 }, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK { 0x0000, 0x0000 }, // 0x30fd KATAKANA ITERATION MARK { 0x0000, 0x0000 }, // 0x30fe KATAKANA VOICED ITERATION MARK { 0x0000, 0x0000 } // 0x30ff }; /** * Decompose Japanese specific voiced and semi-voiced sound marks. */ OUString SAL_CALL decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) { // Create a string buffer which can hold nCount * 2 + 1 characters. // Its size may become double of nCount. rtl_uString * newStr; x_rtl_uString_new_WithLength( &newStr, nCount * 2 ); // defined in x_rtl_ustring.h The reference count is 0 now. // Allocate double of nCount length to offset argument. offset.realloc( nCount * 2 ); sal_Int32 *p = offset.getArray(); sal_Int32 position = startPos; // Prepare pointers of unicode character arrays. const sal_Unicode* src = inStr.getStr() + startPos; sal_Unicode* dst = newStr->buffer; // Decomposition: GA --> KA + voice-mark while (nCount -- > 0) { sal_Unicode c = *src++; // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) // Hiragana is not applied to decomposition. // Only Katakana is applied to decomposition if (0x30a0 <= c && c <= 0x30ff) { int i = int(c - 0x3040); sal_Unicode first = decomposition_table[i].decomposited_character_1; if (first != 0x0000) { *dst ++ = first; *dst ++ = decomposition_table[i].decomposited_character_2; // second *p ++ = position; *p ++ = position ++; continue; } } *dst ++ = c; *p ++ = position ++; } *dst = (sal_Unicode) 0; newStr->length = sal_Int32(dst - newStr->buffer); offset.realloc(newStr->length); return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. } /* Halfwidth and Fullwidth Forms (U+FF00..U+FFEF) ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html http://charts.unicode.org/Web/UFF00.html */ OneToOneMappingTable_t full2half[] = { MAKE_PAIR( 0x2190, 0xFFE9 ), // LEFTWARDS ARROW --> HALFWIDTH LEFTWARDS ARROW MAKE_PAIR( 0x2191, 0xFFEA ), // UPWARDS ARROW --> HALFWIDTH UPWARDS ARROW MAKE_PAIR( 0x2192, 0xFFEB ), // RIGHTWARDS ARROW --> HALFWIDTH RIGHTWARDS ARROW MAKE_PAIR( 0x2193, 0xFFEC ), // DOWNWARDS ARROW --> HALFWIDTH DOWNWARDS ARROW MAKE_PAIR( 0x2212, 0x002D ), // MINUS SIGN --> HYPHEN-MINUS MAKE_PAIR( 0x2502, 0xFFE8 ), // BOX DRAWINGS LIGHT VERTICAL --> HALFWIDTH FORMS LIGHT VERTICAL MAKE_PAIR( 0x25A0, 0xFFED ), // BLACK SQUARE --> HALFWIDTH BLACK SQUARE MAKE_PAIR( 0x25CB, 0xFFEE ), // WHITE CIRCLE --> HALFWIDTH WHITE CIRCLE MAKE_PAIR( 0x3000, 0x0020 ), // IDEOGRAPHIC SPACE --> SPACE MAKE_PAIR( 0x3001, 0xFF64 ), // IDEOGRAPHIC COMMA --> HALFWIDTH IDEOGRAPHIC COMMA MAKE_PAIR( 0x3002, 0xFF61 ), // IDEOGRAPHIC FULL STOP --> HALFWIDTH IDEOGRAPHIC FULL STOP MAKE_PAIR( 0x300C, 0xFF62 ), // LEFT CORNER BRACKET --> HALFWIDTH LEFT CORNER BRACKET MAKE_PAIR( 0x300D, 0xFF63 ), // RIGHT CORNER BRACKET --> HALFWIDTH RIGHT CORNER BRACKET MAKE_PAIR( 0x3099, 0xFF9E ), // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK --> HALFWIDTH KATAKANA VOICED SOUND MARK MAKE_PAIR( 0x309A, 0xFF9F ), // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK --> HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK MAKE_PAIR( 0x30A1, 0xFF67 ), // KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER SMALL A MAKE_PAIR( 0x30A2, 0xFF71 ), // KATAKANA LETTER A --> HALFWIDTH KATAKANA LETTER A MAKE_PAIR( 0x30A3, 0xFF68 ), // KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER SMALL I MAKE_PAIR( 0x30A4, 0xFF72 ), // KATAKANA LETTER I --> HALFWIDTH KATAKANA LETTER I MAKE_PAIR( 0x30A5, 0xFF69 ), // KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER SMALL U MAKE_PAIR( 0x30A6, 0xFF73 ), // KATAKANA LETTER U --> HALFWIDTH KATAKANA LETTER U MAKE_PAIR( 0x30A7, 0xFF6A ), // KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER SMALL E MAKE_PAIR( 0x30A8, 0xFF74 ), // KATAKANA LETTER E --> HALFWIDTH KATAKANA LETTER E MAKE_PAIR( 0x30A9, 0xFF6B ), // KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER SMALL O MAKE_PAIR( 0x30AA, 0xFF75 ), // KATAKANA LETTER O --> HALFWIDTH KATAKANA LETTER O MAKE_PAIR( 0x30AB, 0xFF76 ), // KATAKANA LETTER KA --> HALFWIDTH KATAKANA LETTER KA MAKE_PAIR( 0x30AD, 0xFF77 ), // KATAKANA LETTER KI --> HALFWIDTH KATAKANA LETTER KI MAKE_PAIR( 0x30AF, 0xFF78 ), // KATAKANA LETTER KU --> HALFWIDTH KATAKANA LETTER KU MAKE_PAIR( 0x30B1, 0xFF79 ), // KATAKANA LETTER KE --> HALFWIDTH KATAKANA LETTER KE MAKE_PAIR( 0x30B3, 0xFF7A ), // KATAKANA LETTER KO --> HALFWIDTH KATAKANA LETTER KO MAKE_PAIR( 0x30B5, 0xFF7B ), // KATAKANA LETTER SA --> HALFWIDTH KATAKANA LETTER SA MAKE_PAIR( 0x30B7, 0xFF7C ), // KATAKANA LETTER SI --> HALFWIDTH KATAKANA LETTER SI MAKE_PAIR( 0x30B9, 0xFF7D ), // KATAKANA LETTER SU --> HALFWIDTH KATAKANA LETTER SU MAKE_PAIR( 0x30BB, 0xFF7E ), // KATAKANA LETTER SE --> HALFWIDTH KATAKANA LETTER SE MAKE_PAIR( 0x30BD, 0xFF7F ), // KATAKANA LETTER SO --> HALFWIDTH KATAKANA LETTER SO MAKE_PAIR( 0x30BF, 0xFF80 ), // KATAKANA LETTER TA --> HALFWIDTH KATAKANA LETTER TA MAKE_PAIR( 0x30C1, 0xFF81 ), // KATAKANA LETTER TI --> HALFWIDTH KATAKANA LETTER TI MAKE_PAIR( 0x30C3, 0xFF6F ), // KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER SMALL TU MAKE_PAIR( 0x30C4, 0xFF82 ), // KATAKANA LETTER TU --> HALFWIDTH KATAKANA LETTER TU MAKE_PAIR( 0x30C6, 0xFF83 ), // KATAKANA LETTER TE --> HALFWIDTH KATAKANA LETTER TE MAKE_PAIR( 0x30C8, 0xFF84 ), // KATAKANA LETTER TO --> HALFWIDTH KATAKANA LETTER TO MAKE_PAIR( 0x30CA, 0xFF85 ), // KATAKANA LETTER NA --> HALFWIDTH KATAKANA LETTER NA MAKE_PAIR( 0x30CB, 0xFF86 ), // KATAKANA LETTER NI --> HALFWIDTH KATAKANA LETTER NI MAKE_PAIR( 0x30CC, 0xFF87 ), // KATAKANA LETTER NU --> HALFWIDTH KATAKANA LETTER NU MAKE_PAIR( 0x30CD, 0xFF88 ), // KATAKANA LETTER NE --> HALFWIDTH KATAKANA LETTER NE MAKE_PAIR( 0x30CE, 0xFF89 ), // KATAKANA LETTER NO --> HALFWIDTH KATAKANA LETTER NO MAKE_PAIR( 0x30CF, 0xFF8A ), // KATAKANA LETTER HA --> HALFWIDTH KATAKANA LETTER HA MAKE_PAIR( 0x30D2, 0xFF8B ), // KATAKANA LETTER HI --> HALFWIDTH KATAKANA LETTER HI MAKE_PAIR( 0x30D5, 0xFF8C ), // KATAKANA LETTER HU --> HALFWIDTH KATAKANA LETTER HU MAKE_PAIR( 0x30D8, 0xFF8D ), // KATAKANA LETTER HE --> HALFWIDTH KATAKANA LETTER HE MAKE_PAIR( 0x30DB, 0xFF8E ), // KATAKANA LETTER HO --> HALFWIDTH KATAKANA LETTER HO MAKE_PAIR( 0x30DE, 0xFF8F ), // KATAKANA LETTER MA --> HALFWIDTH KATAKANA LETTER MA MAKE_PAIR( 0x30DF, 0xFF90 ), // KATAKANA LETTER MI --> HALFWIDTH KATAKANA LETTER MI MAKE_PAIR( 0x30E0, 0xFF91 ), // KATAKANA LETTER MU --> HALFWIDTH KATAKANA LETTER MU MAKE_PAIR( 0x30E1, 0xFF92 ), // KATAKANA LETTER ME --> HALFWIDTH KATAKANA LETTER ME MAKE_PAIR( 0x30E2, 0xFF93 ), // KATAKANA LETTER MO --> HALFWIDTH KATAKANA LETTER MO MAKE_PAIR( 0x30E3, 0xFF6C ), // KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER SMALL YA MAKE_PAIR( 0x30E4, 0xFF94 ), // KATAKANA LETTER YA --> HALFWIDTH KATAKANA LETTER YA MAKE_PAIR( 0x30E5, 0xFF6D ), // KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER SMALL YU MAKE_PAIR( 0x30E6, 0xFF95 ), // KATAKANA LETTER YU --> HALFWIDTH KATAKANA LETTER YU MAKE_PAIR( 0x30E7, 0xFF6E ), // KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER SMALL YO MAKE_PAIR( 0x30E8, 0xFF96 ), // KATAKANA LETTER YO --> HALFWIDTH KATAKANA LETTER YO MAKE_PAIR( 0x30E9, 0xFF97 ), // KATAKANA LETTER RA --> HALFWIDTH KATAKANA LETTER RA MAKE_PAIR( 0x30EA, 0xFF98 ), // KATAKANA LETTER RI --> HALFWIDTH KATAKANA LETTER RI MAKE_PAIR( 0x30EB, 0xFF99 ), // KATAKANA LETTER RU --> HALFWIDTH KATAKANA LETTER RU MAKE_PAIR( 0x30EC, 0xFF9A ), // KATAKANA LETTER RE --> HALFWIDTH KATAKANA LETTER RE MAKE_PAIR( 0x30ED, 0xFF9B ), // KATAKANA LETTER RO --> HALFWIDTH KATAKANA LETTER RO MAKE_PAIR( 0x30EF, 0xFF9C ), // KATAKANA LETTER WA --> HALFWIDTH KATAKANA LETTER WA MAKE_PAIR( 0x30F2, 0xFF66 ), // KATAKANA LETTER WO --> HALFWIDTH KATAKANA LETTER WO MAKE_PAIR( 0x30F3, 0xFF9D ), // KATAKANA LETTER N --> HALFWIDTH KATAKANA LETTER N MAKE_PAIR( 0x30FB, 0xFF65 ), // KATAKANA MIDDLE DOT --> HALFWIDTH KATAKANA MIDDLE DOT MAKE_PAIR( 0x30FC, 0xFF70 ), // KATAKANA-HIRAGANA PROLONGED SOUND MARK --> HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK MAKE_PAIR( 0x3131, 0xFFA1 ), // HANGUL LETTER KIYEOK --> HALFWIDTH HANGUL LETTER KIYEOK MAKE_PAIR( 0x3132, 0xFFA2 ), // HANGUL LETTER SSANGKIYEOK --> HALFWIDTH HANGUL LETTER SSANGKIYEOK MAKE_PAIR( 0x3133, 0xFFA3 ), // HANGUL LETTER KIYEOK-SIOS --> HALFWIDTH HANGUL LETTER KIYEOK-SIOS MAKE_PAIR( 0x3134, 0xFFA4 ), // HANGUL LETTER NIEUN --> HALFWIDTH HANGUL LETTER NIEUN MAKE_PAIR( 0x3135, 0xFFA5 ), // HANGUL LETTER NIEUN-CIEUC --> HALFWIDTH HANGUL LETTER NIEUN-CIEUC MAKE_PAIR( 0x3136, 0xFFA6 ), // HANGUL LETTER NIEUN-HIEUH --> HALFWIDTH HANGUL LETTER NIEUN-HIEUH MAKE_PAIR( 0x3137, 0xFFA7 ), // HANGUL LETTER TIKEUT --> HALFWIDTH HANGUL LETTER TIKEUT MAKE_PAIR( 0x3138, 0xFFA8 ), // HANGUL LETTER SSANGTIKEUT --> HALFWIDTH HANGUL LETTER SSANGTIKEUT MAKE_PAIR( 0x3139, 0xFFA9 ), // HANGUL LETTER RIEUL --> HALFWIDTH HANGUL LETTER RIEUL MAKE_PAIR( 0x313A, 0xFFAA ), // HANGUL LETTER RIEUL-KIYEOK --> HALFWIDTH HANGUL LETTER RIEUL-KIYEOK MAKE_PAIR( 0x313B, 0xFFAB ), // HANGUL LETTER RIEUL-MIEUM --> HALFWIDTH HANGUL LETTER RIEUL-MIEUM MAKE_PAIR( 0x313C, 0xFFAC ), // HANGUL LETTER RIEUL-PIEUP --> HALFWIDTH HANGUL LETTER RIEUL-PIEUP MAKE_PAIR( 0x313D, 0xFFAD ), // HANGUL LETTER RIEUL-SIOS --> HALFWIDTH HANGUL LETTER RIEUL-SIOS MAKE_PAIR( 0x313E, 0xFFAE ), // HANGUL LETTER RIEUL-THIEUTH --> HALFWIDTH HANGUL LETTER RIEUL-THIEUTH MAKE_PAIR( 0x313F, 0xFFAF ), // HANGUL LETTER RIEUL-PHIEUPH --> HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH MAKE_PAIR( 0x3140, 0xFFB0 ), // HANGUL LETTER RIEUL-HIEUH --> HALFWIDTH HANGUL LETTER RIEUL-HIEUH MAKE_PAIR( 0x3141, 0xFFB1 ), // HANGUL LETTER MIEUM --> HALFWIDTH HANGUL LETTER MIEUM MAKE_PAIR( 0x3142, 0xFFB2 ), // HANGUL LETTER PIEUP --> HALFWIDTH HANGUL LETTER PIEUP MAKE_PAIR( 0x3143, 0xFFB3 ), // HANGUL LETTER SSANGPIEUP --> HALFWIDTH HANGUL LETTER SSANGPIEUP MAKE_PAIR( 0x3144, 0xFFB4 ), // HANGUL LETTER PIEUP-SIOS --> HALFWIDTH HANGUL LETTER PIEUP-SIOS MAKE_PAIR( 0x3145, 0xFFB5 ), // HANGUL LETTER SIOS --> HALFWIDTH HANGUL LETTER SIOS MAKE_PAIR( 0x3146, 0xFFB6 ), // HANGUL LETTER SSANGSIOS --> HALFWIDTH HANGUL LETTER SSANGSIOS MAKE_PAIR( 0x3147, 0xFFB7 ), // HANGUL LETTER IEUNG --> HALFWIDTH HANGUL LETTER IEUNG MAKE_PAIR( 0x3148, 0xFFB8 ), // HANGUL LETTER CIEUC --> HALFWIDTH HANGUL LETTER CIEUC MAKE_PAIR( 0x3149, 0xFFB9 ), // HANGUL LETTER SSANGCIEUC --> HALFWIDTH HANGUL LETTER SSANGCIEUC MAKE_PAIR( 0x314A, 0xFFBA ), // HANGUL LETTER CHIEUCH --> HALFWIDTH HANGUL LETTER CHIEUCH MAKE_PAIR( 0x314B, 0xFFBB ), // HANGUL LETTER KHIEUKH --> HALFWIDTH HANGUL LETTER KHIEUKH MAKE_PAIR( 0x314C, 0xFFBC ), // HANGUL LETTER THIEUTH --> HALFWIDTH HANGUL LETTER THIEUTH MAKE_PAIR( 0x314D, 0xFFBD ), // HANGUL LETTER PHIEUPH --> HALFWIDTH HANGUL LETTER PHIEUPH MAKE_PAIR( 0x314E, 0xFFBE ), // HANGUL LETTER HIEUH --> HALFWIDTH HANGUL LETTER HIEUH MAKE_PAIR( 0x314F, 0xFFC2 ), // HANGUL LETTER A --> HALFWIDTH HANGUL LETTER A MAKE_PAIR( 0x3150, 0xFFC3 ), // HANGUL LETTER AE --> HALFWIDTH HANGUL LETTER AE MAKE_PAIR( 0x3151, 0xFFC4 ), // HANGUL LETTER YA --> HALFWIDTH HANGUL LETTER YA MAKE_PAIR( 0x3152, 0xFFC5 ), // HANGUL LETTER YAE --> HALFWIDTH HANGUL LETTER YAE MAKE_PAIR( 0x3153, 0xFFC6 ), // HANGUL LETTER EO --> HALFWIDTH HANGUL LETTER EO MAKE_PAIR( 0x3154, 0xFFC7 ), // HANGUL LETTER E --> HALFWIDTH HANGUL LETTER E MAKE_PAIR( 0x3155, 0xFFCA ), // HANGUL LETTER YEO --> HALFWIDTH HANGUL LETTER YEO MAKE_PAIR( 0x3156, 0xFFCB ), // HANGUL LETTER YE --> HALFWIDTH HANGUL LETTER YE MAKE_PAIR( 0x3157, 0xFFCC ), // HANGUL LETTER O --> HALFWIDTH HANGUL LETTER O MAKE_PAIR( 0x3158, 0xFFCD ), // HANGUL LETTER WA --> HALFWIDTH HANGUL LETTER WA MAKE_PAIR( 0x3159, 0xFFCE ), // HANGUL LETTER WAE --> HALFWIDTH HANGUL LETTER WAE MAKE_PAIR( 0x315A, 0xFFCF ), // HANGUL LETTER OE --> HALFWIDTH HANGUL LETTER OE MAKE_PAIR( 0x315B, 0xFFD2 ), // HANGUL LETTER YO --> HALFWIDTH HANGUL LETTER YO MAKE_PAIR( 0x315C, 0xFFD3 ), // HANGUL LETTER U --> HALFWIDTH HANGUL LETTER U MAKE_PAIR( 0x315D, 0xFFD4 ), // HANGUL LETTER WEO --> HALFWIDTH HANGUL LETTER WEO MAKE_PAIR( 0x315E, 0xFFD5 ), // HANGUL LETTER WE --> HALFWIDTH HANGUL LETTER WE MAKE_PAIR( 0x315F, 0xFFD6 ), // HANGUL LETTER WI --> HALFWIDTH HANGUL LETTER WI MAKE_PAIR( 0x3160, 0xFFD7 ), // HANGUL LETTER YU --> HALFWIDTH HANGUL LETTER YU MAKE_PAIR( 0x3161, 0xFFDA ), // HANGUL LETTER EU --> HALFWIDTH HANGUL LETTER EU MAKE_PAIR( 0x3162, 0xFFDB ), // HANGUL LETTER YI --> HALFWIDTH HANGUL LETTER YI MAKE_PAIR( 0x3163, 0xFFDC ), // HANGUL LETTER I --> HALFWIDTH HANGUL LETTER I MAKE_PAIR( 0x3164, 0xFFA0 ), // HANGUL FILLER --> HALFWIDTH HANGUL FILLER MAKE_PAIR( 0xFF01, 0x0021 ), // FULLWIDTH EXCLAMATION MARK --> EXCLAMATION MARK MAKE_PAIR( 0xFF02, 0x0022 ), // FULLWIDTH QUOTATION MARK --> QUOTATION MARK MAKE_PAIR( 0xFF03, 0x0023 ), // FULLWIDTH NUMBER SIGN --> NUMBER SIGN MAKE_PAIR( 0xFF04, 0x0024 ), // FULLWIDTH DOLLAR SIGN --> DOLLAR SIGN MAKE_PAIR( 0xFF05, 0x0025 ), // FULLWIDTH PERCENT SIGN --> PERCENT SIGN MAKE_PAIR( 0xFF06, 0x0026 ), // FULLWIDTH AMPERSAND --> AMPERSAND MAKE_PAIR( 0xFF07, 0x0027 ), // FULLWIDTH APOSTROPHE --> APOSTROPHE MAKE_PAIR( 0xFF08, 0x0028 ), // FULLWIDTH LEFT PARENTHESIS --> LEFT PARENTHESIS MAKE_PAIR( 0xFF09, 0x0029 ), // FULLWIDTH RIGHT PARENTHESIS --> RIGHT PARENTHESIS MAKE_PAIR( 0xFF0A, 0x002A ), // FULLWIDTH ASTERISK --> ASTERISK MAKE_PAIR( 0xFF0B, 0x002B ), // FULLWIDTH PLUS SIGN --> PLUS SIGN MAKE_PAIR( 0xFF0C, 0x002C ), // FULLWIDTH COMMA --> COMMA //MAKE_PAIR( 0xFF0D, 0x002D ), // FULLWIDTH HYPHEN-MINUS --> HYPHEN-MINUS MAKE_PAIR( 0xFF0E, 0x002E ), // FULLWIDTH FULL STOP --> FULL STOP MAKE_PAIR( 0xFF0F, 0x002F ), // FULLWIDTH SOLIDUS --> SOLIDUS MAKE_PAIR( 0xFF10, 0x0030 ), // FULLWIDTH DIGIT ZERO --> DIGIT ZERO MAKE_PAIR( 0xFF11, 0x0031 ), // FULLWIDTH DIGIT ONE --> DIGIT ONE MAKE_PAIR( 0xFF12, 0x0032 ), // FULLWIDTH DIGIT TWO --> DIGIT TWO MAKE_PAIR( 0xFF13, 0x0033 ), // FULLWIDTH DIGIT THREE --> DIGIT THREE MAKE_PAIR( 0xFF14, 0x0034 ), // FULLWIDTH DIGIT FOUR --> DIGIT FOUR MAKE_PAIR( 0xFF15, 0x0035 ), // FULLWIDTH DIGIT FIVE --> DIGIT FIVE MAKE_PAIR( 0xFF16, 0x0036 ), // FULLWIDTH DIGIT SIX --> DIGIT SIX MAKE_PAIR( 0xFF17, 0x0037 ), // FULLWIDTH DIGIT SEVEN --> DIGIT SEVEN MAKE_PAIR( 0xFF18, 0x0038 ), // FULLWIDTH DIGIT EIGHT --> DIGIT EIGHT MAKE_PAIR( 0xFF19, 0x0039 ), // FULLWIDTH DIGIT NINE --> DIGIT NINE MAKE_PAIR( 0xFF1A, 0x003A ), // FULLWIDTH COLON --> COLON MAKE_PAIR( 0xFF1B, 0x003B ), // FULLWIDTH SEMICOLON --> SEMICOLON MAKE_PAIR( 0xFF1C, 0x003C ), // FULLWIDTH LESS-THAN SIGN --> LESS-THAN SIGN MAKE_PAIR( 0xFF1D, 0x003D ), // FULLWIDTH EQUALS SIGN --> EQUALS SIGN MAKE_PAIR( 0xFF1E, 0x003E ), // FULLWIDTH GREATER-THAN SIGN --> GREATER-THAN SIGN MAKE_PAIR( 0xFF1F, 0x003F ), // FULLWIDTH QUESTION MARK --> QUESTION MARK MAKE_PAIR( 0xFF20, 0x0040 ), // FULLWIDTH COMMERCIAL AT --> COMMERCIAL AT MAKE_PAIR( 0xFF21, 0x0041 ), // FULLWIDTH LATIN CAPITAL LETTER A --> LATIN CAPITAL LETTER A MAKE_PAIR( 0xFF22, 0x0042 ), // FULLWIDTH LATIN CAPITAL LETTER B --> LATIN CAPITAL LETTER B MAKE_PAIR( 0xFF23, 0x0043 ), // FULLWIDTH LATIN CAPITAL LETTER C --> LATIN CAPITAL LETTER C MAKE_PAIR( 0xFF24, 0x0044 ), // FULLWIDTH LATIN CAPITAL LETTER D --> LATIN CAPITAL LETTER D MAKE_PAIR( 0xFF25, 0x0045 ), // FULLWIDTH LATIN CAPITAL LETTER E --> LATIN CAPITAL LETTER E MAKE_PAIR( 0xFF26, 0x0046 ), // FULLWIDTH LATIN CAPITAL LETTER F --> LATIN CAPITAL LETTER F MAKE_PAIR( 0xFF27, 0x0047 ), // FULLWIDTH LATIN CAPITAL LETTER G --> LATIN CAPITAL LETTER G MAKE_PAIR( 0xFF28, 0x0048 ), // FULLWIDTH LATIN CAPITAL LETTER H --> LATIN CAPITAL LETTER H MAKE_PAIR( 0xFF29, 0x0049 ), // FULLWIDTH LATIN CAPITAL LETTER I --> LATIN CAPITAL LETTER I MAKE_PAIR( 0xFF2A, 0x004A ), // FULLWIDTH LATIN CAPITAL LETTER J --> LATIN CAPITAL LETTER J MAKE_PAIR( 0xFF2B, 0x004B ), // FULLWIDTH LATIN CAPITAL LETTER K --> LATIN CAPITAL LETTER K MAKE_PAIR( 0xFF2C, 0x004C ), // FULLWIDTH LATIN CAPITAL LETTER L --> LATIN CAPITAL LETTER L MAKE_PAIR( 0xFF2D, 0x004D ), // FULLWIDTH LATIN CAPITAL LETTER M --> LATIN CAPITAL LETTER M MAKE_PAIR( 0xFF2E, 0x004E ), // FULLWIDTH LATIN CAPITAL LETTER N --> LATIN CAPITAL LETTER N MAKE_PAIR( 0xFF2F, 0x004F ), // FULLWIDTH LATIN CAPITAL LETTER O --> LATIN CAPITAL LETTER O MAKE_PAIR( 0xFF30, 0x0050 ), // FULLWIDTH LATIN CAPITAL LETTER P --> LATIN CAPITAL LETTER P MAKE_PAIR( 0xFF31, 0x0051 ), // FULLWIDTH LATIN CAPITAL LETTER Q --> LATIN CAPITAL LETTER Q MAKE_PAIR( 0xFF32, 0x0052 ), // FULLWIDTH LATIN CAPITAL LETTER R --> LATIN CAPITAL LETTER R MAKE_PAIR( 0xFF33, 0x0053 ), // FULLWIDTH LATIN CAPITAL LETTER S --> LATIN CAPITAL LETTER S MAKE_PAIR( 0xFF34, 0x0054 ), // FULLWIDTH LATIN CAPITAL LETTER T --> LATIN CAPITAL LETTER T MAKE_PAIR( 0xFF35, 0x0055 ), // FULLWIDTH LATIN CAPITAL LETTER U --> LATIN CAPITAL LETTER U MAKE_PAIR( 0xFF36, 0x0056 ), // FULLWIDTH LATIN CAPITAL LETTER V --> LATIN CAPITAL LETTER V MAKE_PAIR( 0xFF37, 0x0057 ), // FULLWIDTH LATIN CAPITAL LETTER W --> LATIN CAPITAL LETTER W MAKE_PAIR( 0xFF38, 0x0058 ), // FULLWIDTH LATIN CAPITAL LETTER X --> LATIN CAPITAL LETTER X MAKE_PAIR( 0xFF39, 0x0059 ), // FULLWIDTH LATIN CAPITAL LETTER Y --> LATIN CAPITAL LETTER Y MAKE_PAIR( 0xFF3A, 0x005A ), // FULLWIDTH LATIN CAPITAL LETTER Z --> LATIN CAPITAL LETTER Z MAKE_PAIR( 0xFF3B, 0x005B ), // FULLWIDTH LEFT SQUARE BRACKET --> LEFT SQUARE BRACKET MAKE_PAIR( 0xFF3C, 0x005C ), // FULLWIDTH REVERSE SOLIDUS --> REVERSE SOLIDUS MAKE_PAIR( 0xFF3D, 0x005D ), // FULLWIDTH RIGHT SQUARE BRACKET --> RIGHT SQUARE BRACKET MAKE_PAIR( 0xFF3E, 0x005E ), // FULLWIDTH CIRCUMFLEX ACCENT --> CIRCUMFLEX ACCENT MAKE_PAIR( 0xFF3F, 0x005F ), // FULLWIDTH LOW LINE --> LOW LINE MAKE_PAIR( 0xFF40, 0x0060 ), // FULLWIDTH GRAVE ACCENT --> GRAVE ACCENT MAKE_PAIR( 0xFF41, 0x0061 ), // FULLWIDTH LATIN SMALL LETTER A --> LATIN SMALL LETTER A MAKE_PAIR( 0xFF42, 0x0062 ), // FULLWIDTH LATIN SMALL LETTER B --> LATIN SMALL LETTER B MAKE_PAIR( 0xFF43, 0x0063 ), // FULLWIDTH LATIN SMALL LETTER C --> LATIN SMALL LETTER C MAKE_PAIR( 0xFF44, 0x0064 ), // FULLWIDTH LATIN SMALL LETTER D --> LATIN SMALL LETTER D MAKE_PAIR( 0xFF45, 0x0065 ), // FULLWIDTH LATIN SMALL LETTER E --> LATIN SMALL LETTER E MAKE_PAIR( 0xFF46, 0x0066 ), // FULLWIDTH LATIN SMALL LETTER F --> LATIN SMALL LETTER F MAKE_PAIR( 0xFF47, 0x0067 ), // FULLWIDTH LATIN SMALL LETTER G --> LATIN SMALL LETTER G MAKE_PAIR( 0xFF48, 0x0068 ), // FULLWIDTH LATIN SMALL LETTER H --> LATIN SMALL LETTER H MAKE_PAIR( 0xFF49, 0x0069 ), // FULLWIDTH LATIN SMALL LETTER I --> LATIN SMALL LETTER I MAKE_PAIR( 0xFF4A, 0x006A ), // FULLWIDTH LATIN SMALL LETTER J --> LATIN SMALL LETTER J MAKE_PAIR( 0xFF4B, 0x006B ), // FULLWIDTH LATIN SMALL LETTER K --> LATIN SMALL LETTER K MAKE_PAIR( 0xFF4C, 0x006C ), // FULLWIDTH LATIN SMALL LETTER L --> LATIN SMALL LETTER L MAKE_PAIR( 0xFF4D, 0x006D ), // FULLWIDTH LATIN SMALL LETTER M --> LATIN SMALL LETTER M MAKE_PAIR( 0xFF4E, 0x006E ), // FULLWIDTH LATIN SMALL LETTER N --> LATIN SMALL LETTER N MAKE_PAIR( 0xFF4F, 0x006F ), // FULLWIDTH LATIN SMALL LETTER O --> LATIN SMALL LETTER O MAKE_PAIR( 0xFF50, 0x0070 ), // FULLWIDTH LATIN SMALL LETTER P --> LATIN SMALL LETTER P MAKE_PAIR( 0xFF51, 0x0071 ), // FULLWIDTH LATIN SMALL LETTER Q --> LATIN SMALL LETTER Q MAKE_PAIR( 0xFF52, 0x0072 ), // FULLWIDTH LATIN SMALL LETTER R --> LATIN SMALL LETTER R MAKE_PAIR( 0xFF53, 0x0073 ), // FULLWIDTH LATIN SMALL LETTER S --> LATIN SMALL LETTER S MAKE_PAIR( 0xFF54, 0x0074 ), // FULLWIDTH LATIN SMALL LETTER T --> LATIN SMALL LETTER T MAKE_PAIR( 0xFF55, 0x0075 ), // FULLWIDTH LATIN SMALL LETTER U --> LATIN SMALL LETTER U MAKE_PAIR( 0xFF56, 0x0076 ), // FULLWIDTH LATIN SMALL LETTER V --> LATIN SMALL LETTER V MAKE_PAIR( 0xFF57, 0x0077 ), // FULLWIDTH LATIN SMALL LETTER W --> LATIN SMALL LETTER W MAKE_PAIR( 0xFF58, 0x0078 ), // FULLWIDTH LATIN SMALL LETTER X --> LATIN SMALL LETTER X MAKE_PAIR( 0xFF59, 0x0079 ), // FULLWIDTH LATIN SMALL LETTER Y --> LATIN SMALL LETTER Y MAKE_PAIR( 0xFF5A, 0x007A ), // FULLWIDTH LATIN SMALL LETTER Z --> LATIN SMALL LETTER Z MAKE_PAIR( 0xFF5B, 0x007B ), // FULLWIDTH LEFT CURLY BRACKET --> LEFT CURLY BRACKET MAKE_PAIR( 0xFF5C, 0x007C ), // FULLWIDTH VERTICAL LINE --> VERTICAL LINE MAKE_PAIR( 0xFF5D, 0x007D ), // FULLWIDTH RIGHT CURLY BRACKET --> RIGHT CURLY BRACKET MAKE_PAIR( 0xFF5E, 0x007E ), // FULLWIDTH TILDE --> TILDE MAKE_PAIR( 0xFFE0, 0x00A2 ), // FULLWIDTH CENT SIGN --> CENT SIGN MAKE_PAIR( 0xFFE1, 0x00A3 ), // FULLWIDTH POUND SIGN --> POUND SIGN MAKE_PAIR( 0xFFE2, 0x00AC ), // FULLWIDTH NOT SIGN --> NOT SIGN MAKE_PAIR( 0xFFE3, 0x00AF ), // FULLWIDTH MACRON --> MACRON MAKE_PAIR( 0xFFE4, 0x00A6 ), // FULLWIDTH BROKEN BAR --> BROKEN BAR MAKE_PAIR( 0xFFE5, 0x00A5 ), // FULLWIDTH YEN SIGN --> YEN SIGN MAKE_PAIR( 0xFFE6, 0x20A9 ) // FULLWIDTH WON SIGN --> WON SIGN }; /** * Transliterate fullwidth to halfwidth. * The output is a reference of OUString. You MUST delete this object when you do not need to use it any more * The output string contains a transliterated string only, not whole string. */ OUString SAL_CALL fullwidthToHalfwidth::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) throw(RuntimeException) { // Decomposition: GA --> KA + voice-mark OUString newStr = decompose_ja_voiced_sound_marks (inStr, startPos, nCount, offset); // One to One mapping oneToOneMapping table(full2half, sizeof(full2half)); return transliteration_OneToOne::transliterate( newStr, 0, newStr.getLength(), table ); } } } } }