diff options
author | Karl Hong <khong@openoffice.org> | 2002-03-30 08:25:16 +0000 |
---|---|---|
committer | Karl Hong <khong@openoffice.org> | 2002-03-30 08:25:16 +0000 |
commit | 1c1bd3a6f1ba7c1efe8b52670a6f7f7477a9d55a (patch) | |
tree | 0ef6785714f7f7820b60b6988c9db201c3134c82 /i18npool/source | |
parent | 1e92b7f501133182090aee8ba1893e3fe84f330e (diff) |
#98442#TextToNum, CharToNum transliteration
Diffstat (limited to 'i18npool/source')
-rw-r--r-- | i18npool/source/registerservices/registerservices.cxx | 131 | ||||
-rw-r--r-- | i18npool/source/transliteration/chartonum.cxx | 125 | ||||
-rw-r--r-- | i18npool/source/transliteration/data/numberchar.h | 107 | ||||
-rw-r--r-- | i18npool/source/transliteration/makefile.mk | 8 | ||||
-rw-r--r-- | i18npool/source/transliteration/numtochar.cxx | 66 | ||||
-rw-r--r-- | i18npool/source/transliteration/numtotext_cjk.cxx | 356 | ||||
-rw-r--r-- | i18npool/source/transliteration/texttonum.cxx | 204 |
7 files changed, 686 insertions, 311 deletions
diff --git a/i18npool/source/registerservices/registerservices.cxx b/i18npool/source/registerservices/registerservices.cxx index 6c2df856a63e..82fded86eb03 100644 --- a/i18npool/source/registerservices/registerservices.cxx +++ b/i18npool/source/registerservices/registerservices.cxx @@ -2,9 +2,9 @@ * * $RCSfile: registerservices.cxx,v $ * - * $Revision: 1.7 $ + * $Revision: 1.8 $ * - * last change: $Author: er $ $Date: 2002-03-28 00:35:49 $ + * last change: $Author: khong $ $Date: 2002-03-30 09:25:16 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -92,6 +92,8 @@ #include <textToPronounce_zh.hxx> #include <numtotext_cjk.hxx> #include <numtochar.hxx> +#include <texttonum.hxx> +#include <chartonum.hxx> #include <calendarImpl.hxx> #include <calendar_gregorian.hxx> @@ -248,35 +250,68 @@ IMPL_CREATEINSTANCE( ignoreSpace_ja_JP) IMPL_CREATEINSTANCE( TextToChuyin_zh_TW ) IMPL_CREATEINSTANCE( TextToPinyin_zh_CN ) -IMPL_CREATEINSTANCE( NumToTextLower_zh_CN ) -IMPL_CREATEINSTANCE( NumToTextUpper_zh_CN ) IMPL_CREATEINSTANCE( NumToCharLower_zh_CN ) IMPL_CREATEINSTANCE( NumToCharUpper_zh_CN ) - IMPL_CREATEINSTANCE( NumToCharLower_zh_TW ) IMPL_CREATEINSTANCE( NumToCharUpper_zh_TW ) -IMPL_CREATEINSTANCE( NumToTextLower_zh_TW ) -IMPL_CREATEINSTANCE( NumToTextUpper_zh_TW ) - +IMPL_CREATEINSTANCE( NumToCharFullwidth ) +IMPL_CREATEINSTANCE( NumToCharKanjiShort_ja_JP ) IMPL_CREATEINSTANCE( NumToCharHangul_ko ) IMPL_CREATEINSTANCE( NumToCharLower_ko ) IMPL_CREATEINSTANCE( NumToCharUpper_ko ) -IMPL_CREATEINSTANCE( NumToCharFullwidth ) -IMPL_CREATEINSTANCE( NumToCharKanjiShort_ja_JP ) -IMPL_CREATEINSTANCE( NumToTextFormalHangul_ko ) -IMPL_CREATEINSTANCE( NumToTextFormalLower_ko ) -IMPL_CREATEINSTANCE( NumToTextFormalUpper_ko ) -IMPL_CREATEINSTANCE( NumToTextInformalHangul_ko ) -IMPL_CREATEINSTANCE( NumToTextInformalUpper_ko ) -IMPL_CREATEINSTANCE( NumToTextInformalLower_ko ) - IMPL_CREATEINSTANCE( NumToCharIndic_ar ) IMPL_CREATEINSTANCE( NumToCharEastIndic_ar ) IMPL_CREATEINSTANCE( NumToCharIndic_hi ) IMPL_CREATEINSTANCE( NumToChar_th ) +IMPL_CREATEINSTANCE( CharToNum ) +IMPL_CREATEINSTANCE( CharToNumLower_zh_CN ) +IMPL_CREATEINSTANCE( CharToNumUpper_zh_CN ) +IMPL_CREATEINSTANCE( CharToNumLower_zh_TW ) +IMPL_CREATEINSTANCE( CharToNumUpper_zh_TW ) +IMPL_CREATEINSTANCE( CharToNumFullwidth ) +IMPL_CREATEINSTANCE( CharToNumKanjiShort_ja_JP ) +IMPL_CREATEINSTANCE( CharToNumHangul_ko ) +IMPL_CREATEINSTANCE( CharToNumLower_ko ) +IMPL_CREATEINSTANCE( CharToNumUpper_ko ) +IMPL_CREATEINSTANCE( CharToNumIndic_ar ) +IMPL_CREATEINSTANCE( CharToNumEastIndic_ar ) +IMPL_CREATEINSTANCE( CharToNumIndic_hi ) +IMPL_CREATEINSTANCE( CharToNum_th ) + +IMPL_CREATEINSTANCE( NumToTextLower_zh_CN ) +IMPL_CREATEINSTANCE( NumToTextUpper_zh_CN ) +IMPL_CREATEINSTANCE( NumToTextLower_zh_TW ) +IMPL_CREATEINSTANCE( NumToTextUpper_zh_TW ) IMPL_CREATEINSTANCE( NumToTextKanjiLongModern_ja_JP ) IMPL_CREATEINSTANCE( NumToTextKanjiLongTraditional_ja_JP ) +IMPL_CREATEINSTANCE( NumToTextFormalHangul_ko ) +IMPL_CREATEINSTANCE( NumToTextFormalLower_ko ) +IMPL_CREATEINSTANCE( NumToTextFormalUpper_ko ) +IMPL_CREATEINSTANCE( NumToTextInformalHangul_ko ) +IMPL_CREATEINSTANCE( NumToTextInformalUpper_ko ) +IMPL_CREATEINSTANCE( NumToTextInformalLower_ko ) + +IMPL_CREATEINSTANCE( TextToNum ) +IMPL_CREATEINSTANCE( TextToNumLower_zh_CN ) +IMPL_CREATEINSTANCE( TextToNumUpper_zh_CN ) +IMPL_CREATEINSTANCE( TextToNumLower_zh_TW ) +IMPL_CREATEINSTANCE( TextToNumUpper_zh_TW ) +IMPL_CREATEINSTANCE( TextToNumKanjiLongModern_ja_JP ) +IMPL_CREATEINSTANCE( TextToNumKanjiLongTraditional_ja_JP ) +IMPL_CREATEINSTANCE( TextToNumFormalHangul_ko ) +IMPL_CREATEINSTANCE( TextToNumFormalLower_ko ) +IMPL_CREATEINSTANCE( TextToNumFormalUpper_ko ) +IMPL_CREATEINSTANCE( TextToNumInformalHangul_ko ) +IMPL_CREATEINSTANCE( TextToNumInformalUpper_ko ) +IMPL_CREATEINSTANCE( TextToNumInformalLower_ko ) + +IMPL_CREATEINSTANCE( NumToTextDate_zh ) +IMPL_CREATEINSTANCE( NumToTextAIUFullWidth_ja_JP ) +IMPL_CREATEINSTANCE( NumToTextAIUHalfWidth_ja_JP ) +IMPL_CREATEINSTANCE( NumToTextIROHAFullWidth_ja_JP ) +IMPL_CREATEINSTANCE( NumToTextIROHAHalfWidth_ja_JP ) +IMPL_CREATEINSTANCE( NumToTextCircledNumber ) static const struct InstancesArray { const sal_Char* pServiceNm; @@ -484,31 +519,69 @@ static const struct InstancesArray { IMPL_TRANSLITERATION_ITEM (ignoreSpace_ja_JP), IMPL_TRANSLITERATION_ITEM (TextToPinyin_zh_CN), IMPL_TRANSLITERATION_ITEM (TextToChuyin_zh_TW), - IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_CN), - IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_CN), + IMPL_TRANSLITERATION_ITEM (NumToCharUpper_zh_CN), IMPL_TRANSLITERATION_ITEM (NumToCharLower_zh_CN), - IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_TW), - IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_TW), IMPL_TRANSLITERATION_ITEM (NumToCharUpper_zh_TW), IMPL_TRANSLITERATION_ITEM (NumToCharLower_zh_TW), + IMPL_TRANSLITERATION_ITEM (NumToCharFullwidth), + IMPL_TRANSLITERATION_ITEM (NumToCharKanjiShort_ja_JP), IMPL_TRANSLITERATION_ITEM (NumToCharLower_ko), IMPL_TRANSLITERATION_ITEM (NumToCharUpper_ko), IMPL_TRANSLITERATION_ITEM (NumToCharHangul_ko), - IMPL_TRANSLITERATION_ITEM (NumToCharFullwidth), - IMPL_TRANSLITERATION_ITEM (NumToCharKanjiShort_ja_JP), - IMPL_TRANSLITERATION_ITEM (NumToTextInformalHangul_ko), - IMPL_TRANSLITERATION_ITEM (NumToTextInformalLower_ko), - IMPL_TRANSLITERATION_ITEM (NumToTextInformalUpper_ko), - IMPL_TRANSLITERATION_ITEM (NumToTextFormalHangul_ko), - IMPL_TRANSLITERATION_ITEM (NumToTextFormalLower_ko), - IMPL_TRANSLITERATION_ITEM (NumToTextFormalUpper_ko), IMPL_TRANSLITERATION_ITEM (NumToCharIndic_ar), IMPL_TRANSLITERATION_ITEM (NumToCharEastIndic_ar), IMPL_TRANSLITERATION_ITEM (NumToCharIndic_hi), IMPL_TRANSLITERATION_ITEM (NumToChar_th), + + IMPL_TRANSLITERATION_ITEM (CharToNum), + IMPL_TRANSLITERATION_ITEM (CharToNumUpper_zh_CN), + IMPL_TRANSLITERATION_ITEM (CharToNumLower_zh_CN), + IMPL_TRANSLITERATION_ITEM (CharToNumUpper_zh_TW), + IMPL_TRANSLITERATION_ITEM (CharToNumLower_zh_TW), + IMPL_TRANSLITERATION_ITEM (CharToNumFullwidth), + IMPL_TRANSLITERATION_ITEM (CharToNumKanjiShort_ja_JP), + IMPL_TRANSLITERATION_ITEM (CharToNumLower_ko), + IMPL_TRANSLITERATION_ITEM (CharToNumUpper_ko), + IMPL_TRANSLITERATION_ITEM (CharToNumHangul_ko), + IMPL_TRANSLITERATION_ITEM (CharToNumIndic_ar), + IMPL_TRANSLITERATION_ITEM (CharToNumEastIndic_ar), + IMPL_TRANSLITERATION_ITEM (CharToNumIndic_hi), + IMPL_TRANSLITERATION_ITEM (CharToNum_th), + + IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_CN), + IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_CN), + IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_TW), + IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_TW), IMPL_TRANSLITERATION_ITEM (NumToTextKanjiLongModern_ja_JP), IMPL_TRANSLITERATION_ITEM (NumToTextKanjiLongTraditional_ja_JP), + IMPL_TRANSLITERATION_ITEM (NumToTextInformalHangul_ko), + IMPL_TRANSLITERATION_ITEM (NumToTextInformalLower_ko), + IMPL_TRANSLITERATION_ITEM (NumToTextInformalUpper_ko), + IMPL_TRANSLITERATION_ITEM (NumToTextFormalHangul_ko), + IMPL_TRANSLITERATION_ITEM (NumToTextFormalLower_ko), + IMPL_TRANSLITERATION_ITEM (NumToTextFormalUpper_ko), + + IMPL_TRANSLITERATION_ITEM (TextToNum), + IMPL_TRANSLITERATION_ITEM (TextToNumUpper_zh_CN), + IMPL_TRANSLITERATION_ITEM (TextToNumLower_zh_CN), + IMPL_TRANSLITERATION_ITEM (TextToNumUpper_zh_TW), + IMPL_TRANSLITERATION_ITEM (TextToNumLower_zh_TW), + IMPL_TRANSLITERATION_ITEM (TextToNumKanjiLongModern_ja_JP), + IMPL_TRANSLITERATION_ITEM (TextToNumKanjiLongTraditional_ja_JP), + IMPL_TRANSLITERATION_ITEM (TextToNumInformalHangul_ko), + IMPL_TRANSLITERATION_ITEM (TextToNumInformalLower_ko), + IMPL_TRANSLITERATION_ITEM (TextToNumInformalUpper_ko), + IMPL_TRANSLITERATION_ITEM (TextToNumFormalHangul_ko), + IMPL_TRANSLITERATION_ITEM (TextToNumFormalLower_ko), + IMPL_TRANSLITERATION_ITEM (TextToNumFormalUpper_ko), + + IMPL_TRANSLITERATION_ITEM (NumToTextDate_zh), + IMPL_TRANSLITERATION_ITEM (NumToTextAIUFullWidth_ja_JP), + IMPL_TRANSLITERATION_ITEM (NumToTextAIUHalfWidth_ja_JP), + IMPL_TRANSLITERATION_ITEM (NumToTextIROHAFullWidth_ja_JP), + IMPL_TRANSLITERATION_ITEM (NumToTextIROHAHalfWidth_ja_JP), + IMPL_TRANSLITERATION_ITEM (NumToTextCircledNumber), // add here new services !! { 0, 0, 0 } diff --git a/i18npool/source/transliteration/chartonum.cxx b/i18npool/source/transliteration/chartonum.cxx new file mode 100644 index 000000000000..ea96c1423018 --- /dev/null +++ b/i18npool/source/transliteration/chartonum.cxx @@ -0,0 +1,125 @@ +/************************************************************************* + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2002 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2002 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#define TRANSLITERATION_ALL +#include <chartonum.hxx> +#include <data/numberchar.h> +#include <rtl/ustrbuf.hxx> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL CharToNum::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ + const sal_Unicode *str = inStr.getStr() + startPos; + rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount + 1); + offset.realloc(nCount); + sal_Int16 index; + + OUString numberChar, decimalChar, minusChar; + if (number == 0) { + OUStringBuffer aBuf(NumberChar_Count * 10 + 1); + for (sal_Int32 i = 0; i < NumberChar_Count; i++) + aBuf.append(NumberChar[i], 10); + numberChar = aBuf.makeStringAndClear(); + decimalChar = OUString(DecimalChar); + minusChar = OUString(MinusChar); + } else { + numberChar = OUString(NumberChar[number], 10); + decimalChar = OUString::valueOf(DecimalChar[number]); + minusChar = OUString::valueOf(MinusChar[number]); + } + + for (sal_Int32 i = 0; i < nCount; i++) { + if ((index = numberChar.indexOf(str[i])) >= 0) + newStr->buffer[i] = (NUMBER_ZERO + (index % 10)); + else if ((index = decimalChar.indexOf(str[i]) >= 0) && + i < nCount-1 && numberChar.indexOf(str[i+1]) >= 0) + newStr->buffer[i] = NUMBER_DECIMAL; + else if ((index = minusChar.indexOf(str[i]) >= 0) && + i < nCount-1 && numberChar.indexOf(str[i+1]) >= 0) + newStr->buffer[i] = NUMBER_MINUS; + else + newStr->buffer[i] = str[i]; + offset[i] = startPos + i; + } + return OUString(newStr->buffer, nCount); +} + +CharToNum::CharToNum() +{ + number = 0; + transliterationName = "CharToNum"; + implementationName = "com.sun.star.i18n.Transliteration.CharToNum"; +} + +#define TRANSLITERATION_CHARTONUM( name, _number ) \ +CharToNum##name::CharToNum##name() \ +{ \ + number = NumberChar_##_number; \ + transliterationName = "CharToNum"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.CharToNum"#name; \ +} +TRANSLITERATION_CHARTONUM( Fullwidth, FullWidth) +TRANSLITERATION_CHARTONUM( Lower_zh_CN, Lower_zh) +TRANSLITERATION_CHARTONUM( Lower_zh_TW, Lower_zh) +TRANSLITERATION_CHARTONUM( Upper_zh_CN, Upper_zh) +TRANSLITERATION_CHARTONUM( Upper_zh_TW, Upper_zh_TW) +TRANSLITERATION_CHARTONUM( KanjiShort_ja_JP, Modern_ja) +TRANSLITERATION_CHARTONUM( Lower_ko, Lower_ko) +TRANSLITERATION_CHARTONUM( Upper_ko, Upper_ko) +TRANSLITERATION_CHARTONUM( Hangul_ko, Hangul_ko) +TRANSLITERATION_CHARTONUM( Indic_ar, Indic_ar) +TRANSLITERATION_CHARTONUM( EastIndic_ar, EastIndic_ar) +TRANSLITERATION_CHARTONUM( Indic_hi, Indic_hi) +TRANSLITERATION_CHARTONUM( _th, th) +#undef TRANSLITERATION_CHARTONUM + +} } } } diff --git a/i18npool/source/transliteration/data/numberchar.h b/i18npool/source/transliteration/data/numberchar.h index 9778dadd38a7..e17f8640d770 100644 --- a/i18npool/source/transliteration/data/numberchar.h +++ b/i18npool/source/transliteration/data/numberchar.h @@ -2,9 +2,9 @@ * * $RCSfile: numberchar.h,v $ * - * $Revision: 1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: bustamam $ $Date: 2002-03-26 13:21:45 $ + * last change: $Author: khong $ $Date: 2002-03-30 09:24:47 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -42,16 +42,33 @@ namespace com { namespace sun { namespace star { namespace i18n { +static const sal_Int16 NumberChar_HalfWidth = 0; +static const sal_Int16 NumberChar_FullWidth = 1; +static const sal_Int16 NumberChar_Lower_zh = 2; +static const sal_Int16 NumberChar_Upper_zh = 3; +static const sal_Int16 NumberChar_Upper_zh_TW = 4; +static const sal_Int16 NumberChar_Modern_ja = 5; +static const sal_Int16 NumberChar_Traditional_ja= 6; +static const sal_Int16 NumberChar_Lower_ko = 7; +static const sal_Int16 NumberChar_Upper_ko = 8; +static const sal_Int16 NumberChar_Hangul_ko = 9; +static const sal_Int16 NumberChar_Indic_ar = 10; +static const sal_Int16 NumberChar_EastIndic_ar = 11; +static const sal_Int16 NumberChar_Indic_hi = 12; +static const sal_Int16 NumberChar_th = 13; +static const sal_Int16 NumberChar_Count = 14; + static const sal_Unicode NumberChar[][10] = { - { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Ascii +// 0 1 2 3 4 5 6 7 8 9 + { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Half Width (Ascii) { 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19 }, // Full Width { 0x3007, 0x4E00, 0x4E8c, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Chinese Lower { 0x96F6, 0x58F9, 0x8D30, 0x53C1, 0x8086, 0x4F0D, 0x9646, 0x67D2, 0x634C, 0x7396 }, // S. Chinese Upper { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x8086, 0x4F0D, 0x9678, 0x67D2, 0x634C, 0x7396 }, // T. Chinese Upper { 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Modern - { 0x3007, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad. + { 0x96F6, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad. { 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Lower - { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper + { 0xF9B2, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper { 0xC601, 0xC77C, 0xC774, 0xC0BC, 0xC0AC, 0xC624, 0xC721, 0xCE60, 0xD314, 0xAD6C }, // Korean Hangul { 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669 }, // Arabic Indic { 0x06F0, 0x06F1, 0x06F2, 0x06F3, 0x06F4, 0x06F5, 0x06F6, 0x06F7, 0x06F8, 0x06F9 }, // Est. Arabic Indic @@ -59,20 +76,72 @@ static const sal_Unicode NumberChar[][10] = { { 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59 } // Thai }; -static const sal_Int16 NumberChar_HalfWidth = 0; -static const sal_Int16 NumberChar_FullWidth = 1; -static const sal_Int16 NumberChar_Lower_zh = 2; -static const sal_Int16 NumberChar_Upper_zh = 3; -static const sal_Int16 NumberChar_Upper_zh_TW = 4; -static const sal_Int16 NumberChar_Modern_ja = 5; -static const sal_Int16 NumberChar_Traditional_ja= 6; -static const sal_Int16 NumberChar_Lower_ko = 7; -static const sal_Int16 NumberChar_Upper_ko = 8; -static const sal_Int16 NumberChar_Hangul_ko = 9; -static const sal_Int16 NumberChar_Indic_ar = 11; -static const sal_Int16 NumberChar_EastIndic_ar = 12; -static const sal_Int16 NumberChar_Indic_hi = 13; -static const sal_Int16 NumberChar_th = 14; +static sal_Unicode DecimalChar[] = { + 0x002E, // Half Width (Ascii) + 0xFF0E, // Full Width + 0xFF0E, // Chinese Lower + 0x70B9, // S. Chinese Upper + 0x9EDE, // T. Chinese Upper + 0xFF0E, // Japanese Modern + 0xFF0E, // Japanese Trad. + 0xFF0E, // Korean Lower + 0x9EDE, // Korean Upper + 0xC810, // Korean Hangul + 0x002E, // Arabic Indic + 0x002E, // Est. Arabic Indic + 0x002E, // Indic + 0x002E // Thai +}; + +static sal_Unicode MinusChar[] = { + 0x002D, // Half Width (Ascii) + 0xFF0D, // Full Width + 0xFF0D, // Chinese Lower + 0x8D1F, // S. Chinese Upper + 0x5069, // T. Chinese Upper + 0xFF0D, // Japanese Modern + 0xFF0D, // Japanese Trad. + 0xFF0D, // Korean Lower + 0x5069, // Korean Upper + 0xFF0D, // Korean Hangul ??? + 0x002D, // Arabic Indic + 0x002D, // Est. Arabic Indic + 0x002D, // Indic + 0x002D, // Thai +}; + +#define NUMBER_ZERO NumberChar[NumberChar_HalfWidth][0] // 0x0030 +#define NUMBER_ONE NumberChar[NumberChar_HalfWidth][1] // 0x0031 +#define NUMBER_NINE NumberChar[NumberChar_HalfWidth][9] // 0x0039 +#define NUMBER_DECIMAL DecimalChar[0] +#define NUMBER_MINUS MinusChar[0] +#define isNumber(n) ( NUMBER_ZERO <= n && n <= NUMBER_NINE ) +#define isDecimal(n) ( n == NUMBER_DECIMAL ) +#define isMinus(n) ( n == NUMBER_MINUS ) + +const sal_Int16 ExponentCount_CJK = 6; + +const sal_Int16 Multiplier_Lower_zh = 0; +const sal_Int16 Multiplier_Upper_zh = 1; +const sal_Int16 Multiplier_Lower_zh_TW = 2; +const sal_Int16 Multiplier_Upper_zh_TW = 3; +const sal_Int16 Multiplier_Hangul_ko = 4; +const sal_Int16 Multiplier_Modern_ja = 5; +const sal_Int16 Multiplier_Traditional_ja = 6; +const sal_Int16 Multiplier_Count = 7; + +static sal_Int16 MultiplierExponent_CJK[ExponentCount_CJK] = { + 12, 8, 4, 3, 2, 1 +}; +static sal_Unicode MultiplierChar_CJK[][ExponentCount_CJK] = { + 0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341, // S. Chinese Lower + 0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE, // S. Chinese Upper + 0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341, // T. Chinese & Korean Lower + 0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE, // T. Chinese & Korean Upper + 0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED, // Korean Hangul + 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341, // Japanese Modern + 0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE, // Japanese Traditional +}; } } } } diff --git a/i18npool/source/transliteration/makefile.mk b/i18npool/source/transliteration/makefile.mk index 0c48e5035f11..c9002ec0ac5b 100644 --- a/i18npool/source/transliteration/makefile.mk +++ b/i18npool/source/transliteration/makefile.mk @@ -2,9 +2,9 @@ #* #* $RCSfile: makefile.mk,v $ #* -#* $Revision: 1.2 $ +#* $Revision: 1.3 $ #* -#* last change: $Author: er $ $Date: 2002-03-26 17:13:19 $ +#* last change: $Author: khong $ $Date: 2002-03-30 09:24:46 $ #* #* The Contents of this file are made available subject to the terms of #* either of the following licenses @@ -109,7 +109,9 @@ SLOFILES= \ $(SLO)$/ignoreMiddleDot_ja_JP.obj \ $(SLO)$/textToPronounce_zh.obj \ $(SLO)$/numtochar.obj \ - $(SLO)$/numtotext_cjk.obj + $(SLO)$/numtotext_cjk.obj \ + $(SLO)$/chartonum.obj \ + $(SLO)$/texttonum.obj # MACOSX: manually initialization some static data members .IF "$(OS)"=="MACOSX" diff --git a/i18npool/source/transliteration/numtochar.cxx b/i18npool/source/transliteration/numtochar.cxx index 0ef28b154ef7..7a2ef006d3bd 100644 --- a/i18npool/source/transliteration/numtochar.cxx +++ b/i18npool/source/transliteration/numtochar.cxx @@ -2,9 +2,9 @@ * * $RCSfile: numtochar.cxx,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: er $ $Date: 2002-03-26 17:13:19 $ + * last change: $Author: khong $ $Date: 2002-03-30 09:24:46 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -71,52 +71,44 @@ using namespace rtl; namespace com { namespace sun { namespace star { namespace i18n { -OUString SAL_CALL NumToChar::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) throw(RuntimeException) { - - // Create a string buffer which can hold nCount + 1 characters. - rtl_uString *newStr; - x_rtl_uString_new_WithLength( &newStr, nCount ); // defined in x_rtl_ustring.h The reference count is 0 now. - - // Prepare pointers of unicode character arrays. +OUString SAL_CALL NumToChar::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ const sal_Unicode *src = inStr.getStr() + startPos; - sal_Unicode *dst = newStr->buffer; + rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount); + offset.realloc(nCount); - // Allocate the same length as inStr to offset argument. - offset.realloc(inStr.getLength()); - sal_Int32 *p = offset.getArray(); - sal_Int32 position = startPos; - - for (sal_Int32 index = 0; index < nCount; index++) { - sal_Unicode ch = src[index]; - dst[index] = (0x0030 <= ch && ch <= 0x0039) ? num2char[ ch - 0x0030 ] : ch; - *p++ = position++; + for (sal_Int32 i = 0; i < nCount; i++) { + sal_Unicode ch = src[i]; + newStr->buffer[i] = (isNumber(ch) ? NumberChar[number][ ch - NUMBER_ZERO ] : + (isDecimal(ch) ? DecimalChar[number] : (isMinus(ch) ? MinusChar[number] : ch))); + offset[i] = startPos + i; } - - return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. + return OUString(newStr->buffer, nCount); } -#define TRANSLITERATION_NUMTOCHAR( number, name ) \ +#define TRANSLITERATION_NUMTOCHAR( name, _number ) \ NumToChar##name::NumToChar##name() \ { \ - num2char = NumberChar[number]; \ + number = NumberChar_##_number; \ transliterationName = "NumToChar"#name; \ implementationName = "com.sun.star.i18n.Transliteration.NumToChar"#name; \ } -TRANSLITERATION_NUMTOCHAR( NumberChar_HalfWidth, ) -TRANSLITERATION_NUMTOCHAR( NumberChar_FullWidth, Fullwidth) -TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_zh, Lower_zh_CN) -TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_zh, Lower_zh_TW) -TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_zh, Upper_zh_CN) -TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_zh_TW, Upper_zh_TW) -TRANSLITERATION_NUMTOCHAR( NumberChar_Modern_ja, KanjiShort_ja_JP) -TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_ko, Lower_ko) -TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_ko, Upper_ko) -TRANSLITERATION_NUMTOCHAR( NumberChar_Hangul_ko, Hangul_ko) -TRANSLITERATION_NUMTOCHAR( NumberChar_Indic_ar, Indic_ar) -TRANSLITERATION_NUMTOCHAR( NumberChar_EastIndic_ar, EastIndic_ar) -TRANSLITERATION_NUMTOCHAR( NumberChar_Indic_hi, Indic_hi) -TRANSLITERATION_NUMTOCHAR( NumberChar_th, _th) +TRANSLITERATION_NUMTOCHAR( , HalfWidth ) +TRANSLITERATION_NUMTOCHAR( Fullwidth, FullWidth ) +TRANSLITERATION_NUMTOCHAR( Lower_zh_CN, Lower_zh ) +TRANSLITERATION_NUMTOCHAR( Lower_zh_TW, Lower_zh ) +TRANSLITERATION_NUMTOCHAR( Upper_zh_CN, Upper_zh ) +TRANSLITERATION_NUMTOCHAR( Upper_zh_TW, Upper_zh_TW ) +TRANSLITERATION_NUMTOCHAR( KanjiShort_ja_JP, Modern_ja ) +TRANSLITERATION_NUMTOCHAR( Lower_ko, Lower_ko ) +TRANSLITERATION_NUMTOCHAR( Upper_ko, Upper_ko ) +TRANSLITERATION_NUMTOCHAR( Hangul_ko, Hangul_ko ) +TRANSLITERATION_NUMTOCHAR( Indic_ar, Indic_ar ) +TRANSLITERATION_NUMTOCHAR( EastIndic_ar, EastIndic_ar ) +TRANSLITERATION_NUMTOCHAR( Indic_hi, Indic_hi ) +TRANSLITERATION_NUMTOCHAR( _th, th ) #undef TRANSLITERATION_NUMTOCHAR } } } } diff --git a/i18npool/source/transliteration/numtotext_cjk.cxx b/i18npool/source/transliteration/numtotext_cjk.cxx index 302024ecb16b..9a79bd2aea72 100644 --- a/i18npool/source/transliteration/numtotext_cjk.cxx +++ b/i18npool/source/transliteration/numtotext_cjk.cxx @@ -2,9 +2,9 @@ * * $RCSfile: numtotext_cjk.cxx,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: er $ $Date: 2002-03-26 17:13:19 $ + * last change: $Author: khong $ $Date: 2002-03-30 09:24:46 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -62,6 +62,7 @@ #define TRANSLITERATION_ALL #include <numtotext_cjk.hxx> #include <data/numberchar.h> +#include <data/bullet.h> using namespace com::sun::star::uno; using namespace rtl; @@ -70,49 +71,17 @@ namespace com { namespace sun { namespace star { namespace i18n { #define NUMBER_OMIT_ZERO (1 << 0) #define NUMBER_OMIT_ONE (1 << 1) +#define NUMBER_OMIT_ONLY_ZERO (1 << 2) -#define NUMBER_ZERO 0x0030 -#define NUMBER_ONE 0x0031 - -#define isNum(n) ( 0x0030 <= n && n <= 0x0039 ) +#define NUMBER_COMMA 0x002C +#define isComma(ch) (ch == NUMBER_COMMA) +#define MAX_SAL_UINT32 0xFFFFFFFF +#define MAX_VALUE (MAX_SAL_UINT32 - 9) / 10 NumToText_CJK::NumToText_CJK() { numberChar = NULL; -} - -sal_Bool SAL_CALL NumToText_CJK::numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len, - sal_Unicode *dst, sal_Int32& count, sal_Unicode multiplierChar, sal_Int32** offset) -{ - if ( len == 1 ) { - **offset++ = count; - if (str[begin] != NUMBER_ZERO) { - if (!(numberFlag & NUMBER_OMIT_ONE) || str[begin] != NUMBER_ONE) - dst[count++] = numberChar[(sal_Int16)(str[begin] - NUMBER_ZERO)]; - if (multiplierChar > 0) - dst[count++] = multiplierChar; - } else if (!(numberFlag & NUMBER_OMIT_ZERO) && dst[count-1] != numberChar[0]) - dst[count++] = numberChar[0]; - return str[begin] != NUMBER_ZERO; - } else { - sal_Bool printPower = sal_False; - sal_Int16 last = 0; - for (sal_Int16 i = 1; numberMultiplier[i].power >= 0; i++) { - sal_Int32 tmp = len - numberMultiplier[i].power; - if (tmp > 0) { - printPower |= numberMaker(str, begin, tmp, dst, count, - numberMultiplier[i].multiplierChar, offset); - begin += tmp; - len -= tmp; - } - } - if (printPower) { - if (dst[count-1] == numberChar[0]) - count--; - if (multiplierChar > 0) - dst[count++] = multiplierChar; - } - return printPower; - } + bulletCount = 0; + number = 0; } OUString SAL_CALL NumToText_CJK::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, @@ -128,210 +97,151 @@ OUString SAL_CALL NumToText_CJK::transliterate( const OUString& inStr, sal_Int32 if (nCount > 0) { const sal_Unicode *str = inStr.getStr() + startPos; rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount * 2); - sal_Int32 i, len = 0, count = 0, begin, end; + rtl_uString *srcStr = x_rtl_uString_new_WithLength(nCount); // for keeping number without comma + sal_Int32 i, len = 0, count = 0; offset.realloc( nCount * 2 ); - sal_Int32 *p = offset.getArray(); + sal_Bool doDecimal = sal_False; + sal_Bool makeBullet = bulletCount > 0; for (i = 0; i <= nCount; i++) { - if (i < nCount && isNum(str[i])) { - if (len == 0) - begin = i; - len++; + if (i < nCount && isNumber(str[i])) { + if (doDecimal) { + newStr->buffer[count] = numberChar[str[i] - NUMBER_ZERO]; + offset[count++] = i + startPos; + } + else + srcStr->buffer[len++] = str[i]; } else { if (len > 0) { - for (end = begin + (len % numberMultiplier[0].power); - end <= i; begin = end, end += numberMultiplier[0].power) - numberMaker(str, begin, end - begin, newStr->buffer, count, - end == i ? 0 : numberMultiplier[0].multiplierChar, &p); + if (isComma(str[i]) && i < nCount-1 && isNumber(str[i+1])) + continue; // skip comma inside number string + if (makeBullet) { + sal_uInt32 value = 0; + for (sal_Int32 j = 0; j < len; j++) { + if (value < MAX_VALUE) + value = (value * 10) + (str[j] - NUMBER_ZERO); + else + throw RuntimeException(); // overfollow, number is too big + } + newStr->buffer[count] = value ? numberChar[(value-1) % bulletCount] : NUMBER_ZERO; + offset[count++] = i - len + startPos; + } + else { + sal_Int32 _count = count; + for (sal_Int32 begin = 0, end = len % MultiplierExponent_CJK[0]; + end <= len; begin = end, end += MultiplierExponent_CJK[0]) + numberMaker(srcStr->buffer, begin, end - begin, newStr->buffer, count, + end == len ? 0 : multiplierChar[0], offset, i - len + startPos); + if (_count == count && ! (numberFlag & NUMBER_OMIT_ONLY_ZERO)) { + newStr->buffer[count] = numberChar[0]; + offset[count++] = i - len + startPos; + } + } len = 0; } if (i < nCount) { - *p++ = count; - newStr->buffer[count++] = str[i]; + if (doDecimal = (!makeBullet && !doDecimal && + isDecimal(str[i]) && i < nCount-1 && isNumber(str[i+1]))) + newStr->buffer[count] = DecimalChar[number]; + else if (!makeBullet && isMinus(str[i]) && i < nCount-1 && isNumber(str[i+1])) + newStr->buffer[count] = MinusChar[number]; + else + newStr->buffer[count] = str[i]; + offset[count++] = i + startPos; } } } offset.realloc(count); - for (i = 0; i < count; i++) - offset[i] += startPos; return OUString(newStr->buffer, count); } return OUString(); } -static NumberMultiplier multiplier_Lower_zh[] = { - { 12, 0x5146 }, // fourth four digits group, ten billion - { 8, 0x4EBF }, // third four digits group, hundred million - { 4, 0x4E07 }, // second four digits group, ten thousand - { 3, 0x5343 }, // Unicode Chinese Lower Thousand - { 2, 0x767E }, // Unicode Chinese Lower Hundred - { 1, 0x5341 }, // Unicode Chinese Lower Ten - { 0, 0x0000 } -}; - -NumToTextLower_zh_CN::NumToTextLower_zh_CN() { - numberChar = NumberChar[NumberChar_Lower_zh]; - numberMultiplier = multiplier_Lower_zh; - numberFlag = 0; - transliterationName = "NumToTextLower_zh_CN"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextLower_zh_CN"; -} - - -static NumberMultiplier multiplier_Upper_zh[] = { - { 12, 0x5146 }, // fourth four digits group, ten billion - { 8, 0x4EBF }, // third four digits group, hundred million - { 4, 0x4E07 }, // second four digits group, ten thousand - { 3, 0x4EDF }, // Unicode Chinese Lower Thousand - { 2, 0x4F70 }, // Unicode Chinese Lower Hundred - { 1, 0x62FE }, // Unicode Chinese Lower Ten - { 0, 0x0000 } -}; - -NumToTextUpper_zh_CN::NumToTextUpper_zh_CN() { - numberChar = NumberChar[NumberChar_Upper_zh]; - numberMultiplier = multiplier_Upper_zh; - numberFlag = 0; - transliterationName = "NumToTextUpper_zh_CN"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextUpper_zh_CN"; -} - -static NumberMultiplier multiplier_Lower_zh_TW[] = { - { 12, 0x5146 }, // fourth four digits group, ten billion - { 8, 0x5104 }, // third four digits group, hundred million - { 4, 0x842C }, // second four digits group, ten thousand - { 3, 0x5343 }, // Unicode Chinese Lower Thousand - { 2, 0x767E }, // Unicode Chinese Lower Hundred - { 1, 0x5341 }, // Unicode Chinese Lower Ten - { 0, 0x0000 } -}; - -NumToTextLower_zh_TW::NumToTextLower_zh_TW() { - numberChar = NumberChar[NumberChar_Lower_zh]; - numberMultiplier = multiplier_Lower_zh_TW; - numberFlag = 0; - transliterationName = "NumToTextLower_zh_TW"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextLower_zh_TW"; -} - -static NumberMultiplier multiplier_Upper_zh_TW[] = { - { 12, 0x5146 }, // fourth four digits group, ten billion - { 8, 0x5104 }, // third four digits group, hundred million - { 4, 0x842C }, // second four digits group, ten thousand - { 3, 0x4EDF }, // Unicode Chinese Lower Thousand - { 2, 0x4F70 }, // Unicode Chinese Lower Hundred - { 1, 0x62FE }, // Unicode Chinese Lower Ten - { 0, 0x0000 } -}; - -NumToTextUpper_zh_TW::NumToTextUpper_zh_TW() { - numberChar = NumberChar[NumberChar_Upper_zh_TW]; - numberMultiplier = multiplier_Upper_zh_TW; - numberFlag = 0; - transliterationName = "NumToTextUpper_zh_TW"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextUpper_zh_TW"; -} - -NumToTextFormalLower_ko::NumToTextFormalLower_ko() { - numberChar = NumberChar[NumberChar_Lower_ko]; - numberMultiplier = multiplier_Lower_zh_TW; - numberFlag = NUMBER_OMIT_ZERO; - transliterationName = "NumToTextFormalLower_ko"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalLower_ko"; -} - -NumToTextFormalUpper_ko::NumToTextFormalUpper_ko() { - numberChar = NumberChar[NumberChar_Upper_ko]; - numberMultiplier = multiplier_Lower_zh_TW; - numberFlag = NUMBER_OMIT_ZERO; - transliterationName = "NumToTextFormalUpper_ko"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalUpper_ko"; -} - -NumToTextInformalLower_ko::NumToTextInformalLower_ko() { - numberChar = NumberChar[NumberChar_Lower_ko]; - numberMultiplier = multiplier_Lower_zh_TW; - numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; - transliterationName = "NumToTextInformalLower_ko"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalLower_ko"; -} - -NumToTextInformalUpper_ko::NumToTextInformalUpper_ko() { - numberChar = NumberChar[NumberChar_Upper_ko]; - numberMultiplier = multiplier_Lower_zh_TW; - numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; - transliterationName = "NumToTextInformalUpper_ko"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalUpper_ko"; -} - -static NumberMultiplier multiplier_Hangul_ko[] = { - { 12, 0xC870 }, // fourth four digits group, ten billion - { 8, 0xC5B5 }, // third four digits group, hundred million - { 4, 0xB9CC }, // second four digits group, ten thousand - { 3, 0xCC9C }, // Unicode Chinese Lower Thousand - { 2, 0xBC31 }, // Unicode Chinese Lower Hundred - { 1, 0xC2ED }, // Unicode Chinese Lower Ten - { 0, 0x0000 } -}; - -NumToTextFormalHangul_ko::NumToTextFormalHangul_ko() { - numberChar = NumberChar[NumberChar_Hangul_ko]; - numberMultiplier = multiplier_Hangul_ko; - numberFlag = NUMBER_OMIT_ZERO; - transliterationName = "NumToTextFormalHangul_ko"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalHangul_ko"; -} - -NumToTextInformalHangul_ko::NumToTextInformalHangul_ko() { - numberChar = NumberChar[NumberChar_Hangul_ko]; - numberMultiplier = multiplier_Hangul_ko; - numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; - transliterationName = "NumToTextInformalHangul_ko"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalHangul_ko"; +sal_Bool SAL_CALL NumToText_CJK::numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len, + sal_Unicode *dst, sal_Int32& count, sal_Unicode multiChar, Sequence< sal_Int32 >& offset, sal_Int32 startPos) +{ + if ( len == 1 ) { + if (str[begin] != NUMBER_ZERO) { + if (!(numberFlag & NUMBER_OMIT_ONE) || multiChar == 0 || str[begin] != NUMBER_ONE) { + dst[count] = numberChar[str[begin] - NUMBER_ZERO]; + offset[count++] = begin + startPos; + } + if (multiChar > 0) { + dst[count] = multiChar; + offset[count++] = begin + startPos; + } + } else if (!(numberFlag & NUMBER_OMIT_ZERO) && dst[count-1] != numberChar[0]) { + dst[count] = numberChar[0]; + offset[count++] = begin + startPos; + } + return str[begin] != NUMBER_ZERO; + } else { + sal_Bool printPower = sal_False; + sal_Int16 last = 0; + for (sal_Int16 i = 1; i <= ExponentCount_CJK; i++) { + sal_Int32 tmp = len - (i == ExponentCount_CJK ? 0 : MultiplierExponent_CJK[i]); + if (tmp > 0) { + printPower |= numberMaker(str, begin, tmp, dst, count, + (i == ExponentCount_CJK ? 0 : multiplierChar[i]), offset, startPos); + begin += tmp; + len -= tmp; + } + } + if (printPower) { + if (dst[count-1] == numberChar[0]) + count--; + if (multiChar > 0) { + dst[count] = multiChar; + offset[count++] = begin + startPos; + } + } + return printPower; + } } -static NumberMultiplier multiplier_Traditional_ja[] = { - { 9, 0x62FE }, // billion // 10 * 100000000 - { 8, 0x5104 }, // hundred million // 1 * 100000000 // needs a preceding "one" - { 7, 0x9621 }, // ten million // 1000 * 10000 - { 6, 0x767E }, // million // 100 * 10000 - { 5, 0x62FE }, // hundred thousand // 10 * 10000 - { 4, 0x842C }, // ten thousand // 1 * 10000 // needs a preceding "one" - { 3, 0x9621 }, // thousand // 1000 - { 2, 0x767E }, // hundred // 100 - { 1, 0x62FE }, // ten // 10 - { 0, 0x0000 } // one // 1 // needs a "one" -}; - -NumToTextKanjiLongTraditional_ja_JP::NumToTextKanjiLongTraditional_ja_JP() { - numberChar = NumberChar[NumberChar_Traditional_ja]; - numberMultiplier = multiplier_Traditional_ja; - numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; - transliterationName = "NumToTextKanjiLongTraditional_ja_JP"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextKanjiLongTraditional_ja_JP"; +#define TRANSLITERATION_NUMTOTEXT( name, _number, flag ) \ +NumToText##name::NumToText##name() \ +{ \ + number = NumberChar_##_number; \ + numberChar = NumberChar[NumberChar_##_number]; \ + multiplierChar = MultiplierChar_CJK[Multiplier_##_number]; \ + numberFlag = flag; \ + transliterationName = "NumToText"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \ } - -static NumberMultiplier multiplier_Modern_ja[] = { - { 9, 0x5341 }, // billion // 10 * 100000000 - { 8, 0x5104 }, // hundred million // 1 * 100000000 // needs a preceding "one" - { 7, 0x5343 }, // ten million // 1000 * 10000 - { 6, 0x767E }, // million // 100 * 10000 - { 5, 0x5341 }, // hundred thousand // 10 * 10000 - { 4, 0x4E07 }, // ten thousand // 1 * 10000 // needs a preceding "one" - { 3, 0x5343 }, // thousand // 1000 - { 2, 0x767E }, // hundred // 100 - { 1, 0x5341 }, // ten // 10 - { 0, 0x0000 } // one // 1 // needs a "one" -}; - -NumToTextKanjiLongModern_ja_JP::NumToTextKanjiLongModern_ja_JP() { - numberChar = NumberChar[NumberChar_Modern_ja]; - numberMultiplier = multiplier_Modern_ja; - numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE; - transliterationName = "NumToTextKanjiLongModern_ja_JP"; - implementationName = "com.sun.star.i18n.Transliteration.NumToTextKanjiLongModern_ja_JP"; +TRANSLITERATION_NUMTOTEXT( Lower_zh_CN, Lower_zh, 0 ) +TRANSLITERATION_NUMTOTEXT( Upper_zh_CN, Upper_zh, 0 ) +TRANSLITERATION_NUMTOTEXT( Lower_zh_TW, Lower_zh, 0 ) +TRANSLITERATION_NUMTOTEXT( Upper_zh_TW, Upper_zh_TW, 0 ) +#define Multiplier_Lower_ko Multiplier_Upper_zh_TW +#define Multiplier_Upper_ko Multiplier_Upper_zh_TW +TRANSLITERATION_NUMTOTEXT( FormalLower_ko, Lower_ko, NUMBER_OMIT_ZERO ) +TRANSLITERATION_NUMTOTEXT( FormalUpper_ko, Upper_ko, NUMBER_OMIT_ZERO ) +TRANSLITERATION_NUMTOTEXT( FormalHangul_ko, Hangul_ko, NUMBER_OMIT_ZERO ) +#define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO ) +TRANSLITERATION_NUMTOTEXT( InformalLower_ko, Lower_ko, NUMBER_OMIT_ALL ) +TRANSLITERATION_NUMTOTEXT( InformalUpper_ko, Upper_ko, NUMBER_OMIT_ALL ) +TRANSLITERATION_NUMTOTEXT( InformalHangul_ko, Hangul_ko, NUMBER_OMIT_ALL ) +TRANSLITERATION_NUMTOTEXT( KanjiLongTraditional_ja_JP, Traditional_ja, NUMBER_OMIT_ALL ) +TRANSLITERATION_NUMTOTEXT( KanjiLongModern_ja_JP, Modern_ja, NUMBER_OMIT_ALL ) +TRANSLITERATION_NUMTOTEXT( Date_zh, Lower_zh, NUMBER_OMIT_ALL ) +#undef TRANSLITERATION_NUMTOTEXT + +#define TRANSLITERATION_NUMTOTEXT( name ) \ +NumToText##name::NumToText##name() \ +{ \ + numberChar = table_##name; \ + bulletCount = sizeof(table_##name) / sizeof(sal_Unicode); \ + transliterationName = "NumToText"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \ } +TRANSLITERATION_NUMTOTEXT( AIUFullWidth_ja_JP ) +TRANSLITERATION_NUMTOTEXT( AIUHalfWidth_ja_JP ) +TRANSLITERATION_NUMTOTEXT( IROHAFullWidth_ja_JP ) +TRANSLITERATION_NUMTOTEXT( IROHAHalfWidth_ja_JP ) +TRANSLITERATION_NUMTOTEXT( CircledNumber ) +#undef TRANSLITERATION_NUMTOTEXT } } } } diff --git a/i18npool/source/transliteration/texttonum.cxx b/i18npool/source/transliteration/texttonum.cxx new file mode 100644 index 000000000000..f2c7f17d9530 --- /dev/null +++ b/i18npool/source/transliteration/texttonum.cxx @@ -0,0 +1,204 @@ +/************************************************************************* + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2002 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2002 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#define TRANSLITERATION_ALL +#include <texttonum.hxx> +#include <data/numberchar.h> +#include <rtl/ustrbuf.hxx> + +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +OUString SAL_CALL TextToNum::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) throw(RuntimeException) +{ + sal_Int32 strLen = inStr.getLength() - startPos; + + if (nCount > strLen) + nCount = strLen; + + if (nCount > 0) { + const sal_Unicode *str = inStr.getStr() + startPos; + rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount * MultiplierExponent_CJK[0] + 1); + offset.realloc( nCount * MultiplierExponent_CJK[0] + 1 ); + sal_Int32 i = 0, count = 0, index; + + OUString numberChar, multiplierChar, decimalChar, minusChar; + if (number == 0) { + OUStringBuffer aBuf(NumberChar_Count * 10 + 1); + for (i = 0; i < NumberChar_Count; i++) + aBuf.append(NumberChar[i], 10); + numberChar = aBuf.makeStringAndClear(); + aBuf.ensureCapacity(Multiplier_Count * ExponentCount_CJK + 1); + for (i = 0; i < Multiplier_Count; i++) + aBuf.append(MultiplierChar_CJK[i], ExponentCount_CJK); + multiplierChar = aBuf.makeStringAndClear(); + decimalChar = OUString(DecimalChar); + minusChar = OUString(MinusChar); + } else { + numberChar = OUString(NumberChar[number], 10); + decimalChar = OUString::valueOf(DecimalChar[number]); + minusChar = OUString::valueOf(MinusChar[number]); + multiplierChar = OUString(MultiplierChar_CJK[multiplier], ExponentCount_CJK); \ + } + + while (i < nCount) { + if ((index = multiplierChar.indexOf(str[i])) >= 0) { + if (count == 0 || !isNumber(newStr->buffer[count-1])) { // add 1 in front of multiplier + newStr->buffer[count] = NUMBER_ONE; + offset[count++] = i; + } + index = MultiplierExponent_CJK[index % ExponentCount_CJK]; + numberMaker(index, index, str, i, nCount, newStr->buffer, count, offset, + numberChar, multiplierChar); + } else { + if ((index = numberChar.indexOf(str[i])) >= 0) + newStr->buffer[count] = (index % 10) + NUMBER_ZERO; + else if ((index = decimalChar.indexOf(str[i])) >= 0 && + (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 || + multiplierChar.indexOf(str[i+1]) >= 0))) + // Only when decimal point is followed by numbers, + // it will be convert to ASCII decimal point + newStr->buffer[count] = NUMBER_DECIMAL; + else if ((index = minusChar.indexOf(str[i])) >= 0 && + (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 || + multiplierChar.indexOf(str[i+1]) >= 0))) + // Only when minus is followed by numbers, + // it will be convert to ASCII minus sign + newStr->buffer[count] = NUMBER_MINUS; + else + newStr->buffer[count] = str[i]; + offset[count++] = i++; + } + } + + offset.realloc(count); + for (i = 0; i < count; i++) + offset[i] += startPos; + return OUString(newStr->buffer, count); + } + return OUString(); +} + +void SAL_CALL TextToNum::numberMaker(sal_Int16 max, sal_Int16 prev, const sal_Unicode *str, sal_Int32& i, + sal_Int32 nCount, sal_Unicode *dst, sal_Int32& count, Sequence< sal_Int32 >& offset, + OUString& numberChar, OUString& multiplierChar) +{ + sal_Int16 curr = 0, num = 0, end = 0, shift = 0; + while (++i < nCount) { + if ((curr = numberChar.indexOf(str[i])) >= 0) { + if (num > 0) + break; + num = curr % 10; + } else if ((curr = multiplierChar.indexOf(str[i])) >= 0) { + curr = MultiplierExponent_CJK[curr % ExponentCount_CJK]; + if (prev > curr && num == 0) num = 1; // One may be omitted in informal format + shift = end = 0; + if (curr >= max) + max = curr; + else if (curr > prev) + shift = max - curr; + else + end = curr; + while (end++ < prev) { + dst[count] = NUMBER_ZERO + (end == prev ? num : 0); + offset[count++] = i; + } + if (shift) { + count -= max; + for (sal_Int16 j = 0; j < shift; j++, count++) { + dst[count] = dst[count + curr]; + offset[count] = offset[count + curr]; + } + max = curr; + } + numberMaker(max, curr, str, i, nCount, dst, count, offset, numberChar, multiplierChar); + return; + } else + break; + } + while (end++ < prev) { + dst[count] = NUMBER_ZERO + (end == prev ? num : 0); + offset[count++] = i - 1; + } +} + +TextToNum::TextToNum() +{ + number = multiplier = 0; + transliterationName = "TextToNum"; + implementationName = "com.sun.star.i18n.Transliteration.TextToNum"; +} + +#define TRANSLITERATION_TEXTTONUM( name, _number ) \ +TextToNum##name::TextToNum##name() \ +{ \ + number = NumberChar_##_number; \ + multiplier = Multiplier_##_number; \ + transliterationName = "TextToNum"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.TextToNum"#name; \ +} + +TRANSLITERATION_TEXTTONUM( Lower_zh_CN, Lower_zh ) +TRANSLITERATION_TEXTTONUM( Upper_zh_CN, Upper_zh ) +TRANSLITERATION_TEXTTONUM( Lower_zh_TW, Lower_zh ) +TRANSLITERATION_TEXTTONUM( Upper_zh_TW, Upper_zh_TW ) +#define Multiplier_Lower_ko Multiplier_Upper_zh_TW +#define Multiplier_Upper_ko Multiplier_Upper_zh_TW +TRANSLITERATION_TEXTTONUM( FormalLower_ko, Lower_ko ) +TRANSLITERATION_TEXTTONUM( FormalUpper_ko, Upper_ko ) +TRANSLITERATION_TEXTTONUM( FormalHangul_ko, Hangul_ko ) +TRANSLITERATION_TEXTTONUM( InformalLower_ko, Lower_ko ) +TRANSLITERATION_TEXTTONUM( InformalUpper_ko, Upper_ko ) +TRANSLITERATION_TEXTTONUM( InformalHangul_ko, Hangul_ko ) +TRANSLITERATION_TEXTTONUM( KanjiLongTraditional_ja_JP, Traditional_ja ) +TRANSLITERATION_TEXTTONUM( KanjiLongModern_ja_JP, Modern_ja ) +#undef TRANSLITERATION_TEXTTONUM + +} } } } |