summaryrefslogtreecommitdiff
path: root/i18npool/source
diff options
context:
space:
mode:
authorKarl Hong <khong@openoffice.org>2002-03-30 08:25:16 +0000
committerKarl Hong <khong@openoffice.org>2002-03-30 08:25:16 +0000
commit1c1bd3a6f1ba7c1efe8b52670a6f7f7477a9d55a (patch)
tree0ef6785714f7f7820b60b6988c9db201c3134c82 /i18npool/source
parent1e92b7f501133182090aee8ba1893e3fe84f330e (diff)
#98442#TextToNum, CharToNum transliteration
Diffstat (limited to 'i18npool/source')
-rw-r--r--i18npool/source/registerservices/registerservices.cxx131
-rw-r--r--i18npool/source/transliteration/chartonum.cxx125
-rw-r--r--i18npool/source/transliteration/data/numberchar.h107
-rw-r--r--i18npool/source/transliteration/makefile.mk8
-rw-r--r--i18npool/source/transliteration/numtochar.cxx66
-rw-r--r--i18npool/source/transliteration/numtotext_cjk.cxx356
-rw-r--r--i18npool/source/transliteration/texttonum.cxx204
7 files changed, 686 insertions, 311 deletions
diff --git a/i18npool/source/registerservices/registerservices.cxx b/i18npool/source/registerservices/registerservices.cxx
index 6c2df856a63e..82fded86eb03 100644
--- a/i18npool/source/registerservices/registerservices.cxx
+++ b/i18npool/source/registerservices/registerservices.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: registerservices.cxx,v $
*
- * $Revision: 1.7 $
+ * $Revision: 1.8 $
*
- * last change: $Author: er $ $Date: 2002-03-28 00:35:49 $
+ * last change: $Author: khong $ $Date: 2002-03-30 09:25:16 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -92,6 +92,8 @@
#include <textToPronounce_zh.hxx>
#include <numtotext_cjk.hxx>
#include <numtochar.hxx>
+#include <texttonum.hxx>
+#include <chartonum.hxx>
#include <calendarImpl.hxx>
#include <calendar_gregorian.hxx>
@@ -248,35 +250,68 @@ IMPL_CREATEINSTANCE( ignoreSpace_ja_JP)
IMPL_CREATEINSTANCE( TextToChuyin_zh_TW )
IMPL_CREATEINSTANCE( TextToPinyin_zh_CN )
-IMPL_CREATEINSTANCE( NumToTextLower_zh_CN )
-IMPL_CREATEINSTANCE( NumToTextUpper_zh_CN )
IMPL_CREATEINSTANCE( NumToCharLower_zh_CN )
IMPL_CREATEINSTANCE( NumToCharUpper_zh_CN )
-
IMPL_CREATEINSTANCE( NumToCharLower_zh_TW )
IMPL_CREATEINSTANCE( NumToCharUpper_zh_TW )
-IMPL_CREATEINSTANCE( NumToTextLower_zh_TW )
-IMPL_CREATEINSTANCE( NumToTextUpper_zh_TW )
-
+IMPL_CREATEINSTANCE( NumToCharFullwidth )
+IMPL_CREATEINSTANCE( NumToCharKanjiShort_ja_JP )
IMPL_CREATEINSTANCE( NumToCharHangul_ko )
IMPL_CREATEINSTANCE( NumToCharLower_ko )
IMPL_CREATEINSTANCE( NumToCharUpper_ko )
-IMPL_CREATEINSTANCE( NumToCharFullwidth )
-IMPL_CREATEINSTANCE( NumToCharKanjiShort_ja_JP )
-IMPL_CREATEINSTANCE( NumToTextFormalHangul_ko )
-IMPL_CREATEINSTANCE( NumToTextFormalLower_ko )
-IMPL_CREATEINSTANCE( NumToTextFormalUpper_ko )
-IMPL_CREATEINSTANCE( NumToTextInformalHangul_ko )
-IMPL_CREATEINSTANCE( NumToTextInformalUpper_ko )
-IMPL_CREATEINSTANCE( NumToTextInformalLower_ko )
-
IMPL_CREATEINSTANCE( NumToCharIndic_ar )
IMPL_CREATEINSTANCE( NumToCharEastIndic_ar )
IMPL_CREATEINSTANCE( NumToCharIndic_hi )
IMPL_CREATEINSTANCE( NumToChar_th )
+IMPL_CREATEINSTANCE( CharToNum )
+IMPL_CREATEINSTANCE( CharToNumLower_zh_CN )
+IMPL_CREATEINSTANCE( CharToNumUpper_zh_CN )
+IMPL_CREATEINSTANCE( CharToNumLower_zh_TW )
+IMPL_CREATEINSTANCE( CharToNumUpper_zh_TW )
+IMPL_CREATEINSTANCE( CharToNumFullwidth )
+IMPL_CREATEINSTANCE( CharToNumKanjiShort_ja_JP )
+IMPL_CREATEINSTANCE( CharToNumHangul_ko )
+IMPL_CREATEINSTANCE( CharToNumLower_ko )
+IMPL_CREATEINSTANCE( CharToNumUpper_ko )
+IMPL_CREATEINSTANCE( CharToNumIndic_ar )
+IMPL_CREATEINSTANCE( CharToNumEastIndic_ar )
+IMPL_CREATEINSTANCE( CharToNumIndic_hi )
+IMPL_CREATEINSTANCE( CharToNum_th )
+
+IMPL_CREATEINSTANCE( NumToTextLower_zh_CN )
+IMPL_CREATEINSTANCE( NumToTextUpper_zh_CN )
+IMPL_CREATEINSTANCE( NumToTextLower_zh_TW )
+IMPL_CREATEINSTANCE( NumToTextUpper_zh_TW )
IMPL_CREATEINSTANCE( NumToTextKanjiLongModern_ja_JP )
IMPL_CREATEINSTANCE( NumToTextKanjiLongTraditional_ja_JP )
+IMPL_CREATEINSTANCE( NumToTextFormalHangul_ko )
+IMPL_CREATEINSTANCE( NumToTextFormalLower_ko )
+IMPL_CREATEINSTANCE( NumToTextFormalUpper_ko )
+IMPL_CREATEINSTANCE( NumToTextInformalHangul_ko )
+IMPL_CREATEINSTANCE( NumToTextInformalUpper_ko )
+IMPL_CREATEINSTANCE( NumToTextInformalLower_ko )
+
+IMPL_CREATEINSTANCE( TextToNum )
+IMPL_CREATEINSTANCE( TextToNumLower_zh_CN )
+IMPL_CREATEINSTANCE( TextToNumUpper_zh_CN )
+IMPL_CREATEINSTANCE( TextToNumLower_zh_TW )
+IMPL_CREATEINSTANCE( TextToNumUpper_zh_TW )
+IMPL_CREATEINSTANCE( TextToNumKanjiLongModern_ja_JP )
+IMPL_CREATEINSTANCE( TextToNumKanjiLongTraditional_ja_JP )
+IMPL_CREATEINSTANCE( TextToNumFormalHangul_ko )
+IMPL_CREATEINSTANCE( TextToNumFormalLower_ko )
+IMPL_CREATEINSTANCE( TextToNumFormalUpper_ko )
+IMPL_CREATEINSTANCE( TextToNumInformalHangul_ko )
+IMPL_CREATEINSTANCE( TextToNumInformalUpper_ko )
+IMPL_CREATEINSTANCE( TextToNumInformalLower_ko )
+
+IMPL_CREATEINSTANCE( NumToTextDate_zh )
+IMPL_CREATEINSTANCE( NumToTextAIUFullWidth_ja_JP )
+IMPL_CREATEINSTANCE( NumToTextAIUHalfWidth_ja_JP )
+IMPL_CREATEINSTANCE( NumToTextIROHAFullWidth_ja_JP )
+IMPL_CREATEINSTANCE( NumToTextIROHAHalfWidth_ja_JP )
+IMPL_CREATEINSTANCE( NumToTextCircledNumber )
static const struct InstancesArray {
const sal_Char* pServiceNm;
@@ -484,31 +519,69 @@ static const struct InstancesArray {
IMPL_TRANSLITERATION_ITEM (ignoreSpace_ja_JP),
IMPL_TRANSLITERATION_ITEM (TextToPinyin_zh_CN),
IMPL_TRANSLITERATION_ITEM (TextToChuyin_zh_TW),
- IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_CN),
- IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_CN),
+
IMPL_TRANSLITERATION_ITEM (NumToCharUpper_zh_CN),
IMPL_TRANSLITERATION_ITEM (NumToCharLower_zh_CN),
- IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_TW),
- IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_TW),
IMPL_TRANSLITERATION_ITEM (NumToCharUpper_zh_TW),
IMPL_TRANSLITERATION_ITEM (NumToCharLower_zh_TW),
+ IMPL_TRANSLITERATION_ITEM (NumToCharFullwidth),
+ IMPL_TRANSLITERATION_ITEM (NumToCharKanjiShort_ja_JP),
IMPL_TRANSLITERATION_ITEM (NumToCharLower_ko),
IMPL_TRANSLITERATION_ITEM (NumToCharUpper_ko),
IMPL_TRANSLITERATION_ITEM (NumToCharHangul_ko),
- IMPL_TRANSLITERATION_ITEM (NumToCharFullwidth),
- IMPL_TRANSLITERATION_ITEM (NumToCharKanjiShort_ja_JP),
- IMPL_TRANSLITERATION_ITEM (NumToTextInformalHangul_ko),
- IMPL_TRANSLITERATION_ITEM (NumToTextInformalLower_ko),
- IMPL_TRANSLITERATION_ITEM (NumToTextInformalUpper_ko),
- IMPL_TRANSLITERATION_ITEM (NumToTextFormalHangul_ko),
- IMPL_TRANSLITERATION_ITEM (NumToTextFormalLower_ko),
- IMPL_TRANSLITERATION_ITEM (NumToTextFormalUpper_ko),
IMPL_TRANSLITERATION_ITEM (NumToCharIndic_ar),
IMPL_TRANSLITERATION_ITEM (NumToCharEastIndic_ar),
IMPL_TRANSLITERATION_ITEM (NumToCharIndic_hi),
IMPL_TRANSLITERATION_ITEM (NumToChar_th),
+
+ IMPL_TRANSLITERATION_ITEM (CharToNum),
+ IMPL_TRANSLITERATION_ITEM (CharToNumUpper_zh_CN),
+ IMPL_TRANSLITERATION_ITEM (CharToNumLower_zh_CN),
+ IMPL_TRANSLITERATION_ITEM (CharToNumUpper_zh_TW),
+ IMPL_TRANSLITERATION_ITEM (CharToNumLower_zh_TW),
+ IMPL_TRANSLITERATION_ITEM (CharToNumFullwidth),
+ IMPL_TRANSLITERATION_ITEM (CharToNumKanjiShort_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (CharToNumLower_ko),
+ IMPL_TRANSLITERATION_ITEM (CharToNumUpper_ko),
+ IMPL_TRANSLITERATION_ITEM (CharToNumHangul_ko),
+ IMPL_TRANSLITERATION_ITEM (CharToNumIndic_ar),
+ IMPL_TRANSLITERATION_ITEM (CharToNumEastIndic_ar),
+ IMPL_TRANSLITERATION_ITEM (CharToNumIndic_hi),
+ IMPL_TRANSLITERATION_ITEM (CharToNum_th),
+
+ IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_CN),
+ IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_CN),
+ IMPL_TRANSLITERATION_ITEM (NumToTextUpper_zh_TW),
+ IMPL_TRANSLITERATION_ITEM (NumToTextLower_zh_TW),
IMPL_TRANSLITERATION_ITEM (NumToTextKanjiLongModern_ja_JP),
IMPL_TRANSLITERATION_ITEM (NumToTextKanjiLongTraditional_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (NumToTextInformalHangul_ko),
+ IMPL_TRANSLITERATION_ITEM (NumToTextInformalLower_ko),
+ IMPL_TRANSLITERATION_ITEM (NumToTextInformalUpper_ko),
+ IMPL_TRANSLITERATION_ITEM (NumToTextFormalHangul_ko),
+ IMPL_TRANSLITERATION_ITEM (NumToTextFormalLower_ko),
+ IMPL_TRANSLITERATION_ITEM (NumToTextFormalUpper_ko),
+
+ IMPL_TRANSLITERATION_ITEM (TextToNum),
+ IMPL_TRANSLITERATION_ITEM (TextToNumUpper_zh_CN),
+ IMPL_TRANSLITERATION_ITEM (TextToNumLower_zh_CN),
+ IMPL_TRANSLITERATION_ITEM (TextToNumUpper_zh_TW),
+ IMPL_TRANSLITERATION_ITEM (TextToNumLower_zh_TW),
+ IMPL_TRANSLITERATION_ITEM (TextToNumKanjiLongModern_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (TextToNumKanjiLongTraditional_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (TextToNumInformalHangul_ko),
+ IMPL_TRANSLITERATION_ITEM (TextToNumInformalLower_ko),
+ IMPL_TRANSLITERATION_ITEM (TextToNumInformalUpper_ko),
+ IMPL_TRANSLITERATION_ITEM (TextToNumFormalHangul_ko),
+ IMPL_TRANSLITERATION_ITEM (TextToNumFormalLower_ko),
+ IMPL_TRANSLITERATION_ITEM (TextToNumFormalUpper_ko),
+
+ IMPL_TRANSLITERATION_ITEM (NumToTextDate_zh),
+ IMPL_TRANSLITERATION_ITEM (NumToTextAIUFullWidth_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (NumToTextAIUHalfWidth_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (NumToTextIROHAFullWidth_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (NumToTextIROHAHalfWidth_ja_JP),
+ IMPL_TRANSLITERATION_ITEM (NumToTextCircledNumber),
// add here new services !!
{ 0, 0, 0 }
diff --git a/i18npool/source/transliteration/chartonum.cxx b/i18npool/source/transliteration/chartonum.cxx
new file mode 100644
index 000000000000..ea96c1423018
--- /dev/null
+++ b/i18npool/source/transliteration/chartonum.cxx
@@ -0,0 +1,125 @@
+/*************************************************************************
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2002 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ *
+ * Sun Industry Standards Source License Version 1.1
+ * =================================================
+ * The contents of this file are subject to the Sun Industry Standards
+ * Source License Version 1.1 (the "License"); You may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at http://www.openoffice.org/license.html.
+ *
+ * Software provided under this License is provided on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
+ * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
+ * See the License for the specific provisions governing your rights and
+ * obligations concerning the Software.
+ *
+ * The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ * Copyright: 2002 by Sun Microsystems, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+
+#define TRANSLITERATION_ALL
+#include <chartonum.hxx>
+#include <data/numberchar.h>
+#include <rtl/ustrbuf.hxx>
+
+using namespace com::sun::star::uno;
+using namespace rtl;
+
+namespace com { namespace sun { namespace star { namespace i18n {
+
+OUString SAL_CALL CharToNum::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset ) throw(RuntimeException)
+{
+ const sal_Unicode *str = inStr.getStr() + startPos;
+ rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount + 1);
+ offset.realloc(nCount);
+ sal_Int16 index;
+
+ OUString numberChar, decimalChar, minusChar;
+ if (number == 0) {
+ OUStringBuffer aBuf(NumberChar_Count * 10 + 1);
+ for (sal_Int32 i = 0; i < NumberChar_Count; i++)
+ aBuf.append(NumberChar[i], 10);
+ numberChar = aBuf.makeStringAndClear();
+ decimalChar = OUString(DecimalChar);
+ minusChar = OUString(MinusChar);
+ } else {
+ numberChar = OUString(NumberChar[number], 10);
+ decimalChar = OUString::valueOf(DecimalChar[number]);
+ minusChar = OUString::valueOf(MinusChar[number]);
+ }
+
+ for (sal_Int32 i = 0; i < nCount; i++) {
+ if ((index = numberChar.indexOf(str[i])) >= 0)
+ newStr->buffer[i] = (NUMBER_ZERO + (index % 10));
+ else if ((index = decimalChar.indexOf(str[i]) >= 0) &&
+ i < nCount-1 && numberChar.indexOf(str[i+1]) >= 0)
+ newStr->buffer[i] = NUMBER_DECIMAL;
+ else if ((index = minusChar.indexOf(str[i]) >= 0) &&
+ i < nCount-1 && numberChar.indexOf(str[i+1]) >= 0)
+ newStr->buffer[i] = NUMBER_MINUS;
+ else
+ newStr->buffer[i] = str[i];
+ offset[i] = startPos + i;
+ }
+ return OUString(newStr->buffer, nCount);
+}
+
+CharToNum::CharToNum()
+{
+ number = 0;
+ transliterationName = "CharToNum";
+ implementationName = "com.sun.star.i18n.Transliteration.CharToNum";
+}
+
+#define TRANSLITERATION_CHARTONUM( name, _number ) \
+CharToNum##name::CharToNum##name() \
+{ \
+ number = NumberChar_##_number; \
+ transliterationName = "CharToNum"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.CharToNum"#name; \
+}
+TRANSLITERATION_CHARTONUM( Fullwidth, FullWidth)
+TRANSLITERATION_CHARTONUM( Lower_zh_CN, Lower_zh)
+TRANSLITERATION_CHARTONUM( Lower_zh_TW, Lower_zh)
+TRANSLITERATION_CHARTONUM( Upper_zh_CN, Upper_zh)
+TRANSLITERATION_CHARTONUM( Upper_zh_TW, Upper_zh_TW)
+TRANSLITERATION_CHARTONUM( KanjiShort_ja_JP, Modern_ja)
+TRANSLITERATION_CHARTONUM( Lower_ko, Lower_ko)
+TRANSLITERATION_CHARTONUM( Upper_ko, Upper_ko)
+TRANSLITERATION_CHARTONUM( Hangul_ko, Hangul_ko)
+TRANSLITERATION_CHARTONUM( Indic_ar, Indic_ar)
+TRANSLITERATION_CHARTONUM( EastIndic_ar, EastIndic_ar)
+TRANSLITERATION_CHARTONUM( Indic_hi, Indic_hi)
+TRANSLITERATION_CHARTONUM( _th, th)
+#undef TRANSLITERATION_CHARTONUM
+
+} } } }
diff --git a/i18npool/source/transliteration/data/numberchar.h b/i18npool/source/transliteration/data/numberchar.h
index 9778dadd38a7..e17f8640d770 100644
--- a/i18npool/source/transliteration/data/numberchar.h
+++ b/i18npool/source/transliteration/data/numberchar.h
@@ -2,9 +2,9 @@
*
* $RCSfile: numberchar.h,v $
*
- * $Revision: 1.1 $
+ * $Revision: 1.2 $
*
- * last change: $Author: bustamam $ $Date: 2002-03-26 13:21:45 $
+ * last change: $Author: khong $ $Date: 2002-03-30 09:24:47 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -42,16 +42,33 @@
namespace com { namespace sun { namespace star { namespace i18n {
+static const sal_Int16 NumberChar_HalfWidth = 0;
+static const sal_Int16 NumberChar_FullWidth = 1;
+static const sal_Int16 NumberChar_Lower_zh = 2;
+static const sal_Int16 NumberChar_Upper_zh = 3;
+static const sal_Int16 NumberChar_Upper_zh_TW = 4;
+static const sal_Int16 NumberChar_Modern_ja = 5;
+static const sal_Int16 NumberChar_Traditional_ja= 6;
+static const sal_Int16 NumberChar_Lower_ko = 7;
+static const sal_Int16 NumberChar_Upper_ko = 8;
+static const sal_Int16 NumberChar_Hangul_ko = 9;
+static const sal_Int16 NumberChar_Indic_ar = 10;
+static const sal_Int16 NumberChar_EastIndic_ar = 11;
+static const sal_Int16 NumberChar_Indic_hi = 12;
+static const sal_Int16 NumberChar_th = 13;
+static const sal_Int16 NumberChar_Count = 14;
+
static const sal_Unicode NumberChar[][10] = {
- { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Ascii
+// 0 1 2 3 4 5 6 7 8 9
+ { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Half Width (Ascii)
{ 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19 }, // Full Width
{ 0x3007, 0x4E00, 0x4E8c, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Chinese Lower
{ 0x96F6, 0x58F9, 0x8D30, 0x53C1, 0x8086, 0x4F0D, 0x9646, 0x67D2, 0x634C, 0x7396 }, // S. Chinese Upper
{ 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x8086, 0x4F0D, 0x9678, 0x67D2, 0x634C, 0x7396 }, // T. Chinese Upper
{ 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Modern
- { 0x3007, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad.
+ { 0x96F6, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad.
{ 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Lower
- { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper
+ { 0xF9B2, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper
{ 0xC601, 0xC77C, 0xC774, 0xC0BC, 0xC0AC, 0xC624, 0xC721, 0xCE60, 0xD314, 0xAD6C }, // Korean Hangul
{ 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669 }, // Arabic Indic
{ 0x06F0, 0x06F1, 0x06F2, 0x06F3, 0x06F4, 0x06F5, 0x06F6, 0x06F7, 0x06F8, 0x06F9 }, // Est. Arabic Indic
@@ -59,20 +76,72 @@ static const sal_Unicode NumberChar[][10] = {
{ 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59 } // Thai
};
-static const sal_Int16 NumberChar_HalfWidth = 0;
-static const sal_Int16 NumberChar_FullWidth = 1;
-static const sal_Int16 NumberChar_Lower_zh = 2;
-static const sal_Int16 NumberChar_Upper_zh = 3;
-static const sal_Int16 NumberChar_Upper_zh_TW = 4;
-static const sal_Int16 NumberChar_Modern_ja = 5;
-static const sal_Int16 NumberChar_Traditional_ja= 6;
-static const sal_Int16 NumberChar_Lower_ko = 7;
-static const sal_Int16 NumberChar_Upper_ko = 8;
-static const sal_Int16 NumberChar_Hangul_ko = 9;
-static const sal_Int16 NumberChar_Indic_ar = 11;
-static const sal_Int16 NumberChar_EastIndic_ar = 12;
-static const sal_Int16 NumberChar_Indic_hi = 13;
-static const sal_Int16 NumberChar_th = 14;
+static sal_Unicode DecimalChar[] = {
+ 0x002E, // Half Width (Ascii)
+ 0xFF0E, // Full Width
+ 0xFF0E, // Chinese Lower
+ 0x70B9, // S. Chinese Upper
+ 0x9EDE, // T. Chinese Upper
+ 0xFF0E, // Japanese Modern
+ 0xFF0E, // Japanese Trad.
+ 0xFF0E, // Korean Lower
+ 0x9EDE, // Korean Upper
+ 0xC810, // Korean Hangul
+ 0x002E, // Arabic Indic
+ 0x002E, // Est. Arabic Indic
+ 0x002E, // Indic
+ 0x002E // Thai
+};
+
+static sal_Unicode MinusChar[] = {
+ 0x002D, // Half Width (Ascii)
+ 0xFF0D, // Full Width
+ 0xFF0D, // Chinese Lower
+ 0x8D1F, // S. Chinese Upper
+ 0x5069, // T. Chinese Upper
+ 0xFF0D, // Japanese Modern
+ 0xFF0D, // Japanese Trad.
+ 0xFF0D, // Korean Lower
+ 0x5069, // Korean Upper
+ 0xFF0D, // Korean Hangul ???
+ 0x002D, // Arabic Indic
+ 0x002D, // Est. Arabic Indic
+ 0x002D, // Indic
+ 0x002D, // Thai
+};
+
+#define NUMBER_ZERO NumberChar[NumberChar_HalfWidth][0] // 0x0030
+#define NUMBER_ONE NumberChar[NumberChar_HalfWidth][1] // 0x0031
+#define NUMBER_NINE NumberChar[NumberChar_HalfWidth][9] // 0x0039
+#define NUMBER_DECIMAL DecimalChar[0]
+#define NUMBER_MINUS MinusChar[0]
+#define isNumber(n) ( NUMBER_ZERO <= n && n <= NUMBER_NINE )
+#define isDecimal(n) ( n == NUMBER_DECIMAL )
+#define isMinus(n) ( n == NUMBER_MINUS )
+
+const sal_Int16 ExponentCount_CJK = 6;
+
+const sal_Int16 Multiplier_Lower_zh = 0;
+const sal_Int16 Multiplier_Upper_zh = 1;
+const sal_Int16 Multiplier_Lower_zh_TW = 2;
+const sal_Int16 Multiplier_Upper_zh_TW = 3;
+const sal_Int16 Multiplier_Hangul_ko = 4;
+const sal_Int16 Multiplier_Modern_ja = 5;
+const sal_Int16 Multiplier_Traditional_ja = 6;
+const sal_Int16 Multiplier_Count = 7;
+
+static sal_Int16 MultiplierExponent_CJK[ExponentCount_CJK] = {
+ 12, 8, 4, 3, 2, 1
+};
+static sal_Unicode MultiplierChar_CJK[][ExponentCount_CJK] = {
+ 0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341, // S. Chinese Lower
+ 0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE, // S. Chinese Upper
+ 0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341, // T. Chinese & Korean Lower
+ 0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE, // T. Chinese & Korean Upper
+ 0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED, // Korean Hangul
+ 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341, // Japanese Modern
+ 0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE, // Japanese Traditional
+};
} } } }
diff --git a/i18npool/source/transliteration/makefile.mk b/i18npool/source/transliteration/makefile.mk
index 0c48e5035f11..c9002ec0ac5b 100644
--- a/i18npool/source/transliteration/makefile.mk
+++ b/i18npool/source/transliteration/makefile.mk
@@ -2,9 +2,9 @@
#*
#* $RCSfile: makefile.mk,v $
#*
-#* $Revision: 1.2 $
+#* $Revision: 1.3 $
#*
-#* last change: $Author: er $ $Date: 2002-03-26 17:13:19 $
+#* last change: $Author: khong $ $Date: 2002-03-30 09:24:46 $
#*
#* The Contents of this file are made available subject to the terms of
#* either of the following licenses
@@ -109,7 +109,9 @@ SLOFILES= \
$(SLO)$/ignoreMiddleDot_ja_JP.obj \
$(SLO)$/textToPronounce_zh.obj \
$(SLO)$/numtochar.obj \
- $(SLO)$/numtotext_cjk.obj
+ $(SLO)$/numtotext_cjk.obj \
+ $(SLO)$/chartonum.obj \
+ $(SLO)$/texttonum.obj
# MACOSX: manually initialization some static data members
.IF "$(OS)"=="MACOSX"
diff --git a/i18npool/source/transliteration/numtochar.cxx b/i18npool/source/transliteration/numtochar.cxx
index 0ef28b154ef7..7a2ef006d3bd 100644
--- a/i18npool/source/transliteration/numtochar.cxx
+++ b/i18npool/source/transliteration/numtochar.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: numtochar.cxx,v $
*
- * $Revision: 1.2 $
+ * $Revision: 1.3 $
*
- * last change: $Author: er $ $Date: 2002-03-26 17:13:19 $
+ * last change: $Author: khong $ $Date: 2002-03-30 09:24:46 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -71,52 +71,44 @@ using namespace rtl;
namespace com { namespace sun { namespace star { namespace i18n {
-OUString SAL_CALL NumToChar::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) throw(RuntimeException) {
-
- // Create a string buffer which can hold nCount + 1 characters.
- rtl_uString *newStr;
- x_rtl_uString_new_WithLength( &newStr, nCount ); // defined in x_rtl_ustring.h The reference count is 0 now.
-
- // Prepare pointers of unicode character arrays.
+OUString SAL_CALL NumToChar::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset ) throw(RuntimeException)
+{
const sal_Unicode *src = inStr.getStr() + startPos;
- sal_Unicode *dst = newStr->buffer;
+ rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount);
+ offset.realloc(nCount);
- // Allocate the same length as inStr to offset argument.
- offset.realloc(inStr.getLength());
- sal_Int32 *p = offset.getArray();
- sal_Int32 position = startPos;
-
- for (sal_Int32 index = 0; index < nCount; index++) {
- sal_Unicode ch = src[index];
- dst[index] = (0x0030 <= ch && ch <= 0x0039) ? num2char[ ch - 0x0030 ] : ch;
- *p++ = position++;
+ for (sal_Int32 i = 0; i < nCount; i++) {
+ sal_Unicode ch = src[i];
+ newStr->buffer[i] = (isNumber(ch) ? NumberChar[number][ ch - NUMBER_ZERO ] :
+ (isDecimal(ch) ? DecimalChar[number] : (isMinus(ch) ? MinusChar[number] : ch)));
+ offset[i] = startPos + i;
}
-
- return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1.
+ return OUString(newStr->buffer, nCount);
}
-#define TRANSLITERATION_NUMTOCHAR( number, name ) \
+#define TRANSLITERATION_NUMTOCHAR( name, _number ) \
NumToChar##name::NumToChar##name() \
{ \
- num2char = NumberChar[number]; \
+ number = NumberChar_##_number; \
transliterationName = "NumToChar"#name; \
implementationName = "com.sun.star.i18n.Transliteration.NumToChar"#name; \
}
-TRANSLITERATION_NUMTOCHAR( NumberChar_HalfWidth, )
-TRANSLITERATION_NUMTOCHAR( NumberChar_FullWidth, Fullwidth)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_zh, Lower_zh_CN)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_zh, Lower_zh_TW)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_zh, Upper_zh_CN)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_zh_TW, Upper_zh_TW)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Modern_ja, KanjiShort_ja_JP)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Lower_ko, Lower_ko)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Upper_ko, Upper_ko)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Hangul_ko, Hangul_ko)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Indic_ar, Indic_ar)
-TRANSLITERATION_NUMTOCHAR( NumberChar_EastIndic_ar, EastIndic_ar)
-TRANSLITERATION_NUMTOCHAR( NumberChar_Indic_hi, Indic_hi)
-TRANSLITERATION_NUMTOCHAR( NumberChar_th, _th)
+TRANSLITERATION_NUMTOCHAR( , HalfWidth )
+TRANSLITERATION_NUMTOCHAR( Fullwidth, FullWidth )
+TRANSLITERATION_NUMTOCHAR( Lower_zh_CN, Lower_zh )
+TRANSLITERATION_NUMTOCHAR( Lower_zh_TW, Lower_zh )
+TRANSLITERATION_NUMTOCHAR( Upper_zh_CN, Upper_zh )
+TRANSLITERATION_NUMTOCHAR( Upper_zh_TW, Upper_zh_TW )
+TRANSLITERATION_NUMTOCHAR( KanjiShort_ja_JP, Modern_ja )
+TRANSLITERATION_NUMTOCHAR( Lower_ko, Lower_ko )
+TRANSLITERATION_NUMTOCHAR( Upper_ko, Upper_ko )
+TRANSLITERATION_NUMTOCHAR( Hangul_ko, Hangul_ko )
+TRANSLITERATION_NUMTOCHAR( Indic_ar, Indic_ar )
+TRANSLITERATION_NUMTOCHAR( EastIndic_ar, EastIndic_ar )
+TRANSLITERATION_NUMTOCHAR( Indic_hi, Indic_hi )
+TRANSLITERATION_NUMTOCHAR( _th, th )
#undef TRANSLITERATION_NUMTOCHAR
} } } }
diff --git a/i18npool/source/transliteration/numtotext_cjk.cxx b/i18npool/source/transliteration/numtotext_cjk.cxx
index 302024ecb16b..9a79bd2aea72 100644
--- a/i18npool/source/transliteration/numtotext_cjk.cxx
+++ b/i18npool/source/transliteration/numtotext_cjk.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: numtotext_cjk.cxx,v $
*
- * $Revision: 1.2 $
+ * $Revision: 1.3 $
*
- * last change: $Author: er $ $Date: 2002-03-26 17:13:19 $
+ * last change: $Author: khong $ $Date: 2002-03-30 09:24:46 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -62,6 +62,7 @@
#define TRANSLITERATION_ALL
#include <numtotext_cjk.hxx>
#include <data/numberchar.h>
+#include <data/bullet.h>
using namespace com::sun::star::uno;
using namespace rtl;
@@ -70,49 +71,17 @@ namespace com { namespace sun { namespace star { namespace i18n {
#define NUMBER_OMIT_ZERO (1 << 0)
#define NUMBER_OMIT_ONE (1 << 1)
+#define NUMBER_OMIT_ONLY_ZERO (1 << 2)
-#define NUMBER_ZERO 0x0030
-#define NUMBER_ONE 0x0031
-
-#define isNum(n) ( 0x0030 <= n && n <= 0x0039 )
+#define NUMBER_COMMA 0x002C
+#define isComma(ch) (ch == NUMBER_COMMA)
+#define MAX_SAL_UINT32 0xFFFFFFFF
+#define MAX_VALUE (MAX_SAL_UINT32 - 9) / 10
NumToText_CJK::NumToText_CJK() {
numberChar = NULL;
-}
-
-sal_Bool SAL_CALL NumToText_CJK::numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len,
- sal_Unicode *dst, sal_Int32& count, sal_Unicode multiplierChar, sal_Int32** offset)
-{
- if ( len == 1 ) {
- **offset++ = count;
- if (str[begin] != NUMBER_ZERO) {
- if (!(numberFlag & NUMBER_OMIT_ONE) || str[begin] != NUMBER_ONE)
- dst[count++] = numberChar[(sal_Int16)(str[begin] - NUMBER_ZERO)];
- if (multiplierChar > 0)
- dst[count++] = multiplierChar;
- } else if (!(numberFlag & NUMBER_OMIT_ZERO) && dst[count-1] != numberChar[0])
- dst[count++] = numberChar[0];
- return str[begin] != NUMBER_ZERO;
- } else {
- sal_Bool printPower = sal_False;
- sal_Int16 last = 0;
- for (sal_Int16 i = 1; numberMultiplier[i].power >= 0; i++) {
- sal_Int32 tmp = len - numberMultiplier[i].power;
- if (tmp > 0) {
- printPower |= numberMaker(str, begin, tmp, dst, count,
- numberMultiplier[i].multiplierChar, offset);
- begin += tmp;
- len -= tmp;
- }
- }
- if (printPower) {
- if (dst[count-1] == numberChar[0])
- count--;
- if (multiplierChar > 0)
- dst[count++] = multiplierChar;
- }
- return printPower;
- }
+ bulletCount = 0;
+ number = 0;
}
OUString SAL_CALL NumToText_CJK::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
@@ -128,210 +97,151 @@ OUString SAL_CALL NumToText_CJK::transliterate( const OUString& inStr, sal_Int32
if (nCount > 0) {
const sal_Unicode *str = inStr.getStr() + startPos;
rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount * 2);
- sal_Int32 i, len = 0, count = 0, begin, end;
+ rtl_uString *srcStr = x_rtl_uString_new_WithLength(nCount); // for keeping number without comma
+ sal_Int32 i, len = 0, count = 0;
offset.realloc( nCount * 2 );
- sal_Int32 *p = offset.getArray();
+ sal_Bool doDecimal = sal_False;
+ sal_Bool makeBullet = bulletCount > 0;
for (i = 0; i <= nCount; i++) {
- if (i < nCount && isNum(str[i])) {
- if (len == 0)
- begin = i;
- len++;
+ if (i < nCount && isNumber(str[i])) {
+ if (doDecimal) {
+ newStr->buffer[count] = numberChar[str[i] - NUMBER_ZERO];
+ offset[count++] = i + startPos;
+ }
+ else
+ srcStr->buffer[len++] = str[i];
} else {
if (len > 0) {
- for (end = begin + (len % numberMultiplier[0].power);
- end <= i; begin = end, end += numberMultiplier[0].power)
- numberMaker(str, begin, end - begin, newStr->buffer, count,
- end == i ? 0 : numberMultiplier[0].multiplierChar, &p);
+ if (isComma(str[i]) && i < nCount-1 && isNumber(str[i+1]))
+ continue; // skip comma inside number string
+ if (makeBullet) {
+ sal_uInt32 value = 0;
+ for (sal_Int32 j = 0; j < len; j++) {
+ if (value < MAX_VALUE)
+ value = (value * 10) + (str[j] - NUMBER_ZERO);
+ else
+ throw RuntimeException(); // overfollow, number is too big
+ }
+ newStr->buffer[count] = value ? numberChar[(value-1) % bulletCount] : NUMBER_ZERO;
+ offset[count++] = i - len + startPos;
+ }
+ else {
+ sal_Int32 _count = count;
+ for (sal_Int32 begin = 0, end = len % MultiplierExponent_CJK[0];
+ end <= len; begin = end, end += MultiplierExponent_CJK[0])
+ numberMaker(srcStr->buffer, begin, end - begin, newStr->buffer, count,
+ end == len ? 0 : multiplierChar[0], offset, i - len + startPos);
+ if (_count == count && ! (numberFlag & NUMBER_OMIT_ONLY_ZERO)) {
+ newStr->buffer[count] = numberChar[0];
+ offset[count++] = i - len + startPos;
+ }
+ }
len = 0;
}
if (i < nCount) {
- *p++ = count;
- newStr->buffer[count++] = str[i];
+ if (doDecimal = (!makeBullet && !doDecimal &&
+ isDecimal(str[i]) && i < nCount-1 && isNumber(str[i+1])))
+ newStr->buffer[count] = DecimalChar[number];
+ else if (!makeBullet && isMinus(str[i]) && i < nCount-1 && isNumber(str[i+1]))
+ newStr->buffer[count] = MinusChar[number];
+ else
+ newStr->buffer[count] = str[i];
+ offset[count++] = i + startPos;
}
}
}
offset.realloc(count);
- for (i = 0; i < count; i++)
- offset[i] += startPos;
return OUString(newStr->buffer, count);
}
return OUString();
}
-static NumberMultiplier multiplier_Lower_zh[] = {
- { 12, 0x5146 }, // fourth four digits group, ten billion
- { 8, 0x4EBF }, // third four digits group, hundred million
- { 4, 0x4E07 }, // second four digits group, ten thousand
- { 3, 0x5343 }, // Unicode Chinese Lower Thousand
- { 2, 0x767E }, // Unicode Chinese Lower Hundred
- { 1, 0x5341 }, // Unicode Chinese Lower Ten
- { 0, 0x0000 }
-};
-
-NumToTextLower_zh_CN::NumToTextLower_zh_CN() {
- numberChar = NumberChar[NumberChar_Lower_zh];
- numberMultiplier = multiplier_Lower_zh;
- numberFlag = 0;
- transliterationName = "NumToTextLower_zh_CN";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextLower_zh_CN";
-}
-
-
-static NumberMultiplier multiplier_Upper_zh[] = {
- { 12, 0x5146 }, // fourth four digits group, ten billion
- { 8, 0x4EBF }, // third four digits group, hundred million
- { 4, 0x4E07 }, // second four digits group, ten thousand
- { 3, 0x4EDF }, // Unicode Chinese Lower Thousand
- { 2, 0x4F70 }, // Unicode Chinese Lower Hundred
- { 1, 0x62FE }, // Unicode Chinese Lower Ten
- { 0, 0x0000 }
-};
-
-NumToTextUpper_zh_CN::NumToTextUpper_zh_CN() {
- numberChar = NumberChar[NumberChar_Upper_zh];
- numberMultiplier = multiplier_Upper_zh;
- numberFlag = 0;
- transliterationName = "NumToTextUpper_zh_CN";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextUpper_zh_CN";
-}
-
-static NumberMultiplier multiplier_Lower_zh_TW[] = {
- { 12, 0x5146 }, // fourth four digits group, ten billion
- { 8, 0x5104 }, // third four digits group, hundred million
- { 4, 0x842C }, // second four digits group, ten thousand
- { 3, 0x5343 }, // Unicode Chinese Lower Thousand
- { 2, 0x767E }, // Unicode Chinese Lower Hundred
- { 1, 0x5341 }, // Unicode Chinese Lower Ten
- { 0, 0x0000 }
-};
-
-NumToTextLower_zh_TW::NumToTextLower_zh_TW() {
- numberChar = NumberChar[NumberChar_Lower_zh];
- numberMultiplier = multiplier_Lower_zh_TW;
- numberFlag = 0;
- transliterationName = "NumToTextLower_zh_TW";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextLower_zh_TW";
-}
-
-static NumberMultiplier multiplier_Upper_zh_TW[] = {
- { 12, 0x5146 }, // fourth four digits group, ten billion
- { 8, 0x5104 }, // third four digits group, hundred million
- { 4, 0x842C }, // second four digits group, ten thousand
- { 3, 0x4EDF }, // Unicode Chinese Lower Thousand
- { 2, 0x4F70 }, // Unicode Chinese Lower Hundred
- { 1, 0x62FE }, // Unicode Chinese Lower Ten
- { 0, 0x0000 }
-};
-
-NumToTextUpper_zh_TW::NumToTextUpper_zh_TW() {
- numberChar = NumberChar[NumberChar_Upper_zh_TW];
- numberMultiplier = multiplier_Upper_zh_TW;
- numberFlag = 0;
- transliterationName = "NumToTextUpper_zh_TW";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextUpper_zh_TW";
-}
-
-NumToTextFormalLower_ko::NumToTextFormalLower_ko() {
- numberChar = NumberChar[NumberChar_Lower_ko];
- numberMultiplier = multiplier_Lower_zh_TW;
- numberFlag = NUMBER_OMIT_ZERO;
- transliterationName = "NumToTextFormalLower_ko";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalLower_ko";
-}
-
-NumToTextFormalUpper_ko::NumToTextFormalUpper_ko() {
- numberChar = NumberChar[NumberChar_Upper_ko];
- numberMultiplier = multiplier_Lower_zh_TW;
- numberFlag = NUMBER_OMIT_ZERO;
- transliterationName = "NumToTextFormalUpper_ko";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalUpper_ko";
-}
-
-NumToTextInformalLower_ko::NumToTextInformalLower_ko() {
- numberChar = NumberChar[NumberChar_Lower_ko];
- numberMultiplier = multiplier_Lower_zh_TW;
- numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE;
- transliterationName = "NumToTextInformalLower_ko";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalLower_ko";
-}
-
-NumToTextInformalUpper_ko::NumToTextInformalUpper_ko() {
- numberChar = NumberChar[NumberChar_Upper_ko];
- numberMultiplier = multiplier_Lower_zh_TW;
- numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE;
- transliterationName = "NumToTextInformalUpper_ko";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalUpper_ko";
-}
-
-static NumberMultiplier multiplier_Hangul_ko[] = {
- { 12, 0xC870 }, // fourth four digits group, ten billion
- { 8, 0xC5B5 }, // third four digits group, hundred million
- { 4, 0xB9CC }, // second four digits group, ten thousand
- { 3, 0xCC9C }, // Unicode Chinese Lower Thousand
- { 2, 0xBC31 }, // Unicode Chinese Lower Hundred
- { 1, 0xC2ED }, // Unicode Chinese Lower Ten
- { 0, 0x0000 }
-};
-
-NumToTextFormalHangul_ko::NumToTextFormalHangul_ko() {
- numberChar = NumberChar[NumberChar_Hangul_ko];
- numberMultiplier = multiplier_Hangul_ko;
- numberFlag = NUMBER_OMIT_ZERO;
- transliterationName = "NumToTextFormalHangul_ko";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextFormalHangul_ko";
-}
-
-NumToTextInformalHangul_ko::NumToTextInformalHangul_ko() {
- numberChar = NumberChar[NumberChar_Hangul_ko];
- numberMultiplier = multiplier_Hangul_ko;
- numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE;
- transliterationName = "NumToTextInformalHangul_ko";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextInformalHangul_ko";
+sal_Bool SAL_CALL NumToText_CJK::numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len,
+ sal_Unicode *dst, sal_Int32& count, sal_Unicode multiChar, Sequence< sal_Int32 >& offset, sal_Int32 startPos)
+{
+ if ( len == 1 ) {
+ if (str[begin] != NUMBER_ZERO) {
+ if (!(numberFlag & NUMBER_OMIT_ONE) || multiChar == 0 || str[begin] != NUMBER_ONE) {
+ dst[count] = numberChar[str[begin] - NUMBER_ZERO];
+ offset[count++] = begin + startPos;
+ }
+ if (multiChar > 0) {
+ dst[count] = multiChar;
+ offset[count++] = begin + startPos;
+ }
+ } else if (!(numberFlag & NUMBER_OMIT_ZERO) && dst[count-1] != numberChar[0]) {
+ dst[count] = numberChar[0];
+ offset[count++] = begin + startPos;
+ }
+ return str[begin] != NUMBER_ZERO;
+ } else {
+ sal_Bool printPower = sal_False;
+ sal_Int16 last = 0;
+ for (sal_Int16 i = 1; i <= ExponentCount_CJK; i++) {
+ sal_Int32 tmp = len - (i == ExponentCount_CJK ? 0 : MultiplierExponent_CJK[i]);
+ if (tmp > 0) {
+ printPower |= numberMaker(str, begin, tmp, dst, count,
+ (i == ExponentCount_CJK ? 0 : multiplierChar[i]), offset, startPos);
+ begin += tmp;
+ len -= tmp;
+ }
+ }
+ if (printPower) {
+ if (dst[count-1] == numberChar[0])
+ count--;
+ if (multiChar > 0) {
+ dst[count] = multiChar;
+ offset[count++] = begin + startPos;
+ }
+ }
+ return printPower;
+ }
}
-static NumberMultiplier multiplier_Traditional_ja[] = {
- { 9, 0x62FE }, // billion // 10 * 100000000
- { 8, 0x5104 }, // hundred million // 1 * 100000000 // needs a preceding "one"
- { 7, 0x9621 }, // ten million // 1000 * 10000
- { 6, 0x767E }, // million // 100 * 10000
- { 5, 0x62FE }, // hundred thousand // 10 * 10000
- { 4, 0x842C }, // ten thousand // 1 * 10000 // needs a preceding "one"
- { 3, 0x9621 }, // thousand // 1000
- { 2, 0x767E }, // hundred // 100
- { 1, 0x62FE }, // ten // 10
- { 0, 0x0000 } // one // 1 // needs a "one"
-};
-
-NumToTextKanjiLongTraditional_ja_JP::NumToTextKanjiLongTraditional_ja_JP() {
- numberChar = NumberChar[NumberChar_Traditional_ja];
- numberMultiplier = multiplier_Traditional_ja;
- numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE;
- transliterationName = "NumToTextKanjiLongTraditional_ja_JP";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextKanjiLongTraditional_ja_JP";
+#define TRANSLITERATION_NUMTOTEXT( name, _number, flag ) \
+NumToText##name::NumToText##name() \
+{ \
+ number = NumberChar_##_number; \
+ numberChar = NumberChar[NumberChar_##_number]; \
+ multiplierChar = MultiplierChar_CJK[Multiplier_##_number]; \
+ numberFlag = flag; \
+ transliterationName = "NumToText"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \
}
-
-static NumberMultiplier multiplier_Modern_ja[] = {
- { 9, 0x5341 }, // billion // 10 * 100000000
- { 8, 0x5104 }, // hundred million // 1 * 100000000 // needs a preceding "one"
- { 7, 0x5343 }, // ten million // 1000 * 10000
- { 6, 0x767E }, // million // 100 * 10000
- { 5, 0x5341 }, // hundred thousand // 10 * 10000
- { 4, 0x4E07 }, // ten thousand // 1 * 10000 // needs a preceding "one"
- { 3, 0x5343 }, // thousand // 1000
- { 2, 0x767E }, // hundred // 100
- { 1, 0x5341 }, // ten // 10
- { 0, 0x0000 } // one // 1 // needs a "one"
-};
-
-NumToTextKanjiLongModern_ja_JP::NumToTextKanjiLongModern_ja_JP() {
- numberChar = NumberChar[NumberChar_Modern_ja];
- numberMultiplier = multiplier_Modern_ja;
- numberFlag = NUMBER_OMIT_ZERO | NUMBER_OMIT_ONE;
- transliterationName = "NumToTextKanjiLongModern_ja_JP";
- implementationName = "com.sun.star.i18n.Transliteration.NumToTextKanjiLongModern_ja_JP";
+TRANSLITERATION_NUMTOTEXT( Lower_zh_CN, Lower_zh, 0 )
+TRANSLITERATION_NUMTOTEXT( Upper_zh_CN, Upper_zh, 0 )
+TRANSLITERATION_NUMTOTEXT( Lower_zh_TW, Lower_zh, 0 )
+TRANSLITERATION_NUMTOTEXT( Upper_zh_TW, Upper_zh_TW, 0 )
+#define Multiplier_Lower_ko Multiplier_Upper_zh_TW
+#define Multiplier_Upper_ko Multiplier_Upper_zh_TW
+TRANSLITERATION_NUMTOTEXT( FormalLower_ko, Lower_ko, NUMBER_OMIT_ZERO )
+TRANSLITERATION_NUMTOTEXT( FormalUpper_ko, Upper_ko, NUMBER_OMIT_ZERO )
+TRANSLITERATION_NUMTOTEXT( FormalHangul_ko, Hangul_ko, NUMBER_OMIT_ZERO )
+#define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO )
+TRANSLITERATION_NUMTOTEXT( InformalLower_ko, Lower_ko, NUMBER_OMIT_ALL )
+TRANSLITERATION_NUMTOTEXT( InformalUpper_ko, Upper_ko, NUMBER_OMIT_ALL )
+TRANSLITERATION_NUMTOTEXT( InformalHangul_ko, Hangul_ko, NUMBER_OMIT_ALL )
+TRANSLITERATION_NUMTOTEXT( KanjiLongTraditional_ja_JP, Traditional_ja, NUMBER_OMIT_ALL )
+TRANSLITERATION_NUMTOTEXT( KanjiLongModern_ja_JP, Modern_ja, NUMBER_OMIT_ALL )
+TRANSLITERATION_NUMTOTEXT( Date_zh, Lower_zh, NUMBER_OMIT_ALL )
+#undef TRANSLITERATION_NUMTOTEXT
+
+#define TRANSLITERATION_NUMTOTEXT( name ) \
+NumToText##name::NumToText##name() \
+{ \
+ numberChar = table_##name; \
+ bulletCount = sizeof(table_##name) / sizeof(sal_Unicode); \
+ transliterationName = "NumToText"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \
}
+TRANSLITERATION_NUMTOTEXT( AIUFullWidth_ja_JP )
+TRANSLITERATION_NUMTOTEXT( AIUHalfWidth_ja_JP )
+TRANSLITERATION_NUMTOTEXT( IROHAFullWidth_ja_JP )
+TRANSLITERATION_NUMTOTEXT( IROHAHalfWidth_ja_JP )
+TRANSLITERATION_NUMTOTEXT( CircledNumber )
+#undef TRANSLITERATION_NUMTOTEXT
} } } }
diff --git a/i18npool/source/transliteration/texttonum.cxx b/i18npool/source/transliteration/texttonum.cxx
new file mode 100644
index 000000000000..f2c7f17d9530
--- /dev/null
+++ b/i18npool/source/transliteration/texttonum.cxx
@@ -0,0 +1,204 @@
+/*************************************************************************
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2002 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ *
+ * Sun Industry Standards Source License Version 1.1
+ * =================================================
+ * The contents of this file are subject to the Sun Industry Standards
+ * Source License Version 1.1 (the "License"); You may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at http://www.openoffice.org/license.html.
+ *
+ * Software provided under this License is provided on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
+ * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
+ * See the License for the specific provisions governing your rights and
+ * obligations concerning the Software.
+ *
+ * The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ * Copyright: 2002 by Sun Microsystems, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+
+#define TRANSLITERATION_ALL
+#include <texttonum.hxx>
+#include <data/numberchar.h>
+#include <rtl/ustrbuf.hxx>
+
+using namespace com::sun::star::uno;
+using namespace rtl;
+
+namespace com { namespace sun { namespace star { namespace i18n {
+
+OUString SAL_CALL TextToNum::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset ) throw(RuntimeException)
+{
+ sal_Int32 strLen = inStr.getLength() - startPos;
+
+ if (nCount > strLen)
+ nCount = strLen;
+
+ if (nCount > 0) {
+ const sal_Unicode *str = inStr.getStr() + startPos;
+ rtl_uString *newStr = x_rtl_uString_new_WithLength(nCount * MultiplierExponent_CJK[0] + 1);
+ offset.realloc( nCount * MultiplierExponent_CJK[0] + 1 );
+ sal_Int32 i = 0, count = 0, index;
+
+ OUString numberChar, multiplierChar, decimalChar, minusChar;
+ if (number == 0) {
+ OUStringBuffer aBuf(NumberChar_Count * 10 + 1);
+ for (i = 0; i < NumberChar_Count; i++)
+ aBuf.append(NumberChar[i], 10);
+ numberChar = aBuf.makeStringAndClear();
+ aBuf.ensureCapacity(Multiplier_Count * ExponentCount_CJK + 1);
+ for (i = 0; i < Multiplier_Count; i++)
+ aBuf.append(MultiplierChar_CJK[i], ExponentCount_CJK);
+ multiplierChar = aBuf.makeStringAndClear();
+ decimalChar = OUString(DecimalChar);
+ minusChar = OUString(MinusChar);
+ } else {
+ numberChar = OUString(NumberChar[number], 10);
+ decimalChar = OUString::valueOf(DecimalChar[number]);
+ minusChar = OUString::valueOf(MinusChar[number]);
+ multiplierChar = OUString(MultiplierChar_CJK[multiplier], ExponentCount_CJK); \
+ }
+
+ while (i < nCount) {
+ if ((index = multiplierChar.indexOf(str[i])) >= 0) {
+ if (count == 0 || !isNumber(newStr->buffer[count-1])) { // add 1 in front of multiplier
+ newStr->buffer[count] = NUMBER_ONE;
+ offset[count++] = i;
+ }
+ index = MultiplierExponent_CJK[index % ExponentCount_CJK];
+ numberMaker(index, index, str, i, nCount, newStr->buffer, count, offset,
+ numberChar, multiplierChar);
+ } else {
+ if ((index = numberChar.indexOf(str[i])) >= 0)
+ newStr->buffer[count] = (index % 10) + NUMBER_ZERO;
+ else if ((index = decimalChar.indexOf(str[i])) >= 0 &&
+ (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
+ multiplierChar.indexOf(str[i+1]) >= 0)))
+ // Only when decimal point is followed by numbers,
+ // it will be convert to ASCII decimal point
+ newStr->buffer[count] = NUMBER_DECIMAL;
+ else if ((index = minusChar.indexOf(str[i])) >= 0 &&
+ (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
+ multiplierChar.indexOf(str[i+1]) >= 0)))
+ // Only when minus is followed by numbers,
+ // it will be convert to ASCII minus sign
+ newStr->buffer[count] = NUMBER_MINUS;
+ else
+ newStr->buffer[count] = str[i];
+ offset[count++] = i++;
+ }
+ }
+
+ offset.realloc(count);
+ for (i = 0; i < count; i++)
+ offset[i] += startPos;
+ return OUString(newStr->buffer, count);
+ }
+ return OUString();
+}
+
+void SAL_CALL TextToNum::numberMaker(sal_Int16 max, sal_Int16 prev, const sal_Unicode *str, sal_Int32& i,
+ sal_Int32 nCount, sal_Unicode *dst, sal_Int32& count, Sequence< sal_Int32 >& offset,
+ OUString& numberChar, OUString& multiplierChar)
+{
+ sal_Int16 curr = 0, num = 0, end = 0, shift = 0;
+ while (++i < nCount) {
+ if ((curr = numberChar.indexOf(str[i])) >= 0) {
+ if (num > 0)
+ break;
+ num = curr % 10;
+ } else if ((curr = multiplierChar.indexOf(str[i])) >= 0) {
+ curr = MultiplierExponent_CJK[curr % ExponentCount_CJK];
+ if (prev > curr && num == 0) num = 1; // One may be omitted in informal format
+ shift = end = 0;
+ if (curr >= max)
+ max = curr;
+ else if (curr > prev)
+ shift = max - curr;
+ else
+ end = curr;
+ while (end++ < prev) {
+ dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
+ offset[count++] = i;
+ }
+ if (shift) {
+ count -= max;
+ for (sal_Int16 j = 0; j < shift; j++, count++) {
+ dst[count] = dst[count + curr];
+ offset[count] = offset[count + curr];
+ }
+ max = curr;
+ }
+ numberMaker(max, curr, str, i, nCount, dst, count, offset, numberChar, multiplierChar);
+ return;
+ } else
+ break;
+ }
+ while (end++ < prev) {
+ dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
+ offset[count++] = i - 1;
+ }
+}
+
+TextToNum::TextToNum()
+{
+ number = multiplier = 0;
+ transliterationName = "TextToNum";
+ implementationName = "com.sun.star.i18n.Transliteration.TextToNum";
+}
+
+#define TRANSLITERATION_TEXTTONUM( name, _number ) \
+TextToNum##name::TextToNum##name() \
+{ \
+ number = NumberChar_##_number; \
+ multiplier = Multiplier_##_number; \
+ transliterationName = "TextToNum"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.TextToNum"#name; \
+}
+
+TRANSLITERATION_TEXTTONUM( Lower_zh_CN, Lower_zh )
+TRANSLITERATION_TEXTTONUM( Upper_zh_CN, Upper_zh )
+TRANSLITERATION_TEXTTONUM( Lower_zh_TW, Lower_zh )
+TRANSLITERATION_TEXTTONUM( Upper_zh_TW, Upper_zh_TW )
+#define Multiplier_Lower_ko Multiplier_Upper_zh_TW
+#define Multiplier_Upper_ko Multiplier_Upper_zh_TW
+TRANSLITERATION_TEXTTONUM( FormalLower_ko, Lower_ko )
+TRANSLITERATION_TEXTTONUM( FormalUpper_ko, Upper_ko )
+TRANSLITERATION_TEXTTONUM( FormalHangul_ko, Hangul_ko )
+TRANSLITERATION_TEXTTONUM( InformalLower_ko, Lower_ko )
+TRANSLITERATION_TEXTTONUM( InformalUpper_ko, Upper_ko )
+TRANSLITERATION_TEXTTONUM( InformalHangul_ko, Hangul_ko )
+TRANSLITERATION_TEXTTONUM( KanjiLongTraditional_ja_JP, Traditional_ja )
+TRANSLITERATION_TEXTTONUM( KanjiLongModern_ja_JP, Modern_ja )
+#undef TRANSLITERATION_TEXTTONUM
+
+} } } }