summaryrefslogtreecommitdiff
path: root/transex3
diff options
context:
space:
mode:
authorNils Fuhrmann <nf@openoffice.org>2001-12-05 10:30:49 +0000
committerNils Fuhrmann <nf@openoffice.org>2001-12-05 10:30:49 +0000
commit3a6ea1f063ec40f1b15be61bdc3b8ef73c357368 (patch)
tree980e8562ab49f001d6fb94f9184518fed517aa58 /transex3
parent1d56f010dde669ddbdfea9a9769dff609c6c1c52 (diff)
Converter for half width katakana to full width katakana
Diffstat (limited to 'transex3')
-rw-r--r--transex3/source/hw2fw.cxx169
-rw-r--r--transex3/source/makefile.mk11
-rw-r--r--transex3/source/txtconv.cxx25
3 files changed, 194 insertions, 11 deletions
diff --git a/transex3/source/hw2fw.cxx b/transex3/source/hw2fw.cxx
new file mode 100644
index 000000000000..4989319e2e90
--- /dev/null
+++ b/transex3/source/hw2fw.cxx
@@ -0,0 +1,169 @@
+#include <tools/string.hxx>
+
+struct hw_pair
+{
+ sal_Unicode nFrom;
+ sal_Unicode nTo;
+};
+
+#define MAKE_PAIR(a,b) { a, b }
+
+static struct hw_pair aHWPairs[] =
+{
+ MAKE_PAIR( 0xFF65, 0x30FB ), // HALFWIDTH KATAKANA MIDDLE DOT --> KATAKANA MIDDLE DOT
+ MAKE_PAIR( 0xFF66, 0x30F2 ), // HALFWIDTH KATAKANA LETTER WO --> KATAKANA LETTER WO
+ MAKE_PAIR( 0xFF67, 0x30A1 ), // HALFWIDTH KATAKANA LETTER SMALL A --> KATAKANA LETTER SMALL A
+ MAKE_PAIR( 0xFF68, 0x30A3 ), // HALFWIDTH KATAKANA LETTER SMALL I --> KATAKANA LETTER SMALL I
+ MAKE_PAIR( 0xFF69, 0x30A5 ), // HALFWIDTH KATAKANA LETTER SMALL U --> KATAKANA LETTER SMALL U
+ MAKE_PAIR( 0xFF6A, 0x30A7 ), // HALFWIDTH KATAKANA LETTER SMALL E --> KATAKANA LETTER SMALL E
+ MAKE_PAIR( 0xFF6B, 0x30A9 ), // HALFWIDTH KATAKANA LETTER SMALL O --> KATAKANA LETTER SMALL O
+ MAKE_PAIR( 0xFF6C, 0x30E3 ), // HALFWIDTH KATAKANA LETTER SMALL YA --> KATAKANA LETTER SMALL YA
+ MAKE_PAIR( 0xFF6D, 0x30E5 ), // HALFWIDTH KATAKANA LETTER SMALL YU --> KATAKANA LETTER SMALL YU
+ MAKE_PAIR( 0xFF6E, 0x30E7 ), // HALFWIDTH KATAKANA LETTER SMALL YO --> KATAKANA LETTER SMALL YO
+ MAKE_PAIR( 0xFF6F, 0x30C3 ), // HALFWIDTH KATAKANA LETTER SMALL TU --> KATAKANA LETTER SMALL TU
+ MAKE_PAIR( 0xFF70, 0x30FC ), // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK --> KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ MAKE_PAIR( 0xFF71, 0x30A2 ), // HALFWIDTH KATAKANA LETTER A --> KATAKANA LETTER A
+ MAKE_PAIR( 0xFF72, 0x30A4 ), // HALFWIDTH KATAKANA LETTER I --> KATAKANA LETTER I
+ MAKE_PAIR( 0xFF73, 0x30A6 ), // HALFWIDTH KATAKANA LETTER U --> KATAKANA LETTER U
+ MAKE_PAIR( 0xFF74, 0x30A8 ), // HALFWIDTH KATAKANA LETTER E --> KATAKANA LETTER E
+ MAKE_PAIR( 0xFF75, 0x30AA ), // HALFWIDTH KATAKANA LETTER O --> KATAKANA LETTER O
+ MAKE_PAIR( 0xFF76, 0x30AB ), // HALFWIDTH KATAKANA LETTER KA --> KATAKANA LETTER KA
+ MAKE_PAIR( 0xFF77, 0x30AD ), // HALFWIDTH KATAKANA LETTER KI --> KATAKANA LETTER KI
+ MAKE_PAIR( 0xFF78, 0x30AF ), // HALFWIDTH KATAKANA LETTER KU --> KATAKANA LETTER KU
+ MAKE_PAIR( 0xFF79, 0x30B1 ), // HALFWIDTH KATAKANA LETTER KE --> KATAKANA LETTER KE
+ MAKE_PAIR( 0xFF7A, 0x30B3 ), // HALFWIDTH KATAKANA LETTER KO --> KATAKANA LETTER KO
+ MAKE_PAIR( 0xFF7B, 0x30B5 ), // HALFWIDTH KATAKANA LETTER SA --> KATAKANA LETTER SA
+ MAKE_PAIR( 0xFF7C, 0x30B7 ), // HALFWIDTH KATAKANA LETTER SI --> KATAKANA LETTER SI
+ MAKE_PAIR( 0xFF7D, 0x30B9 ), // HALFWIDTH KATAKANA LETTER SU --> KATAKANA LETTER SU
+ MAKE_PAIR( 0xFF7E, 0x30BB ), // HALFWIDTH KATAKANA LETTER SE --> KATAKANA LETTER SE
+ MAKE_PAIR( 0xFF7F, 0x30BD ), // HALFWIDTH KATAKANA LETTER SO --> KATAKANA LETTER SO
+ MAKE_PAIR( 0xFF80, 0x30BF ), // HALFWIDTH KATAKANA LETTER TA --> KATAKANA LETTER TA
+ MAKE_PAIR( 0xFF81, 0x30C1 ), // HALFWIDTH KATAKANA LETTER TI --> KATAKANA LETTER TI
+ MAKE_PAIR( 0xFF82, 0x30C4 ), // HALFWIDTH KATAKANA LETTER TU --> KATAKANA LETTER TU
+ MAKE_PAIR( 0xFF83, 0x30C6 ), // HALFWIDTH KATAKANA LETTER TE --> KATAKANA LETTER TE
+ MAKE_PAIR( 0xFF84, 0x30C8 ), // HALFWIDTH KATAKANA LETTER TO --> KATAKANA LETTER TO
+ MAKE_PAIR( 0xFF85, 0x30CA ), // HALFWIDTH KATAKANA LETTER NA --> KATAKANA LETTER NA
+ MAKE_PAIR( 0xFF86, 0x30CB ), // HALFWIDTH KATAKANA LETTER NI --> KATAKANA LETTER NI
+ MAKE_PAIR( 0xFF87, 0x30CC ), // HALFWIDTH KATAKANA LETTER NU --> KATAKANA LETTER NU
+ MAKE_PAIR( 0xFF88, 0x30CD ), // HALFWIDTH KATAKANA LETTER NE --> KATAKANA LETTER NE
+ MAKE_PAIR( 0xFF89, 0x30CE ), // HALFWIDTH KATAKANA LETTER NO --> KATAKANA LETTER NO
+ MAKE_PAIR( 0xFF8A, 0x30CF ), // HALFWIDTH KATAKANA LETTER HA --> KATAKANA LETTER HA
+ MAKE_PAIR( 0xFF8B, 0x30D2 ), // HALFWIDTH KATAKANA LETTER HI --> KATAKANA LETTER HI
+ MAKE_PAIR( 0xFF8C, 0x30D5 ), // HALFWIDTH KATAKANA LETTER HU --> KATAKANA LETTER HU
+ MAKE_PAIR( 0xFF8D, 0x30D8 ), // HALFWIDTH KATAKANA LETTER HE --> KATAKANA LETTER HE
+ MAKE_PAIR( 0xFF8E, 0x30DB ), // HALFWIDTH KATAKANA LETTER HO --> KATAKANA LETTER HO
+ MAKE_PAIR( 0xFF8F, 0x30DE ), // HALFWIDTH KATAKANA LETTER MA --> KATAKANA LETTER MA
+ MAKE_PAIR( 0xFF90, 0x30DF ), // HALFWIDTH KATAKANA LETTER MI --> KATAKANA LETTER MI
+ MAKE_PAIR( 0xFF91, 0x30E0 ), // HALFWIDTH KATAKANA LETTER MU --> KATAKANA LETTER MU
+ MAKE_PAIR( 0xFF92, 0x30E1 ), // HALFWIDTH KATAKANA LETTER ME --> KATAKANA LETTER ME
+ MAKE_PAIR( 0xFF93, 0x30E2 ), // HALFWIDTH KATAKANA LETTER MO --> KATAKANA LETTER MO
+ MAKE_PAIR( 0xFF94, 0x30E4 ), // HALFWIDTH KATAKANA LETTER YA --> KATAKANA LETTER YA
+ MAKE_PAIR( 0xFF95, 0x30E6 ), // HALFWIDTH KATAKANA LETTER YU --> KATAKANA LETTER YU
+ MAKE_PAIR( 0xFF96, 0x30E8 ), // HALFWIDTH KATAKANA LETTER YO --> KATAKANA LETTER YO
+ MAKE_PAIR( 0xFF97, 0x30E9 ), // HALFWIDTH KATAKANA LETTER RA --> KATAKANA LETTER RA
+ MAKE_PAIR( 0xFF98, 0x30EA ), // HALFWIDTH KATAKANA LETTER RI --> KATAKANA LETTER RI
+ MAKE_PAIR( 0xFF99, 0x30EB ), // HALFWIDTH KATAKANA LETTER RU --> KATAKANA LETTER RU
+ MAKE_PAIR( 0xFF9A, 0x30EC ), // HALFWIDTH KATAKANA LETTER RE --> KATAKANA LETTER RE
+ MAKE_PAIR( 0xFF9B, 0x30ED ), // HALFWIDTH KATAKANA LETTER RO --> KATAKANA LETTER RO
+ MAKE_PAIR( 0xFF9C, 0x30EF ), // HALFWIDTH KATAKANA LETTER WA --> KATAKANA LETTER WA
+ MAKE_PAIR( 0xFF9D, 0x30F3 ), // HALFWIDTH KATAKANA LETTER N --> KATAKANA LETTER N
+ MAKE_PAIR( 0xFF9E, 0x3099 ), // HALFWIDTH KATAKANA VOICED SOUND MARK --> COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
+ MAKE_PAIR( 0xFF9F, 0x309A ) // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK --> COMBINING KATAKANA-
+};
+
+
+static struct hw_pair aCombine3099[] =
+{
+ { 0x30a6, 0x30f4 },
+ { 0x30ab, 0x30ac },
+ { 0x30ad, 0x30ae },
+ { 0x30af, 0x30b0 },
+ { 0x30b1, 0x30b2 },
+ { 0x30b3, 0x30b4 },
+ { 0x30b5, 0x30b6 },
+ { 0x30b7, 0x30b8 },
+ { 0x30b9, 0x30ba },
+ { 0x30bb, 0x30bc },
+ { 0x30bd, 0x30be },
+ { 0x30bf, 0x30c0 },
+ { 0x30c1, 0x30c2 },
+ { 0x30c4, 0x30c5 },
+ { 0x30c6, 0x30c7 },
+ { 0x30c8, 0x30c9 },
+ { 0x30cf, 0x30d0 },
+ { 0x30d2, 0x30d3 },
+ { 0x30d5, 0x30d6 },
+ { 0x30d8, 0x30d9 },
+ { 0x30db, 0x30dc },
+ { 0x30ef, 0x30f7 },
+ { 0x30f0, 0x30f8 },
+ { 0x30f1, 0x30f9 },
+ { 0x30f2, 0x30fa },
+ { 0x30fd, 0x30fe }
+};
+
+static struct hw_pair aCombine309A[] =
+{
+ { 0x30cf, 0x30d1 },
+ { 0x30d2, 0x30d4 },
+ { 0x30d5, 0x30d7 },
+ { 0x30d8, 0x30da },
+ { 0x30db, 0x30dd }
+};
+
+int ImplReplaceFullWidth( sal_Unicode* pString, int nLen )
+{
+ sal_Unicode* pRead = pString;
+ sal_Unicode* pWrite = pRead;
+ int nNewLen = nLen;
+
+ while( (pRead - pString) < nLen )
+ {
+ if( pWrite != pRead )
+ *pWrite = *pRead;
+
+ if( *pRead >= 0xff65 && *pRead <= 0xff9f )
+ {
+ *pWrite = aHWPairs[ *pRead - 0xff65 ].nTo;
+
+ struct hw_pair* pTable = NULL;
+ int nTableEntries = 0;
+ if( *pWrite == 0x3099 )
+ {
+ // replace 0x3099 combinations
+ pTable = aCombine3099;
+ nTableEntries = sizeof(aCombine3099)/sizeof(aCombine3099[0]);
+ }
+ else if( *pWrite == 0x309a )
+ {
+ // replace 0x309a combinations
+ pTable = aCombine309A;
+ nTableEntries = sizeof(aCombine309A)/sizeof(aCombine309A[0]);
+ }
+ if( pTable )
+ {
+ sal_Unicode c = pWrite[-1];
+ for( int i = 0; i < nTableEntries; i++ )
+ if( c == pTable[i].nFrom )
+ {
+ pWrite--;
+ *pWrite = pTable[i].nTo;
+ nNewLen--;
+ break;
+ }
+ }
+ }
+ pRead++;
+ pWrite++;
+ }
+ if( pWrite < pRead )
+ *pWrite = 0;
+
+ return nNewLen;
+}
+
+void ConvertHalfwitdhToFullwidth( String& rString )
+{
+ int nNewLen = ImplReplaceFullWidth( rString.GetBufferAccess(), rString.Len() );
+ rString.ReleaseBufferAccess( nNewLen );
+}
diff --git a/transex3/source/makefile.mk b/transex3/source/makefile.mk
index 6430c1061426..97222d61f844 100644
--- a/transex3/source/makefile.mk
+++ b/transex3/source/makefile.mk
@@ -2,9 +2,9 @@
#
# $RCSfile: makefile.mk,v $
#
-# $Revision: 1.19 $
+# $Revision: 1.20 $
#
-# last change: $Author: nf $ $Date: 2001-08-23 14:13:41 $
+# last change: $Author: nf $ $Date: 2001-12-05 11:30:49 $
#
# The Contents of this file are made available subject to the terms of
# either of the following licenses
@@ -84,7 +84,8 @@ OBJFILES= \
$(OBJ)$/wtratree.obj \
$(OBJ)$/wtranode.obj \
$(OBJ)$/srciter.obj \
- $(OBJ)$/utf8conv.obj
+ $(OBJ)$/utf8conv.obj \
+ $(OBJ)$/hw2fw.obj
LIB1TARGET= $(LB)$/transex.lib
LIB1ARCHIV= $(LB)$/libtransex.a
@@ -148,7 +149,7 @@ APP8STDLIBS=$(BTSTRPLIB) $(STATIC_LIBS)
# encoding converter for text files
#APP9TARGET= txtconv
#APP9STACK= 16000
-#APP9OBJS= $(OBJ)$/utf8conv.obj $(OBJ)$/txtconv.obj
+#APP9OBJS= $(OBJ)$/utf8conv.obj $(OBJ)$/txtconv.obj $(OBJ)$/hw2fw.obj
#APP9STDLIBS=$(STATIC_LIBS)
# localizer for l10n framework
@@ -156,7 +157,7 @@ APP9TARGET= localize
APP9STACK= 16000
APP9OBJS= $(OBJ)$/localize.obj $(OBJ)$/utf8conv.obj $(OBJ)$/srciter.obj $(OBJ)$/export2.obj
APP9STDLIBS+=$(BTSTRPLIB) $(STATIC_LIBS)
-#APP9LIBS+= $(LB)$/$(TARGET).lib
+APP9LIBS+= $(LB)$/$(TARGET).lib
DEPOBJFILES=$(APP1OBJS) $(APP2OBJS) $(APP3OBJS) $(APP4OBJS) $(APP5OBJS) $(APP6OBJS) $(APP7OBJS) $(APP8OBJS) $(APP9OBJS)
diff --git a/transex3/source/txtconv.cxx b/transex3/source/txtconv.cxx
index b0bab36aeb37..7e96562c38c1 100644
--- a/transex3/source/txtconv.cxx
+++ b/transex3/source/txtconv.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: txtconv.cxx,v $
*
- * $Revision: 1.2 $
+ * $Revision: 1.3 $
*
- * last change: $Author: nf $ $Date: 2001-05-22 14:11:52 $
+ * last change: $Author: nf $ $Date: 2001-12-05 11:30:10 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -65,6 +65,8 @@
// local includes
#include "utf8conv.hxx"
+extern void ConvertHalfwitdhToFullwidth( String& rString );
+
/*****************************************************************************/
void Help()
/*****************************************************************************/
@@ -91,6 +93,7 @@ void Help()
fprintf( stdout, " MS_1254 => Turkish\n" );
fprintf( stdout, " MS_1255 => Hebrew\n" );
fprintf( stdout, " MS_1256 => Arabic\n" );
+ fprintf( stdout, " HW2FW => Only with -t, converts half to full width katakana" );
fprintf( stdout, "\n" );
}
@@ -110,6 +113,8 @@ int _cdecl main( int argc, char *argv[] )
if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) {
rtl_TextEncoding nEncoding;
+ BOOL bHW2FW = FALSE;
+
ByteString sCharset( argv[ 2 ] );
sCharset.ToUpperAscii();
@@ -125,6 +130,7 @@ int _cdecl main( int argc, char *argv[] )
else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255;
else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256;
else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257;
+ else if (( sCharset == "HW2FW" ) && ( ByteString( argv[ 1 ] ) == "-t" )) bHW2FW = TRUE;
else {
Help();
@@ -159,10 +165,17 @@ int _cdecl main( int argc, char *argv[] )
while ( !aGSI.IsEof()) {
aGSI.ReadLine( sGSILine );
- if ( ByteString( argv[ 1 ] ) == "-t" )
- sGSILine = UTF8Converter::ConvertToUTF8( sGSILine, nEncoding );
- else
- sGSILine = UTF8Converter::ConvertFromUTF8( sGSILine, nEncoding );
+ if ( bHW2FW ) {
+ String sConverter( sGSILine, RTL_TEXTENCODING_UTF8 );
+ ConvertHalfwitdhToFullwidth( sConverter );
+ sGSILine = ByteString( sConverter, RTL_TEXTENCODING_UTF8 );
+ }
+ else {
+ if ( ByteString( argv[ 1 ] ) == "-t" )
+ sGSILine = UTF8Converter::ConvertToUTF8( sGSILine, nEncoding );
+ else
+ sGSILine = UTF8Converter::ConvertFromUTF8( sGSILine, nEncoding );
+ }
if ( aOutput.IsOpen())
aOutput.WriteLine( sGSILine );