summaryrefslogtreecommitdiff
path: root/i18npool/source/textconversion/genconv_dict.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/textconversion/genconv_dict.cxx')
-rw-r--r--i18npool/source/textconversion/genconv_dict.cxx348
1 files changed, 330 insertions, 18 deletions
diff --git a/i18npool/source/textconversion/genconv_dict.cxx b/i18npool/source/textconversion/genconv_dict.cxx
index dfd6acffe2a7..9b292d4b73df 100644
--- a/i18npool/source/textconversion/genconv_dict.cxx
+++ b/i18npool/source/textconversion/genconv_dict.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: genconv_dict.cxx,v $
*
- * $Revision: 1.2 $
+ * $Revision: 1.3 $
*
- * last change: $Author: vg $ $Date: 2003-04-17 17:53:26 $
+ * last change: $Author: rt $ $Date: 2004-09-17 13:56:53 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -68,19 +68,25 @@
using namespace ::rtl;
+void make_hhc_char(FILE *sfp, FILE *cfp);
+void make_stc_char(FILE *sfp, FILE *cfp);
+void make_stc_word(FILE *sfp, FILE *cfp);
+
/* Main Procedure */
-int SAL_CALL main(int argc, char* argv[]) {
+int SAL_CALL main(int argc, char* argv[])
+{
FILE *sfp, *cfp;
- if (argc < 3) exit(-1);
+ if (argc < 4) exit(-1);
+
- sfp = fopen(argv[1], "rb"); // open the source file for read;
+ sfp = fopen(argv[2], "rb"); // open the source file for read;
if (sfp == NULL)
printf("Open the dictionary source file failed.");
// create the C source file to write
- cfp = fopen(argv[2], "wb");
+ cfp = fopen(argv[3], "wb");
if (cfp == NULL) {
fclose(sfp);
printf("Can't create the C source file.");
@@ -95,6 +101,24 @@ int SAL_CALL main(int argc, char* argv[]) {
fprintf(cfp, "#include <textconversion.hxx>\n");
fprintf(cfp, "\nnamespace com { namespace sun { namespace star { namespace i18n {\n");
+ if (strcmp(argv[1], "hhc_char") == 0)
+ make_hhc_char(sfp, cfp);
+ else if (strcmp(argv[1], "stc_char") == 0)
+ make_stc_char(sfp, cfp);
+ else if (strcmp(argv[1], "stc_word") == 0)
+ make_stc_word(sfp, cfp);
+
+ fprintf (cfp, "} } } }\n");
+
+ fclose(sfp);
+ fclose(cfp);
+
+ return 0;
+} // end of main
+
+// Hangul/Hanja character conversion
+void make_hhc_char(FILE *sfp, FILE *cfp)
+{
sal_Int32 count, address, i, j, k;
sal_Unicode Hanja2HangulData[0x10000];
for (i = 0; i < 0x10000; i++) {
@@ -105,26 +129,26 @@ int SAL_CALL main(int argc, char* argv[]) {
// generate main dict. data array
fprintf(cfp, "\nstatic const sal_Unicode Hangul2HanjaData[] = {");
- sal_Char str[1024];
+ sal_Char Cstr[1024];
count = 0;
address = 0;
- while (fgets(str, 1024, sfp)) {
+ while (fgets(Cstr, 1024, sfp)) {
// input file is in UTF-8 encoding (Hangul:Hanja)
// don't convert last new line character to Ostr.
- OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
- const sal_Unicode *str = Ostr.getStr();
+ OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
+ const sal_Unicode *Ustr = Ostr.getStr();
sal_Int32 len = Ostr.getLength();
- Hangul2HanjaData[count][0] = str[0];
+ Hangul2HanjaData[count][0] = Ustr[0];
Hangul2HanjaData[count][1] = address;
Hangul2HanjaData[count][2] = len - 2;
count++;
for (i = 2; i < len; i++) {
- Hanja2HangulData[str[i]] = str[0];
+ Hanja2HangulData[Ustr[i]] = Ustr[0];
if (address++ % 16 == 0)
fprintf(cfp, "\n\t");
- fprintf(cfp, "0x%04x, ", str[i]);
+ fprintf(cfp, "0x%04x, ", Ustr[i]);
}
}
fprintf(cfp, "\n};\n");
@@ -177,10 +201,298 @@ int SAL_CALL main(int argc, char* argv[]) {
fprintf (cfp, "\tconst sal_Int16 getHangul2HanjaIndexCount() { return sizeof(Hangul2HanjaIndex) / sizeof(Hangul_Index); }\n");
fprintf (cfp, "\tconst sal_uInt16* getHanja2HangulIndex() { return Hanja2HangulIndex; }\n");
fprintf (cfp, "\tconst sal_Unicode* getHanja2HangulData() { return Hanja2HangulData; }\n");
- fprintf (cfp, "} } } }\n");
+}
- fclose(sfp);
- fclose(cfp);
+// Simplified/Traditional Chinese character conversion
+void make_stc_char(FILE *sfp, FILE *cfp)
+{
+ sal_Int32 address, i, j, k;
+ sal_Unicode SChinese2TChineseData[0x10000];
+ sal_Unicode SChinese2VChineseData[0x10000];
+ sal_Unicode TChinese2SChineseData[0x10000];
+ for (i = 0; i < 0x10000; i++) {
+ SChinese2TChineseData[i] = 0;
+ SChinese2VChineseData[i] = 0;
+ TChinese2SChineseData[i] = 0;
+ }
+
+ sal_Char Cstr[1024];
+ while (fgets(Cstr, 1024, sfp)) {
+ // input file is in UTF-8 encoding (SChinese:TChinese)
+ // don't convert last new line character to Ostr.
+ OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
+ const sal_Unicode *Ustr = Ostr.getStr();
+ sal_Int32 len = Ostr.getLength();
+ if (Ustr[1] == sal_Unicode('v'))
+ SChinese2VChineseData[Ustr[0]] = Ustr[2];
+ else {
+ SChinese2TChineseData[Ustr[0]] = Ustr[2];
+ if (SChinese2VChineseData[Ustr[0]] == 0)
+ SChinese2VChineseData[Ustr[0]] = Ustr[2];
+ }
+ for (i = 2; i < len; i++)
+ TChinese2SChineseData[Ustr[i]] = Ustr[0];
+ }
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2T[] = {");
+
+ address=0;
+ for (i = 0; i < 0x10; i++) {
+ fprintf(cfp, "\n\t");
+ for (j = 0; j < 0x10; j++) {
+ for (k = 0; k < 0x100; k++) {
+ if (SChinese2TChineseData[((i*0x10)+j)*0x100+k] != 0)
+ break;
+ }
+ fprintf(cfp, "0x%04x, ", k < 0x100 ? (address++)*0x100 : 0xFFFF);
+ }
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2T[] = {");
+
+ for (i = 0; i < 0x100; i++) {
+ for (j = 0; j < 0x100; j++) {
+ if (SChinese2TChineseData[i*0x100+j] != 0)
+ break;
+ }
+ if (j < 0x100) {
+ for (j = 0; j < 0x10; j++) {
+ fprintf(cfp, "\n\t");
+ for (k = 0; k < 0x10; k++) {
+ sal_Unicode c = SChinese2TChineseData[((i*0x10+j)*0x10)+k];
+ fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
+ }
+ }
+ }
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2V[] = {");
+
+ address=0;
+ for (i = 0; i < 0x10; i++) {
+ fprintf(cfp, "\n\t");
+ for (j = 0; j < 0x10; j++) {
+ for (k = 0; k < 0x100; k++) {
+ if (SChinese2VChineseData[((i*0x10)+j)*0x100+k] != 0)
+ break;
+ }
+ fprintf(cfp, "0x%04x, ", k < 0x100 ? (address++)*0x100 : 0xFFFF);
+ }
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2V[] = {");
+
+ for (i = 0; i < 0x100; i++) {
+ for (j = 0; j < 0x100; j++) {
+ if (SChinese2VChineseData[i*0x100+j] != 0)
+ break;
+ }
+ if (j < 0x100) {
+ for (j = 0; j < 0x10; j++) {
+ fprintf(cfp, "\n\t");
+ for (k = 0; k < 0x10; k++) {
+ sal_Unicode c = SChinese2VChineseData[((i*0x10+j)*0x10)+k];
+ fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
+ }
+ }
+ }
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_T2S[] = {");
+
+ address=0;
+ for (i = 0; i < 0x10; i++) {
+ fprintf(cfp, "\n\t");
+ for (j = 0; j < 0x10; j++) {
+ for (k = 0; k < 0x100; k++) {
+ if (TChinese2SChineseData[((i*0x10)+j)*0x100+k] != 0)
+ break;
+ }
+ fprintf(cfp, "0x%04x, ", k < 0x100 ? (address++)*0x100 : 0xFFFF);
+ }
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_T2S[] = {");
+
+ for (i = 0; i < 0x100; i++) {
+ for (j = 0; j < 0x100; j++) {
+ if (TChinese2SChineseData[i*0x100+j] != 0)
+ break;
+ }
+ if (j < 0x100) {
+ for (j = 0; j < 0x10; j++) {
+ fprintf(cfp, "\n\t");
+ for (k = 0; k < 0x10; k++) {
+ sal_Unicode c = TChinese2SChineseData[((i*0x10+j)*0x10)+k];
+ fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
+ }
+ }
+ }
+ }
+ fprintf(cfp, "\n};\n");
+
+ // create function to return arrays
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2T() { return STC_CharIndex_S2T; }\n");
+ fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2T() { return STC_CharData_S2T; }\n");
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2V() { return STC_CharIndex_S2V; }\n");
+ fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2V() { return STC_CharData_S2V; }\n");
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_T2S() { return STC_CharIndex_T2S; }\n");
+ fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_T2S() { return STC_CharData_T2S; }\n");
+}
+
+
+typedef struct {
+ sal_uInt16 address;
+ sal_Int32 len;
+ sal_Unicode *data;
+} Index;
+
+extern "C" {
+int Index_comp(const void* s1, const void* s2)
+{
+ Index *p1 = (Index*)s1, *p2 = (Index*)s2;
+ int result = p1->len - p2->len;
+ for (int i = 0; result == 0 && i < p1->len; i++)
+ result = *(p1->data+i) - *(p2->data+i);
+ return result;
+}
+};
+
+// Simplified/Traditional Chinese word conversion
+void make_stc_word(FILE *sfp, FILE *cfp)
+{
+ sal_Int32 count, i, len;
+ sal_Unicode STC_WordData[0x10000];
+ Index *STC_WordEntry_S2T = (Index*) malloc(0x10000 * sizeof(Index));
+ Index *STC_WordEntry_T2S = (Index*) malloc(0x10000 * sizeof(Index));
+ sal_Int32 count_S2T = 0, count_T2S = 0;
+ sal_Int32 line = 0, char_total = 0;
+ sal_Char Cstr[1024];
+
+ while (fgets(Cstr, 1024, sfp)) {
+ // input file is in UTF-8 encoding (SChinese:TChinese)
+ // don't convert last new line character to Ostr.
+ OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
+ const sal_Unicode *Ustr = Ostr.getStr();
+ sal_Int32 len = Ostr.getLength();
+ if (char_total + len + 1 > 0xFFFF) {
+ fprintf(stderr, "Word Dictionary stc_word.dic is too big (line %d)", line);
+ return;
+ }
+ sal_Int32 sep=-1, eq=-1, gt=-1, lt=-1;
+ if (((sep = eq = Ostr.indexOf(sal_Unicode('='))) > 0) ||
+ ((sep = gt = Ostr.indexOf(sal_Unicode('>'))) > 0) ||
+ ((sep = lt = Ostr.indexOf(sal_Unicode('<'))) > 0)) {
+
+ if (eq > 0 || gt > 0) {
+ STC_WordEntry_S2T[count_S2T].address = char_total;
+ STC_WordEntry_S2T[count_S2T].len = sep;
+ STC_WordEntry_S2T[count_S2T++].data = &STC_WordData[char_total];
+ }
+ if (eq > 0 || lt > 0) {
+ STC_WordEntry_T2S[count_T2S].address = char_total + sep + 1;
+ STC_WordEntry_T2S[count_T2S].len = len - sep - 1;
+ STC_WordEntry_T2S[count_T2S++].data = &STC_WordData[char_total + sep + 1];
+ }
+ for (i = 0; i < len; i++)
+ STC_WordData[char_total++] = (i == sep) ? 0 : Ostr[i];
+ STC_WordData[char_total++] = 0;
+ } else {
+ fprintf(stderr, "Invalid entry in stc_word.dic (line %d)", line);
+ return;
+ }
+ line++;
+ }
+
+ if (char_total > 0) {
+ fprintf(cfp, "\nstatic const sal_Unicode STC_WordData[] = {");
+ for (i = 0; i < char_total; i++) {
+ if (i % 32 == 0) fprintf(cfp, "\n\t");
+ fprintf(cfp, "0x%04x, ", STC_WordData[i]);
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic sal_Int32 STC_WordData_Count = %d;\n", char_total);
+
+ // create function to return arrays
+ fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = STC_WordData_Count; return STC_WordData; }\n");
+ } else {
+ fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = 0; return NULL; }\n");
+ }
+
+ sal_uInt16 STC_WordIndex[0x100];
+
+ if (count_S2T > 0) {
+ qsort(STC_WordEntry_S2T, count_S2T, sizeof(Index), Index_comp);
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_S2T[] = {");
+ count = 0;
+ len = 0;
+ for (i = 0; i < count_S2T; i++) {
+ if (i % 32 == 0) fprintf(cfp, "\n\t");
+ fprintf(cfp, "0x%04x, ", STC_WordEntry_S2T[i].address);
+ if (STC_WordEntry_S2T[i].len != len) {
+ len = STC_WordEntry_S2T[i].len;
+ while (count <= len)
+ STC_WordIndex[count++] = i;
+ }
+ }
+ fprintf(cfp, "\n};\n");
+ STC_WordIndex[count++] = i;
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_S2T[] = {");
+ for (i = 0; i < count; i++) {
+ if (i % 16 == 0) fprintf(cfp, "\n\t");
+ fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_S2T_Count = %d;\n", len);
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return STC_WordEntry_S2T; }\n");
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = STC_WordIndex_S2T_Count; return STC_WordIndex_S2T; }\n");
+ } else {
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return NULL; }\n");
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = 0; return NULL; }\n");
+ }
+
+ if (count_T2S > 0) {
+ qsort(STC_WordEntry_T2S, count_T2S, sizeof(Index), Index_comp);
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_T2S[] = {");
+ count = 0;
+ len = 0;
+ for (i = 0; i < count_T2S; i++) {
+ if (i % 32 == 0) fprintf(cfp, "\n\t");
+ fprintf(cfp, "0x%04x, ", STC_WordEntry_T2S[i].address);
+ if (STC_WordEntry_T2S[i].len != len) {
+ len = STC_WordEntry_T2S[i].len;
+ while (count <= len)
+ STC_WordIndex[count++] = i;
+ }
+ }
+ STC_WordIndex[count++] = i;
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_T2S[] = {");
+ for (i = 0; i < count; i++) {
+ if (i % 16 == 0) fprintf(cfp, "\n\t");
+ fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
+ }
+ fprintf(cfp, "\n};\n");
+
+ fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_T2S_Count = %d;\n\n", len);
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return STC_WordEntry_T2S; }\n");
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = STC_WordIndex_T2S_Count; return STC_WordIndex_T2S; }\n");
+ } else {
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return NULL; }\n");
+ fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = 0; return NULL; }\n");
+ }
+ free(STC_WordEntry_S2T);
+ free(STC_WordEntry_T2S);
+}
- return 0;
-} // End of main