INTEGRATION: CWS langsupporter (1.1.2); FILE ADDED

2004/09/22 07:24:46 obr 1.1.2.2: #i33873# replace fprintf with fputs 2004/09/22 06:52:43 obr 1.1.2.1: #i33873# ulf file converter tool
author: Oliver Bolte <obo@openoffice.org> 2004-10-18 13:08:04 +0000
committer: Oliver Bolte <obo@openoffice.org> 2004-10-18 13:08:04 +0000
commit: 29a93687ec4942b7f507ee114042e45ef3f8da01 (patch)
tree: ee3528f6681b012f9b6cf1482005f4966550a5e5 /setup_native/source
parent: 319fbe258bf2ba1fa7a182c8add996a96932f07e (diff)
1 files changed, 387 insertions, 0 deletions
diff --git a/setup_native/source/ulfconv/ulfconv.cxx b/setup_native/source/ulfconv/ulfconv.cxx
new file mode 100644
index 000000000000..6d50227b4c2a
--- /dev/null
+++ b/setup_native/source/ulfconv/ulfconv.cxx
@@ -0,0 +1,387 @@
+/*************************************************************************
+ *
+ *  $RCSfile: ulfconv.cxx,v $
+ *
+ *  $Revision: 1.2 $
+ *
+ *  last change: $Author: obo $ $Date: 2004-10-18 14:08:04 $
+ *
+ *  The Contents of this file are made available subject to the terms of
+ *  either of the following licenses
+ *
+ *         - GNU Lesser General Public License Version 2.1
+ *         - Sun Industry Standards Source License Version 1.1
+ *
+ *  Sun Microsystems Inc., October, 2000
+ *
+ *  GNU Lesser General Public License Version 2.1
+ *  =============================================
+ *  Copyright 2000 by Sun Microsystems, Inc.
+ *  901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License version 2.1, as published by the Free Software Foundation.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *  MA  02111-1307  USA
+ *
+ *
+ *  Sun Industry Standards Source License Version 1.1
+ *  =================================================
+ *  The contents of this file are subject to the Sun Industry Standards
+ *  Source License Version 1.1 (the "License"); You may not use this file
+ *  except in compliance with the License. You may obtain a copy of the
+ *  License at http://www.openoffice.org/license.html.
+ *
+ *  Software provided under this License is provided on an "AS IS" basis,
+ *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
+ *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
+ *  See the License for the specific provisions governing your rights and
+ *  obligations concerning the Software.
+ *
+ *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ *  Copyright: 2000 by Sun Microsystems, Inc.
+ *
+ *  All Rights Reserved.
+ *
+ *  Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#if !(defined NETBSD || defined FREEBSD || defined MACOSX || WNT)
+#include <alloca.h>
+#endif
+
+#ifdef WNT
+#include <malloc.h>
+#define alloca _alloca
+#endif
+
+#include <rtl/ustring.hxx>
+
+#include <map>
+#include <string>
+
+/*****************************************************************************/
+/* typedefs
+/*****************************************************************************/
+
+typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
+
+typedef struct {
+    const char  *key;
+    const rtl_TextEncoding value;
+} _pair;
+
+static int _pair_compare (const char *key, const _pair *pair);
+static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
+
+
+const _pair _ms_encoding_list[] = {
+    { "0",       RTL_TEXTENCODING_UTF8        },
+    { "1250",    RTL_TEXTENCODING_MS_1250     },
+    { "1251",    RTL_TEXTENCODING_MS_1251     },
+    { "1252",    RTL_TEXTENCODING_MS_1252     },
+    { "1253",    RTL_TEXTENCODING_MS_1253     },
+    { "1254",    RTL_TEXTENCODING_MS_1254     },
+    { "1255",    RTL_TEXTENCODING_MS_1255     },
+    { "1256",    RTL_TEXTENCODING_MS_1256     },
+    { "1257",    RTL_TEXTENCODING_MS_1257     },
+    { "1258",    RTL_TEXTENCODING_MS_1258     },
+    { "874",     RTL_TEXTENCODING_MS_874      },
+    { "932",     RTL_TEXTENCODING_MS_932      },
+    { "936",     RTL_TEXTENCODING_MS_936      },
+    { "949",     RTL_TEXTENCODING_MS_949      },
+    { "950",     RTL_TEXTENCODING_MS_950      }
+};
+
+
+/*****************************************************************************/
+/* fgets that work with unix line ends on Windows
+/*****************************************************************************/
+
+char * my_fgets(char *s, int n, FILE *fp)
+{
+    int i;
+    for( i=0; i < n-1; i++ )
+    {
+        int c = getc(fp);
+
+        if( c == EOF )
+            break;
+
+        s[i] = (char) c;
+
+        if( s[i] == '\n' )
+        {
+            i++;
+            break;
+        }
+    }
+
+    if( i>0 )
+    {
+        s[i] = '\0';
+        return s;
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+/*****************************************************************************/
+/* compare function for binary search
+/*****************************************************************************/
+
+static int
+_pair_compare (const char *key, const _pair *pair)
+{
+    int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
+    return result;
+}
+
+/*****************************************************************************/
+/* binary search on encoding tables
+/*****************************************************************************/
+
+static const _pair*
+_pair_search (const char *key, const _pair *base, unsigned int member )
+{
+    unsigned int lower = 0;
+    unsigned int upper = member;
+    unsigned int current;
+    int comparison;
+
+    /* check for validity of input */
+    if ( (key == NULL) || (base == NULL) || (member == 0) )
+        return NULL;
+
+    /* binary search */
+    while ( lower < upper )
+    {
+        current = (lower + upper) / 2;
+        comparison = _pair_compare( key, base + current );
+        if (comparison < 0)
+            upper = current;
+        else
+        if (comparison > 0)
+            lower = current + 1;
+        else
+            return base + current;
+    }
+
+    return NULL;
+}
+
+
+/************************************************************************
+ * read_encoding_table
+ ************************************************************************/
+
+void read_encoding_table(char * file, EncodingMap& aEncodingMap)
+{
+    int nlang = 0;
+
+    FILE * fp = fopen(file, "r");
+    if ( ! fp  ) {
+        fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
+        exit(2);
+    }
+
+    char buffer[512];
+    while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
+
+        // strip comment lines
+        if ( buffer[0] == '#' )
+            continue;
+
+        // find end of language string
+        char * cp;
+        for ( cp = buffer; ! isspace(*cp); cp++ )
+            ;
+        *cp = '\0';
+
+        // find start of codepage string
+        for ( ++cp; isspace(*cp); ++cp )
+            ;
+        char * codepage = cp;
+
+        // find end of codepage string
+        for ( ++cp; ! isspace(*cp); ++cp )
+            ;
+        *cp = '\0';
+
+        // find the correct mapping for codepage
+        const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair );
+        const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
+
+        if ( encoding != NULL ) {
+            const std::string language(buffer);
+            aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
+        }
+    }
+}
+
+/************************************************************************
+ * print_legacy_mixed
+ ************************************************************************/
+
+void print_legacy_mixed(
+    FILE * ostream,
+    const rtl::OUString& aString,
+    const std::string& language,
+    EncodingMap& aEncodingMap)
+{
+    EncodingMap::iterator iter = aEncodingMap.find(language);
+
+    if ( iter != aEncodingMap.end() ) {
+        fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
+    } else {
+        fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
+    }
+}
+
+/************************************************************************
+ * print_java_style
+ ************************************************************************/
+
+void print_java_style(FILE * ostream, const rtl::OUString& aString)
+{
+    int imax = aString.getLength();
+    for (int i = 0; i < imax; i++) {
+        sal_Unicode uc = aString[i];
+        if ( uc < 128 ) {
+            fprintf(ostream, "%c", (char) uc);
+        } else {
+            fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
+        }
+    }
+}
+
+/************************************************************************
+ * main
+ ************************************************************************/
+
+int main( int argc, char * const argv[] )
+{
+    EncodingMap aEncodingMap;
+
+    FILE *istream = stdin;
+    FILE *ostream = stdout;
+
+    int errflg = 0;
+    int argi;
+
+    for( argi=1; argi < argc; argi++ )
+    {
+        if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
+        {
+            switch(argv[argi][1]) {
+            case 'o':
+                if (argi+1 >= argc || argv[argi+1][0] == '-')
+                {
+                    fprintf(stderr, "Option -%c requires an operand\n", argv[argi]);
+                    errflg++;
+                    break;
+                }
+
+                ++argi;
+                ostream = fopen(argv[argi], "w");
+                if ( ostream == NULL ) {
+                    fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
+                    exit(2);
+                }
+                break;
+            case 't':
+                if (argi+1 >= argc || argv[argi+1][0] == '-')
+                {
+                    fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
+                    errflg++;
+                    break;
+                }
+
+                read_encoding_table(argv[++argi], aEncodingMap);
+                break;
+            default:
+                fprintf(stderr, "Unrecognized option: -%c\n", argv[argi]);
+                errflg++;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    if (errflg) {
+      fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
+      exit(2);
+    }
+
+    /* assign input file to stdin */
+    if ( argi < argc ) {
+
+        istream = fopen(argv[argi], "r");
+        if ( istream  == NULL ) {
+            fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
+            exit(2);
+        }
+    }
+
+    /* read line by line from stdin */
+    char buffer[512];
+    while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
+
+        /* only handle lines containing " = " */
+        char * cp = strstr(buffer, " = \"");
+        if ( cp ) {
+            rtl::OUString aString;
+
+            /* find end of lang string */
+            int n;
+            for ( n=0; ! isspace(buffer[n]); n++ )
+                ;
+
+            std::string line = buffer;
+            std::string lang(line, 0, n);
+
+            cp += 4;
+            rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
+                RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
+
+            fprintf(ostream, "%s = \"", lang.c_str());
+
+            if ( aEncodingMap.empty() ) {
+                print_java_style(ostream, aString);
+            } else {
+                print_legacy_mixed(ostream, aString, lang, aEncodingMap);
+            }
+
+            fprintf(ostream, "\"\n");
+
+
+        } else {
+            fprintf(ostream, buffer);
+        }
+    }
+}
author	Oliver Bolte <obo@openoffice.org>	2004-10-18 13:08:04 +0000
committer	Oliver Bolte <obo@openoffice.org>	2004-10-18 13:08:04 +0000
commit	29a93687ec4942b7f507ee114042e45ef3f8da01 (patch)
tree	ee3528f6681b012f9b6cf1482005f4966550a5e5 /setup_native/source
parent	319fbe258bf2ba1fa7a182c8add996a96932f07e (diff)