diff options
author | Oliver Bolte <obo@openoffice.org> | 2004-10-18 13:08:04 +0000 |
---|---|---|
committer | Oliver Bolte <obo@openoffice.org> | 2004-10-18 13:08:04 +0000 |
commit | 29a93687ec4942b7f507ee114042e45ef3f8da01 (patch) | |
tree | ee3528f6681b012f9b6cf1482005f4966550a5e5 /setup_native/source | |
parent | 319fbe258bf2ba1fa7a182c8add996a96932f07e (diff) |
INTEGRATION: CWS langsupporter (1.1.2); FILE ADDED
2004/09/22 07:24:46 obr 1.1.2.2: #i33873# replace fprintf with fputs
2004/09/22 06:52:43 obr 1.1.2.1: #i33873# ulf file converter tool
Diffstat (limited to 'setup_native/source')
-rw-r--r-- | setup_native/source/ulfconv/ulfconv.cxx | 387 |
1 files changed, 387 insertions, 0 deletions
diff --git a/setup_native/source/ulfconv/ulfconv.cxx b/setup_native/source/ulfconv/ulfconv.cxx new file mode 100644 index 000000000000..6d50227b4c2a --- /dev/null +++ b/setup_native/source/ulfconv/ulfconv.cxx @@ -0,0 +1,387 @@ +/************************************************************************* + * + * $RCSfile: ulfconv.cxx,v $ + * + * $Revision: 1.2 $ + * + * last change: $Author: obo $ $Date: 2004-10-18 14:08:04 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +#if !(defined NETBSD || defined FREEBSD || defined MACOSX || WNT) +#include <alloca.h> +#endif + +#ifdef WNT +#include <malloc.h> +#define alloca _alloca +#endif + +#include <rtl/ustring.hxx> + +#include <map> +#include <string> + +/*****************************************************************************/ +/* typedefs +/*****************************************************************************/ + +typedef std::map< const std::string, rtl_TextEncoding > EncodingMap; + +typedef struct { + const char *key; + const rtl_TextEncoding value; +} _pair; + +static int _pair_compare (const char *key, const _pair *pair); +static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member ); + + +const _pair _ms_encoding_list[] = { + { "0", RTL_TEXTENCODING_UTF8 }, + { "1250", RTL_TEXTENCODING_MS_1250 }, + { "1251", RTL_TEXTENCODING_MS_1251 }, + { "1252", RTL_TEXTENCODING_MS_1252 }, + { "1253", RTL_TEXTENCODING_MS_1253 }, + { "1254", RTL_TEXTENCODING_MS_1254 }, + { "1255", RTL_TEXTENCODING_MS_1255 }, + { "1256", RTL_TEXTENCODING_MS_1256 }, + { "1257", RTL_TEXTENCODING_MS_1257 }, + { "1258", RTL_TEXTENCODING_MS_1258 }, + { "874", RTL_TEXTENCODING_MS_874 }, + { "932", RTL_TEXTENCODING_MS_932 }, + { "936", RTL_TEXTENCODING_MS_936 }, + { "949", RTL_TEXTENCODING_MS_949 }, + { "950", RTL_TEXTENCODING_MS_950 } +}; + + +/*****************************************************************************/ +/* fgets that work with unix line ends on Windows +/*****************************************************************************/ + +char * my_fgets(char *s, int n, FILE *fp) +{ + int i; + for( i=0; i < n-1; i++ ) + { + int c = getc(fp); + + if( c == EOF ) + break; + + s[i] = (char) c; + + if( s[i] == '\n' ) + { + i++; + break; + } + } + + if( i>0 ) + { + s[i] = '\0'; + return s; + } + else + { + return NULL; + } +} + +/*****************************************************************************/ +/* compare function for binary search +/*****************************************************************************/ + +static int +_pair_compare (const char *key, const _pair *pair) +{ + int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); + return result; +} + +/*****************************************************************************/ +/* binary search on encoding tables +/*****************************************************************************/ + +static const _pair* +_pair_search (const char *key, const _pair *base, unsigned int member ) +{ + unsigned int lower = 0; + unsigned int upper = member; + unsigned int current; + int comparison; + + /* check for validity of input */ + if ( (key == NULL) || (base == NULL) || (member == 0) ) + return NULL; + + /* binary search */ + while ( lower < upper ) + { + current = (lower + upper) / 2; + comparison = _pair_compare( key, base + current ); + if (comparison < 0) + upper = current; + else + if (comparison > 0) + lower = current + 1; + else + return base + current; + } + + return NULL; +} + + +/************************************************************************ + * read_encoding_table + ************************************************************************/ + +void read_encoding_table(char * file, EncodingMap& aEncodingMap) +{ + int nlang = 0; + + FILE * fp = fopen(file, "r"); + if ( ! fp ) { + fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno)); + exit(2); + } + + char buffer[512]; + while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) { + + // strip comment lines + if ( buffer[0] == '#' ) + continue; + + // find end of language string + char * cp; + for ( cp = buffer; ! isspace(*cp); cp++ ) + ; + *cp = '\0'; + + // find start of codepage string + for ( ++cp; isspace(*cp); ++cp ) + ; + char * codepage = cp; + + // find end of codepage string + for ( ++cp; ! isspace(*cp); ++cp ) + ; + *cp = '\0'; + + // find the correct mapping for codepage + const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair ); + const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members ); + + if ( encoding != NULL ) { + const std::string language(buffer); + aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) ); + } + } +} + +/************************************************************************ + * print_legacy_mixed + ************************************************************************/ + +void print_legacy_mixed( + FILE * ostream, + const rtl::OUString& aString, + const std::string& language, + EncodingMap& aEncodingMap) +{ + EncodingMap::iterator iter = aEncodingMap.find(language); + + if ( iter != aEncodingMap.end() ) { + fputs(OUStringToOString(aString, iter->second).getStr(), ostream); + } else { + fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str()); + } +} + +/************************************************************************ + * print_java_style + ************************************************************************/ + +void print_java_style(FILE * ostream, const rtl::OUString& aString) +{ + int imax = aString.getLength(); + for (int i = 0; i < imax; i++) { + sal_Unicode uc = aString[i]; + if ( uc < 128 ) { + fprintf(ostream, "%c", (char) uc); + } else { + fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF ); + } + } +} + +/************************************************************************ + * main + ************************************************************************/ + +int main( int argc, char * const argv[] ) +{ + EncodingMap aEncodingMap; + + FILE *istream = stdin; + FILE *ostream = stdout; + + int errflg = 0; + int argi; + + for( argi=1; argi < argc; argi++ ) + { + if( argv[argi][0] == '-' && argv[argi][2] == '\0' ) + { + switch(argv[argi][1]) { + case 'o': + if (argi+1 >= argc || argv[argi+1][0] == '-') + { + fprintf(stderr, "Option -%c requires an operand\n", argv[argi]); + errflg++; + break; + } + + ++argi; + ostream = fopen(argv[argi], "w"); + if ( ostream == NULL ) { + fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno)); + exit(2); + } + break; + case 't': + if (argi+1 >= argc || argv[argi+1][0] == '-') + { + fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); + errflg++; + break; + } + + read_encoding_table(argv[++argi], aEncodingMap); + break; + default: + fprintf(stderr, "Unrecognized option: -%c\n", argv[argi]); + errflg++; + } + } + else + { + break; + } + } + + if (errflg) { + fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n"); + exit(2); + } + + /* assign input file to stdin */ + if ( argi < argc ) { + + istream = fopen(argv[argi], "r"); + if ( istream == NULL ) { + fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno)); + exit(2); + } + } + + /* read line by line from stdin */ + char buffer[512]; + while ( NULL != fgets(buffer, sizeof(buffer), istream) ) { + + /* only handle lines containing " = " */ + char * cp = strstr(buffer, " = \""); + if ( cp ) { + rtl::OUString aString; + + /* find end of lang string */ + int n; + for ( n=0; ! isspace(buffer[n]); n++ ) + ; + + std::string line = buffer; + std::string lang(line, 0, n); + + cp += 4; + rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp, + RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS ); + + fprintf(ostream, "%s = \"", lang.c_str()); + + if ( aEncodingMap.empty() ) { + print_java_style(ostream, aString); + } else { + print_legacy_mixed(ostream, aString, lang, aEncodingMap); + } + + fprintf(ostream, "\"\n"); + + + } else { + fprintf(ostream, buffer); + } + } +} |