diff options
author | Bustamam Harun <bustamam@openoffice.org> | 2002-03-26 05:31:13 +0000 |
---|---|---|
committer | Bustamam Harun <bustamam@openoffice.org> | 2002-03-26 05:31:13 +0000 |
commit | 88eb5cc80cb71dd13cb0d147f765e9808c3d157f (patch) | |
tree | c524a9fb7fc9ee348649257b4d5145969acba4fb /i18npool | |
parent | 736e7a711af5437ef5b7dfb2ed050c0053fdaaaa (diff) |
#97583# Add Character Classification
Diffstat (limited to 'i18npool')
5 files changed, 1749 insertions, 0 deletions
diff --git a/i18npool/source/characterclassification/cclass_unicode.cxx b/i18npool/source/characterclassification/cclass_unicode.cxx new file mode 100644 index 000000000000..044e919c54e8 --- /dev/null +++ b/i18npool/source/characterclassification/cclass_unicode.cxx @@ -0,0 +1,236 @@ +/************************************************************************* + * + * $RCSfile: cclass_unicode.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 06:31:13 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include <cclass_unicode.hxx> +#include <com/sun/star/i18n/UnicodeScript.hpp> +#include <com/sun/star/i18n/UnicodeType.hpp> +#include <unicode.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::rtl; + +namespace com { namespace sun { namespace star { namespace i18n { +// ---------------------------------------------------- +// class cclass_Unicode +// ----------------------------------------------------; + +cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ), + pTable( NULL ), + pStart( NULL ), + pCont( NULL ), + nStartTypes( 0 ), + nContTypes( 0 ), + eState( ssGetChar ), + cGroupSep( ',' ), + cDecimalSep( '.' ) +{ + trans = new Transliteration_casemapping(); + cClass = "com.sun.star.i18n.CharacterClassification_Unicode"; +} + +cclass_Unicode::~cclass_Unicode() { + destroyParserTable(); + delete trans; +} + + +OUString SAL_CALL +cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { + Sequence< sal_Int32 > offset; + trans->setMappingType(MappingTypeToUpper, rLocale); + return trans->transliterate(Text, nPos, nCount, offset); +} + +OUString SAL_CALL +cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { + Sequence< sal_Int32 > offset; + trans->setMappingType(MappingTypeToLower, rLocale); + return trans->transliterate(Text, nPos, nCount, offset); +} + +OUString SAL_CALL +cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { + Sequence< sal_Int32 > offset; + trans->setMappingType(MappingTypeToTitle, rLocale); + return trans->transliterate(Text, nPos, nCount, offset); +} + +sal_Int16 SAL_CALL +cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { + if ( Text.getLength() <= nPos ) return 0; + return unicode::getUnicodeType(Text[nPos]); +} + +sal_Int16 SAL_CALL +cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { + if ( Text.getLength() <= nPos ) return 0; + return unicode::getUnicodeDirection(Text[nPos]); +} + + +sal_Int16 SAL_CALL +cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { + if ( Text.getLength() <= nPos ) return 0; + return unicode::getUnicodeScriptType(Text[nPos], (ScriptTypeList*) 0, 0); +} + + +sal_Int32 SAL_CALL +cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& rLocale ) throw(RuntimeException) { + if ( Text.getLength() <= nPos ) return 0; + return unicode::getCharType(Text[nPos]); +} + +sal_Int32 SAL_CALL +cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { + if ( Text.getLength() <= nPos ) return 0; + + if ( Text.getLength() < nPos + nCount ) + nCount = Text.getLength() - nPos; + + sal_Int32 result = 0; + for (int i = 0; i < nCount; i++) + result |= unicode::getCharType(Text[nPos+i]); + return result; +} + +ParseResult SAL_CALL cclass_Unicode::parseAnyToken( + const OUString& Text, + sal_Int32 nPos, + const Locale& rLocale, + sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, + sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) + throw(RuntimeException) +{ + ParseResult r; + if ( Text.getLength() <= nPos ) + return r; + + setupParserTable( rLocale, + startCharTokenType, userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont ); + parseText( r, Text, nPos ); + + return r; +} + + +ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken( + sal_Int32 nTokenType, + const OUString& Text, + sal_Int32 nPos, + const Locale& rLocale, + sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, + sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) + throw(RuntimeException) +{ + ParseResult r; + if ( Text.getLength() <= nPos ) + return r; + + setupParserTable( rLocale, + startCharTokenType, userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont ); + parseText( r, Text, nPos, nTokenType ); + + return r; +} + +OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException ) +{ + return OUString::createFromAscii(cClass); +} + + +sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException ) +{ + return !rServiceName.compareToAscii(cClass); +} + +Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException ) +{ + Sequence< OUString > aRet(1); + aRet[0] = OUString::createFromAscii(cClass); + return aRet; +} + +} } } } + +/************************************************************************** + + Source Code Control System - Updates + + $Log: not supported by cvs2svn $ + Revision 1.4 2001/10/19 21:24:42 bustamam.harun + #84725# add XServiceInfo implementation + + Revision 1.3 2001/05/18 17:58:15 er + #79771# optimize: disentangled: cclass_Unicode not derived from CharacterClassificationImpl; reuse instance if locale didn't change; OUString instead of String + + Revision 1.2 2001/04/12 23:55:36 bustamam.harun + Fix compile problem on Solaris: change String to OUString + + Revision 1.1 2001/03/27 21:10:36 bustamam.harun + Rename characterclassification to cclass_Unicode + + Revision 1.8 2001/01/29 17:06:50 er + cclass_Unicode with service manager + + Revision 1.7 2000/10/29 17:02:41 er + i18n API moved from com.sun.star.lang to com.sun.star.i18n + + Revision 1.6 2000/08/11 14:52:52 er + removed queryInterface/aquire/release, using WeakImplHelper instead + + Revision 1.5 2000/07/19 10:59:59 er + toUpper, toLower, toTitle: nCount characters are converted; other: optimizations + + Revision 1.4 2000/07/06 15:46:57 gmu + implemented parsing functions + + Revision 1.1 2000/07/06 08:52:43 er + new: cclass_Unicode with parser + + +**************************************************************************/ diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx new file mode 100644 index 000000000000..23ba5a75c941 --- /dev/null +++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx @@ -0,0 +1,1006 @@ +/************************************************************************* + * + * $RCSfile: cclass_unicode_parser.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 06:31:13 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include <cclass_unicode.hxx> +#include <unicode.hxx> + +#ifndef _TOOLS_INTN_HXX +#include <tools/intn.hxx> +#endif +#ifndef _ISOLANG_HXX +#include <tools/isolang.hxx> +#endif +#ifndef _TOOLS_SOLMATH_HXX +#include <tools/solmath.hxx> +#endif + +#ifndef _COM_SUN_STAR_I18N_KPARSETOKENS_HPP_ +#include <com/sun/star/i18n/KParseTokens.hpp> +#endif +#ifndef _COM_SUN_STAR_I18N_KPARSETYPE_HPP_ +#include <com/sun/star/i18n/KParseType.hpp> +#endif +#ifndef _COM_SUN_STAR_I18N_UNICODETYPE_HPP_ +#include <com/sun/star/i18n/UnicodeType.hpp> +#endif +#ifndef _COM_SUN_STAR_I18N_XLOCALEDATA_HPP_ +#include <com/sun/star/i18n/XLocaleData.hpp> +#endif + +#include <string.h> // memcpy() + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000; +const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000; + +#define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT) + +// Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]* + +const sal_uInt8 cclass_Unicode::nDefCnt = 128; +const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] = +{ +// (...) == Calc formula compiler specific, commented out and modified + + /* \0 */ TOKEN_EXCLUDED, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) + TOKEN_ILLEGAL, + /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + TOKEN_ILLEGAL, + /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP, + /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP) + /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) + /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE) + /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 39 ' */ TOKEN_NAME_SEP, + /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, + /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE) + /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, + /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE) + /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + //for ( i = 48; i < 58; i++ ) + /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, + /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD) + /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) + /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + //for ( i = 65; i < 91; i++ ) + /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, + /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + //for ( i = 97; i < 123; i++ ) + /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD, + /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) + /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED) +}; + + +const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] = +{ + /* \0 */ KParseTokens::ASC_OTHER, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + /* 9 \t */ KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + /* 11 \v */ KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + KParseTokens::ASC_CONTROL, + /* 32 */ KParseTokens::ASC_OTHER, + /* 33 ! */ KParseTokens::ASC_OTHER, + /* 34 " */ KParseTokens::ASC_OTHER, + /* 35 # */ KParseTokens::ASC_OTHER, + /* 36 $ */ KParseTokens::ASC_DOLLAR, + /* 37 % */ KParseTokens::ASC_OTHER, + /* 38 & */ KParseTokens::ASC_OTHER, + /* 39 ' */ KParseTokens::ASC_OTHER, + /* 40 ( */ KParseTokens::ASC_OTHER, + /* 41 ) */ KParseTokens::ASC_OTHER, + /* 42 * */ KParseTokens::ASC_OTHER, + /* 43 + */ KParseTokens::ASC_OTHER, + /* 44 , */ KParseTokens::ASC_OTHER, + /* 45 - */ KParseTokens::ASC_OTHER, + /* 46 . */ KParseTokens::ASC_DOT, + /* 47 / */ KParseTokens::ASC_OTHER, + //for ( i = 48; i < 58; i++ ) + /* 48 0 */ KParseTokens::ASC_DIGIT, + /* 49 1 */ KParseTokens::ASC_DIGIT, + /* 50 2 */ KParseTokens::ASC_DIGIT, + /* 51 3 */ KParseTokens::ASC_DIGIT, + /* 52 4 */ KParseTokens::ASC_DIGIT, + /* 53 5 */ KParseTokens::ASC_DIGIT, + /* 54 6 */ KParseTokens::ASC_DIGIT, + /* 55 7 */ KParseTokens::ASC_DIGIT, + /* 56 8 */ KParseTokens::ASC_DIGIT, + /* 57 9 */ KParseTokens::ASC_DIGIT, + /* 58 : */ KParseTokens::ASC_COLON, + /* 59 ; */ KParseTokens::ASC_OTHER, + /* 60 < */ KParseTokens::ASC_OTHER, + /* 61 = */ KParseTokens::ASC_OTHER, + /* 62 > */ KParseTokens::ASC_OTHER, + /* 63 ? */ KParseTokens::ASC_OTHER, + /* 64 @ */ KParseTokens::ASC_OTHER, + //for ( i = 65; i < 91; i++ ) + /* 65 A */ KParseTokens::ASC_UPALPHA, + /* 66 B */ KParseTokens::ASC_UPALPHA, + /* 67 C */ KParseTokens::ASC_UPALPHA, + /* 68 D */ KParseTokens::ASC_UPALPHA, + /* 69 E */ KParseTokens::ASC_UPALPHA, + /* 70 F */ KParseTokens::ASC_UPALPHA, + /* 71 G */ KParseTokens::ASC_UPALPHA, + /* 72 H */ KParseTokens::ASC_UPALPHA, + /* 73 I */ KParseTokens::ASC_UPALPHA, + /* 74 J */ KParseTokens::ASC_UPALPHA, + /* 75 K */ KParseTokens::ASC_UPALPHA, + /* 76 L */ KParseTokens::ASC_UPALPHA, + /* 77 M */ KParseTokens::ASC_UPALPHA, + /* 78 N */ KParseTokens::ASC_UPALPHA, + /* 79 O */ KParseTokens::ASC_UPALPHA, + /* 80 P */ KParseTokens::ASC_UPALPHA, + /* 81 Q */ KParseTokens::ASC_UPALPHA, + /* 82 R */ KParseTokens::ASC_UPALPHA, + /* 83 S */ KParseTokens::ASC_UPALPHA, + /* 84 T */ KParseTokens::ASC_UPALPHA, + /* 85 U */ KParseTokens::ASC_UPALPHA, + /* 86 V */ KParseTokens::ASC_UPALPHA, + /* 87 W */ KParseTokens::ASC_UPALPHA, + /* 88 X */ KParseTokens::ASC_UPALPHA, + /* 89 Y */ KParseTokens::ASC_UPALPHA, + /* 90 Z */ KParseTokens::ASC_UPALPHA, + /* 91 [ */ KParseTokens::ASC_OTHER, + /* 92 \ */ KParseTokens::ASC_OTHER, + /* 93 ] */ KParseTokens::ASC_OTHER, + /* 94 ^ */ KParseTokens::ASC_OTHER, + /* 95 _ */ KParseTokens::ASC_UNDERSCORE, + /* 96 ` */ KParseTokens::ASC_OTHER, + //for ( i = 97; i < 123; i++ ) + /* 97 a */ KParseTokens::ASC_LOALPHA, + /* 98 b */ KParseTokens::ASC_LOALPHA, + /* 99 c */ KParseTokens::ASC_LOALPHA, + /* 100 d */ KParseTokens::ASC_LOALPHA, + /* 101 e */ KParseTokens::ASC_LOALPHA, + /* 102 f */ KParseTokens::ASC_LOALPHA, + /* 103 g */ KParseTokens::ASC_LOALPHA, + /* 104 h */ KParseTokens::ASC_LOALPHA, + /* 105 i */ KParseTokens::ASC_LOALPHA, + /* 106 j */ KParseTokens::ASC_LOALPHA, + /* 107 k */ KParseTokens::ASC_LOALPHA, + /* 108 l */ KParseTokens::ASC_LOALPHA, + /* 109 m */ KParseTokens::ASC_LOALPHA, + /* 110 n */ KParseTokens::ASC_LOALPHA, + /* 111 o */ KParseTokens::ASC_LOALPHA, + /* 112 p */ KParseTokens::ASC_LOALPHA, + /* 113 q */ KParseTokens::ASC_LOALPHA, + /* 114 r */ KParseTokens::ASC_LOALPHA, + /* 115 s */ KParseTokens::ASC_LOALPHA, + /* 116 t */ KParseTokens::ASC_LOALPHA, + /* 117 u */ KParseTokens::ASC_LOALPHA, + /* 118 v */ KParseTokens::ASC_LOALPHA, + /* 119 w */ KParseTokens::ASC_LOALPHA, + /* 120 x */ KParseTokens::ASC_LOALPHA, + /* 121 y */ KParseTokens::ASC_LOALPHA, + /* 122 z */ KParseTokens::ASC_LOALPHA, + /* 123 { */ KParseTokens::ASC_OTHER, + /* 124 | */ KParseTokens::ASC_OTHER, + /* 125 } */ KParseTokens::ASC_OTHER, + /* 126 ~ */ KParseTokens::ASC_OTHER, + /* 127 */ KParseTokens::ASC_OTHER +}; + + +// static +const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c ) +{ + if ( !pStr ) + return NULL; + while ( *pStr ) + { + if ( *pStr == c ) + return pStr; + pStr++; + } + return NULL; +} + + +sal_Int32 cclass_Unicode::getParseTokensType( sal_Unicode c ) +{ + if ( c < nDefCnt ) + return pParseTokensType[ sal_uInt8(c) ]; + else + { + + //! all KParseTokens::UNI_... must be matched + switch ( unicode::getUnicodeType( c ) ) + { + case UnicodeType::UPPERCASE_LETTER : + return KParseTokens::UNI_UPALPHA; + break; + case UnicodeType::LOWERCASE_LETTER : + return KParseTokens::UNI_LOALPHA; + break; + case UnicodeType::TITLECASE_LETTER : + return KParseTokens::UNI_TITLE_ALPHA; + break; + case UnicodeType::MODIFIER_LETTER : + return KParseTokens::UNI_MODIFIER_LETTER; + break; + case UnicodeType::OTHER_LETTER : + return KParseTokens::UNI_OTHER_LETTER; + break; + case UnicodeType::DECIMAL_DIGIT_NUMBER : + return KParseTokens::UNI_DIGIT; + break; + case UnicodeType::LETTER_NUMBER : + return KParseTokens::UNI_LETTER_NUMBER; + break; + case UnicodeType::OTHER_NUMBER : + return KParseTokens::UNI_OTHER_NUMBER; + break; + } + + return KParseTokens::UNI_OTHER; + } +} + +sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale ) +{ + sal_Bool bChanged = (aParserLocale.Language != rLocale.Language + || aParserLocale.Country != rLocale.Country + || aParserLocale.Variant != rLocale.Variant); + if ( bChanged ) + { + aParserLocale.Language = rLocale.Language; + aParserLocale.Country = rLocale.Country; + aParserLocale.Variant = rLocale.Variant; + } + if ( !xLocaleData.is() && xMSF.is() ) + { + Reference < + XInterface > xI = + xMSF->createInstance( OUString( + RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) ); + if ( xI.is() ) + { + Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) ); + x >>= xLocaleData; + } + } + return bChanged; +} + + +void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) +{ + BOOL bIntlEqual = (rLocale.Language == aParserLocale.Language && + rLocale.Country == aParserLocale.Country && + rLocale.Variant == aParserLocale.Variant); + if ( !pTable || !bIntlEqual || + startCharTokenType != nStartTypes || + contCharTokenType != nContTypes || + userDefinedCharactersStart != aStartChars || + userDefinedCharactersCont != aContChars ) + initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont ); +} + + +void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) +{ + // (Re)Init + setupInternational( rLocale ); + // Memory of pTable is reused. + if ( !pTable ) + pTable = new UPT_FLAG_TYPE[nDefCnt]; + memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt ); + // Start and cont tables only need reallocation if different length. + if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() ) + { + delete [] pStart; + pStart = NULL; + } + if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() ) + { + delete [] pCont; + pCont = NULL; + } + nStartTypes = startCharTokenType; + nContTypes = contCharTokenType; + aStartChars = userDefinedCharactersStart; + aContChars = userDefinedCharactersCont; + + // specials + if( xLocaleData.is() ) + { + LocaleDataItem aItem = + xLocaleData->getLocaleItem( aParserLocale ); +//!TODO: theoretically separators may be a string, adjustment would have to be +//! done here and in parsing and in SolarMath::StringToDouble + cGroupSep = aItem.thousandSeparator.getStr()[0]; + cDecimalSep = aItem.decimalSeparator.getStr()[0]; + } + + if ( cGroupSep < nDefCnt ) + pTable[cGroupSep] |= TOKEN_VALUE; + if ( cDecimalSep < nDefCnt ) + pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE; + + // Modify characters according to KParseTokens definitions. + { + using namespace KParseTokens; + sal_uInt8 i; + + if ( !(nStartTypes & ASC_UPALPHA) ) + for ( i = 65; i < 91; i++ ) + pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character + if ( !(nContTypes & ASC_UPALPHA) ) + for ( i = 65; i < 91; i++ ) + pTable[i] &= ~TOKEN_WORD; // not allowed as cont character + + if ( !(nStartTypes & ASC_LOALPHA) ) + for ( i = 97; i < 123; i++ ) + pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character + if ( !(nContTypes & ASC_LOALPHA) ) + for ( i = 97; i < 123; i++ ) + pTable[i] &= ~TOKEN_WORD; // not allowed as cont character + + if ( nStartTypes & ASC_DIGIT ) + for ( i = 48; i < 58; i++ ) + pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character + if ( !(nContTypes & ASC_DIGIT) ) + for ( i = 48; i < 58; i++ ) + pTable[i] &= ~TOKEN_WORD; // not allowed as cont character + + if ( !(nStartTypes & ASC_UNDERSCORE) ) + pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character + if ( !(nContTypes & ASC_UNDERSCORE) ) + pTable[95] &= ~TOKEN_WORD; // not allowed as cont character + + if ( nStartTypes & ASC_DOLLAR ) + pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character + if ( nContTypes & ASC_DOLLAR ) + pTable[36] |= TOKEN_WORD; // allowed as cont character + + if ( nStartTypes & ASC_DOT ) + pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character + if ( nContTypes & ASC_DOT ) + pTable[46] |= TOKEN_WORD; // allowed as cont character + + if ( nStartTypes & ASC_COLON ) + pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character + if ( nContTypes & ASC_COLON ) + pTable[58] |= TOKEN_WORD; // allowed as cont character + + if ( nStartTypes & ASC_CONTROL ) + for ( i = 1; i < 32; i++ ) + pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character + if ( nContTypes & ASC_CONTROL ) + for ( i = 1; i < 32; i++ ) + pTable[i] |= TOKEN_WORD; // allowed as cont character + + if ( nStartTypes & ASC_ANY_BUT_CONTROL ) + for ( i = 32; i < nDefCnt; i++ ) + pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character + if ( nContTypes & ASC_ANY_BUT_CONTROL ) + for ( i = 32; i < nDefCnt; i++ ) + pTable[i] |= TOKEN_WORD; // allowed as cont character + + } + + // Merge in (positively override with) user defined characters. + // StartChars + sal_Int32 nLen = aStartChars.getLength(); + if ( nLen ) + { + if ( !pStart ) + pStart = new UPT_FLAG_TYPE[ nLen ]; + const sal_Unicode* p = aStartChars.getStr(); + for ( sal_Int32 j=0; j<nLen; j++, p++ ) + { + pStart[j] = TOKEN_CHAR_WORD; + if ( *p < nDefCnt ) + pTable[*p] |= TOKEN_CHAR_WORD; + } + } + // ContChars + nLen = aContChars.getLength(); + if ( nLen ) + { + if ( !pCont ) + pCont = new UPT_FLAG_TYPE[ nLen ]; + const sal_Unicode* p = aContChars.getStr(); + for ( sal_Int32 j=0; j<nLen; j++ ) + { + pCont[j] = TOKEN_WORD; + if ( *p < nDefCnt ) + pTable[*p] |= TOKEN_WORD; + } + } +} + + +void cclass_Unicode::destroyParserTable() +{ + if ( pCont ) + delete [] pCont; + if ( pStart ) + delete [] pStart; + if ( pTable ) + delete [] pTable; +} + + +UPT_FLAG_TYPE cclass_Unicode::getFlags( sal_Unicode c ) +{ + UPT_FLAG_TYPE nMask; + if ( c < nDefCnt ) + nMask = pTable[ sal_uInt8(c) ]; + else + nMask = getFlagsExtended( c ); + switch ( eState ) + { + case ssGetChar : + case ssGetWordFirstChar : + if ( !(nMask & TOKEN_CHAR_WORD) ) + { + nMask |= getStartCharsFlags( c ); + if ( nMask & TOKEN_CHAR_WORD ) + nMask &= ~TOKEN_EXCLUDED; + } + break; + case ssGetValue : + case ssGetWord : + if ( !(nMask & TOKEN_WORD) ) + { + nMask |= getContCharsFlags( c ); + if ( nMask & TOKEN_WORD ) + nMask &= ~TOKEN_EXCLUDED; + } + break; + // other cases aren't needed + } + return nMask; +} + + +UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( sal_Unicode c ) +{ + if ( c == cGroupSep ) + return TOKEN_VALUE; + else if ( c == cDecimalSep ) + return TOKEN_CHAR_VALUE | TOKEN_VALUE; + using namespace i18n; + BOOL bStart = (eState == ssGetChar || eState == ssGetWordFirstChar); + sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes); + + //! all KParseTokens::UNI_... must be matched + switch ( unicode::getUnicodeType( c ) ) + { + case UnicodeType::UPPERCASE_LETTER : + return (nTypes & KParseTokens::UNI_UPALPHA) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL; + break; + case UnicodeType::LOWERCASE_LETTER : + return (nTypes & KParseTokens::UNI_LOALPHA) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL; + break; + case UnicodeType::TITLECASE_LETTER : + return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL; + break; + case UnicodeType::MODIFIER_LETTER : + return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL; + break; + case UnicodeType::OTHER_LETTER : + return (nTypes & KParseTokens::UNI_OTHER_LETTER) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL; + break; + case UnicodeType::DECIMAL_DIGIT_NUMBER : + return ((nTypes & KParseTokens::UNI_DIGIT) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; + break; + case UnicodeType::LETTER_NUMBER : + return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; + break; + case UnicodeType::OTHER_NUMBER : + return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ? + (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : + TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; + break; + case UnicodeType::SPACE_SEPARATOR : + return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ? + TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD )); + break; + } + + return TOKEN_ILLEGAL; +} + + +UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c ) +{ + if ( pStart ) + { + const sal_Unicode* pStr = aStartChars.getStr(); + const sal_Unicode* p = StrChr( pStr, c ); + if ( p ) + return pStart[ p - pStr ]; + } + return TOKEN_ILLEGAL; +} + + +UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c ) +{ + if ( pCont ) + { + const sal_Unicode* pStr = aContChars.getStr(); + const sal_Unicode* p = StrChr( pStr, c ); + if ( p ) + return pCont[ p - pStr ]; + } + return TOKEN_ILLEGAL; +} + + +void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType ) +{ + using namespace i18n; + const sal_Unicode* const pStart = rText.getStr() + nPos; + const sal_Unicode* pSym = pStart; + const sal_Unicode* pSrc = pSym; + OUString aSymbol; + sal_Unicode c = *pSrc; + sal_Unicode cLast = 0; + BOOL bQuote = FALSE; + BOOL bMightBeWord = TRUE; + BOOL bMightBeWordLast = TRUE; + eState = ssGetChar; + while ( (c != 0) && (eState != ssStop) ) + { + pSrc++; + UPT_FLAG_TYPE nMask = getFlags( c ); + if ( nMask & TOKEN_EXCLUDED ) + eState = ssBounce; + if ( bMightBeWord ) + { // only relevant for ssGetValue fall back + if ( eState == ssGetChar ) + bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0); + else + bMightBeWord = ((nMask & TOKEN_WORD) != 0); + } + sal_Int32 nParseTokensType = getParseTokensType( c ); + switch (eState) + { + case ssGetChar : + { + if ( nMask & TOKEN_CHAR_VALUE ) + { //! must be first, may fall back to ssGetWord via bMightBeWord + eState = ssGetValue; + if ( (nMask & TOKEN_VALUE_DIGIT) && 128 <= c ) + r.TokenType = KParseType::UNI_NUMBER; + else + r.TokenType = KParseType::ASC_NUMBER; + } + else if ( nMask & TOKEN_CHAR_WORD ) + { + eState = ssGetWord; + r.TokenType = KParseType::IDENTNAME; + } + else if ( nMask & TOKEN_NAME_SEP ) + { + eState = ssGetWordFirstChar; + bQuote = TRUE; + pSym++; + nParseTokensType = 0; // will be taken of first real character + r.TokenType = KParseType::SINGLE_QUOTE_NAME; + } + else if ( nMask & TOKEN_CHAR_STRING ) + { + eState = ssGetString; + pSym++; + nParseTokensType = 0; // will be taken of first real character + r.TokenType = KParseType::DOUBLE_QUOTE_STRING; + } + else if ( nMask & TOKEN_CHAR_DONTCARE ) + { + if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS ) + { + r.LeadingWhiteSpace++; + pSym++; + nParseTokensType = 0; // wait until real character + bMightBeWord = TRUE; + } + else + eState = ssBounce; + } + else if ( nMask & TOKEN_CHAR_BOOL ) + { + eState = ssGetBool; + r.TokenType = KParseType::BOOLEAN; + } + else if ( nMask & TOKEN_CHAR ) + { //! must be last + eState = ssStop; + r.TokenType = KParseType::ONE_SINGLE_CHAR; + } + else + eState = ssBounce; // not known + } + break; + case ssGetValue : + { + if ( (nMask & TOKEN_VALUE_DIGIT) && 128 <= c ) + r.TokenType = KParseType::UNI_NUMBER; + if ( nMask & TOKEN_VALUE ) + ; // keep it going + else if ( c == 'E' || c == 'e' ) + { + UPT_FLAG_TYPE nNext = getFlags( *pSrc ); + if ( nNext & TOKEN_VALUE_EXP ) + ; // keep it going + else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) + { // might be a numerical name (1.2efg) + eState = ssGetWord; + r.TokenType = KParseType::IDENTNAME; + } + else + eState = ssStopBack; + } + else if ( nMask & TOKEN_VALUE_SIGN ) + { + if ( (cLast == 'E') || (cLast == 'e') ) + { + UPT_FLAG_TYPE nNext = getFlags( *pSrc ); + if ( nNext & TOKEN_VALUE_EXP_VALUE ) + ; // keep it going + else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) + { // might be a numerical name (1.2e+fg) + eState = ssGetWord; + r.TokenType = KParseType::IDENTNAME; + } + else + eState = ssStopBack; + } + else if ( bMightBeWord ) + { // might be a numerical name (1.2+fg) + eState = ssGetWord; + r.TokenType = KParseType::IDENTNAME; + } + else + eState = ssStopBack; + } + else if ( bMightBeWord && (nMask & TOKEN_WORD) ) + { // might be a numerical name (1995.A1) + eState = ssGetWord; + r.TokenType = KParseType::IDENTNAME; + } + else + eState = ssStopBack; + } + break; + case ssGetWordFirstChar : + eState = ssGetWord; + // fall thru + case ssGetWord : + { + if ( nMask & TOKEN_WORD ) + ; // keep it going + else if ( nMask & TOKEN_NAME_SEP ) + { + if ( bQuote ) + { + if ( cLast == '\\' ) + { // escaped + aSymbol += OUString( pSym, pSrc - pSym - 2 ); + aSymbol += OUString(&c); + } + else + { + eState = ssStop; + aSymbol += OUString( pSym, pSrc - pSym - 1 ); + } + pSym = pSrc; + } + else + eState = ssStopBack; + } + else if ( bQuote ) + ; // keep it going + else + eState = ssStopBack; + } + break; + case ssGetString : + { + if ( nMask & TOKEN_STRING_SEP ) + { + if ( cLast == '\\' ) + { // escaped + aSymbol += OUString( pSym, pSrc - pSym - 2 ); + aSymbol += OUString(&c); + } + else if ( c == *pSrc && + !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) ) + { // "" => literal " escaped + aSymbol += OUString( pSym, pSrc - pSym ); + pSrc++; + } + else + { + eState = ssStop; + aSymbol += OUString( pSym, pSrc - pSym - 1 ); + } + pSym = pSrc; + } + } + break; + case ssGetBool : + { + if ( (nMask & TOKEN_BOOL) ) + eState = ssStop; // maximum 2: <, >, <>, <=, >= + else + eState = ssStopBack; + } + break; + } + if ( !(r.TokenType & nTokenType) ) + { + if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER)) + && (nTokenType & KParseType::IDENTNAME) && bMightBeWord ) + ; // keep a number which might be a word + else if ( r.LeadingWhiteSpace == (pSrc - pStart) ) + ; // keep ignored white space + else + eState = ssBounce; + } + if ( eState == ssBounce ) + { + r.TokenType = 0; + eState = ssStopBack; + } + if ( eState == ssStopBack ) + { // put back + pSrc--; + bMightBeWord = bMightBeWordLast; + eState = ssStop; + } + if ( eState != ssStop ) + { + if ( !r.StartFlags ) + r.StartFlags |= nParseTokensType; + else + r.ContFlags |= nParseTokensType; + } + bMightBeWordLast = bMightBeWord; + cLast = c; + c = *pSrc; + } + // r.CharLen is the length in characters (not code points) of the parsed + // token not including any leading white space, change this calculation if + // multi-code-point Unicode characters are to be supported. + r.CharLen = pSrc - pStart - r.LeadingWhiteSpace; + r.EndPos = nPos + (pSrc - pStart); + if ( r.TokenType & KParseType::ASC_NUMBER ) + { + int nErrno; + r.Value = SolarMath::StringToDouble( pStart + r.LeadingWhiteSpace, + cGroupSep, cDecimalSep, nErrno ); + if ( bMightBeWord ) + r.TokenType |= KParseType::IDENTNAME; + } + else if ( r.TokenType & KParseType::UNI_NUMBER ) + { +//!!!!! +//!TODO: r.Value = implementation +//!!!!! + if ( bMightBeWord ) + r.TokenType |= KParseType::IDENTNAME; + } + else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) ) + { + if ( pSym < pSrc ) + { //! open quote + aSymbol += OUString( pSym, pSrc - pSym ); + r.TokenType |= KParseType::MISSING_QUOTE; + } + r.DequotedNameOrString = aSymbol; + } +} + +} } } } diff --git a/i18npool/source/characterclassification/characterclassificationImpl.cxx b/i18npool/source/characterclassification/characterclassificationImpl.cxx new file mode 100644 index 000000000000..08bac1c8a316 --- /dev/null +++ b/i18npool/source/characterclassification/characterclassificationImpl.cxx @@ -0,0 +1,248 @@ +/************************************************************************* + * + * $RCSfile: characterclassificationImpl.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 06:31:13 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include <characterclassificationImpl.hxx> +#ifndef _RTL_USTRBUF_HXX_ +#include <rtl/ustrbuf.hxx> +#endif + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +CharacterClassificationImpl::CharacterClassificationImpl( + const Reference < lang::XMultiServiceFactory >& rxMSF ) : xMSF( rxMSF ) +{ + if (createLocaleSpecificCharacterClassification(OUString::createFromAscii("Unicode"), Locale())) + xUCI = cachedItem->xCI; +} + +CharacterClassificationImpl::~CharacterClassificationImpl() { + // Clear lookuptable + for (cachedItem = (lookupTableItem*)lookupTable.First(); + cachedItem; cachedItem = (lookupTableItem*)lookupTable.Next()) + delete cachedItem; + lookupTable.Clear(); +} + + +OUString SAL_CALL +CharacterClassificationImpl::toUpper( const OUString& Text, sal_Int32 nPos, + sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->toUpper(Text, nPos, nCount, rLocale); +} + +OUString SAL_CALL +CharacterClassificationImpl::toLower( const OUString& Text, sal_Int32 nPos, + sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->toLower(Text, nPos, nCount, rLocale); +} + +OUString SAL_CALL +CharacterClassificationImpl::toTitle( const OUString& Text, sal_Int32 nPos, + sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->toTitle(Text, nPos, nCount, rLocale); +} + +sal_Int16 SAL_CALL +CharacterClassificationImpl::getType( const OUString& Text, sal_Int32 nPos ) + throw(RuntimeException) +{ + if (xUCI.is()) + return xUCI->getType(Text, nPos); + throw RuntimeException(); +} + +sal_Int16 SAL_CALL +CharacterClassificationImpl::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) + throw(RuntimeException) +{ + if (xUCI.is()) + return xUCI->getCharacterDirection(Text, nPos); + throw RuntimeException(); +} + +sal_Int16 SAL_CALL +CharacterClassificationImpl::getScript( const OUString& Text, sal_Int32 nPos ) + throw(RuntimeException) +{ + if (xUCI.is()) + return xUCI->getScript(Text, nPos); + throw RuntimeException(); +} + +sal_Int32 SAL_CALL +CharacterClassificationImpl::getCharacterType( const OUString& Text, sal_Int32 nPos, + const Locale& rLocale ) throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->getCharacterType(Text, nPos, rLocale); +} + +sal_Int32 SAL_CALL +CharacterClassificationImpl::getStringType( const OUString& Text, sal_Int32 nPos, + sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->getStringType(Text, nPos, nCount, rLocale); +} + +ParseResult SAL_CALL CharacterClassificationImpl::parseAnyToken( + const OUString& Text, sal_Int32 nPos, const Locale& rLocale, + sal_Int32 startCharTokenType, const OUString& userDefinedCharactersStart, + sal_Int32 contCharTokenType, const OUString& userDefinedCharactersCont ) + throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->parseAnyToken(Text, nPos, rLocale, + startCharTokenType,userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont); +} + + +ParseResult SAL_CALL CharacterClassificationImpl::parsePredefinedToken( + sal_Int32 nTokenType, const OUString& Text, sal_Int32 nPos, + const Locale& rLocale, sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) throw(RuntimeException) +{ + return getLocaleSpecificCharacterClassification(rLocale)->parsePredefinedToken( + nTokenType, Text, nPos, rLocale, startCharTokenType, userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont); +} + +sal_Bool SAL_CALL CharacterClassificationImpl::createLocaleSpecificCharacterClassification(const OUString& serviceName, const Locale& rLocale) +{ + // to share service between same Language but different Country code, like zh_CN and zh_SG + for (cachedItem = (lookupTableItem*)lookupTable.First(); + cachedItem; cachedItem = (lookupTableItem*)lookupTable.Next()) { + if (serviceName == cachedItem->aName) { + lookupTable.Insert( cachedItem = new lookupTableItem(rLocale, serviceName, cachedItem->xCI) ); + return sal_True; + } + } + + Reference < XInterface > xI = xMSF->createInstance( + OUString::createFromAscii("com.sun.star.i18n.CharacterClassification_") + serviceName); + + Reference < XCharacterClassification > xCI; + if ( xI.is() ) { + xI->queryInterface(::getCppuType((const Reference< XCharacterClassification>*)0) ) >>= xCI; + if (xCI.is()) { + lookupTable.Insert( cachedItem = new lookupTableItem(rLocale, serviceName, xCI) ); + return sal_True; + } + } + return sal_False; +} + +Reference < XCharacterClassification > SAL_CALL +CharacterClassificationImpl::getLocaleSpecificCharacterClassification(const Locale& rLocale) + throw(RuntimeException) +{ + // reuse instance if locale didn't change + if (cachedItem && cachedItem->equals(rLocale)) + return cachedItem->xCI; + else if (xMSF.is()) { + for (cachedItem = (lookupTableItem*)lookupTable.First(); + cachedItem; cachedItem = (lookupTableItem*)lookupTable.Next()) { + if (cachedItem->equals(rLocale)) + return cachedItem->xCI; + } + + static sal_Unicode under = (sal_Unicode)'_'; + static OUString tw(OUString::createFromAscii("TW")); + sal_Int32 l = rLocale.Language.getLength(); + sal_Int32 c = rLocale.Country.getLength(); + sal_Int32 v = rLocale.Variant.getLength(); + OUStringBuffer aBuf(l+c+v+3); + + // load service with name <base>_<lang>_<country>_<varian> + if ((l > 0 && c > 0 && v > 0 && + createLocaleSpecificCharacterClassification(aBuf.append(rLocale.Language).append(under).append( + rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear(), rLocale)) || + // load service with name <base>_<lang>_<country> + (l > 0 && c > 0 && + createLocaleSpecificCharacterClassification(aBuf.append(rLocale.Language).append(under).append( + rLocale.Country).makeStringAndClear(), rLocale)) || + (l > 0 && c > 0 && rLocale.Language.compareToAscii("zh") == 0 && + (rLocale.Country.compareToAscii("HK") == 0 || + rLocale.Country.compareToAscii("MO") == 0) && + // if the country code is HK or MO, one more step to try TW. + createLocaleSpecificCharacterClassification(aBuf.append(rLocale.Language).append(under).append( + tw).makeStringAndClear(), rLocale)) || + (l > 0 && + // load service with name <base>_<lang> + createLocaleSpecificCharacterClassification(rLocale.Language, rLocale))) { + return cachedItem->xCI; + } else if (xUCI.is()) { + lookupTable.Insert( cachedItem = new lookupTableItem(rLocale, OUString::createFromAscii("Unicode"), xUCI) ); + return cachedItem->xCI; + } + } + throw RuntimeException(); +} + +const sal_Char cClass[] = "com.sun.star.i18n.CharacterClassification"; + +OUString SAL_CALL +CharacterClassificationImpl::getImplementationName(void) + throw( RuntimeException ) +{ + return OUString::createFromAscii(cClass); +} + +sal_Bool SAL_CALL +CharacterClassificationImpl::supportsService(const rtl::OUString& rServiceName) + throw( RuntimeException ) +{ + return !rServiceName.compareToAscii(cClass); +} + +Sequence< OUString > SAL_CALL +CharacterClassificationImpl::getSupportedServiceNames(void) throw( RuntimeException ) +{ + Sequence< OUString > aRet(1); + aRet[0] = OUString::createFromAscii(cClass); + return aRet; +} + +} } } } diff --git a/i18npool/source/characterclassification/makefile.mk b/i18npool/source/characterclassification/makefile.mk new file mode 100644 index 000000000000..f5adcee50e60 --- /dev/null +++ b/i18npool/source/characterclassification/makefile.mk @@ -0,0 +1,67 @@ +#************************************************************************* +#* +#* $RCSfile: makefile.mk,v $ +#* +#* $Revision: 1.1 $ +#* +#* last change: $Author: bustamam $ $Date: 2002-03-26 06:31:13 $ +#* +#* The Contents of this file are made available subject to the terms of +#* either of the following licenses +#* +#* - Sun Industry Standards Source License Version 1.1 +#* +#* Sun Microsystems Inc., October, 2000 +#* +#* Sun Industry Standards Source License Version 1.1 +#* ================================================= +#* The contents of this file are subject to the Sun Industry Standards +#* Source License Version 1.1 (the "License"); You may not use this file +#* except in compliance with the License. You may obtain a copy of the +#* License at http://www.openoffice.org/license.html. +#* +#* Software provided under this License is provided on an "AS IS" basis, +#* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +#* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +#* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +#* See the License for the specific provisions governing your rights and +#* obligations concerning the Software. +#* +#* The Initial Developer of the Original Code is: Sun Microsystems, Inc. +#* +#* Copyright: 2000 by Sun Microsystems, Inc. +#* +#* All Rights Reserved. +#* +#* Contributor(s): _______________________________________ +#* +#* +#************************************************************************/ +PRJ=..$/.. + +PRJNAME=i18npool +TARGET=characterclassification + +ENABLE_EXCEPTIONS=TRUE + +# --- Settings ----------------------------------------------------- + +.INCLUDE : svpre.mk +.INCLUDE : settings.mk +.INCLUDE : sv.mk + +# --- Files -------------------------------------------------------- + +SLOFILES= \ + $(SLO)$/characterclassificationImpl.obj \ + $(SLO)$/cclass_unicode.obj \ + $(SLO)$/cclass_unicode_parser.obj \ + $(SLO)$/unicode.obj \ + $(SLO)$/scripttypedetector.obj + +# --- Targets ------------------------------------------------------ + +.INCLUDE : target.mk + + + diff --git a/i18npool/source/characterclassification/scripttypedetector.cxx b/i18npool/source/characterclassification/scripttypedetector.cxx new file mode 100644 index 000000000000..52f80008d62d --- /dev/null +++ b/i18npool/source/characterclassification/scripttypedetector.cxx @@ -0,0 +1,192 @@ +/************************************************************************* + * + * $RCSfile: scripttypedetector.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: bustamam $ $Date: 2002-03-26 06:31:13 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#include <drafts/com/sun/star/i18n/CTLScriptType.hpp> +#include <drafts/com/sun/star/i18n/ScriptDirection.hpp> +#include <com/sun/star/i18n/UnicodeScript.hpp> +#include <scripttypedetector.hxx> +#include <unicode.hxx> + +// ---------------------------------------------------- +// class ScriptTypeDetector +// ----------------------------------------------------; + +using namespace drafts::com::sun::star::i18n; +using namespace com::sun::star::i18n; + +ScriptTypeDetector::ScriptTypeDetector() +{ +} + +ScriptTypeDetector::~ScriptTypeDetector() +{ +} + +static sal_Int16 scriptDirection[] = { + ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT = 0, + ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT = 1, + ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER = 2, + ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER_SEPARATOR = 3, + ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER_TERMINATOR = 4, + ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_ARABIC_NUMBER = 5, + ScriptDirection::NEUTRAL, // DirectionProperty_COMMON_NUMBER_SEPARATOR = 6, + ScriptDirection::NEUTRAL, // DirectionProperty_BLOCK_SEPARATOR = 7, + ScriptDirection::NEUTRAL, // DirectionProperty_SEGMENT_SEPARATOR = 8, + ScriptDirection::NEUTRAL, // DirectionProperty_WHITE_SPACE_NEUTRAL = 9, + ScriptDirection::NEUTRAL, // DirectionProperty_OTHER_NEUTRAL = 10, + ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT_EMBEDDING = 11, + ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT_OVERRIDE = 12, + ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_ARABIC = 13, + ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_EMBEDDING = 14, + ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_OVERRIDE = 15, + ScriptDirection::NEUTRAL, // DirectionProperty_POP_DIRECTIONAL_FORMAT = 16, + ScriptDirection::NEUTRAL, // DirectionProperty_DIR_NON_SPACING_MARK = 17, + ScriptDirection::NEUTRAL, // DirectionProperty_BOUNDARY_NEUTRAL = 18, +}; + +sal_Int16 SAL_CALL +ScriptTypeDetector::getScriptDirection( const ::rtl::OUString& Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection ) throw (::com::sun::star::uno::RuntimeException) +{ + sal_Int16 dir = scriptDirection[unicode::getUnicodeDirection(Text[nPos])]; + return (dir == ScriptDirection::NEUTRAL) ? defaultScriptDirection : dir; +} + +// return value '-1' means either the direction on nPos is not same as scriptDirection or nPos is out of range. +sal_Int32 SAL_CALL +ScriptTypeDetector::beginOfScriptDirection( const ::rtl::OUString& Text, sal_Int32 nPos, sal_Int16 scriptDirection ) throw (::com::sun::star::uno::RuntimeException) +{ + sal_Int32 cPos = nPos; + + if (cPos < Text.getLength()) { + for (; cPos >= 0; cPos--) { + if (scriptDirection != getScriptDirection(Text, cPos, scriptDirection)) + break; + } + return cPos == nPos ? -1 : cPos + 1; + } +} + +sal_Int32 SAL_CALL +ScriptTypeDetector::endOfScriptDirection( const ::rtl::OUString& Text, sal_Int32 nPos, sal_Int16 scriptDirection ) throw (::com::sun::star::uno::RuntimeException) +{ + sal_Int32 cPos = nPos; + sal_Int32 len = Text.getLength(); + + if (cPos >=0) { + for (; cPos < len; cPos++) { + if (scriptDirection != getScriptDirection(Text, cPos, scriptDirection)) + break; + } + } + return cPos == nPos ? -1 : cPos; +} + +sal_Int16 SAL_CALL +ScriptTypeDetector::getCTLScriptType( const ::rtl::OUString& Text, sal_Int32 nPos ) throw (::com::sun::star::uno::RuntimeException) +{ + static ScriptTypeList typeList[] = { + { UnicodeScript_kHebrew, CTLScriptType::CTL_HEBREW }, // 10 + { UnicodeScript_kArabic, CTLScriptType::CTL_ARABIC }, // 11 + { UnicodeScript_kDevanagari, CTLScriptType::CTL_INDIC }, // 14 + { UnicodeScript_kThai, CTLScriptType::CTL_THAI }, // 24 + { UnicodeScript_kScriptCount, CTLScriptType::CTL_UNKNOW } // 88 + }; + + return unicode::getUnicodeScriptType(Text[nPos], typeList, CTLScriptType::CTL_UNKNOW); +} + +// Begin of Script Type is inclusive. +sal_Int32 SAL_CALL +ScriptTypeDetector::beginOfCTLScriptType( const ::rtl::OUString& Text, sal_Int32 nPos ) throw (::com::sun::star::uno::RuntimeException) +{ + if (nPos < 0) + return 0; + else if (nPos >= Text.getLength()) + return Text.getLength(); + else { + sal_Int16 cType = getCTLScriptType(Text, nPos); + for (nPos--; nPos >= 0; nPos--) { + if (cType != getCTLScriptType(Text, nPos)) + break; + } + return nPos + 1; + } +} + +// End of the Script Type is exclusive, the return value pointing to the begin of next script type +sal_Int32 SAL_CALL +ScriptTypeDetector::endOfCTLScriptType( const ::rtl::OUString& Text, sal_Int32 nPos ) throw (::com::sun::star::uno::RuntimeException) +{ + if (nPos < 0) + return 0; + else if (nPos >= Text.getLength()) + return Text.getLength(); + else { + sal_Int16 cType = getCTLScriptType(Text, nPos); + sal_Int32 len = Text.getLength(); + for (nPos++; nPos < len; nPos++) { + if (cType != getCTLScriptType(Text, nPos)) + break; + } + return nPos; + } +} + +const sal_Char sDetector[] = "draft.com.sun.star.i18n.ScriptTypeDetector"; + +rtl::OUString SAL_CALL +ScriptTypeDetector::getImplementationName() throw( ::com::sun::star::uno::RuntimeException ) +{ + return ::rtl::OUString::createFromAscii(sDetector); +} + +sal_Bool SAL_CALL +ScriptTypeDetector::supportsService(const rtl::OUString& ServiceName) throw( ::com::sun::star::uno::RuntimeException ) +{ + return !ServiceName.compareToAscii(sDetector); +} + +::com::sun::star::uno::Sequence< rtl::OUString > SAL_CALL +ScriptTypeDetector::getSupportedServiceNames() throw( ::com::sun::star::uno::RuntimeException ) +{ + ::com::sun::star::uno::Sequence< ::rtl::OUString > aRet(1); + aRet[0] = ::rtl::OUString::createFromAscii(sDetector); + return aRet; +} + |