/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #ifndef __com_sun_star_i18n_XCharacterClassification_idl__ #define __com_sun_star_i18n_XCharacterClassification_idl__ #include #include #include module com { module sun { module star { module i18n { /* Possible tokens to be parsed with parse...Token(): UPASCALPHA=[A-Z] LOASCALPHA=[a-z] ASCALPHA=1*(UPASCALPHA|LOASCALPHA) ASCDIGIT=[0-9] ASC_UNDERSCORE='_' ASC_SPACE=' ' ASC_HT='\0x9' ASC_VT='\0xb' ASC_WS=ASC_SPACE|ASC_HT|ASC_VT ASC_DBL_QUOTE=\"; ASC_QUOTE=\' UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit ALNUM=ALPHA|DIGIT CHAR=anycharacter WS=isWhiteSpace() SIGN='+'|'-' DECSEP= GRPSEP= EXPONENT=(E|e)[SIGN]1*ASC_DIGIT IDENTIFIER=ALPHA *ALNUM UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) ANY_NAME=1*(ALNUM|DEFCHARS) SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] */ /** Character classification (upper, lower, digit, letter, number, ...) and generic Unicode enabled parser. */ published interface XCharacterClassification : com::sun::star::uno::XInterface { /** Convert lower case alpha to upper case alpha, starting at position nPos for nCount code points. */ string toUpper( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /** Convert upper case alpha to lower case alpha, starting at position nPos for nCount code points. */ string toLower( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /** Convert to title case, starting at position nPos for nCount code points. */ string toTitle( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /// Get UnicodeType of character at position nPos. short getType( [in] string aText, [in] long nPos ); /** Get DirectionProperty of character at position nPos. */ short getCharacterDirection( [in] string aText, [in] long nPos ); /// Get UnicodeScript of character at position nPos. short getScript( [in] string aText, [in] long nPos ); /// Get KCharacterType of character at position nPos. long getCharacterType( [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale ); /** Get accumulated KCharacterTypes of string starting at position nPos of length nCount code points. @returns A number with appropriate flags set to indicate what type of characters the string contains, each flag value being one of KCharacterType values. */ long getStringType( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /** Parse a string for a token starting at position nPos.

A name or identifier must match the KParseTokens criteria passed in nStartCharFlags and nContCharFlags and may additionally contain characters of aUserDefinedCharactersStart and/or aUserDefinedCharactersCont.

@returns A filled ParseResult structure. If no unambiguous token could be parsed, ParseResult::TokenType will be set to 0 (zero), other fields will contain the values parsed so far.

If a token may represent either a numeric value or a name according to the passed Start/Cont-Flags/Chars, both KParseType::ASC_NUM (or KParseType::UNI_NUM) and KParseType::IDENTNAME are set in ParseResult::TokenType. @param aText Text to be parsed. @param nPos Position where parsing starts. @param aLocale The locale, for example, for decimal and group separator or character type determination. @param nStartCharFlags A set of KParseTokens constants determining the allowed characters a name or identifier may start with. @param aUserDefinedCharactersStart A set of additionally allowed characters a name or identifier may start with. @param nContCharFlags A set of KParseTokens constants determining the allowed characters a name or identifier may continue with. @param aUserDefinedCharactersCont A set of additionally allowed characters a name or identifier may continue with. @code{.cpp} using namespace ::com::sun::star::i18n; // First character of an identifier may be any alphabetic or underscore. sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; // Continuing characters may be any alphanumeric or underscore or dot. sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; // No further characters assumed to be contained in an identifier OUString aEmptyString; // Parse any token. ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, nStartFlags, aEmptyString, nContFlags, aEmptyString ); // Get parsed token. if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) fValue = rRes.Value; if ( rRes.TokenType & KParseType::IDENTNAME ) aName = aText.copy( nPos, rRes.EndPos - nPos ); else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) aName = rRes.DequotedNameOrString; else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) aString = rRes.DequotedNameOrString; else if ( rRes.TokenType & KParseType::BOOLEAN ) aSymbol = aText.copy( nPos, rRes.EndPos - nPos ); else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) aSymbol = aText.copy( nPos, rRes.EndPos - nPos ); @endcode */ ParseResult parseAnyToken( [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale, [in] long nStartCharFlags, [in] string aUserDefinedCharactersStart, [in] long nContCharFlags, [in] string aUserDefinedCharactersCont ); /** Parse a string for a token of type nTokenType starting at position nPos.

Other parameters are the same as in parseAnyToken(). If the actual token does not match the passed nTokenType a ParseResult::TokenType set to 0 (zero) is returned.

@param nTokenType One or more of the KParseType constants. @param aText See #parseAnyToken @param nPos See #parseAnyToken @param aLocale See #parseAnyToken @param nStartCharFlags See #parseAnyToken @param aUserDefinedCharactersStart See #parseAnyToken @param nContCharFlags See #parseAnyToken @param aUserDefinedCharactersCont See #parseAnyToken @code{.cpp} // Determine if a given name is a valid name (not quoted) and contains // only allowed characters. using namespace ::com::sun::star::i18n; // First character of an identifier may be any alphanumeric or underscore. sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; // No further characters assumed to be contained in an identifier start. OUString aEmptyString; // Continuing characters may be any alphanumeric or underscore. sal_Int32 nContFlags = nStartFlags; // Additionally, continuing characters may contain a blank. OUString aContChars( " " ); // Parse predefined (must be an IDENTNAME) token. ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, nStartFlags, aEmptyString, nContFlags, aContChars ); // Test if it is an identifier name and if it only is one // and no more else is following it. bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); @endcode */ ParseResult parsePredefinedToken( [in] long nTokenType, [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale, [in] long nStartCharFlags, [in] string aUserDefinedCharactersStart, [in] long nContCharFlags, [in] string aUserDefinedCharactersCont ); }; }; }; }; }; #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */