diff options
-rw-r--r-- | include/unotools/charclass.hxx | 25 | ||||
-rw-r--r-- | linguistic/source/misc.cxx | 4 | ||||
-rw-r--r-- | unotools/source/i18n/charclass.cxx | 83 |
3 files changed, 80 insertions, 32 deletions
diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx index c76c7ae35f0b..7cb35ba2a594 100644 --- a/include/unotools/charclass.hxx +++ b/include/unotools/charclass.hxx @@ -31,28 +31,29 @@ namespace com::sun::star::uno { class XComponentContext; } namespace com::sun::star::i18n { class XCharacterClassification; } -const sal_Int32 nCharClassAlphaType = +inline constexpr sal_Int32 nCharClassAlphaType = css::i18n::KCharacterType::UPPER | css::i18n::KCharacterType::LOWER | css::i18n::KCharacterType::TITLE_CASE; -const sal_Int32 nCharClassAlphaTypeMask = +inline constexpr sal_Int32 nCharClassAlphaTypeMask = nCharClassAlphaType | + css::i18n::KCharacterType::LETTER | // Alpha is also always a LETTER css::i18n::KCharacterType::PRINTABLE | css::i18n::KCharacterType::BASE_FORM; -const sal_Int32 nCharClassLetterType = +inline constexpr sal_Int32 nCharClassLetterType = nCharClassAlphaType | css::i18n::KCharacterType::LETTER; -const sal_Int32 nCharClassLetterTypeMask = +inline constexpr sal_Int32 nCharClassLetterTypeMask = nCharClassAlphaTypeMask | css::i18n::KCharacterType::LETTER; -const sal_Int32 nCharClassNumericType = +inline constexpr sal_Int32 nCharClassNumericType = css::i18n::KCharacterType::DIGIT; -const sal_Int32 nCharClassNumericTypeMask = +inline constexpr sal_Int32 nCharClassNumericTypeMask = nCharClassNumericType | css::i18n::KCharacterType::PRINTABLE | css::i18n::KCharacterType::BASE_FORM; @@ -86,14 +87,14 @@ public: /// isalpha() on ascii values of entire string static bool isAsciiAlpha( std::u16string_view rStr ); - /// whether type is pure numeric or not, e.g. return of getStringType + /// whether type is pure numeric or not, e.g. return of getCharacterType() static bool isNumericType( sal_Int32 nType ) { return ((nType & nCharClassNumericType) != 0) && ((nType & ~nCharClassNumericTypeMask) == 0); } - /// whether type is pure alphanumeric or not, e.g. return of getStringType + /// whether type is pure alphanumeric or not, e.g. return of getCharacterType() static bool isAlphaNumericType( sal_Int32 nType ) { return ((nType & (nCharClassAlphaType | @@ -102,14 +103,14 @@ public: nCharClassNumericTypeMask)) == 0); } - /// whether type is pure letter or not, e.g. return of getStringType + /// whether type is pure letter or not, e.g. return of getCharacterType() static bool isLetterType( sal_Int32 nType ) { return ((nType & nCharClassLetterType) != 0) && ((nType & ~nCharClassLetterTypeMask) == 0); } - /// whether type is pure letternumeric or not, e.g. return of getStringType + /// whether type is pure letternumeric or not, e.g. return of getCharacterType() static bool isLetterNumericType( sal_Int32 nType ) { return ((nType & (nCharClassLetterType | @@ -141,7 +142,6 @@ public: css::i18n::DirectionProperty getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const; css::i18n::UnicodeScript getScript( const OUString& rStr, sal_Int32 nPos ) const; sal_Int32 getCharacterType( const OUString& rStr, sal_Int32 nPos ) const; - sal_Int32 getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const; css::i18n::ParseResult parseAnyToken( const OUString& rStr, @@ -167,10 +167,13 @@ public: bool isDigit( const OUString& rStr, sal_Int32 nPos ) const; bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const; bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const; + bool isUpper( const OUString& rStr, sal_Int32 nPos ) const; bool isLetter( const OUString& rStr ) const; bool isNumeric( const OUString& rStr ) const; bool isLetterNumeric( const OUString& rStr ) const; + bool isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const; + private: const css::lang::Locale & getMyLocale() const; diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx index dc4c17ce8430..c315ad1270ee 100644 --- a/linguistic/source/misc.cxx +++ b/linguistic/source/misc.cxx @@ -558,9 +558,7 @@ uno::Reference< XHyphenatedWord > RebuildHyphensAndControlChars( bool IsUpper( const OUString &rText, sal_Int32 nPos, sal_Int32 nLen, LanguageType nLanguage ) { CharClass aCC(( LanguageTag( nLanguage ) )); - sal_Int32 nFlags = aCC.getStringType( rText, nPos, nLen ); - return (nFlags & KCharacterType::UPPER) - && !(nFlags & KCharacterType::LOWER); + return aCC.isUpper( rText, nPos, nLen ); } CapType capitalType(const OUString& aTerm, CharClass const * pCC) diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx index ace153d03a74..4573687c4d29 100644 --- a/unotools/source/i18n/charclass.cxx +++ b/unotools/source/i18n/charclass.cxx @@ -134,7 +134,14 @@ bool CharClass::isLetter( const OUString& rStr ) const { try { - return isLetterType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) ); + sal_Int32 nPos = 0; + while (nPos < rStr.getLength()) + { + if (!isLetter( rStr, nPos)) + return false; + rStr.iterateCodePoints( &nPos); + } + return true; } catch ( const Exception& ) { @@ -165,7 +172,14 @@ bool CharClass::isNumeric( const OUString& rStr ) const { try { - return isNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) ); + sal_Int32 nPos = 0; + while (nPos < rStr.getLength()) + { + if (!isDigit( rStr, nPos)) + return false; + rStr.iterateCodePoints( &nPos); + } + return true; } catch ( const Exception& ) { @@ -183,7 +197,7 @@ bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const try { return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & - (nCharClassAlphaType | KCharacterType::DIGIT)) != 0; + (nCharClassAlphaType | nCharClassNumericType)) != 0; } catch ( const Exception& ) { @@ -201,7 +215,7 @@ bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const try { return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & - (nCharClassLetterType | KCharacterType::DIGIT)) != 0; + (nCharClassLetterType | nCharClassNumericType)) != 0; } catch ( const Exception& ) { @@ -214,7 +228,53 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const { try { - return isLetterNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) ); + sal_Int32 nPos = 0; + while (nPos < rStr.getLength()) + { + if (!isLetterNumeric( rStr, nPos)) + return false; + rStr.iterateCodePoints( &nPos); + } + return true; + } + catch ( const Exception& ) + { + TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); + } + return false; +} + +bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const +{ + sal_Unicode c = rStr[nPos]; + if ( c < 128 ) + return rtl::isAsciiUpperCase(c); + + try + { + return (xCC->getCharacterType( rStr, nPos, getMyLocale()) & + KCharacterType::UPPER) != 0; + } + catch ( const Exception& ) + { + TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); + } + return false; +} + +bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const +{ + try + { + assert(nPos >= 0 && nCount >= 0); + sal_Int32 nLen = std::min( nPos + nCount, rStr.getLength()); + while (nPos < nLen) + { + if (!isUpper( rStr, nPos)) + return false; + rStr.iterateCodePoints( &nPos); + } + return true; } catch ( const Exception& ) { @@ -314,19 +374,6 @@ sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) co return 0; } -sal_Int32 CharClass::getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const -{ - try - { - return xCC->getStringType( rStr, nPos, nCount, getMyLocale() ); - } - catch ( const Exception& ) - { - TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); - } - return 0; -} - css::i18n::ParseResult CharClass::parseAnyToken( const OUString& rStr, sal_Int32 nPos, |