summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/unotools/charclass.hxx25
-rw-r--r--linguistic/source/misc.cxx4
-rw-r--r--unotools/source/i18n/charclass.cxx83
3 files changed, 80 insertions, 32 deletions
diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx
index c76c7ae35f0b..7cb35ba2a594 100644
--- a/include/unotools/charclass.hxx
+++ b/include/unotools/charclass.hxx
@@ -31,28 +31,29 @@
namespace com::sun::star::uno { class XComponentContext; }
namespace com::sun::star::i18n { class XCharacterClassification; }
-const sal_Int32 nCharClassAlphaType =
+inline constexpr sal_Int32 nCharClassAlphaType =
css::i18n::KCharacterType::UPPER |
css::i18n::KCharacterType::LOWER |
css::i18n::KCharacterType::TITLE_CASE;
-const sal_Int32 nCharClassAlphaTypeMask =
+inline constexpr sal_Int32 nCharClassAlphaTypeMask =
nCharClassAlphaType |
+ css::i18n::KCharacterType::LETTER | // Alpha is also always a LETTER
css::i18n::KCharacterType::PRINTABLE |
css::i18n::KCharacterType::BASE_FORM;
-const sal_Int32 nCharClassLetterType =
+inline constexpr sal_Int32 nCharClassLetterType =
nCharClassAlphaType |
css::i18n::KCharacterType::LETTER;
-const sal_Int32 nCharClassLetterTypeMask =
+inline constexpr sal_Int32 nCharClassLetterTypeMask =
nCharClassAlphaTypeMask |
css::i18n::KCharacterType::LETTER;
-const sal_Int32 nCharClassNumericType =
+inline constexpr sal_Int32 nCharClassNumericType =
css::i18n::KCharacterType::DIGIT;
-const sal_Int32 nCharClassNumericTypeMask =
+inline constexpr sal_Int32 nCharClassNumericTypeMask =
nCharClassNumericType |
css::i18n::KCharacterType::PRINTABLE |
css::i18n::KCharacterType::BASE_FORM;
@@ -86,14 +87,14 @@ public:
/// isalpha() on ascii values of entire string
static bool isAsciiAlpha( std::u16string_view rStr );
- /// whether type is pure numeric or not, e.g. return of getStringType
+ /// whether type is pure numeric or not, e.g. return of getCharacterType()
static bool isNumericType( sal_Int32 nType )
{
return ((nType & nCharClassNumericType) != 0) &&
((nType & ~nCharClassNumericTypeMask) == 0);
}
- /// whether type is pure alphanumeric or not, e.g. return of getStringType
+ /// whether type is pure alphanumeric or not, e.g. return of getCharacterType()
static bool isAlphaNumericType( sal_Int32 nType )
{
return ((nType & (nCharClassAlphaType |
@@ -102,14 +103,14 @@ public:
nCharClassNumericTypeMask)) == 0);
}
- /// whether type is pure letter or not, e.g. return of getStringType
+ /// whether type is pure letter or not, e.g. return of getCharacterType()
static bool isLetterType( sal_Int32 nType )
{
return ((nType & nCharClassLetterType) != 0) &&
((nType & ~nCharClassLetterTypeMask) == 0);
}
- /// whether type is pure letternumeric or not, e.g. return of getStringType
+ /// whether type is pure letternumeric or not, e.g. return of getCharacterType()
static bool isLetterNumericType( sal_Int32 nType )
{
return ((nType & (nCharClassLetterType |
@@ -141,7 +142,6 @@ public:
css::i18n::DirectionProperty getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const;
css::i18n::UnicodeScript getScript( const OUString& rStr, sal_Int32 nPos ) const;
sal_Int32 getCharacterType( const OUString& rStr, sal_Int32 nPos ) const;
- sal_Int32 getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const;
css::i18n::ParseResult parseAnyToken(
const OUString& rStr,
@@ -167,10 +167,13 @@ public:
bool isDigit( const OUString& rStr, sal_Int32 nPos ) const;
bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const;
bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const;
+ bool isUpper( const OUString& rStr, sal_Int32 nPos ) const;
bool isLetter( const OUString& rStr ) const;
bool isNumeric( const OUString& rStr ) const;
bool isLetterNumeric( const OUString& rStr ) const;
+ bool isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const;
+
private:
const css::lang::Locale & getMyLocale() const;
diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx
index dc4c17ce8430..c315ad1270ee 100644
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@@ -558,9 +558,7 @@ uno::Reference< XHyphenatedWord > RebuildHyphensAndControlChars(
bool IsUpper( const OUString &rText, sal_Int32 nPos, sal_Int32 nLen, LanguageType nLanguage )
{
CharClass aCC(( LanguageTag( nLanguage ) ));
- sal_Int32 nFlags = aCC.getStringType( rText, nPos, nLen );
- return (nFlags & KCharacterType::UPPER)
- && !(nFlags & KCharacterType::LOWER);
+ return aCC.isUpper( rText, nPos, nLen );
}
CapType capitalType(const OUString& aTerm, CharClass const * pCC)
diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx
index ace153d03a74..4573687c4d29 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -134,7 +134,14 @@ bool CharClass::isLetter( const OUString& rStr ) const
{
try
{
- return isLetterType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
+ sal_Int32 nPos = 0;
+ while (nPos < rStr.getLength())
+ {
+ if (!isLetter( rStr, nPos))
+ return false;
+ rStr.iterateCodePoints( &nPos);
+ }
+ return true;
}
catch ( const Exception& )
{
@@ -165,7 +172,14 @@ bool CharClass::isNumeric( const OUString& rStr ) const
{
try
{
- return isNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
+ sal_Int32 nPos = 0;
+ while (nPos < rStr.getLength())
+ {
+ if (!isDigit( rStr, nPos))
+ return false;
+ rStr.iterateCodePoints( &nPos);
+ }
+ return true;
}
catch ( const Exception& )
{
@@ -183,7 +197,7 @@ bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const
try
{
return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
- (nCharClassAlphaType | KCharacterType::DIGIT)) != 0;
+ (nCharClassAlphaType | nCharClassNumericType)) != 0;
}
catch ( const Exception& )
{
@@ -201,7 +215,7 @@ bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const
try
{
return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
- (nCharClassLetterType | KCharacterType::DIGIT)) != 0;
+ (nCharClassLetterType | nCharClassNumericType)) != 0;
}
catch ( const Exception& )
{
@@ -214,7 +228,53 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const
{
try
{
- return isLetterNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
+ sal_Int32 nPos = 0;
+ while (nPos < rStr.getLength())
+ {
+ if (!isLetterNumeric( rStr, nPos))
+ return false;
+ rStr.iterateCodePoints( &nPos);
+ }
+ return true;
+ }
+ catch ( const Exception& )
+ {
+ TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+ }
+ return false;
+}
+
+bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
+{
+ sal_Unicode c = rStr[nPos];
+ if ( c < 128 )
+ return rtl::isAsciiUpperCase(c);
+
+ try
+ {
+ return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
+ KCharacterType::UPPER) != 0;
+ }
+ catch ( const Exception& )
+ {
+ TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+ }
+ return false;
+}
+
+bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
+{
+ try
+ {
+ assert(nPos >= 0 && nCount >= 0);
+ sal_Int32 nLen = std::min( nPos + nCount, rStr.getLength());
+ while (nPos < nLen)
+ {
+ if (!isUpper( rStr, nPos))
+ return false;
+ rStr.iterateCodePoints( &nPos);
+ }
+ return true;
}
catch ( const Exception& )
{
@@ -314,19 +374,6 @@ sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) co
return 0;
}
-sal_Int32 CharClass::getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
-{
- try
- {
- return xCC->getStringType( rStr, nPos, nCount, getMyLocale() );
- }
- catch ( const Exception& )
- {
- TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
- }
- return 0;
-}
-
css::i18n::ParseResult CharClass::parseAnyToken(
const OUString& rStr,
sal_Int32 nPos,