diff options
author | Baole Fang <baole.fang@gmail.com> | 2023-06-23 11:47:54 -0400 |
---|---|---|
committer | خالد حسني <khaled@libreoffice.org> | 2023-06-23 22:20:58 +0200 |
commit | caab94a3e0387bde05538cff91ff13446f330785 (patch) | |
tree | 350f091ac061e14916bc86b845e4d5dbc4473e77 /unotools | |
parent | 9bb6f3083b8b2a763417ca1cdce21865a41ddd08 (diff) |
tdf#142437: Fix word boundary detection in autocorrect
Marks (combining and spacing) were incorrectly considered word separators,
because isLetterNumeric() matches only for letters and numbers.
The new isBase() matches any character with BASE_FORM character class, which covers letters, numbers, and marks.
Change-Id: I27ec2f7fb8d360791a280d10aba9b6d16e7cfb71
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153509
Tested-by: Jenkins
Reviewed-by: خالد حسني <khaled@libreoffice.org>
Diffstat (limited to 'unotools')
-rw-r--r-- | unotools/source/i18n/charclass.cxx | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx index be3a9f4f0ee0..423f9530f2cb 100644 --- a/unotools/source/i18n/charclass.cxx +++ b/unotools/source/i18n/charclass.cxx @@ -253,6 +253,23 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const return false; } +bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const +{ + sal_Unicode c = rStr[nPos]; + if ( c < 128 ) + return rtl::isAsciiAlphanumeric( c ); + + try + { + return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0; + } + catch ( const Exception& ) + { + TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); + } + return false; +} + bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const { sal_Unicode c = rStr[nPos]; |