summaryrefslogtreecommitdiff
path: root/unotools
diff options
context:
space:
mode:
authorBaole Fang <baole.fang@gmail.com>2023-06-23 11:47:54 -0400
committerخالد حسني <khaled@libreoffice.org>2023-06-23 22:20:58 +0200
commitcaab94a3e0387bde05538cff91ff13446f330785 (patch)
tree350f091ac061e14916bc86b845e4d5dbc4473e77 /unotools
parent9bb6f3083b8b2a763417ca1cdce21865a41ddd08 (diff)
tdf#142437: Fix word boundary detection in autocorrect
Marks (combining and spacing) were incorrectly considered word separators, because isLetterNumeric() matches only for letters and numbers. The new isBase() matches any character with BASE_FORM character class, which covers letters, numbers, and marks. Change-Id: I27ec2f7fb8d360791a280d10aba9b6d16e7cfb71 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153509 Tested-by: Jenkins Reviewed-by: خالد حسني <khaled@libreoffice.org>
Diffstat (limited to 'unotools')
-rw-r--r--unotools/source/i18n/charclass.cxx17
1 files changed, 17 insertions, 0 deletions
diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx
index be3a9f4f0ee0..423f9530f2cb 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -253,6 +253,23 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const
return false;
}
+bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const
+{
+ sal_Unicode c = rStr[nPos];
+ if ( c < 128 )
+ return rtl::isAsciiAlphanumeric( c );
+
+ try
+ {
+ return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0;
+ }
+ catch ( const Exception& )
+ {
+ TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+ }
+ return false;
+}
+
bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
{
sal_Unicode c = rStr[nPos];