summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLászló Németh <nemeth@numbertext.org>2024-06-27 10:06:03 +0200
committerChristian Lohmaier <lohmaier+LibreOffice@googlemail.com>2024-07-08 17:09:02 +0200
commitfc2bba731459b5ba2ed88fc8212f90b6ae08c15a (patch)
treed46581b263ef538f383075eda55369e9b84fbb71
parent0c3b1fec87b1d1f32832e2265918f68f93e2aca7 (diff)
tdf#161737 i18npool: fix fake spelling alarms with NNBSP
Fix word break by excluding narrow no-break space at the end of the words for spell checking. This was a problem e.g. for French, where (automatically? or manually) inserted narrow no-break space is used to get correct typography before exclamation and question marks, also after and before guillemets, if the OpenType/Graphite font doesn't have this feature). Regression from commit 44699b3de37f07090ac6fee1cd97aa76036e9700 "tdf#49885 BreakIterator rule upgrades". Note: this fixes also the problem, when digits separated by NNBSP thousand separator weren't handled by spell checking, alarming fake spelling mistakes, when "Check words with numbers" was enabled in Tools->Options->Languages and Locales->Writing Aids. (TODO: at the case of thousand separators, remove NBSP by the linguistic module or by the spell checking dictionaries to allow to check numbers with thousand separators and with correct suffix.) Change-Id: I36e10add7e0ba840f207a375ccc8668dbfef9572 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169618 Tested-by: Jenkins Reviewed-by: László Németh <nemeth@numbertext.org> (cherry picked from commit 6e002da1615b52cda4e9331e87878458b1fe9677) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169593 Reviewed-by: Christian Lohmaier <lohmaier+LibreOffice@googlemail.com>
-rw-r--r--i18npool/qa/cppunit/test_breakiterator.cxx30
-rw-r--r--i18npool/source/breakiterator/data/dict_word.txt2
2 files changed, 31 insertions, 1 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index e790c17e1155..6fbde026f565 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -992,6 +992,36 @@ void TestBreakIterator::testWordBoundaries()
CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.startPos);
CPPUNIT_ASSERT_EQUAL(sal_Int32(11), aBounds.endPos);
}
+
+ // tdf#161737: narrow no-break space at the end of words resulted spelling mistakes
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ OUString aTest(u"L’espace fine insécable\u202F!"_ustr);
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 14, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.startPos);
+ // This was 24 (word + NNBSP)
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(23), aBounds.endPos);
+ }
+
+ // tdf#161737: narrow no-break space between digits resulted spelling mistakes
+ // as a quick fix, limit NBSP as word-part character only for editing, and not for spell checking
+ // TODO: remove NBSP by the linguistic module or by the spell checking dictionaries to allow
+ // to check numbers with thousand separators and with correct suffix
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ OUString aTest(u"1\u202F000\u202F000"_ustr);
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 2, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ // This was 0 (word + NNBSP)
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos);
+ // This was 8 (word + NNBSP)
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+ }
}
void TestBreakIterator::testSentenceBoundaries()
diff --git a/i18npool/source/breakiterator/data/dict_word.txt b/i18npool/source/breakiterator/data/dict_word.txt
index f804b0eec214..deeec7dd659e 100644
--- a/i18npool/source/breakiterator/data/dict_word.txt
+++ b/i18npool/source/breakiterator/data/dict_word.txt
@@ -54,7 +54,7 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
-$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
+$ExtendNumLet = [\p{Word_Break = ExtendNumLet}-[:name = NARROW NO-BREAK SPACE:]];
$WSegSpace = [\p{Word_Break = WSegSpace}];
$Extended_Pict = [\p{Extended_Pictographic}];