diff options
author | László Németh <nemeth@numbertext.org> | 2024-06-27 10:06:03 +0200 |
---|---|---|
committer | Christian Lohmaier <lohmaier+LibreOffice@googlemail.com> | 2024-07-08 17:09:02 +0200 |
commit | fc2bba731459b5ba2ed88fc8212f90b6ae08c15a (patch) | |
tree | d46581b263ef538f383075eda55369e9b84fbb71 | |
parent | 0c3b1fec87b1d1f32832e2265918f68f93e2aca7 (diff) |
tdf#161737 i18npool: fix fake spelling alarms with NNBSP
Fix word break by excluding narrow no-break space at the
end of the words for spell checking.
This was a problem e.g. for French, where (automatically? or
manually) inserted narrow no-break space is used to get correct
typography before exclamation and question marks, also after and
before guillemets, if the OpenType/Graphite font doesn't have this
feature).
Regression from commit 44699b3de37f07090ac6fee1cd97aa76036e9700
"tdf#49885 BreakIterator rule upgrades".
Note: this fixes also the problem, when digits separated
by NNBSP thousand separator weren't handled by spell checking,
alarming fake spelling mistakes, when "Check words with numbers"
was enabled in Tools->Options->Languages and Locales->Writing Aids.
(TODO: at the case of thousand separators, remove NBSP by the
linguistic module or by the spell checking dictionaries to allow
to check numbers with thousand separators and with correct suffix.)
Change-Id: I36e10add7e0ba840f207a375ccc8668dbfef9572
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169618
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
(cherry picked from commit 6e002da1615b52cda4e9331e87878458b1fe9677)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169593
Reviewed-by: Christian Lohmaier <lohmaier+LibreOffice@googlemail.com>
-rw-r--r-- | i18npool/qa/cppunit/test_breakiterator.cxx | 30 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/dict_word.txt | 2 |
2 files changed, 31 insertions, 1 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index e790c17e1155..6fbde026f565 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -992,6 +992,36 @@ void TestBreakIterator::testWordBoundaries() CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.startPos); CPPUNIT_ASSERT_EQUAL(sal_Int32(11), aBounds.endPos); } + + // tdf#161737: narrow no-break space at the end of words resulted spelling mistakes + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + OUString aTest(u"L’espace fine insécable\u202F!"_ustr); + aBounds + = m_xBreak->getWordBoundary(aTest, 14, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.startPos); + // This was 24 (word + NNBSP) + CPPUNIT_ASSERT_EQUAL(sal_Int32(23), aBounds.endPos); + } + + // tdf#161737: narrow no-break space between digits resulted spelling mistakes + // as a quick fix, limit NBSP as word-part character only for editing, and not for spell checking + // TODO: remove NBSP by the linguistic module or by the spell checking dictionaries to allow + // to check numbers with thousand separators and with correct suffix + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + OUString aTest(u"1\u202F000\u202F000"_ustr); + aBounds + = m_xBreak->getWordBoundary(aTest, 2, aLocale, i18n::WordType::DICTIONARY_WORD, false); + // This was 0 (word + NNBSP) + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos); + // This was 8 (word + NNBSP) + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); + } } void TestBreakIterator::testSentenceBoundaries() diff --git a/i18npool/source/breakiterator/data/dict_word.txt b/i18npool/source/breakiterator/data/dict_word.txt index f804b0eec214..deeec7dd659e 100644 --- a/i18npool/source/breakiterator/data/dict_word.txt +++ b/i18npool/source/breakiterator/data/dict_word.txt @@ -54,7 +54,7 @@ $Double_Quote = [\p{Word_Break = Double_Quote}]; $MidNumLet = [\p{Word_Break = MidNumLet}]; $MidNum = [\p{Word_Break = MidNum}]; $Numeric = [\p{Word_Break = Numeric}]; -$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; +$ExtendNumLet = [\p{Word_Break = ExtendNumLet}-[:name = NARROW NO-BREAK SPACE:]]; $WSegSpace = [\p{Word_Break = WSegSpace}]; $Extended_Pict = [\p{Extended_Pictographic}]; |