summaryrefslogtreecommitdiff
path: root/i18npool/qa/cppunit
diff options
context:
space:
mode:
authorKhaled Hosny <khaled@aliftype.com>2022-08-17 10:45:24 +0200
committerخالد حسني <khaled@aliftype.com>2022-08-19 19:02:01 +0200
commit2a78fbf4e4a49f2b52aa1352aac41ee024d0cf72 (patch)
treeec3e33f3aaaa4fb7f5b929f137d76762e2fb449b /i18npool/qa/cppunit
parentf505d8fee149f089090b208db99a7b4107af76e9 (diff)
tdf#91764: Combining marks from “complex” scripts can’t be searched for
Don’t skip search results that are in the middle of a grapheme cluster (AKA cell in LO speak). It is not clear why it was done like this, as these checks are present all the way back to the first commit of this file: commit 36eb193f4809221af42c01c5ac226a97cf74ec21 Author: Rüdiger Timm <rt@openoffice.org> Date: Tue Apr 8 15:01:00 2003 +0000 INTEGRATION: CWS calc06 (1.1.2); FILE ADDED 2003/03/26 15:54:42 er 1.1.2.1: #i3393# moved from i18n module, cleaned out tools module usage, and added support for regexp But ignoring such results and only for so-called “complex” scripts seems arbitrary, and as the linked issue shows, people want to be able to search for combining marks. Furthermore, it prevents searching for a base character followed by a combining mark, unless ignoring diacritics is enabled. Change-Id: I530788d928861ddfa18dd7b813d0a13f53c0b77b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138410 Tested-by: Jenkins Reviewed-by: خالد حسني <khaled@aliftype.com>
Diffstat (limited to 'i18npool/qa/cppunit')
-rw-r--r--i18npool/qa/cppunit/test_textsearch.cxx121
1 files changed, 121 insertions, 0 deletions
diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx
index 1d72a8d83f18..f224e58c3809 100644
--- a/i18npool/qa/cppunit/test_textsearch.cxx
+++ b/i18npool/qa/cppunit/test_textsearch.cxx
@@ -38,12 +38,14 @@ public:
void testSearches();
void testWildcardSearch();
void testApostropheSearch();
+ void testTdf138410();
CPPUNIT_TEST_SUITE(TestTextSearch);
CPPUNIT_TEST(testICU);
CPPUNIT_TEST(testSearches);
CPPUNIT_TEST(testWildcardSearch);
CPPUNIT_TEST(testApostropheSearch);
+ CPPUNIT_TEST(testTdf138410);
CPPUNIT_TEST_SUITE_END();
private:
uno::Reference<util::XTextSearch> m_xSearch;
@@ -402,6 +404,125 @@ void TestTextSearch::testApostropheSearch()
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
}
+void TestTextSearch::testTdf138410()
+{
+ OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628");
+ sal_Int32 startPos = 0, endPos = str.getLength();
+
+ util::SearchOptions aOptions;
+ aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
+
+ util::SearchResult aRes;
+
+ // A) base alone
+ // The search string will be found whether it is followed by a mark in the
+ // text or not, and whether IGNORE_DIACRITICS_CTL is set or not.
+
+ // set options
+ aOptions.searchString = u"\u0643";
+ aOptions.transliterateFlags = 0;
+ m_xSearch->setOptions(aOptions);
+
+ // search forward
+ aRes = m_xSearch->searchForward(str, startPos, endPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
+
+ // search backwards
+ aRes = m_xSearch->searchBackward(str, endPos, startPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
+
+ // check with transliteration
+ aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
+ m_xSearch->setOptions(aOptions);
+
+ // search forward
+ aRes = m_xSearch->searchForward(str, startPos, endPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
+
+ // search backwards
+ aRes = m_xSearch->searchBackward(str, endPos, startPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
+
+ // b) base+mark
+ // The search string will be found when followed by a mark in the text, or
+ // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or
+ // not.
+
+ // set options
+ aOptions.searchString = u"\u0643\u064f";
+ aOptions.transliterateFlags = 0;
+ m_xSearch->setOptions(aOptions);
+
+ // search forward
+ aRes = m_xSearch->searchForward(str, startPos, endPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
+
+ // search backwards
+ aRes = m_xSearch->searchBackward(str, endPos, startPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.endOffset[0]);
+
+ // check with transliteration
+ aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
+ m_xSearch->setOptions(aOptions);
+
+ // search forward
+ aRes = m_xSearch->searchForward(str, startPos, endPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
+
+ // search backwards
+ aRes = m_xSearch->searchBackward(str, endPos, startPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
+
+ // b) mark alone
+ // The search string will be found only when IGNORE_DIACRITICS_CTL is not
+ // set.
+
+ // set options
+ aOptions.searchString = u"\u064f";
+ aOptions.transliterateFlags = 0;
+ m_xSearch->setOptions(aOptions);
+
+ // search forward
+ aRes = m_xSearch->searchForward(str, startPos, endPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
+
+ // search backwards
+ aRes = m_xSearch->searchBackward(str, endPos, startPos);
+ CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(4), aRes.startOffset[0]);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.endOffset[0]);
+
+ // with ignore marks the mark will not be found
+ aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
+ m_xSearch->setOptions(aOptions);
+
+ // search forward
+ aRes = m_xSearch->searchForward(str, startPos, endPos);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+ // search backwards
+ aRes = m_xSearch->searchBackward(str, endPos, startPos);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+}
+
void TestTextSearch::setUp()
{
BootstrapFixtureBase::setUp();