diff options
Diffstat (limited to 'i18npool')
-rw-r--r-- | i18npool/qa/cppunit/test_textsearch.cxx | 121 | ||||
-rw-r--r-- | i18npool/source/search/textsearch.cxx | 79 | ||||
-rw-r--r-- | i18npool/source/search/textsearch.hxx | 4 |
3 files changed, 148 insertions, 56 deletions
diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx index 1d72a8d83f18..f224e58c3809 100644 --- a/i18npool/qa/cppunit/test_textsearch.cxx +++ b/i18npool/qa/cppunit/test_textsearch.cxx @@ -38,12 +38,14 @@ public: void testSearches(); void testWildcardSearch(); void testApostropheSearch(); + void testTdf138410(); CPPUNIT_TEST_SUITE(TestTextSearch); CPPUNIT_TEST(testICU); CPPUNIT_TEST(testSearches); CPPUNIT_TEST(testWildcardSearch); CPPUNIT_TEST(testApostropheSearch); + CPPUNIT_TEST(testTdf138410); CPPUNIT_TEST_SUITE_END(); private: uno::Reference<util::XTextSearch> m_xSearch; @@ -402,6 +404,125 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); } +void TestTextSearch::testTdf138410() +{ + OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"); + sal_Int32 startPos = 0, endPos = str.getLength(); + + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE; + + util::SearchResult aRes; + + // A) base alone + // The search string will be found whether it is followed by a mark in the + // text or not, and whether IGNORE_DIACRITICS_CTL is set or not. + + // set options + aOptions.searchString = u"\u0643"; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]); + + // check with transliteration + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]); + + // b) base+mark + // The search string will be found when followed by a mark in the text, or + // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or + // not. + + // set options + aOptions.searchString = u"\u0643\u064f"; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.endOffset[0]); + + // check with transliteration + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]); + + // b) mark alone + // The search string will be found only when IGNORE_DIACRITICS_CTL is not + // set. + + // set options + aOptions.searchString = u"\u064f"; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.endOffset[0]); + + // with ignore marks the mark will not be found + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions); +} + void TestTextSearch::setUp() { BootstrapFixtureBase::setUp(); diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index c80afc19890f..a16c3e1cc4c7 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -214,13 +214,6 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); } - // When start or end of search string is a complex script type, we need to - // make sure the result boundary is not located in the middle of cell. - checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == - ScriptType::COMPLEX)); - checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, - sSrchStr.getLength()-1) == ScriptType::COMPLEX)); - if ( bReplaceApostrophe ) sSrchStr = sSrchStr.replace(u'\u2019', '\''); @@ -305,13 +298,6 @@ static sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 return static_cast<sal_Int32>(std::distance(rOff.begin(), pOff)); } -bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) -{ - sal_Int32 nDone; - return nPos == xBreak->previousCharacters(searchStr, nPos+1, - aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); -} - SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) { std::unique_lock g(m_aMutex); @@ -737,11 +723,6 @@ SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startP nCmpIdx <= nEnd; nCmpIdx += GetDiff( searchStr[nCmpIdx + sSearchKey.getLength()-1])) { - // if the match would be the completed cells, skip it. - if ( (checkCTLStart && !isCellStart( searchStr, nCmpIdx )) || (checkCTLEnd - && !isCellStart( searchStr, nCmpIdx + sSearchKey.getLength())) ) - continue; - nSuchIdx = sSearchKey.getLength() - 1; while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == searchStr[nCmpIdx + nSuchIdx]) { @@ -804,41 +785,28 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP while (nCmpIdx >= nEnd) { - // if the match would be the completed cells, skip it. - if ( (!checkCTLStart || isCellStart( searchStr, nCmpIdx - - sSearchKey.getLength() )) && (!checkCTLEnd || - isCellStart( searchStr, nCmpIdx))) + nSuchIdx = 0; + while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == + searchStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) + nSuchIdx++; + if( nSuchIdx >= sSearchKey.getLength() ) { - nSuchIdx = 0; - while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == - searchStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) - nSuchIdx++; - if( nSuchIdx >= sSearchKey.getLength() ) + if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) { - if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) - { - sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); - bool bAtStart = !nFndStt; - bool bAtEnd = nCmpIdx == startPos; - bool bDelimBehind = bAtEnd || IsDelimiter( searchStr, nCmpIdx ); - bool bDelimBefore = bAtStart || // begin of paragraph - IsDelimiter( searchStr, nFndStt-1 ); - // * 1 -> only one word in the paragraph - // * 2 -> at begin of paragraph - // * 3 -> at end of paragraph - // * 4 -> inside the paragraph - if( ( bAtStart && bAtEnd ) || // 1 - ( bAtStart && bDelimBehind ) || // 2 - ( bAtEnd && bDelimBefore ) || // 3 - ( bDelimBefore && bDelimBehind )) // 4 - { - aRet.subRegExpressions = 1; - aRet.startOffset = { nCmpIdx }; - aRet.endOffset = { nCmpIdx - sSearchKey.getLength() }; - return aRet; - } - } - else + sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); + bool bAtStart = !nFndStt; + bool bAtEnd = nCmpIdx == startPos; + bool bDelimBehind = bAtEnd || IsDelimiter( searchStr, nCmpIdx ); + bool bDelimBefore = bAtStart || // begin of paragraph + IsDelimiter( searchStr, nFndStt-1 ); + // * 1 -> only one word in the paragraph + // * 2 -> at begin of paragraph + // * 3 -> at end of paragraph + // * 4 -> inside the paragraph + if( ( bAtStart && bAtEnd ) || // 1 + ( bAtStart && bDelimBehind ) || // 2 + ( bAtEnd && bDelimBefore ) || // 3 + ( bDelimBefore && bDelimBehind )) // 4 { aRet.subRegExpressions = 1; aRet.startOffset = { nCmpIdx }; @@ -846,6 +814,13 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP return aRet; } } + else + { + aRet.subRegExpressions = 1; + aRet.startOffset = { nCmpIdx }; + aRet.endOffset = { nCmpIdx - sSearchKey.getLength() }; + return aRet; + } } nSuchIdx = GetDiff( searchStr[nCmpIdx - sSearchKey.getLength()] ); if( nCmpIdx < nSuchIdx ) diff --git a/i18npool/source/search/textsearch.hxx b/i18npool/source/search/textsearch.hxx index 0a4da19dfadc..43a643537a3a 100644 --- a/i18npool/source/search/textsearch.hxx +++ b/i18npool/source/search/textsearch.hxx @@ -130,10 +130,6 @@ class TextSearch: public cppu::WeakImplHelper bool IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const; - bool checkCTLStart, checkCTLEnd; - /// @throws css::uno::RuntimeException - bool isCellStart(const OUString& searchStr, sal_Int32 nPos); - public: explicit TextSearch( const css::uno::Reference < css::uno::XComponentContext >& rxContext ); |