/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace ::com::sun::star::util; using namespace ::com::sun::star::uno; using namespace ::com::sun::star::lang; namespace utl { SearchParam::SearchParam( const OUString &rText, SearchType eType, bool bCaseSensitive, sal_uInt32 cWildEscChar, bool bWildMatchSel ) { sSrchStr = rText; m_eSrchType = eType; m_cWildEscChar = cWildEscChar; m_bCaseSense = bCaseSensitive; m_bWildMatchSel = bWildMatchSel; } SearchParam::SearchParam( const SearchParam& rParam ) { sSrchStr = rParam.sSrchStr; m_eSrchType = rParam.m_eSrchType; m_cWildEscChar = rParam.m_cWildEscChar; m_bCaseSense = rParam.m_bCaseSense; m_bWildMatchSel = rParam.m_bWildMatchSel; } SearchParam::~SearchParam() {} static bool lcl_Equals( const i18nutil::SearchOptions2& rSO1, const i18nutil::SearchOptions2& rSO2 ) { return rSO1.AlgorithmType2 == rSO2.AlgorithmType2 && rSO1.WildcardEscapeCharacter == rSO2.WildcardEscapeCharacter && rSO1.searchFlag == rSO2.searchFlag && rSO1.searchString == rSO2.searchString && rSO1.replaceString == rSO2.replaceString && rSO1.changedChars == rSO2.changedChars && rSO1.deletedChars == rSO2.deletedChars && rSO1.insertedChars == rSO2.insertedChars && rSO1.Locale.Language == rSO2.Locale.Language && rSO1.Locale.Country == rSO2.Locale.Country && rSO1.Locale.Variant == rSO2.Locale.Variant && rSO1.transliterateFlags == rSO2.transliterateFlags; } namespace { struct CachedTextSearch { std::mutex mutex; i18nutil::SearchOptions2 Options; css::uno::Reference< css::util::XTextSearch2 > xTextSearch; }; } Reference TextSearch::getXTextSearch( const i18nutil::SearchOptions2& rPara ) { static CachedTextSearch theCachedTextSearch; std::scoped_lock aGuard(theCachedTextSearch.mutex); if ( lcl_Equals(theCachedTextSearch.Options, rPara) ) return theCachedTextSearch.xTextSearch; Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext(); theCachedTextSearch.xTextSearch.set( ::TextSearch2::create(xContext) ); theCachedTextSearch.xTextSearch->setOptions2( rPara.toUnoSearchOptions2() ); theCachedTextSearch.Options = rPara; return theCachedTextSearch.xTextSearch; } TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang ) { if( LANGUAGE_NONE == eLang ) eLang = LANGUAGE_SYSTEM; css::lang::Locale aLocale( LanguageTag::convertToLocale( eLang ) ); Init( rParam, aLocale); } TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass ) { Init( rParam, rCClass.getLanguageTag().getLocale() ); } TextSearch::TextSearch( const i18nutil::SearchOptions2& rPara ) { xTextSearch = getXTextSearch( rPara ); } void TextSearch::Init( const SearchParam & rParam, const css::lang::Locale& rLocale ) { // convert SearchParam to the UNO SearchOptions2 i18nutil::SearchOptions2 aSOpt; switch( rParam.GetSrchType() ) { case SearchParam::SearchType::Wildcard: aSOpt.AlgorithmType2 = SearchAlgorithms2::WILDCARD; aSOpt.WildcardEscapeCharacter = rParam.GetWildEscChar(); if (rParam.IsWildMatchSel()) aSOpt.searchFlag |= SearchFlags::WILD_MATCH_SELECTION; break; case SearchParam::SearchType::Regexp: aSOpt.AlgorithmType2 = SearchAlgorithms2::REGEXP; break; case SearchParam::SearchType::Normal: aSOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE; break; default: for (;;) std::abort(); } aSOpt.searchString = rParam.GetSrchStr(); aSOpt.replaceString = ""; aSOpt.Locale = rLocale; aSOpt.transliterateFlags = TransliterationFlags::NONE; if( !rParam.IsCaseSensitive() ) { aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE; aSOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE; } xTextSearch = getXTextSearch( aSOpt ); } void TextSearch::SetLocale( const i18nutil::SearchOptions2& rOptions, const css::lang::Locale& rLocale ) { i18nutil::SearchOptions2 aSOpt( rOptions ); aSOpt.Locale = rLocale; xTextSearch = getXTextSearch( aSOpt ); } TextSearch::~TextSearch() { } /* * General search methods. These methods will call the respective * methods, such as ordinary string searching or regular expression * matching, using the method pointer. */ bool TextSearch::SearchForward( const OUString &rStr, sal_Int32* pStart, sal_Int32* pEnd, css::util::SearchResult* pRes) { bool bRet = false; try { if( xTextSearch.is() ) { SearchResult aRet( xTextSearch->searchForward( rStr, *pStart, *pEnd )); if( aRet.subRegExpressions > 0 ) { bRet = true; // the XTextsearch returns in startOffset the higher position // and the endposition is always exclusive. // The caller of this function will have in startPos the // lower pos. and end *pStart = aRet.startOffset[ 0 ]; *pEnd = aRet.endOffset[ 0 ]; if( pRes ) *pRes = std::move(aRet); } } } catch ( Exception& ) { TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); } return bRet; } bool TextSearch::searchForward( const OUString &rStr ) { sal_Int32 pStart = 0; sal_Int32 pEnd = rStr.getLength(); bool bResult = SearchForward(rStr, &pStart, &pEnd); return bResult; } bool TextSearch::SearchBackward( const OUString & rStr, sal_Int32* pStart, sal_Int32* pEnd, SearchResult* pRes ) { bool bRet = false; try { if( xTextSearch.is() ) { SearchResult aRet( xTextSearch->searchBackward( rStr, *pStart, *pEnd )); if( aRet.subRegExpressions ) { bRet = true; // the XTextsearch returns in startOffset the higher position // and the endposition is always exclusive. // The caller of this function will have in startPos the // lower pos. and end *pEnd = aRet.startOffset[ 0 ]; *pStart = aRet.endOffset[ 0 ]; if( pRes ) *pRes = std::move(aRet); } } } catch ( Exception& ) { TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); } return bRet; } // static void TextSearch::ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const SearchResult& rResult ) { if( rResult.subRegExpressions <= 0 ) return; sal_Unicode sFndChar; sal_Int32 i; OUStringBuffer sBuff(rReplaceStr.getLength()*4); for(i = 0; i < rReplaceStr.getLength(); i++) { if( rReplaceStr[i] == '&') { sal_Int32 nStart = rResult.startOffset[0]; sal_Int32 nLength = rResult.endOffset[0] - rResult.startOffset[0]; sBuff.append(rStr.substr(nStart, nLength)); } else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '$') { sFndChar = rReplaceStr[ i + 1 ]; switch(sFndChar) { // placeholder for a backward reference? case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int j = sFndChar - '0'; // index if(j < rResult.subRegExpressions) { sal_Int32 nSttReg = rResult.startOffset[j]; sal_Int32 nRegLen = rResult.endOffset[j]; if (nSttReg < 0 || nRegLen < 0) // A "not found" optional capture { nSttReg = nRegLen = 0; // Copy empty string } else if (nRegLen >= nSttReg) { nRegLen = nRegLen - nSttReg; } else { nRegLen = nSttReg - nRegLen; nSttReg = rResult.endOffset[j]; } // Copy reference from found string sBuff.append(rStr.substr(nSttReg, nRegLen)); } i += 1; } break; default: sBuff.append(OUStringChar(rReplaceStr[i]) + OUStringChar(rReplaceStr[i+1])); i += 1; break; } } else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '\\') { sFndChar = rReplaceStr[ i+1 ]; switch(sFndChar) { case '\\': case '&': case '$': sBuff.append(sFndChar); i+=1; break; case 't': sBuff.append('\t'); i += 1; break; default: sBuff.append(OUStringChar(rReplaceStr[i]) + OUStringChar(rReplaceStr[i+1])); i += 1; break; } } else { sBuff.append(rReplaceStr[i]); } } rReplaceStr = sBuff.makeStringAndClear(); } bool TextSearch::SimilaritySearch(const OUString& rString, const OUString& rSearchString, ::std::pair& rSimilarityScore) { sal_Int32 nScore = 0; sal_Int32 nFirstScore = GetSubstringSimilarity(rString, rSearchString, nScore, true); if (nFirstScore == -1) nFirstScore = GetSubstringSimilarity(rString, rSearchString, nScore, false); if (nFirstScore == -1) { if (rSearchString.getLength() == 1) { if (rString.startsWith(rSearchString)) nFirstScore = nScore; else if (rString.endsWith(rSearchString)) nFirstScore = nScore + 1; nScore += 2; } else if (rString.getLength() == 1 && rSearchString.getLength() < SMALL_STRING_THRESHOLD) { if (rSearchString.startsWith(rString)) nFirstScore = nScore; else if (rSearchString.endsWith(rString)) nFirstScore = nScore + 1; nScore += 2; } } sal_Int32 nSecondScore = GetWeightedLevenshteinDistance(rString, rSearchString); if (nFirstScore == -1 && nSecondScore >= WLD_THRESHOLD) return false; rSimilarityScore.first = (nFirstScore == -1) ? nScore : nFirstScore; rSimilarityScore.second = nSecondScore; return true; } sal_Int32 TextSearch::GetSubstringSimilarity(std::u16string_view rString, std::u16string_view rSearchString, sal_Int32& nInitialScore, const bool bFromStart) { sal_Int32 nScore = -1; for (sal_Int32 length = rSearchString.length(); length > 1; length--) { sal_Int32 nStartPos = bFromStart ? 0 : rSearchString.length() - length; std::u16string_view rSearchSubString = rSearchString.substr(nStartPos, length); if (rString.starts_with(rSearchSubString)) { nScore = nInitialScore; break; } else if (rString.ends_with(rSearchSubString)) { nScore = nInitialScore + 1; break; } else if (rString.find(rSearchSubString) != std::u16string_view::npos) { nScore = nInitialScore + 2; break; } nInitialScore += 3; } return nScore; } sal_Int32 TextSearch::GetWeightedLevenshteinDistance(const OUString& rString, const OUString& rSearchString) { sal_Int32 n = rString.getLength(); sal_Int32 m = rSearchString.getLength(); std::vector> ScoreDP(n + 1, std::vector(m + 1)); for (sal_Int32 i = 0; i <= n; i++) { ScoreDP[i][0] = i; } for (sal_Int32 j = 0; j <= m; j++) { ScoreDP[0][j] = j; } for (sal_Int32 i = 1; i <= n; i++) { for (sal_Int32 j = 1; j <= m; j++) { sal_Int32& minE = ScoreDP[i][j]; minE = ScoreDP[i - 1][j] + 1; minE = std::min(minE, ScoreDP[i][j - 1] + 1); if (rString[i - 1] != rSearchString[j - 1]) minE = std::min(minE, ScoreDP[i - 1][j - 1] + 2); else minE = std::min(minE, ScoreDP[i - 1][j - 1]); } } return ScoreDP[n][m]; } } // namespace utl /* vim:set shiftwidth=4 softtabstop=4 expandtab: */