summaryrefslogtreecommitdiff
path: root/unotools/source
diff options
context:
space:
mode:
authorAhmedHamed <ahmedhamed3699@gmail.com>2024-07-05 21:41:26 +0300
committerAndreas Heinisch <andreas.heinisch@yahoo.de>2024-09-03 09:59:16 +0200
commitd05e0be5f4ab2bdb31d6cf14528a9a8ee5ac965c (patch)
tree1dc8194db01b3376bc7adddfe0d56abefaf926f3 /unotools/source
parent5b0256f30ee154edb28b999bc4faba2453fc32b8 (diff)
tdf#161543 Enhance the searching functionality in FD & FW
Change-Id: I1a21595228f886c942ae46d90e41705443d31550 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170073 Reviewed-by: Heiko Tietze <heiko.tietze@documentfoundation.org> Reviewed-by: Andreas Heinisch <andreas.heinisch@yahoo.de> Tested-by: Jenkins
Diffstat (limited to 'unotools/source')
-rw-r--r--unotools/source/i18n/textsearch.cxx97
1 files changed, 97 insertions, 0 deletions
diff --git a/unotools/source/i18n/textsearch.cxx b/unotools/source/i18n/textsearch.cxx
index 9c4573c38538..5958240cda44 100644
--- a/unotools/source/i18n/textsearch.cxx
+++ b/unotools/source/i18n/textsearch.cxx
@@ -353,6 +353,103 @@ void TextSearch::ReplaceBackReferences( OUString& rReplaceStr, std::u16string_vi
rReplaceStr = sBuff.makeStringAndClear();
}
+bool TextSearch::SimilaritySearch(const OUString& rString, const OUString& rSearchString,
+ ::std::pair<sal_Int32, sal_Int32>& rSimilarityScore)
+{
+ sal_Int32 nScore = 0;
+ sal_Int32 nFirstScore = GetSubstringSimilarity(rString, rSearchString, nScore, true);
+ if (nFirstScore == -1)
+ nFirstScore = GetSubstringSimilarity(rString, rSearchString, nScore, false);
+ if (nFirstScore == -1)
+ {
+ if (rSearchString.getLength() == 1)
+ {
+ if (rString.startsWith(rSearchString))
+ nFirstScore = nScore;
+ else if (rString.endsWith(rSearchString))
+ nFirstScore = nScore + 1;
+ nScore += 2;
+ }
+ else if (rString.getLength() == 1 && rSearchString.getLength() < SMALL_STRING_THRESHOLD)
+ {
+ if (rSearchString.startsWith(rString))
+ nFirstScore = nScore;
+ else if (rSearchString.endsWith(rString))
+ nFirstScore = nScore + 1;
+ nScore += 2;
+ }
+ }
+ sal_Int32 nSecondScore = GetWeightedLevenshteinDistance(rString, rSearchString);
+
+ if (nFirstScore == -1 && nSecondScore >= WLD_THRESHOLD)
+ return false;
+
+ rSimilarityScore.first = (nFirstScore == -1) ? nScore : nFirstScore;
+ rSimilarityScore.second = nSecondScore;
+ return true;
+}
+
+sal_Int32 TextSearch::GetSubstringSimilarity(std::u16string_view rString,
+ std::u16string_view rSearchString,
+ sal_Int32& nInitialScore, const bool bFromStart)
+{
+ sal_Int32 nScore = -1;
+ for (sal_Int32 length = rSearchString.length(); length > 1; length--)
+ {
+ sal_Int32 nStartPos = bFromStart ? 0 : rSearchString.length() - length;
+ std::u16string_view rSearchSubString = rSearchString.substr(nStartPos, length);
+ if (rString.starts_with(rSearchSubString))
+ {
+ nScore = nInitialScore;
+ break;
+ }
+ else if (rString.ends_with(rSearchSubString))
+ {
+ nScore = nInitialScore + 1;
+ break;
+ }
+ else if (rString.find(rSearchSubString) != std::u16string_view::npos)
+ {
+ nScore = nInitialScore + 2;
+ break;
+ }
+ nInitialScore += 3;
+ }
+ return nScore;
+}
+
+sal_Int32 TextSearch::GetWeightedLevenshteinDistance(const OUString& rString,
+ const OUString& rSearchString)
+{
+ sal_Int32 n = rString.getLength();
+ sal_Int32 m = rSearchString.getLength();
+ std::vector<std::vector<sal_Int32>> ScoreDP(n + 1, std::vector<sal_Int32>(m + 1));
+
+ for (sal_Int32 i = 0; i <= n; i++)
+ {
+ ScoreDP[i][0] = i;
+ }
+ for (sal_Int32 j = 0; j <= m; j++)
+ {
+ ScoreDP[0][j] = j;
+ }
+
+ for (sal_Int32 i = 1; i <= n; i++)
+ {
+ for (sal_Int32 j = 1; j <= m; j++)
+ {
+ sal_Int32& minE = ScoreDP[i][j];
+ minE = ScoreDP[i - 1][j] + 1;
+ minE = std::min(minE, ScoreDP[i][j - 1] + 1);
+ if (rString[i - 1] != rSearchString[j - 1])
+ minE = std::min(minE, ScoreDP[i - 1][j - 1] + 2);
+ else
+ minE = std::min(minE, ScoreDP[i - 1][j - 1]);
+ }
+ }
+ return ScoreDP[n][m];
+}
+
} // namespace utl
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */