1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
#define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
#include <unotools/unotoolsdllapi.h>
#include <i18nlangtag/lang.h>
#include <rtl/ustring.hxx>
#include <com/sun/star/uno/Reference.h>
#include <ostream>
#define WLD_THRESHOLD 3
#define SMALL_STRING_THRESHOLD 4
class CharClass;
namespace com::sun::star::lang { struct Locale; }
namespace com::sun::star::util { class XTextSearch2; }
namespace com::sun::star::util { struct SearchResult; }
namespace i18nutil {
struct SearchOptions;
struct SearchOptions2;
}
enum class TransliterationFlags;
namespace utl
{
// Utility class for searching
class UNOTOOLS_DLLPUBLIC SearchParam
{
public:
enum class SearchType { Normal, Regexp, Wildcard, Unknown = -1 };
/** Convert configuration and document boolean settings to SearchType.
If bWildcard is true it takes precedence over rbRegExp.
@param rbRegExp
If true and bWildcard is also true, rbRegExp is set to false to
adapt the caller's settings.
*/
static SearchType ConvertToSearchType( bool bWildcard, bool & rbRegExp )
{
if (bWildcard)
{
if (rbRegExp)
rbRegExp = false;
return SearchType::Wildcard;
}
return rbRegExp ? SearchType::Regexp : SearchType::Normal;
}
/** Convert SearchType to configuration and document boolean settings.
*/
static void ConvertToBool( const SearchType eSearchType, bool& rbWildcard, bool& rbRegExp )
{
switch (eSearchType)
{
case SearchType::Wildcard:
rbWildcard = true;
rbRegExp = false;
break;
case SearchType::Regexp:
rbWildcard = false;
rbRegExp = true;
break;
default:
rbWildcard = false;
rbRegExp = false;
break;
}
}
private:
OUString sSrchStr; // the search string
SearchType m_eSrchType; // search normal/regular/LevDist
sal_uInt32 m_cWildEscChar; // wildcard escape character
bool m_bCaseSense : 1;
bool m_bWildMatchSel : 1; // wildcard pattern must match entire selection
public:
SearchParam( const OUString &rText,
SearchType eSrchType,
bool bCaseSensitive = true,
sal_uInt32 cWildEscChar = '\\',
bool bWildMatchSel = false );
SearchParam( const SearchParam& );
~SearchParam();
const OUString& GetSrchStr() const { return sSrchStr; }
SearchType GetSrchType() const { return m_eSrchType; }
bool IsCaseSensitive() const { return m_bCaseSense; }
bool IsWildMatchSel() const { return m_bWildMatchSel; }
// signed return for API use
sal_Int32 GetWildEscChar() const { return static_cast<sal_Int32>(m_cWildEscChar); }
};
// For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
template<typename charT, typename traits>
inline std::basic_ostream<charT, traits> & operator <<(std::basic_ostream<charT, traits> & stream, const SearchParam::SearchType& eType)
{
switch (eType)
{
case SearchParam::SearchType::Normal:
stream << "N";
break;
case SearchParam::SearchType::Regexp:
stream << "RE";
break;
case SearchParam::SearchType::Wildcard:
stream << "WC";
break;
case SearchParam::SearchType::Unknown:
stream << "UNK";
break;
default:
stream << static_cast<int>(eType) << '?';
break;
}
return stream;
}
// Utility class for searching a substring in a string.
// The following metrics are supported
// - ordinary text (Bayer/Moore)
// - regular expressions
// - weighted Levenshtein distance
// - wildcards '*' and '?'
// This class allows forward and backward searching!
class UNOTOOLS_DLLPUBLIC TextSearch
{
static css::uno::Reference< css::util::XTextSearch2 >
getXTextSearch( const i18nutil::SearchOptions2& rPara );
css::uno::Reference < css::util::XTextSearch2 >
xTextSearch;
void Init( const SearchParam & rParam,
const css::lang::Locale& rLocale );
public:
// rText is the string being searched for
// this first two CTORs are deprecated!
TextSearch( const SearchParam & rPara, LanguageType nLanguage );
TextSearch( const SearchParam & rPara, const CharClass& rCClass );
TextSearch( const i18nutil::SearchOptions2& rPara );
~TextSearch();
/* search in the (selected) text the search string:
rScrTxt - the text, in which we search
pStart - start position for the search
pEnd - end position for the search
RETURN values == true: something is found
- pStart start pos of the found text,
- pEnd end pos of the found text,
- pSrchResult - the search result with all found
positions. Is only filled with more positions
if the regular expression handles groups.
== false: nothing found, pStart, pEnd unchanged.
Definitions: start pos always inclusive, end pos always exclusive!
The position must always in the right direction!
search forward: start <= end
search backward: end <= start
*/
bool SearchForward( const OUString &rStr,
sal_Int32* pStart, sal_Int32* pEnd,
css::util::SearchResult* pRes = nullptr );
/**
* @brief searchForward Search forward beginning from the start to the end
* of the given text
* @param rStr The text in which we search
* @return True if the search term is found in the text
*/
bool searchForward( const OUString &rStr );
bool SearchBackward( const OUString &rStr,
sal_Int32* pStart, sal_Int32* pEnd,
css::util::SearchResult* pRes = nullptr );
void SetLocale( const i18nutil::SearchOptions2& rOpt,
const css::lang::Locale& rLocale );
/* replace back references in the replace string by the sub expressions from the search result */
static void ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const css::util::SearchResult& rResult );
/**
* @brief Search for a string in a another one based on similarity
* @param rString The string we compare with
* @param rSearchString The search term
* @param rSimilarityScore The similarity score (sent by reference to be filled)
* @return True if the search term is found, false otherwise
*/
static bool SimilaritySearch(const OUString& rString, const OUString& rSearchString,
::std::pair<sal_Int32, sal_Int32>& rSimilarityScore);
/**
* @brief Get similarity score between two strings
* according to the length of the common substring and its position
* @param rString The string we compare with
* @param rSearchString The search term
* @param nInitialScore The initial score
* @param bFromStart True if the search is from the start
* @return Score if the search term is found in the text, -1 otherwise
*/
static sal_Int32 GetSubstringSimilarity(std::u16string_view rString,
std::u16string_view rSearchString,
sal_Int32& nInitialScore, const bool bFromStart);
static sal_Int32 GetWeightedLevenshteinDistance(const OUString& rString,
const OUString& rSearchString);
};
} // namespace utl
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|