summaryrefslogtreecommitdiff
path: root/sc
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2018-10-29 23:49:22 +0100
committerEike Rathke <erack@redhat.com>2018-10-30 02:35:28 +0100
commit9574076674acfed2e7f24a86929c10b7a539a35c (patch)
tree1581b47b128f85e8b9f4dc750a0fca0af3056be7 /sc
parentbd96f02f51ccaa6ad02f459b07f810d23c868f98 (diff)
Rewrite REGEX() to directly use icu::RegexMatcher, tdf#113977
This not only avoids all the overhead of i18npool's TextSearch and Transliteration efforts (which are useful in the Find&Replace dialog bu we don't use here) that could possibly even get in the way how they are implemented, but also gives us direct control over all possible flags; plus ICU's replace features are more convenient to use and have better error checking to report back and there is the advantage that ICU's regex description can be taken verbatim. Change-Id: Iaa06c26a3ff6882a0057d1ca92b1605073429bca Reviewed-on: https://gerrit.libreoffice.org/62531 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Eike Rathke <erack@redhat.com>
Diffstat (limited to 'sc')
-rw-r--r--sc/source/core/tool/interpr1.cxx67
1 files changed, 47 insertions, 20 deletions
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index 6c3012ad2ad9..3123335ea3b9 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -37,6 +37,7 @@
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
#include <unicode/uchar.h>
+#include <unicode/regex.h>
#include <patattr.hxx>
#include <global.hxx>
@@ -9242,30 +9243,56 @@ void ScInterpreter::ScRegex()
return;
}
- sal_Int32 nPos = 0;
- sal_Int32 nEndPos = aText.getLength();
- utl::SearchParam aParam( aExpression, utl::SearchParam::SearchType::Regexp);
- css::util::SearchResult aResult;
- utl::TextSearch aSearch( aParam, *ScGlobal::pCharClass);
- const bool bMatch = aSearch.SearchForward( aText, &nPos, &nEndPos, &aResult);
- if (!bMatch)
- PushError( FormulaError::NotAvailable);
- else
+ const icu::UnicodeString aIcuExpression(
+ reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength());
+ UErrorCode status = U_ZERO_ERROR;
+ icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status);
+ if (U_FAILURE(status))
{
- assert(aResult.subRegExpressions >= 1);
- if (!bReplacement)
- PushString( aText.copy( aResult.startOffset[0], aResult.endOffset[0] - aResult.startOffset[0]));
- else
+ // Invalid regex.
+ PushIllegalArgument();
+ return;
+ }
+ // Guard against pathological patterns, limit steps of engine, see
+ // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
+ aRegexMatcher.setTimeLimit ( 23*1000, status);
+
+ const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
+ aRegexMatcher.reset( aIcuText);
+
+ if (!bReplacement)
+ {
+ // Find first occurrence.
+ if (!aRegexMatcher.find())
+ {
+ PushError( FormulaError::NotAvailable);
+ return;
+ }
+ // Extract matched text.
+ icu::UnicodeString aMatch( aRegexMatcher.group( status));
+ if (U_FAILURE(status))
{
- /* TODO: global replacement of multiple occurrences, introduce
- * extra parameter with flag 'g'? Loop over positions after
- * nEndPos until none left? How to keep the offsets in sync
- * after replacement? That should be done by
- * ReplaceBackReferences(). */
- aSearch.ReplaceBackReferences( aReplacement, aText, aResult);
- PushString( aReplacement);
+ // Some error.
+ PushIllegalArgument();
+ return;
}
+ OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length());
+ PushString( aResult);
+ return;
+ }
+
+ // Replace first occurrence of match with replacement.
+ const icu::UnicodeString aIcuReplacement(
+ reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
+ icu::UnicodeString aReplaced( aRegexMatcher.replaceFirst( aIcuReplacement, status));
+ if (U_FAILURE(status))
+ {
+ // Some error, e.g. extraneous $1 without group.
+ PushIllegalArgument();
+ return;
}
+ OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length());
+ PushString( aResult);
}
}