diff options
author | Eike Rathke <erack@redhat.com> | 2018-10-29 23:49:22 +0100 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2018-10-30 02:35:28 +0100 |
commit | 9574076674acfed2e7f24a86929c10b7a539a35c (patch) | |
tree | 1581b47b128f85e8b9f4dc750a0fca0af3056be7 | |
parent | bd96f02f51ccaa6ad02f459b07f810d23c868f98 (diff) |
Rewrite REGEX() to directly use icu::RegexMatcher, tdf#113977
This not only avoids all the overhead of i18npool's TextSearch and
Transliteration efforts (which are useful in the Find&Replace
dialog bu we don't use here) that could possibly even get in the
way how they are implemented, but also gives us direct control
over all possible flags; plus ICU's replace features are more
convenient to use and have better error checking to report back
and there is the advantage that ICU's regex description can be
taken verbatim.
Change-Id: Iaa06c26a3ff6882a0057d1ca92b1605073429bca
Reviewed-on: https://gerrit.libreoffice.org/62531
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Eike Rathke <erack@redhat.com>
-rw-r--r-- | sc/source/core/tool/interpr1.cxx | 67 |
1 files changed, 47 insertions, 20 deletions
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index 6c3012ad2ad9..3123335ea3b9 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -37,6 +37,7 @@ #include <rtl/ustring.hxx> #include <sal/log.hxx> #include <unicode/uchar.h> +#include <unicode/regex.h> #include <patattr.hxx> #include <global.hxx> @@ -9242,30 +9243,56 @@ void ScInterpreter::ScRegex() return; } - sal_Int32 nPos = 0; - sal_Int32 nEndPos = aText.getLength(); - utl::SearchParam aParam( aExpression, utl::SearchParam::SearchType::Regexp); - css::util::SearchResult aResult; - utl::TextSearch aSearch( aParam, *ScGlobal::pCharClass); - const bool bMatch = aSearch.SearchForward( aText, &nPos, &nEndPos, &aResult); - if (!bMatch) - PushError( FormulaError::NotAvailable); - else + const icu::UnicodeString aIcuExpression( + reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength()); + UErrorCode status = U_ZERO_ERROR; + icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status); + if (U_FAILURE(status)) { - assert(aResult.subRegExpressions >= 1); - if (!bReplacement) - PushString( aText.copy( aResult.startOffset[0], aResult.endOffset[0] - aResult.startOffset[0])); - else + // Invalid regex. + PushIllegalArgument(); + return; + } + // Guard against pathological patterns, limit steps of engine, see + // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00 + aRegexMatcher.setTimeLimit ( 23*1000, status); + + const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength()); + aRegexMatcher.reset( aIcuText); + + if (!bReplacement) + { + // Find first occurrence. + if (!aRegexMatcher.find()) + { + PushError( FormulaError::NotAvailable); + return; + } + // Extract matched text. + icu::UnicodeString aMatch( aRegexMatcher.group( status)); + if (U_FAILURE(status)) { - /* TODO: global replacement of multiple occurrences, introduce - * extra parameter with flag 'g'? Loop over positions after - * nEndPos until none left? How to keep the offsets in sync - * after replacement? That should be done by - * ReplaceBackReferences(). */ - aSearch.ReplaceBackReferences( aReplacement, aText, aResult); - PushString( aReplacement); + // Some error. + PushIllegalArgument(); + return; } + OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length()); + PushString( aResult); + return; + } + + // Replace first occurrence of match with replacement. + const icu::UnicodeString aIcuReplacement( + reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength()); + icu::UnicodeString aReplaced( aRegexMatcher.replaceFirst( aIcuReplacement, status)); + if (U_FAILURE(status)) + { + // Some error, e.g. extraneous $1 without group. + PushIllegalArgument(); + return; } + OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length()); + PushString( aResult); } } |