diff options
author | Eike Rathke <erack@redhat.com> | 2018-11-29 01:11:57 +0100 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2018-11-29 11:30:40 +0100 |
commit | e3af4947fd4b8d1411212775e8ffe42e330364c3 (patch) | |
tree | 333ad617d667a272d6fcd6f1360fac716a575a5a | |
parent | 00df4a5ae395607eab1f83aacfc1fb05eb93ecc9 (diff) |
Support occurrence number as REGEX() 4th argument, tdf#113977 follow-up
REGEX( Text ; Expression [ ; [ Replacement ] [ ; Flags|Occurrence ] ] )
REGEX(Text;Expression) extracts the first match of Expression in
Text. If there is no match, #N/A is returned.
REGEX(Text;Expression;Replacement) replaces the first match of
Expression in Text, not extracted. If there is no match, Text is
returned unmodified.
REGEX(Text;Expression;Replacement;"g") replaces all matches of
Expression in Text with Replacement, not extracted. If there is no
match, Text is returned unmodified.
REGEX(Text;Expression;;Occurrence) extracts the n-th match of
Expression in Text. If there is no n-th match, #N/A is returned.
If Occurrence is 0, Text is returned unmodified.
REGEX(Text;Expression;Replacement;Occurrence) replaces the n-th
match of Expression in Text with Replacement, not extracted. If
there is no n-th match, Text is returned unmodified. If Occurrence
is 0, Text is returned unmodified.
Change-Id: Iadb705e4c76415c57bf510489410ec029344cca7
Reviewed-on: https://gerrit.libreoffice.org/64199
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
-rw-r--r-- | sc/inc/scfuncs.hrc | 4 | ||||
-rw-r--r-- | sc/source/core/tool/interpr1.cxx | 206 |
2 files changed, 140 insertions, 70 deletions
diff --git a/sc/inc/scfuncs.hrc b/sc/inc/scfuncs.hrc index e09a12f4e4c7..c08df5adc072 100644 --- a/sc/inc/scfuncs.hrc +++ b/sc/inc/scfuncs.hrc @@ -3826,8 +3826,8 @@ const char* SC_OPCODE_REGEX_ARY[] = NC_("SC_OPCODE_REGEX", "The regular expression pattern to be matched."), NC_("SC_OPCODE_REGEX", "Replacement"), NC_("SC_OPCODE_REGEX", "The replacement text and references to capture groups."), - NC_("SC_OPCODE_REGEX", "Flags"), - NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement.") + NC_("SC_OPCODE_REGEX", "Flags or Occurrence"), + NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement. Or number of occurrence to match or replace.") }; // -=*# Resource for function BASE #*=- diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index c659f0f93ae3..a07f5a7ce1c6 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -9226,17 +9226,48 @@ void ScInterpreter::ScSearch() void ScInterpreter::ScRegex() { - sal_uInt8 nParamCount = GetByte(); - if (MustHaveParamCount( nParamCount, 2, 4)) + const sal_uInt8 nParamCount = GetByte(); + if (!MustHaveParamCount( nParamCount, 2, 4)) + return; + + // Flags are supported only for replacement, search match flags can be + // individually and much more flexible set in the regular expression + // pattern using (?ismwx-ismwx) + bool bGlobalReplacement = false; + sal_Int32 nOccurrence = 1; // default first occurrence, if any + if (nParamCount == 4) { - // Flags are supported only for replacement, search match flags can be - // individually and much more flexible set in the regular expression - // pattern using (?ismwx-ismwx) - bool bGlobalReplacement = false; - if (nParamCount == 4) + // Argument can be either string or double. + double fOccurrence; + svl::SharedString aFlagsString; + bool bDouble; + if (!IsMissing()) + bDouble = GetDoubleOrString( fOccurrence, aFlagsString); + else + { + // For an omitted argument keep the default. + PopError(); + bDouble = true; + fOccurrence = nOccurrence; + } + if (nGlobalError != FormulaError::NONE) + { + PushError( nGlobalError); + return; + } + if (bDouble) + { + if (!CheckStringPositionArgument( fOccurrence)) + { + PushError( FormulaError::IllegalArgument); + return; + } + nOccurrence = static_cast<sal_Int32>(fOccurrence); + } + else { + const OUString aFlags( aFlagsString.getString()); // Empty flags string is valid => no flag set. - OUString aFlags( GetString().getString()); if (aFlags.getLength() > 1) { // Only one flag supported. @@ -9255,87 +9286,126 @@ void ScInterpreter::ScRegex() } } } + } - bool bReplacement = false; - OUString aReplacement; - if (nParamCount >= 3) + bool bReplacement = false; + OUString aReplacement; + if (nParamCount >= 3) + { + // A missing argument is not an empty string to replace the match. + // nOccurrence==0 forces no replacement, so simply discard the + // argument. + if (IsMissing() || nOccurrence == 0) + PopError(); + else { - // A missing argument is not an empty string to replace the match. - if (IsMissing()) - Pop(); - else - { - aReplacement = GetString().getString(); - bReplacement = true; - } + aReplacement = GetString().getString(); + bReplacement = true; } - // If bGlobalReplacement==true and bReplacement==false then - // bGlobalReplacement is silently ignored. + } + // If bGlobalReplacement==true and bReplacement==false then + // bGlobalReplacement is silently ignored. - OUString aExpression = GetString().getString(); - OUString aText = GetString().getString(); + OUString aExpression = GetString().getString(); + OUString aText = GetString().getString(); - if (nGlobalError != FormulaError::NONE) - { - PushError( nGlobalError); - return; - } + if (nGlobalError != FormulaError::NONE) + { + PushError( nGlobalError); + return; + } - const icu::UnicodeString aIcuExpression( - reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength()); - UErrorCode status = U_ZERO_ERROR; - icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status); + // 0-th match or replacement is none, return original string early. + if (nOccurrence == 0) + { + PushString( aText); + return; + } + + const icu::UnicodeString aIcuExpression( + reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength()); + UErrorCode status = U_ZERO_ERROR; + icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status); + if (U_FAILURE(status)) + { + // Invalid regex. + PushIllegalArgument(); + return; + } + // Guard against pathological patterns, limit steps of engine, see + // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00 + aRegexMatcher.setTimeLimit( 23*1000, status); + + const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength()); + aRegexMatcher.reset( aIcuText); + + if (!bReplacement) + { + // Find n-th occurrence. + sal_Int32 nCount = 0; + while (aRegexMatcher.find( status) && U_SUCCESS(status) && ++nCount < nOccurrence) + ; if (U_FAILURE(status)) { - // Invalid regex. + // Some error. PushIllegalArgument(); return; } - // Guard against pathological patterns, limit steps of engine, see - // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00 - aRegexMatcher.setTimeLimit ( 23*1000, status); - - const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength()); - aRegexMatcher.reset( aIcuText); - - if (!bReplacement) + // n-th match found? + if (nCount != nOccurrence) { - // Find first occurrence. - if (!aRegexMatcher.find()) - { - PushError( FormulaError::NotAvailable); - return; - } - // Extract matched text. - icu::UnicodeString aMatch( aRegexMatcher.group( status)); - if (U_FAILURE(status)) - { - // Some error. - PushIllegalArgument(); - return; - } - OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length()); - PushString( aResult); + PushError( FormulaError::NotAvailable); return; } - - // Replace first occurrence of match with replacement. - const icu::UnicodeString aIcuReplacement( - reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength()); - icu::UnicodeString aReplaced; - if (bGlobalReplacement) - aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status); - else - aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status); + // Extract matched text. + icu::UnicodeString aMatch( aRegexMatcher.group( status)); if (U_FAILURE(status)) { - // Some error, e.g. extraneous $1 without group. + // Some error. PushIllegalArgument(); return; } - OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length()); + OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length()); PushString( aResult); + return; + } + + const icu::UnicodeString aIcuReplacement( + reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength()); + icu::UnicodeString aReplaced; + if (bGlobalReplacement) + // Replace all occurrences of match with replacement. + aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status); + else if (nOccurrence == 1) + // Replace first occurrence of match with replacement. + aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status); + else + { + // Replace n-th occurrence of match with replacement. + sal_Int32 nCount = 0; + while (aRegexMatcher.find( status) && U_SUCCESS(status)) + { + // XXX NOTE: After several RegexMatcher::find() the + // RegexMatcher::appendReplacement() still starts at the + // beginning (or after the last appendReplacement() position + // which is none here) and copies the original text up to the + // current found match and then replaces the found match. + if (++nCount == nOccurrence) + { + aRegexMatcher.appendReplacement( aReplaced, aIcuReplacement, status); + break; + } + } + aRegexMatcher.appendTail( aReplaced); + } + if (U_FAILURE(status)) + { + // Some error, e.g. extraneous $1 without group. + PushIllegalArgument(); + return; } + OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length()); + PushString( aResult); } void ScInterpreter::ScMid() |