summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2013-03-08 21:36:44 +0100
committerFridrich Strba <fridrich@documentfoundation.org>2013-03-12 13:19:29 +0000
commit5dd1542bb372627167b44f1b020eab5a405fa6fa (patch)
tree66bb4901496f595cd9caea4d0c75ed00edefce4f /i18npool
parentd4b3e398f95225951f86d3e8f7474f7a0fb86b92 (diff)
more regex fixes, fdo#60259 related
fdo#60259 prevent crash when searching backward for $ anchor regex Old code wasn't prepared that searching for $ may actually return a result set pointing behind the search string which it does with the ICU regex engine. (cherry picked from commit c00601dab0f5533b152cd63cec0a89bfec1ba95f) Conflicts: i18npool/source/search/textsearch.cxx regex: handle zero-length matches, fdo#60259 related Also in backward search ignore all zero-length matches except the text end single $ anchor search. The anchor search is a valid match, treat it as such in Writer. This still doesn't solve the backward $ backward search, the convoluted Writer code in that place apparently never worked, someone more familiar with those internals should straighten out the mess. (cherry picked from commit 3bc5cb3c485d67f1ce0541d349d11637f52ebda5) regex: don't loop 10000 identical matches to find a single $ anchor (cherry picked from commit ccc349d3abb70ef38cd2b7706da51b060a385908) make forward replacement of $ work again, fdo#60259 related broken with 3bc5cb3c485d67f1ce0541d349d11637f52ebda5 (cherry picked from commit d8dcfa0e5dbecf77c4d6a8d49caf61b339cf9b43) Change-Id: I6b5eb28d0a54ceecb6873a3c05f18f70312ab1a2 Reviewed-on: https://gerrit.libreoffice.org/2679 Reviewed-by: Fridrich Strba <fridrich@documentfoundation.org> Tested-by: Fridrich Strba <fridrich@documentfoundation.org>
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/source/search/textsearch.cxx99
1 files changed, 70 insertions, 29 deletions
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index dceb4d759915..997b01dd6afa 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -124,14 +124,12 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
sSrchStr = aSrchPara.searchString;
// use transliteration here
- if ( xTranslit.is() &&
- aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
+ if ( xTranslit.is() && aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
sSrchStr = xTranslit->transliterateString2String(
- aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
+ aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
- if ( xTranslit2.is() &&
- aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
- sSrchStr2 = xTranslit2->transliterateString2String(
+ if ( xTranslit2.is() && aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
+ sSrchStr2 = xTranslit2->transliterateString2String(
aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
// When start or end of search string is a complex script type, we need to
@@ -204,22 +202,32 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
newStartPos = FindPosInSeq_Impl( offset, startPos );
if( endPos < searchStr.getLength() )
- newEndPos = FindPosInSeq_Impl( offset, endPos );
+ newEndPos = FindPosInSeq_Impl( offset, endPos );
else
newEndPos = in_str.getLength();
sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
- for ( int k = 0; k < sres.startOffset.getLength(); k++ )
+ // Map offsets back to untransliterated string.
+ const sal_Int32 nOffsets = offset.getLength();
+ if (nOffsets)
{
- if (sres.startOffset[k])
- sres.startOffset[k] = offset[sres.startOffset[k]];
- // JP 20.6.2001: end is ever exclusive and then don't return
- // the position of the next character - return the
- // next position behind the last found character!
- // "a b c" find "b" must return 2,3 and not 2,4!!!
- if (sres.endOffset[k])
- sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
+ // For regex nGroups is the number of groups+1 with group 0 being
+ // the entire match.
+ const sal_Int32 nGroups = sres.startOffset.getLength();
+ for ( sal_Int32 k = 0; k < nGroups; k++ )
+ {
+ const sal_Int32 nStart = sres.startOffset[k];
+ if (nStart > 0)
+ sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
+ // JP 20.6.2001: end is ever exclusive and then don't return
+ // the position of the next character - return the
+ // next position behind the last found character!
+ // "a b c" find "b" must return 2,3 and not 2,4!!!
+ const sal_Int32 nStop = sres.endOffset[k];
+ if (nStop > 0)
+ sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
+ }
}
}
else
@@ -291,24 +299,34 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
// JP 20.6.2001: also the start and end positions must be corrected!
if( startPos < searchStr.getLength() )
newStartPos = FindPosInSeq_Impl( offset, startPos );
- else
- newStartPos = in_str.getLength();
+ else
+ newStartPos = in_str.getLength();
if( endPos )
- newEndPos = FindPosInSeq_Impl( offset, endPos );
+ newEndPos = FindPosInSeq_Impl( offset, endPos );
sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
- for ( int k = 0; k < sres.startOffset.getLength(); k++ )
+ // Map offsets back to untransliterated string.
+ const sal_Int32 nOffsets = offset.getLength();
+ if (nOffsets)
{
- if (sres.startOffset[k])
- sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
- // JP 20.6.2001: end is ever exclusive and then don't return
- // the position of the next character - return the
- // next position behind the last found character!
- // "a b c" find "b" must return 2,3 and not 2,4!!!
- if (sres.endOffset[k])
- sres.endOffset[k] = offset[sres.endOffset[k]];
+ // For regex nGroups is the number of groups+1 with group 0 being
+ // the entire match.
+ const sal_Int32 nGroups = sres.startOffset.getLength();
+ for ( sal_Int32 k = 0; k < nGroups; k++ )
+ {
+ const sal_Int32 nStart = sres.startOffset[k];
+ if (nStart > 0)
+ sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
+ // JP 20.6.2001: end is ever exclusive and then don't return
+ // the position of the next character - return the
+ // next position behind the last found character!
+ // "a b c" find "b" must return 2,3 and not 2,4!!!
+ const sal_Int32 nStop = sres.endOffset[k];
+ if (nStop > 0)
+ sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
+ }
}
}
else
@@ -734,6 +752,11 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
int nEndOfs = pRegexMatcher->end( nIcuErr);
if( nStartOfs < nEndOfs)
break;
+ // If the zero-length match is behind the string, do not match it again
+ // and again until startPos reaches there. A match behind the string is
+ // a "$" anchor.
+ if (nStartOfs == endPos)
+ break;
// try at next position if there was a zero-length match
if( ++startPos >= endPos)
return aRet;
@@ -779,17 +802,35 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
// find the last match
int nLastPos = 0;
int nFoundEnd = 0;
+ int nGoodPos = 0, nGoodEnd = 0;
+ bool bFirst = true;
do {
nLastPos = pRegexMatcher->start( nIcuErr);
nFoundEnd = pRegexMatcher->end( nIcuErr);
+ if (nLastPos < nFoundEnd)
+ {
+ // remember last non-zero-length match
+ nGoodPos = nLastPos;
+ nGoodEnd = nFoundEnd;
+ }
if( nFoundEnd >= startPos)
break;
+ bFirst = false;
if( nFoundEnd == nLastPos)
++nFoundEnd;
} while( pRegexMatcher->find( nFoundEnd, nIcuErr));
+ // Ignore all zero-length matches except "$" anchor on first match.
+ if (nGoodPos == nGoodEnd)
+ {
+ if (bFirst && nLastPos == startPos)
+ nGoodPos = nLastPos;
+ else
+ return aRet;
+ }
+
// find last match again to get its details
- pRegexMatcher->find( nLastPos, nIcuErr);
+ pRegexMatcher->find( nGoodPos, nIcuErr);
// fill in the details of the last match
const int nGroupCount = pRegexMatcher->groupCount();