summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
authorPetr Mladek <pmladek@suse.cz>2013-05-01 13:05:06 +0200
committerPetr Mladek <pmladek@suse.cz>2013-05-01 13:05:06 +0200
commit93e11e4fb697aaea63c99308f980e4c932bd1efa (patch)
tree1892e49932d8d7b7656ae504113a40e4e39f876f /i18npool
parenta5b6a379a2d628040db98060c2adfe8f2fac5607 (diff)
parentc6786add5a58268e11aa027c47054344040db1bc (diff)
Merge tag 'libreoffice-4.0.3.2' into suse-4.0
Tag libreoffice-4.0.3.2 Conflicts: basic/qa/cppunit/test_vba.cxx basic/source/runtime/step2.cxx dictionaries helpcontent2 instsetoo_native/util/openoffice.lst libvisio/UnpackedTarball_visio.mk sc/source/ui/vba/vbaapplication.cxx sc/source/ui/vba/vbavalidation.cxx solenv/inc/minor.mk sw/qa/extras/ooxmlimport/ooxmlimport.cxx translations Change-Id: Ic1f06489175f3db92d6bbcebb9732fadc1c61fed
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/inc/i18npool/languagetag.hxx24
-rw-r--r--i18npool/qa/cppunit/test_textsearch.cxx2
-rw-r--r--i18npool/source/languagetag/languagetag.cxx7
-rw-r--r--i18npool/source/localedata/data/es_PE.xml36
-rw-r--r--i18npool/source/localedata/data/es_PY.xml2
-rw-r--r--i18npool/source/localedata/data/es_VE.xml4
-rw-r--r--i18npool/source/search/textsearch.cxx136
7 files changed, 147 insertions, 64 deletions
diff --git a/i18npool/inc/i18npool/languagetag.hxx b/i18npool/inc/i18npool/languagetag.hxx
index bb86b72262f0..1e0ae65c4b4a 100644
--- a/i18npool/inc/i18npool/languagetag.hxx
+++ b/i18npool/inc/i18npool/languagetag.hxx
@@ -204,10 +204,30 @@ public:
*/
LanguageTag & makeFallback();
- /* Test equality of two LangageTag. */
+ /** Test equality of two LanguageTag, possibly resolving system locale.
+
+ @param bResolveSystem
+ If TRUE, resolve empty language tags denoting the system
+ locale to the real locale used before comparing.
+ If FALSE, the behavior is identical to operator==(), system
+ locales are not resolved first.
+ */
+ bool equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const;
+
+ /** Test equality of two LanguageTag.
+
+ Does NOT resolve system, i.e. if the system locale is en-US
+ LanguageTag("")==LanguageTag("en-US") returns false! Use
+ equals(...,true) instead if system locales shall be resolved.
+ */
bool operator==( const LanguageTag & rLanguageTag ) const;
- /* Test inequality of two LangageTag. */
+ /** Test inequality of two LanguageTag.
+
+ Does NOT resolve system, i.e. if the system locale is en-US
+ LanguageTag("")!=LanguageTag("en-US") returns true! Use
+ !equals(,...true) instead if system locales shall be resolved.
+ */
bool operator!=( const LanguageTag & rLanguageTag ) const;
private:
diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx
index c26550b950ca..d7a6c3349146 100644
--- a/i18npool/qa/cppunit/test_textsearch.cxx
+++ b/i18npool/qa/cppunit/test_textsearch.cxx
@@ -101,7 +101,7 @@ void TestTextSearch::testSearches()
sal_Int32 startPos = 2, endPos = 20 ;
OUString searchStr( "(ab)*a(c|d)+" );
sal_Int32 fStartRes = 10, fEndRes = 18 ;
- sal_Int32 bStartRes = 18, bEndRes = 14 ;
+ sal_Int32 bStartRes = 18, bEndRes = 10 ;
// set options
util::SearchOptions aOptions;
diff --git a/i18npool/source/languagetag/languagetag.cxx b/i18npool/source/languagetag/languagetag.cxx
index bab244371063..9055d2b5dc20 100644
--- a/i18npool/source/languagetag/languagetag.cxx
+++ b/i18npool/source/languagetag/languagetag.cxx
@@ -995,6 +995,13 @@ LanguageTag & LanguageTag::makeFallback()
}
+bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
+{
+ // Compare full language tag strings.
+ return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
+}
+
+
bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
{
// Compare full language tag strings but SYSTEM unresolved.
diff --git a/i18npool/source/localedata/data/es_PE.xml b/i18npool/source/localedata/data/es_PE.xml
index 2f75eb76d61d..a0a9bc44916f 100644
--- a/i18npool/source/localedata/data/es_PE.xml
+++ b/i18npool/source/localedata/data/es_PE.xml
@@ -34,10 +34,10 @@
<LC_CTYPE unoid="generic">
<Separators>
<DateSeparator>/</DateSeparator>
- <ThousandSeparator>.</ThousandSeparator>
- <DecimalSeparator>,</DecimalSeparator>
+ <ThousandSeparator>,</ThousandSeparator>
+ <DecimalSeparator>.</DecimalSeparator>
<TimeSeparator>:</TimeSeparator>
- <Time100SecSeparator>,</Time100SecSeparator>
+ <Time100SecSeparator>.</Time100SecSeparator>
<ListSeparator>;</ListSeparator>
<LongDateDayOfWeekSeparator> </LongDateDayOfWeekSeparator>
<LongDateDaySeparator> de </LongDateDaySeparator>
@@ -136,10 +136,10 @@
<FormatCode>[HH]:MM:SS</FormatCode>
</FormatElement>
<FormatElement msgid="TimeFormatskey6" default="false" type="short" usage="TIME" formatindex="44">
- <FormatCode>MM:SS,00</FormatCode>
+ <FormatCode>MM:SS.00</FormatCode>
</FormatElement>
<FormatElement msgid="TimeFormatskey7" default="false" type="medium" usage="TIME" formatindex="45">
- <FormatCode>[HH]:MM:SS,00</FormatCode>
+ <FormatCode>[HH]:MM:SS.00</FormatCode>
</FormatElement>
<FormatElement msgid="DateTimeFormatskey1" default="true" type="medium" usage="DATE_TIME" formatindex="46">
<FormatCode>DD/MM/AA HH:MM</FormatCode>
@@ -154,46 +154,46 @@
<FormatCode>0</FormatCode>
</FormatElement>
<FormatElement msgid="FixedFormatskey3" default="false" type="medium" usage="FIXED_NUMBER" formatindex="2">
- <FormatCode>0,00</FormatCode>
+ <FormatCode>0.00</FormatCode>
</FormatElement>
<FormatElement msgid="FixedFormatskey4" default="false" type="short" usage="FIXED_NUMBER" formatindex="3">
- <FormatCode>#.##0</FormatCode>
+ <FormatCode>#,##0</FormatCode>
</FormatElement>
<FormatElement msgid="FixedFormatskey5" default="false" type="medium" usage="FIXED_NUMBER" formatindex="4">
- <FormatCode>#.##0,00</FormatCode>
+ <FormatCode>#,##0.00</FormatCode>
</FormatElement>
<FormatElement msgid="FixedFormatskey6" default="false" type="medium" usage="FIXED_NUMBER" formatindex="5">
- <FormatCode>#.###,00</FormatCode>
+ <FormatCode>#,###.00</FormatCode>
</FormatElement>
<FormatElement msgid="CurrencyFormatskey1" default="true" type="short" usage="CURRENCY" formatindex="12">
- <FormatCode>[CURRENCY]#.##0;[CURRENCY]-#.##0</FormatCode>
+ <FormatCode>[CURRENCY]#,##0;[CURRENCY]-#,##0</FormatCode>
</FormatElement>
<FormatElement msgid="CurrencyFormatskey2" default="false" type="medium" usage="CURRENCY" formatindex="13">
- <FormatCode>[CURRENCY]#.##0,00;[CURRENCY]-#.##0,00</FormatCode>
+ <FormatCode>[CURRENCY]#,##0.00;[CURRENCY]-#,##0.00</FormatCode>
</FormatElement>
<FormatElement msgid="CurrencyFormatskey3" default="false" type="medium" usage="CURRENCY" formatindex="14">
- <FormatCode>[CURRENCY]#.##0;[RED][CURRENCY]-#.##0</FormatCode>
+ <FormatCode>[CURRENCY]#,##0;[RED][CURRENCY]-#,##0</FormatCode>
</FormatElement>
<FormatElement msgid="CurrencyFormatskey4" default="true" type="medium" usage="CURRENCY" formatindex="15">
- <FormatCode>[CURRENCY]#.##0,00;[RED][CURRENCY]-#.##0,00</FormatCode>
+ <FormatCode>[CURRENCY]#,##0.00;[RED][CURRENCY]-#,##0.00</FormatCode>
</FormatElement>
<FormatElement msgid="CurrencyFormatskey5" default="false" type="medium" usage="CURRENCY" formatindex="16">
- <FormatCode>#.##0,00 CCC</FormatCode>
+ <FormatCode>#,##0.00 CCC</FormatCode>
</FormatElement>
<FormatElement msgid="CurrencyFormatskey6" default="false" type="medium" usage="CURRENCY" formatindex="17">
- <FormatCode>[CURRENCY]#.##0,--;[RED][CURRENCY]-#.##0,--</FormatCode>
+ <FormatCode>[CURRENCY]#,##0.--;[RED][CURRENCY]-#,##0.--</FormatCode>
</FormatElement>
<FormatElement msgid="PercentFormatskey1" default="true" type="short" usage="PERCENT_NUMBER" formatindex="8">
<FormatCode>0%</FormatCode>
</FormatElement>
<FormatElement msgid="PercentFormatskey2" default="true" type="long" usage="PERCENT_NUMBER" formatindex="9">
- <FormatCode>0,00%</FormatCode>
+ <FormatCode>0.00%</FormatCode>
</FormatElement>
<FormatElement msgid="ScientificFormatskey1" default="true" type="medium" usage="SCIENTIFIC_NUMBER" formatindex="6">
- <FormatCode>0,00E+000</FormatCode>
+ <FormatCode>0.00E+000</FormatCode>
</FormatElement>
<FormatElement msgid="ScientificFormatskey2" default="false" type="medium" usage="SCIENTIFIC_NUMBER" formatindex="7">
- <FormatCode>0,00E+00</FormatCode>
+ <FormatCode>0.00E+00</FormatCode>
</FormatElement>
</LC_FORMAT>
<LC_COLLATION ref="en_US"/>
diff --git a/i18npool/source/localedata/data/es_PY.xml b/i18npool/source/localedata/data/es_PY.xml
index 89de1629e1fa..acbc1ecac3db 100644
--- a/i18npool/source/localedata/data/es_PY.xml
+++ b/i18npool/source/localedata/data/es_PY.xml
@@ -31,7 +31,7 @@
<PlatformID>generic</PlatformID>
</Platform>
</LC_INFO>
- <LC_CTYPE ref="es_PE"/>
+ <LC_CTYPE ref="es_AR"/>
<LC_FORMAT ref="es_AR" replaceTo="[$G-3C0A]"/>
<LC_COLLATION ref="en_US"/>
<LC_SEARCH ref="en_US"/>
diff --git a/i18npool/source/localedata/data/es_VE.xml b/i18npool/source/localedata/data/es_VE.xml
index a82138a77f47..532a3d8587f6 100644
--- a/i18npool/source/localedata/data/es_VE.xml
+++ b/i18npool/source/localedata/data/es_VE.xml
@@ -31,8 +31,8 @@
<PlatformID>generic</PlatformID>
</Platform>
</LC_INFO>
- <LC_CTYPE ref="es_PY"/>
- <LC_FORMAT ref="es_PE" replaceTo="[$Bs-200A]"/>
+ <LC_CTYPE ref="es_AR"/>
+ <LC_FORMAT ref="es_AR" replaceTo="[$Bs-200A]"/>
<LC_COLLATION ref="en_US"/>
<LC_SEARCH ref="en_US"/>
<LC_INDEX ref="es_ES"/>
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 314dd5b0e0b5..997b01dd6afa 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -60,7 +60,7 @@ static sal_Int32 COMPLEX_TRANS_MASK_TMP =
TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
TransliterationModules_ignoreProlongedSoundMark_ja_JP;
static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
-static const sal_Int32 SIMPLE_TRANS_MASK = ~COMPLEX_TRANS_MASK;
+static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK | TransliterationModules_IGNORE_CASE | TransliterationModules_UPPERCASE_LOWERCASE | TransliterationModules_LOWERCASE_UPPERCASE);
// Above 2 transliteration is simple but need to take effect in
// complex transliteration
@@ -124,14 +124,12 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
sSrchStr = aSrchPara.searchString;
// use transliteration here
- if ( xTranslit.is() &&
- aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
+ if ( xTranslit.is() && aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
sSrchStr = xTranslit->transliterateString2String(
- aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
+ aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
- if ( xTranslit2.is() &&
- aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
- sSrchStr2 = xTranslit2->transliterateString2String(
+ if ( xTranslit2.is() && aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
+ sSrchStr2 = xTranslit2->transliterateString2String(
aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
// When start or end of search string is a complex script type, we need to
@@ -204,22 +202,32 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
newStartPos = FindPosInSeq_Impl( offset, startPos );
if( endPos < searchStr.getLength() )
- newEndPos = FindPosInSeq_Impl( offset, endPos );
+ newEndPos = FindPosInSeq_Impl( offset, endPos );
else
newEndPos = in_str.getLength();
sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
- for ( int k = 0; k < sres.startOffset.getLength(); k++ )
+ // Map offsets back to untransliterated string.
+ const sal_Int32 nOffsets = offset.getLength();
+ if (nOffsets)
{
- if (sres.startOffset[k])
- sres.startOffset[k] = offset[sres.startOffset[k]];
- // JP 20.6.2001: end is ever exclusive and then don't return
- // the position of the next character - return the
- // next position behind the last found character!
- // "a b c" find "b" must return 2,3 and not 2,4!!!
- if (sres.endOffset[k])
- sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
+ // For regex nGroups is the number of groups+1 with group 0 being
+ // the entire match.
+ const sal_Int32 nGroups = sres.startOffset.getLength();
+ for ( sal_Int32 k = 0; k < nGroups; k++ )
+ {
+ const sal_Int32 nStart = sres.startOffset[k];
+ if (nStart > 0)
+ sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
+ // JP 20.6.2001: end is ever exclusive and then don't return
+ // the position of the next character - return the
+ // next position behind the last found character!
+ // "a b c" find "b" must return 2,3 and not 2,4!!!
+ const sal_Int32 nStop = sres.endOffset[k];
+ if (nStop > 0)
+ sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
+ }
}
}
else
@@ -291,24 +299,34 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
// JP 20.6.2001: also the start and end positions must be corrected!
if( startPos < searchStr.getLength() )
newStartPos = FindPosInSeq_Impl( offset, startPos );
- else
- newStartPos = in_str.getLength();
+ else
+ newStartPos = in_str.getLength();
if( endPos )
- newEndPos = FindPosInSeq_Impl( offset, endPos );
+ newEndPos = FindPosInSeq_Impl( offset, endPos );
sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
- for ( int k = 0; k < sres.startOffset.getLength(); k++ )
+ // Map offsets back to untransliterated string.
+ const sal_Int32 nOffsets = offset.getLength();
+ if (nOffsets)
{
- if (sres.startOffset[k])
- sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
- // JP 20.6.2001: end is ever exclusive and then don't return
- // the position of the next character - return the
- // next position behind the last found character!
- // "a b c" find "b" must return 2,3 and not 2,4!!!
- if (sres.endOffset[k])
- sres.endOffset[k] = offset[sres.endOffset[k]];
+ // For regex nGroups is the number of groups+1 with group 0 being
+ // the entire match.
+ const sal_Int32 nGroups = sres.startOffset.getLength();
+ for ( sal_Int32 k = 0; k < nGroups; k++ )
+ {
+ const sal_Int32 nStart = sres.startOffset[k];
+ if (nStart > 0)
+ sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
+ // JP 20.6.2001: end is ever exclusive and then don't return
+ // the position of the next character - return the
+ // next position behind the last found character!
+ // "a b c" find "b" must return 2,3 and not 2,4!!!
+ const sal_Int32 nStop = sres.endOffset[k];
+ if (nStop > 0)
+ sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
+ }
}
}
else
@@ -675,21 +693,30 @@ void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOp
// REG_NOSUB is not used anywhere => not implemented
// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
// LEV_RELAXED is only used for SearchAlgorithm==Approximate
- // why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it???
- if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
+ // Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
+ // probably because the transliteration flag IGNORE_CASE handles it as well.
+ if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0
+ || (rOptions.transliterateFlags & TransliterationModules_IGNORE_CASE) != 0)
nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
UErrorCode nIcuErr = U_ZERO_ERROR;
// assumption: transliteration didn't mangle regexp control chars
IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
#ifndef DISABLE_WORDBOUND_EMULATION
// for conveniance specific syntax elements of the old regex engine are emulated
- // by using regular word boundary matching \b to replace \< and \>
- static const IcuUniString aChevronPattern( "\\\\<|\\\\>", -1, IcuUniString::kInvariant);
- static const IcuUniString aChevronReplace( "\\\\b", -1, IcuUniString::kInvariant);
- static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr);
- aChevronMatcher.reset( aIcuSearchPatStr);
- aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr);
- aChevronMatcher.reset();
+ // - by replacing \< with "word-break followed by a look-ahead word-char"
+ static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
+ static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
+ static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
+ aChevronMatcherB.reset( aIcuSearchPatStr);
+ aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
+ aChevronMatcherB.reset();
+ // - by replacing \> with "look-behind word-char followed by a word-break"
+ static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
+ static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
+ static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
+ aChevronMatcherE.reset( aIcuSearchPatStr);
+ aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
+ aChevronMatcherE.reset();
#endif
pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
if( nIcuErr)
@@ -725,6 +752,11 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
int nEndOfs = pRegexMatcher->end( nIcuErr);
if( nStartOfs < nEndOfs)
break;
+ // If the zero-length match is behind the string, do not match it again
+ // and again until startPos reaches there. A match behind the string is
+ // a "$" anchor.
+ if (nStartOfs == endPos)
+ break;
// try at next position if there was a zero-length match
if( ++startPos >= endPos)
return aRet;
@@ -769,12 +801,36 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
// find the last match
int nLastPos = 0;
+ int nFoundEnd = 0;
+ int nGoodPos = 0, nGoodEnd = 0;
+ bool bFirst = true;
do {
nLastPos = pRegexMatcher->start( nIcuErr);
- } while( pRegexMatcher->find( nLastPos + 1, nIcuErr));
+ nFoundEnd = pRegexMatcher->end( nIcuErr);
+ if (nLastPos < nFoundEnd)
+ {
+ // remember last non-zero-length match
+ nGoodPos = nLastPos;
+ nGoodEnd = nFoundEnd;
+ }
+ if( nFoundEnd >= startPos)
+ break;
+ bFirst = false;
+ if( nFoundEnd == nLastPos)
+ ++nFoundEnd;
+ } while( pRegexMatcher->find( nFoundEnd, nIcuErr));
+
+ // Ignore all zero-length matches except "$" anchor on first match.
+ if (nGoodPos == nGoodEnd)
+ {
+ if (bFirst && nLastPos == startPos)
+ nGoodPos = nLastPos;
+ else
+ return aRet;
+ }
// find last match again to get its details
- pRegexMatcher->find( nLastPos, nIcuErr);
+ pRegexMatcher->find( nGoodPos, nIcuErr);
// fill in the details of the last match
const int nGroupCount = pRegexMatcher->groupCount();