diff options
author | obo <obo@openoffice.org> | 2010-06-09 16:35:27 +0200 |
---|---|---|
committer | obo <obo@openoffice.org> | 2010-06-09 16:35:27 +0200 |
commit | ff33409c01b082dc5a0e501b0f4c2f97131fcd52 (patch) | |
tree | 71de51c5ae2992d1a4da21373ea85d5e2161338a /i18npool/source/transliteration | |
parent | 279b08c3fe4416cdf68a0f564b7122d399819671 (diff) | |
parent | 4e653694b634c30f052ba3288f71e6905377b9c6 (diff) |
CWS-TOOLING: integrate CWS tl74
Notes
Notes:
split repo tag: libs-gui_ooo/DEV300_m82
Diffstat (limited to 'i18npool/source/transliteration')
-rwxr-xr-x[-rw-r--r--] | i18npool/source/transliteration/transliteration_body.cxx | 263 |
1 files changed, 258 insertions, 5 deletions
diff --git a/i18npool/source/transliteration/transliteration_body.cxx b/i18npool/source/transliteration/transliteration_body.cxx index a869f6713b99..b58347826470 100644..100755 --- a/i18npool/source/transliteration/transliteration_body.cxx +++ b/i18npool/source/transliteration/transliteration_body.cxx @@ -28,7 +28,17 @@ // MARKER(update_precomp.py): autogen include statement, do not remove #include "precompiled_i18npool.hxx" +#include <rtl/ustrbuf.hxx> #include <i18nutil/casefolding.hxx> +#include <i18nutil/unicode.hxx> + +#include <comphelper/processfactory.hxx> +#include <osl/diagnose.h> + + +#include "characterclassificationImpl.hxx" +#include "breakiteratorImpl.hxx" + #define TRANSLITERATION_ALL #include "transliteration_body.hxx" @@ -36,8 +46,11 @@ using namespace ::com::sun::star::uno; using namespace ::com::sun::star::lang; using namespace ::rtl; +#define A2OU(x) OUString::createFromAscii(x) + namespace com { namespace sun { namespace star { namespace i18n { + Transliteration_body::Transliteration_body() { nMappingType = 0; @@ -68,9 +81,35 @@ Transliteration_body::transliterateRange( const OUString& str1, const OUString& return ostr; } + +static sal_uInt8 lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType, sal_Unicode cChar ) +{ + sal_uInt8 nRes = nMappingType; + + // take care of TOGGLE_CASE transliteration: + // nMappingType should not be a combination of flags, thuse we decide now + // which one to use. + if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower)) + { + const sal_Int16 nType = unicode::getUnicodeType( cChar ); + if (nType & 0x02 /* lower case*/) + nRes = MappingTypeLowerToUpper; + else + { + OSL_ENSURE( nType & 0x01 /* upper case */, "uppercase character expected! 'Toggle case' failed?" ); + nRes = MappingTypeUpperToLower; + } + } + + return nRes; +} + + OUString SAL_CALL -Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, - Sequence< sal_Int32 >& offset) throw(RuntimeException) +Transliteration_body::transliterate( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset) + throw(RuntimeException) { #if 0 /* Performance optimization: @@ -139,7 +178,12 @@ Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nOffCount = 0, i; for (i = 0; i < nCount; i++) { - const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType); + // take care of TOGGLE_CASE transliteration: + sal_uInt8 nTmpMappingType = nMappingType; + if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower)) + nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] ); + + const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType ); nOffCount += map.nmap; } rtl_uString* pStr = x_rtl_uString_new_WithLength( nOffCount, 1 ); // our x_rtl_ustring.h @@ -152,7 +196,12 @@ Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 * pArr = offset.getArray(); for (i = 0; i < nCount; i++) { - const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType); + // take care of TOGGLE_CASE transliteration: + sal_uInt8 nTmpMappingType = nMappingType; + if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower)) + nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] ); + + const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType ); for (sal_Int32 k = 0; k < map.nmap; k++) { pArr[j] = i + startPos; @@ -184,7 +233,12 @@ Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 j = 0; for ( sal_Int32 i = 0; i < nCount; i++) { - const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType); + // take care of TOGGLE_CASE transliteration: + sal_uInt8 nTmpMappingType = nMappingType; + if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower)) + nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] ); + + const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType ); for (sal_Int32 k = 0; k < map.nmap; k++) { out[j++] = map.map[k]; @@ -258,4 +312,203 @@ Transliteration_l2u::Transliteration_l2u() implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u"; } +Transliteration_togglecase::Transliteration_togglecase() +{ + // usually nMappingType must NOT be a combiantion of different flages here, + // but we take care of that problem in Transliteration_body::transliterate above + // before that value is used. There we will decide which of both is to be used on + // a per character basis. + nMappingType = MappingTypeLowerToUpper | MappingTypeUpperToLower; + transliterationName = "toggle(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase"; +} + +Transliteration_titlecase::Transliteration_titlecase() +{ + nMappingType = MappingTypeToTitle; + transliterationName = "title(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase"; +} + +rtl::OUString SAL_CALL Transliteration_titlecase::transliterate( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& /*offset*/ ) + throw(RuntimeException) +{ + Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory(); + CharacterClassificationImpl aCharClassImpl( xMSF ); + + // possible problem: the locale is not exactly specific for each word in the text... + OUString aRes( aCharClassImpl.toTitle( inStr, startPos, nCount, aLocale ) ); + return aRes; +} + +Transliteration_sentencecase::Transliteration_sentencecase() +{ + nMappingType = MappingTypeToTitle; // though only to be applied to the first word... + transliterationName = "sentence(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase"; +} + +rtl::OUString SAL_CALL Transliteration_sentencecase::transliterate( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) + throw(RuntimeException) +{ + // inspired from Transliteration_body::transliterate + sal_Int32 nOffCount = 0, i; + bool bPoint = true; + if (useOffset) + { + for( i = 0; i < nCount; ++i ) { + sal_Unicode c = inStr.getStr()[ i + startPos ]; + if( sal_Unicode('.') == c || sal_Unicode('!') == c || sal_Unicode('?') == c ) { + bPoint = true; + nOffCount++; + } + else if( unicode::isAlpha( c ) || unicode::isDigit( c ) ) + { + const Mapping* map = 0; + if( bPoint && unicode::isLower( c )) + { + map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeLowerToUpper); + bPoint = false; + } + else if (!bPoint && unicode::isUpper( c )) + { + map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeUpperToLower); + } + + if(map == 0) + { + nOffCount++; + } + else + { + nOffCount += map->nmap; + } + } + else + { + nOffCount++; + } + } + } + + bPoint = true; + rtl::OUStringBuffer result; + + if (useOffset) + { + result.ensureCapacity(nOffCount); + if ( nOffCount != offset.getLength() ) + offset.realloc( nOffCount ); + } + + + sal_Int32 j = 0; + sal_Int32 * pArr = offset.getArray(); + for( i = 0; i < nCount; ++i ) { + sal_Unicode c = inStr.getStr()[ i + startPos ]; + if( sal_Unicode('.') == c || sal_Unicode('!') == c || sal_Unicode('?') == c ) { + bPoint = true; + result.append(c); + pArr[j++] = i + startPos; + } + else if( unicode::isAlpha( c ) || unicode::isDigit( c ) ) + { + const Mapping* map = 0; + if( bPoint && unicode::isLower( c )) + { + map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeLowerToUpper); + } + else if (!bPoint && unicode::isUpper( c )) + { + map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeUpperToLower); + } + + if(map == 0) + { + result.append( c ); + pArr[j++] = i + startPos; + } + else + { + for (sal_Int32 k = 0; k < map->nmap; k++) + { + result.append( map->map[k] ); + pArr[j++] = i + startPos; + } + } + bPoint = false; + } + else + { + result.append( c ); + pArr[j++] = i + startPos; + } + } + return result.makeStringAndClear(); +} + +#if 0 +// TL: alternative implemntation try. But breakiterator has its problem too since +// beginOfSentence does not work as expected with '.'. See comment below. +// For the time being I will leave this code here as a from-scratch sample if the +// breakiterator works better at some point... +rtl::OUString SAL_CALL Transliteration_sentencecase::transliterate( + const OUString& inStr, sal_Int32 nStartPos, sal_Int32 nCount, + Sequence< sal_Int32 >& /*offset*/ ) + throw(RuntimeException) +{ + OUString aRes( inStr.copy( nStartPos, nCount ) ); + + if (nStartPos >= 0 && nStartPos < inStr.getLength() && nCount > 0) + { + Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory(); + BreakIteratorImpl brk( xMSF ); + + sal_Int32 nSentenceStart = -1, nOldSentenceStart = -1; + sal_Int32 nPos = nStartPos + nCount - 1; + while (nPos >= nStartPos && nPos != -1) + { + // possible problem: the locale is not exactly specific for each sentence in the text, + // but it is the only one we have... + nOldSentenceStart = nSentenceStart; + nSentenceStart = brk.beginOfSentence( inStr, nPos, aLocale ); + + // since the breakiterator completely ignores '.' characvters as end-of-sentence when + // the next word is lower case we need to take care of that ourself. The drawback: + // la mid-sentence abbreviation like e.g. will now be identified as end-of-sentence. :-( + // Well, at least the other product does it in the same way... + sal_Int32 nFullStopPos = inStr.lastIndexOf( (sal_Unicode)'.', nPos ); + nPos = nSentenceStart; + if (nFullStopPos > 0 && nFullStopPos > nSentenceStart) + { + Boundary aBd2 = brk.nextWord( inStr, nFullStopPos, aLocale, WordType::DICTIONARY_WORD ); + nSentenceStart = aBd2.startPos; + nPos = nFullStopPos; + } + + if (nSentenceStart < nOldSentenceStart || nOldSentenceStart == -1) + { + // the sentence start might be a quotation mark or some kind of bracket, thus + // we need the first dictionary word starting or following this position + // Boundary aBd1 = brk.nextWord( inStr, nSentenceStart, aLocale, WordType::DICTIONARY_WORD ); + Boundary aBd2 = brk.getWordBoundary( inStr, nSentenceStart, aLocale, WordType::DICTIONARY_WORD, true ); + // OUString aWord1( inStr.copy( aBd1.startPos, aBd1.endPos - aBd1.startPos + 1 ) ); + OUString aWord2( inStr.copy( aBd2.startPos, aBd2.endPos - aBd2.startPos + 1 ) ); + } + else + break; // prevent endless loop + + // continue with previous sentence + if (nPos != -1) + --nPos; + } + } + return aRes; +} +#endif + } } } } |