summaryrefslogtreecommitdiff
path: root/i18npool/source/transliteration/transliteration_body.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/transliteration/transliteration_body.cxx')
-rwxr-xr-x[-rw-r--r--]i18npool/source/transliteration/transliteration_body.cxx263
1 files changed, 258 insertions, 5 deletions
diff --git a/i18npool/source/transliteration/transliteration_body.cxx b/i18npool/source/transliteration/transliteration_body.cxx
index a869f6713b99..b58347826470 100644..100755
--- a/i18npool/source/transliteration/transliteration_body.cxx
+++ b/i18npool/source/transliteration/transliteration_body.cxx
@@ -28,7 +28,17 @@
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_i18npool.hxx"
+#include <rtl/ustrbuf.hxx>
#include <i18nutil/casefolding.hxx>
+#include <i18nutil/unicode.hxx>
+
+#include <comphelper/processfactory.hxx>
+#include <osl/diagnose.h>
+
+
+#include "characterclassificationImpl.hxx"
+#include "breakiteratorImpl.hxx"
+
#define TRANSLITERATION_ALL
#include "transliteration_body.hxx"
@@ -36,8 +46,11 @@ using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::lang;
using namespace ::rtl;
+#define A2OU(x) OUString::createFromAscii(x)
+
namespace com { namespace sun { namespace star { namespace i18n {
+
Transliteration_body::Transliteration_body()
{
nMappingType = 0;
@@ -68,9 +81,35 @@ Transliteration_body::transliterateRange( const OUString& str1, const OUString&
return ostr;
}
+
+static sal_uInt8 lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType, sal_Unicode cChar )
+{
+ sal_uInt8 nRes = nMappingType;
+
+ // take care of TOGGLE_CASE transliteration:
+ // nMappingType should not be a combination of flags, thuse we decide now
+ // which one to use.
+ if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
+ {
+ const sal_Int16 nType = unicode::getUnicodeType( cChar );
+ if (nType & 0x02 /* lower case*/)
+ nRes = MappingTypeLowerToUpper;
+ else
+ {
+ OSL_ENSURE( nType & 0x01 /* upper case */, "uppercase character expected! 'Toggle case' failed?" );
+ nRes = MappingTypeUpperToLower;
+ }
+ }
+
+ return nRes;
+}
+
+
OUString SAL_CALL
-Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
- Sequence< sal_Int32 >& offset) throw(RuntimeException)
+Transliteration_body::transliterate(
+ const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset)
+ throw(RuntimeException)
{
#if 0
/* Performance optimization:
@@ -139,7 +178,12 @@ Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos,
sal_Int32 nOffCount = 0, i;
for (i = 0; i < nCount; i++)
{
- const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
+ // take care of TOGGLE_CASE transliteration:
+ sal_uInt8 nTmpMappingType = nMappingType;
+ if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
+ nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
+
+ const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
nOffCount += map.nmap;
}
rtl_uString* pStr = x_rtl_uString_new_WithLength( nOffCount, 1 ); // our x_rtl_ustring.h
@@ -152,7 +196,12 @@ Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos,
sal_Int32 * pArr = offset.getArray();
for (i = 0; i < nCount; i++)
{
- const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
+ // take care of TOGGLE_CASE transliteration:
+ sal_uInt8 nTmpMappingType = nMappingType;
+ if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
+ nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
+
+ const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
for (sal_Int32 k = 0; k < map.nmap; k++)
{
pArr[j] = i + startPos;
@@ -184,7 +233,12 @@ Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos,
sal_Int32 j = 0;
for ( sal_Int32 i = 0; i < nCount; i++)
{
- const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
+ // take care of TOGGLE_CASE transliteration:
+ sal_uInt8 nTmpMappingType = nMappingType;
+ if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
+ nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
+
+ const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
for (sal_Int32 k = 0; k < map.nmap; k++)
{
out[j++] = map.map[k];
@@ -258,4 +312,203 @@ Transliteration_l2u::Transliteration_l2u()
implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
}
+Transliteration_togglecase::Transliteration_togglecase()
+{
+ // usually nMappingType must NOT be a combiantion of different flages here,
+ // but we take care of that problem in Transliteration_body::transliterate above
+ // before that value is used. There we will decide which of both is to be used on
+ // a per character basis.
+ nMappingType = MappingTypeLowerToUpper | MappingTypeUpperToLower;
+ transliterationName = "toggle(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
+}
+
+Transliteration_titlecase::Transliteration_titlecase()
+{
+ nMappingType = MappingTypeToTitle;
+ transliterationName = "title(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
+}
+
+rtl::OUString SAL_CALL Transliteration_titlecase::transliterate(
+ const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& /*offset*/ )
+ throw(RuntimeException)
+{
+ Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory();
+ CharacterClassificationImpl aCharClassImpl( xMSF );
+
+ // possible problem: the locale is not exactly specific for each word in the text...
+ OUString aRes( aCharClassImpl.toTitle( inStr, startPos, nCount, aLocale ) );
+ return aRes;
+}
+
+Transliteration_sentencecase::Transliteration_sentencecase()
+{
+ nMappingType = MappingTypeToTitle; // though only to be applied to the first word...
+ transliterationName = "sentence(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
+}
+
+rtl::OUString SAL_CALL Transliteration_sentencecase::transliterate(
+ const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset )
+ throw(RuntimeException)
+{
+ // inspired from Transliteration_body::transliterate
+ sal_Int32 nOffCount = 0, i;
+ bool bPoint = true;
+ if (useOffset)
+ {
+ for( i = 0; i < nCount; ++i ) {
+ sal_Unicode c = inStr.getStr()[ i + startPos ];
+ if( sal_Unicode('.') == c || sal_Unicode('!') == c || sal_Unicode('?') == c ) {
+ bPoint = true;
+ nOffCount++;
+ }
+ else if( unicode::isAlpha( c ) || unicode::isDigit( c ) )
+ {
+ const Mapping* map = 0;
+ if( bPoint && unicode::isLower( c ))
+ {
+ map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeLowerToUpper);
+ bPoint = false;
+ }
+ else if (!bPoint && unicode::isUpper( c ))
+ {
+ map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeUpperToLower);
+ }
+
+ if(map == 0)
+ {
+ nOffCount++;
+ }
+ else
+ {
+ nOffCount += map->nmap;
+ }
+ }
+ else
+ {
+ nOffCount++;
+ }
+ }
+ }
+
+ bPoint = true;
+ rtl::OUStringBuffer result;
+
+ if (useOffset)
+ {
+ result.ensureCapacity(nOffCount);
+ if ( nOffCount != offset.getLength() )
+ offset.realloc( nOffCount );
+ }
+
+
+ sal_Int32 j = 0;
+ sal_Int32 * pArr = offset.getArray();
+ for( i = 0; i < nCount; ++i ) {
+ sal_Unicode c = inStr.getStr()[ i + startPos ];
+ if( sal_Unicode('.') == c || sal_Unicode('!') == c || sal_Unicode('?') == c ) {
+ bPoint = true;
+ result.append(c);
+ pArr[j++] = i + startPos;
+ }
+ else if( unicode::isAlpha( c ) || unicode::isDigit( c ) )
+ {
+ const Mapping* map = 0;
+ if( bPoint && unicode::isLower( c ))
+ {
+ map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeLowerToUpper);
+ }
+ else if (!bPoint && unicode::isUpper( c ))
+ {
+ map = &casefolding::getValue(&c, 0, 1, aLocale, MappingTypeUpperToLower);
+ }
+
+ if(map == 0)
+ {
+ result.append( c );
+ pArr[j++] = i + startPos;
+ }
+ else
+ {
+ for (sal_Int32 k = 0; k < map->nmap; k++)
+ {
+ result.append( map->map[k] );
+ pArr[j++] = i + startPos;
+ }
+ }
+ bPoint = false;
+ }
+ else
+ {
+ result.append( c );
+ pArr[j++] = i + startPos;
+ }
+ }
+ return result.makeStringAndClear();
+}
+
+#if 0
+// TL: alternative implemntation try. But breakiterator has its problem too since
+// beginOfSentence does not work as expected with '.'. See comment below.
+// For the time being I will leave this code here as a from-scratch sample if the
+// breakiterator works better at some point...
+rtl::OUString SAL_CALL Transliteration_sentencecase::transliterate(
+ const OUString& inStr, sal_Int32 nStartPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& /*offset*/ )
+ throw(RuntimeException)
+{
+ OUString aRes( inStr.copy( nStartPos, nCount ) );
+
+ if (nStartPos >= 0 && nStartPos < inStr.getLength() && nCount > 0)
+ {
+ Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory();
+ BreakIteratorImpl brk( xMSF );
+
+ sal_Int32 nSentenceStart = -1, nOldSentenceStart = -1;
+ sal_Int32 nPos = nStartPos + nCount - 1;
+ while (nPos >= nStartPos && nPos != -1)
+ {
+ // possible problem: the locale is not exactly specific for each sentence in the text,
+ // but it is the only one we have...
+ nOldSentenceStart = nSentenceStart;
+ nSentenceStart = brk.beginOfSentence( inStr, nPos, aLocale );
+
+ // since the breakiterator completely ignores '.' characvters as end-of-sentence when
+ // the next word is lower case we need to take care of that ourself. The drawback:
+ // la mid-sentence abbreviation like e.g. will now be identified as end-of-sentence. :-(
+ // Well, at least the other product does it in the same way...
+ sal_Int32 nFullStopPos = inStr.lastIndexOf( (sal_Unicode)'.', nPos );
+ nPos = nSentenceStart;
+ if (nFullStopPos > 0 && nFullStopPos > nSentenceStart)
+ {
+ Boundary aBd2 = brk.nextWord( inStr, nFullStopPos, aLocale, WordType::DICTIONARY_WORD );
+ nSentenceStart = aBd2.startPos;
+ nPos = nFullStopPos;
+ }
+
+ if (nSentenceStart < nOldSentenceStart || nOldSentenceStart == -1)
+ {
+ // the sentence start might be a quotation mark or some kind of bracket, thus
+ // we need the first dictionary word starting or following this position
+ // Boundary aBd1 = brk.nextWord( inStr, nSentenceStart, aLocale, WordType::DICTIONARY_WORD );
+ Boundary aBd2 = brk.getWordBoundary( inStr, nSentenceStart, aLocale, WordType::DICTIONARY_WORD, true );
+ // OUString aWord1( inStr.copy( aBd1.startPos, aBd1.endPos - aBd1.startPos + 1 ) );
+ OUString aWord2( inStr.copy( aBd2.startPos, aBd2.endPos - aBd2.startPos + 1 ) );
+ }
+ else
+ break; // prevent endless loop
+
+ // continue with previous sentence
+ if (nPos != -1)
+ --nPos;
+ }
+ }
+ return aRes;
+}
+#endif
+
} } } }