summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lange [tl] <tl@openoffice.org>2010-08-17 16:46:36 +0200
committerThomas Lange [tl] <tl@openoffice.org>2010-08-17 16:46:36 +0200
commitafb2f41ca982b7e0cbc8a5b4952ea3edfbb2a883 (patch)
tree18c12cb36d61768453728529f4c10faf4c3b4642
parentf47d90f206047b44993671316cfad21eb471a846 (diff)
cws sw33bf08: #i113584#, #i113587# transliteration fixed
-rwxr-xr-xediteng/source/editeng/impedit.hxx3
-rwxr-xr-xediteng/source/editeng/impedit4.cxx369
2 files changed, 312 insertions, 60 deletions
diff --git a/editeng/source/editeng/impedit.hxx b/editeng/source/editeng/impedit.hxx
index ede5acacc698..c8f5e2948665 100755
--- a/editeng/source/editeng/impedit.hxx
+++ b/editeng/source/editeng/impedit.hxx
@@ -51,6 +51,7 @@
#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
#include <com/sun/star/i18n/WordType.hpp>
#include <com/sun/star/i18n/XExtendedInputSequenceChecker.hpp>
+#include <com/sun/star/uno/Sequence.hxx>
#include <i18npool/lang.h>
#include <vos/ref.hxx>
@@ -1002,6 +1003,8 @@ public:
void SetAutoCompleteText( const String& rStr, sal_Bool bUpdateTipWindow );
EditSelection TransliterateText( const EditSelection& rSelection, sal_Int32 nTransliterationMode );
+ short ReplaceTextOnly( ContentNode* pNode, USHORT nCurrentStart, xub_StrLen nLen, const String& rText, const ::com::sun::star::uno::Sequence< sal_Int32 >& rOffsets );
+
void SetAsianCompressionMode( USHORT n );
USHORT GetAsianCompressionMode() const { return nAsianCompressionMode; }
diff --git a/editeng/source/editeng/impedit4.cxx b/editeng/source/editeng/impedit4.cxx
index 482cd6d71338..6f216f7b22ca 100755
--- a/editeng/source/editeng/impedit4.cxx
+++ b/editeng/source/editeng/impedit4.cxx
@@ -76,6 +76,9 @@
#include <com/sun/star/linguistic2/XThesaurus.hpp>
#include <com/sun/star/linguistic2/XMeaning.hpp>
#include <com/sun/star/i18n/ScriptType.hpp>
+#include <com/sun/star/i18n/WordType.hpp>
+#include <com/sun/star/i18n/TransliterationModules.hpp>
+#include <com/sun/star/i18n/TransliterationModulesExtra.hpp>
#include <unotools/transliterationwrapper.hxx>
#include <unotools/textsearch.hxx>
#include <comphelper/processfactory.hxx>
@@ -83,6 +86,8 @@
#include <svtools/rtfkeywd.hxx>
#include <editeng/edtdlg.hxx>
+#include <vector>
+
using namespace ::com::sun::star;
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::beans;
@@ -2798,8 +2803,23 @@ void ImpEditEngine::SetAutoCompleteText( const String& rStr, sal_Bool bClearTipW
#endif // !SVX_LIGHT
}
+
+struct TransliterationChgData
+{
+ USHORT nStart;
+ xub_StrLen nLen;
+ EditSelection aSelection;
+ String aNewText;
+ uno::Sequence< sal_Int32 > aOffsets;
+};
+
+
EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection, sal_Int32 nTransliterationMode )
{
+ uno::Reference < i18n::XBreakIterator > _xBI( ImplGetBreakIterator() );
+ if (!_xBI.is())
+ return rSelection;
+
EditSelection aSel( rSelection );
aSel.Adjust( aEditDoc );
@@ -2808,8 +2828,8 @@ EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection,
EditSelection aNewSel( aSel );
- USHORT nStartNode = aEditDoc.GetPos( aSel.Min().GetNode() );
- USHORT nEndNode = aEditDoc.GetPos( aSel.Max().GetNode() );
+ const USHORT nStartNode = aEditDoc.GetPos( aSel.Min().GetNode() );
+ const USHORT nEndNode = aEditDoc.GetPos( aSel.Max().GetNode() );
BOOL bChanges = FALSE;
BOOL bLenChanged = FALSE;
@@ -2832,83 +2852,266 @@ EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection,
USHORT nCurrentEnd = nEndPos;
sal_uInt16 nLanguage = LANGUAGE_SYSTEM;
- do
- {
- if ( bConsiderLanguage )
+ // since we don't use Hiragana/Katakana or half-width/full-width transliterations here
+ // it is fine to use ANYWORD_IGNOREWHITESPACES. (ANY_WORD btw is broken and will
+ // occasionaly miss words in consecutive sentences). Also with ANYWORD_IGNOREWHITESPACES
+ // text like 'just-in-time' will be converted to 'Just-In-Time' which seems to be the
+ // proper thing to do.
+ const sal_Int16 nWordType = i18n::WordType::ANYWORD_IGNOREWHITESPACES;
+
+ //! In order to have less trouble with changing text size, e.g. because
+ //! of ligatures or � (German small sz) being resolved, we need to process
+ //! the text replacements from end to start.
+ //! This way the offsets for the yet to be changed words will be
+ //! left unchanged by the already replaced text.
+ //! For this we temporarily save the changes to be done in this vector
+ std::vector< TransliterationChgData > aChanges;
+ TransliterationChgData aChgData;
+
+ if (nTransliterationMode == i18n::TransliterationModulesExtra::TITLE_CASE)
+ {
+ // for 'capitalize every word' we need to iterate over each word
+
+ i18n::Boundary aSttBndry;
+ i18n::Boundary aEndBndry;
+ aSttBndry = _xBI->getWordBoundary(
+ *pNode, nStartPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nStartPos + 1 ) ) ),
+ nWordType, TRUE /*prefer forward direction*/);
+ aEndBndry = _xBI->getWordBoundary(
+ *pNode, nEndPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nEndPos + 1 ) ) ),
+ nWordType, FALSE /*prefer backward direction*/);
+
+ // prevent backtracking to the previous word if selection is at word boundary
+ if (aSttBndry.endPos <= nStartPos)
+ {
+ aSttBndry = _xBI->nextWord(
+ *pNode, aSttBndry.endPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, aSttBndry.endPos + 1 ) ) ),
+ nWordType);
+ }
+ // prevent advancing to the next word if selection is at word boundary
+ if (aEndBndry.startPos >= nEndPos)
{
- nLanguage = GetLanguage( EditPaM( pNode, nCurrentStart+1 ), &nCurrentEnd );
- if ( nCurrentEnd > nEndPos )
- nCurrentEnd = nEndPos;
+ aEndBndry = _xBI->previousWord(
+ *pNode, aEndBndry.startPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, aEndBndry.startPos + 1 ) ) ),
+ nWordType);
}
- xub_StrLen nLen = nCurrentEnd - nCurrentStart;
+ i18n::Boundary aCurWordBndry( aSttBndry );
+ while (aCurWordBndry.startPos <= aEndBndry.startPos)
+ {
+ nCurrentStart = (xub_StrLen)aCurWordBndry.startPos;
+ nCurrentEnd = (xub_StrLen)aCurWordBndry.endPos;
+ sal_Int32 nLen = nCurrentEnd - nCurrentStart;
+ DBG_ASSERT( nLen > 0, "invalid word length of 0" );
+#if OSL_DEBUG_LEVEL > 1
+ String aText( pNode->Copy( nCurrentStart, nLen ) );
+#endif
+
+ Sequence< sal_Int32 > aOffsets;
+ String aNewText( aTranslitarationWrapper.transliterate( *pNode,
+ GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ),
+ nCurrentStart, nLen, &aOffsets ));
+
+ if (!pNode->Equals( aNewText, nCurrentStart, nLen ))
+ {
+ aChgData.nStart = nCurrentStart;
+ aChgData.nLen = nLen;
+ aChgData.aSelection = EditSelection( EditPaM( pNode, nCurrentStart ), EditPaM( pNode, nCurrentEnd ) );
+ aChgData.aNewText = aNewText;
+ aChgData.aOffsets = aOffsets;
+ aChanges.push_back( aChgData );
+ }
+#if OSL_DEBUG_LEVEL > 1
+ String aSelTxt ( GetSelected( aChgData.aSelection ) );
+ (void) aSelTxt;
+#endif
+
+ aCurWordBndry = _xBI->nextWord( *pNode, nCurrentEnd,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentEnd + 1 ) ) ),
+ nWordType);
+ }
+ DBG_ASSERT( nCurrentEnd >= aEndBndry.endPos, "failed to reach end of transliteration" );
+ }
+ else if (nTransliterationMode == i18n::TransliterationModulesExtra::SENTENCE_CASE)
+ {
+ // for 'sentence case' we need to iterate sentence by sentence
+
+ sal_Int32 nLastStart = _xBI->beginOfSentence(
+ *pNode, nEndPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nEndPos + 1 ) ) ) );
+ sal_Int32 nLastEnd = _xBI->endOfSentence(
+ *pNode, nLastStart,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nLastStart + 1 ) ) ) );
- Sequence <sal_Int32> aOffsets;
- String aNewText( aTranslitarationWrapper.transliterate( *pNode, nLanguage, nCurrentStart, nLen, &aOffsets ) );
+ // extend nCurrentStart, nCurrentEnd to the current sentence boundaries
+ nCurrentStart = _xBI->beginOfSentence(
+ *pNode, nStartPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nStartPos + 1 ) ) ) );
+ nCurrentEnd = _xBI->endOfSentence(
+ *pNode, nCurrentStart,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ) ) );
- if( ( nLen != aNewText.Len() ) || !pNode->Equals( aNewText, nCurrentStart, nLen ) )
+ // prevent backtracking to the previous sentence if selection starts at end of a sentence
+ if (nCurrentEnd <= nStartPos)
{
- bChanges = TRUE;
- if ( nLen != aNewText.Len() )
- bLenChanged = TRUE;
+ // now nCurrentStart is probably located on a non-letter word. (unless we
+ // are in Asian text with no spaces...)
+ // Thus to get the real sentence start we should locate the next real word,
+ // that is one found by DICTIONARY_WORD
+ i18n::Boundary aBndry = _xBI->nextWord( *pNode, nCurrentEnd,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentEnd + 1 ) ) ),
+ i18n::WordType::DICTIONARY_WORD);
+
+ // now get new current sentence boundaries
+ nCurrentStart = _xBI->beginOfSentence(
+ *pNode, aBndry.startPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, aBndry.startPos + 1 ) ) ) );
+ nCurrentEnd = _xBI->endOfSentence(
+ *pNode, nCurrentStart,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ) ) );
+ }
+ // prevent advancing to the next sentence if selection ends at start of a sentence
+ if (nLastStart >= nEndPos)
+ {
+ // now nCurrentStart is probably located on a non-letter word. (unless we
+ // are in Asian text with no spaces...)
+ // Thus to get the real sentence start we should locate the previous real word,
+ // that is one found by DICTIONARY_WORD
+ i18n::Boundary aBndry = _xBI->previousWord( *pNode, nLastStart,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nLastStart + 1 ) ) ),
+ i18n::WordType::DICTIONARY_WORD);
+ nLastEnd = _xBI->endOfSentence(
+ *pNode, aBndry.startPos,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, aBndry.startPos + 1 ) ) ) );
+ if (nCurrentEnd > nLastEnd)
+ nCurrentEnd = nLastEnd;
+ }
-#ifndef SVX_LIGHT
- // Create UndoAction on Demand....
- if ( !pUndo && IsUndoEnabled() && !IsInUndo() )
- {
- ESelection aESel( CreateESel( aSel ) );
- pUndo = new EditUndoTransliteration( this, aESel, nTransliterationMode );
+ while (nCurrentStart < nLastEnd)
+ {
+ sal_Int32 nLen = nCurrentEnd - nCurrentStart;
+ DBG_ASSERT( nLen > 0, "invalid word length of 0" );
+#if OSL_DEBUG_LEVEL > 1
+ String aText( pNode->Copy( nCurrentStart, nLen ) );
+#endif
- if ( ( nStartNode == nEndNode ) && !aSel.Min().GetNode()->GetCharAttribs().HasAttrib( aSel.Min().GetIndex(), aSel.Max().GetIndex() ) )
- pUndo->SetText( aSel.Min().GetNode()->Copy( aSel.Min().GetIndex(), aSel.Max().GetIndex()-aSel.Min().GetIndex() ) );
- else
- pUndo->SetText( CreateBinTextObject( aSel, NULL ) );
+ Sequence< sal_Int32 > aOffsets;
+ String aNewText( aTranslitarationWrapper.transliterate( *pNode,
+ GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ),
+ nCurrentStart, nLen, &aOffsets ));
+
+ if (!pNode->Equals( aNewText, nCurrentStart, nLen ))
+ {
+ aChgData.nStart = nCurrentStart;
+ aChgData.nLen = nLen;
+ aChgData.aSelection = EditSelection( EditPaM( pNode, nCurrentStart ), EditPaM( pNode, nCurrentEnd ) );
+ aChgData.aNewText = aNewText;
+ aChgData.aOffsets = aOffsets;
+ aChanges.push_back( aChgData );
}
-#endif
- // Change text without loosing the attributes
- USHORT nCharsAfterTransliteration =
- sal::static_int_cast< USHORT >(aOffsets.getLength());
- const sal_Int32* pOffsets = aOffsets.getConstArray();
- short nDiffs = 0;
- for ( USHORT n = 0; n < nCharsAfterTransliteration; n++ )
+ i18n::Boundary aFirstWordBndry;
+ aFirstWordBndry = _xBI->nextWord(
+ *pNode, nCurrentEnd,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentEnd + 1 ) ) ),
+ nWordType);
+ nCurrentStart = aFirstWordBndry.startPos;
+ nCurrentEnd = _xBI->endOfSentence(
+ *pNode, nCurrentStart,
+ SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ) ) );
+ }
+ DBG_ASSERT( nCurrentEnd >= nLastEnd, "failed to reach end of transliteration" );
+ }
+ else
+ {
+ do
+ {
+ if ( bConsiderLanguage )
{
- USHORT nCurrentPos = nCurrentStart+n;
- sal_Int32 nDiff = (nCurrentPos-nDiffs) - pOffsets[n];
+ nLanguage = GetLanguage( EditPaM( pNode, nCurrentStart+1 ), &nCurrentEnd );
+ if ( nCurrentEnd > nEndPos )
+ nCurrentEnd = nEndPos;
+ }
- if ( !nDiff )
- {
- DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" );
- pNode->SetChar( nCurrentPos, aNewText.GetChar(n) );
- }
- else if ( nDiff < 0 )
- {
- // Replace first char, delete the rest...
- DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" );
- pNode->SetChar( nCurrentPos, aNewText.GetChar(n) );
+ xub_StrLen nLen = nCurrentEnd - nCurrentStart;
- DBG_ASSERT( (nCurrentPos+1) < pNode->Len(), "TransliterateText - String smaller than expected!" );
- GetEditDoc().RemoveChars( EditPaM( pNode, nCurrentPos+1 ), sal::static_int_cast< USHORT >(-nDiff) );
- }
- else
- {
- DBG_ASSERT( nDiff == 1, "TransliterateText - Diff other than expected! But should work..." );
- GetEditDoc().InsertText( EditPaM( pNode, nCurrentPos ), aNewText.GetChar(n) );
+ Sequence< sal_Int32 > aOffsets;
+ String aNewText( aTranslitarationWrapper.transliterate( *pNode, nLanguage, nCurrentStart, nLen, &aOffsets ) );
- }
- nDiffs = sal::static_int_cast< short >(nDiffs + nDiff);
+ if (!pNode->Equals( aNewText, nCurrentStart, nLen ))
+ {
+ aChgData.nStart = nCurrentStart;
+ aChgData.nLen = nLen;
+ aChgData.aSelection = EditSelection( EditPaM( pNode, nCurrentStart ), EditPaM( pNode, nCurrentEnd ) );
+ aChgData.aNewText = aNewText;
+ aChgData.aOffsets = aOffsets;
+ aChanges.push_back( aChgData );
}
- if ( nNode == nEndNode )
- aNewSel.Max().GetIndex() =
- aNewSel.Max().GetIndex() + nDiffs;
+ nCurrentStart = nCurrentEnd;
+ } while( nCurrentEnd < nEndPos );
+ }
+
+ if (aChanges.size() > 0)
+ {
+#ifndef SVX_LIGHT
+ // Create a single UndoAction on Demand for all the changes ...
+ if ( !pUndo && IsUndoEnabled() && !IsInUndo() )
+ {
+ // adjust selection to include all changes
+ for (size_t i = 0; i < aChanges.size(); ++i)
+ {
+ const EditSelection &rSel = aChanges[i].aSelection;
+ if (aSel.Min().GetNode() == rSel.Min().GetNode() &&
+ aSel.Min().GetIndex() > rSel.Min().GetIndex())
+ aSel.Min().SetIndex( rSel.Min().GetIndex() );
+ if (aSel.Max().GetNode() == rSel.Max().GetNode() &&
+ aSel.Max().GetIndex() < rSel.Max().GetIndex())
+ aSel.Max().SetIndex( rSel.Max().GetIndex() );
+ }
+ aNewSel = aSel;
+
+ ESelection aESel( CreateESel( aSel ) );
+ pUndo = new EditUndoTransliteration( this, aESel, nTransliterationMode );
+
+ const bool bSingleNode = aSel.Min().GetNode()== aSel.Max().GetNode();
+ const bool bHasAttribs = aSel.Min().GetNode()->GetCharAttribs().HasAttrib( aSel.Min().GetIndex(), aSel.Max().GetIndex() );
+ if (bSingleNode && !bHasAttribs)
+ pUndo->SetText( aSel.Min().GetNode()->Copy( aSel.Min().GetIndex(), aSel.Max().GetIndex()-aSel.Min().GetIndex() ) );
+ else
+ pUndo->SetText( CreateBinTextObject( aSel, NULL ) );
+ }
+#endif
+
+ // now apply the changes from end to start to leave the offsets of the
+ // yet unchanged text parts remain the same.
+ for (size_t i = 0; i < aChanges.size(); ++i)
+ {
+ const TransliterationChgData &rData = aChanges[ aChanges.size() - 1 - i ];
- ParaPortion* pParaPortion = GetParaPortions()[nNode];
- pParaPortion->MarkSelectionInvalid( nCurrentStart, std::max< USHORT >( nCurrentStart+nLen, nCurrentStart+aNewText.Len() ) );
+ bChanges = TRUE;
+ if (rData.nLen != rData.aNewText.Len())
+ bLenChanged = TRUE;
+ // Change text without loosing the attributes
+ USHORT nDiffs = ReplaceTextOnly( rData.aSelection.Min().GetNode(),
+ rData.nStart, rData.nLen, rData.aNewText, rData.aOffsets );
+
+ // adjust selection in end node to possibly changed size
+ if (aSel.Max().GetNode() == rData.aSelection.Max().GetNode())
+ aNewSel.Max().GetIndex() = aNewSel.Max().GetIndex() + nDiffs;
+
+ USHORT nSelNode = aEditDoc.GetPos( rData.aSelection.Min().GetNode() );
+ ParaPortion* pParaPortion = GetParaPortions()[nSelNode];
+ pParaPortion->MarkSelectionInvalid( rData.nStart,
+ std::max< USHORT >( rData.nStart + rData.nLen,
+ rData.nStart + rData.aNewText.Len() ) );
}
- nCurrentStart = nCurrentEnd;
- } while( nCurrentEnd < nEndPos );
+ } // if (aChanges.size() > 0)
}
#ifndef SVX_LIGHT
@@ -2932,6 +3135,52 @@ EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection,
return aNewSel;
}
+
+short ImpEditEngine::ReplaceTextOnly(
+ ContentNode* pNode,
+ USHORT nCurrentStart, xub_StrLen nLen,
+ const String& rNewText,
+ const uno::Sequence< sal_Int32 >& rOffsets )
+{
+ (void) nLen;
+
+ // Change text without loosing the attributes
+ USHORT nCharsAfterTransliteration =
+ sal::static_int_cast< USHORT >(rOffsets.getLength());
+ const sal_Int32* pOffsets = rOffsets.getConstArray();
+ short nDiffs = 0;
+ for ( USHORT n = 0; n < nCharsAfterTransliteration; n++ )
+ {
+ USHORT nCurrentPos = nCurrentStart+n;
+ sal_Int32 nDiff = (nCurrentPos-nDiffs) - pOffsets[n];
+
+ if ( !nDiff )
+ {
+ DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" );
+ pNode->SetChar( nCurrentPos, rNewText.GetChar(n) );
+ }
+ else if ( nDiff < 0 )
+ {
+ // Replace first char, delete the rest...
+ DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" );
+ pNode->SetChar( nCurrentPos, rNewText.GetChar(n) );
+
+ DBG_ASSERT( (nCurrentPos+1) < pNode->Len(), "TransliterateText - String smaller than expected!" );
+ GetEditDoc().RemoveChars( EditPaM( pNode, nCurrentPos+1 ), sal::static_int_cast< USHORT >(-nDiff) );
+ }
+ else
+ {
+ DBG_ASSERT( nDiff == 1, "TransliterateText - Diff other than expected! But should work..." );
+ GetEditDoc().InsertText( EditPaM( pNode, nCurrentPos ), rNewText.GetChar(n) );
+
+ }
+ nDiffs = sal::static_int_cast< short >(nDiffs + nDiff);
+ }
+
+ return nDiffs;
+}
+
+
void ImpEditEngine::SetAsianCompressionMode( USHORT n )
{
if ( n != nAsianCompressionMode )