diff options
author | Thomas Lange [tl] <tl@openoffice.org> | 2010-08-17 16:46:36 +0200 |
---|---|---|
committer | Thomas Lange [tl] <tl@openoffice.org> | 2010-08-17 16:46:36 +0200 |
commit | afb2f41ca982b7e0cbc8a5b4952ea3edfbb2a883 (patch) | |
tree | 18c12cb36d61768453728529f4c10faf4c3b4642 | |
parent | f47d90f206047b44993671316cfad21eb471a846 (diff) |
cws sw33bf08: #i113584#, #i113587# transliteration fixed
-rwxr-xr-x | editeng/source/editeng/impedit.hxx | 3 | ||||
-rwxr-xr-x | editeng/source/editeng/impedit4.cxx | 369 |
2 files changed, 312 insertions, 60 deletions
diff --git a/editeng/source/editeng/impedit.hxx b/editeng/source/editeng/impedit.hxx index ede5acacc698..c8f5e2948665 100755 --- a/editeng/source/editeng/impedit.hxx +++ b/editeng/source/editeng/impedit.hxx @@ -51,6 +51,7 @@ #include <com/sun/star/i18n/CharacterIteratorMode.hpp> #include <com/sun/star/i18n/WordType.hpp> #include <com/sun/star/i18n/XExtendedInputSequenceChecker.hpp> +#include <com/sun/star/uno/Sequence.hxx> #include <i18npool/lang.h> #include <vos/ref.hxx> @@ -1002,6 +1003,8 @@ public: void SetAutoCompleteText( const String& rStr, sal_Bool bUpdateTipWindow ); EditSelection TransliterateText( const EditSelection& rSelection, sal_Int32 nTransliterationMode ); + short ReplaceTextOnly( ContentNode* pNode, USHORT nCurrentStart, xub_StrLen nLen, const String& rText, const ::com::sun::star::uno::Sequence< sal_Int32 >& rOffsets ); + void SetAsianCompressionMode( USHORT n ); USHORT GetAsianCompressionMode() const { return nAsianCompressionMode; } diff --git a/editeng/source/editeng/impedit4.cxx b/editeng/source/editeng/impedit4.cxx index 482cd6d71338..6f216f7b22ca 100755 --- a/editeng/source/editeng/impedit4.cxx +++ b/editeng/source/editeng/impedit4.cxx @@ -76,6 +76,9 @@ #include <com/sun/star/linguistic2/XThesaurus.hpp> #include <com/sun/star/linguistic2/XMeaning.hpp> #include <com/sun/star/i18n/ScriptType.hpp> +#include <com/sun/star/i18n/WordType.hpp> +#include <com/sun/star/i18n/TransliterationModules.hpp> +#include <com/sun/star/i18n/TransliterationModulesExtra.hpp> #include <unotools/transliterationwrapper.hxx> #include <unotools/textsearch.hxx> #include <comphelper/processfactory.hxx> @@ -83,6 +86,8 @@ #include <svtools/rtfkeywd.hxx> #include <editeng/edtdlg.hxx> +#include <vector> + using namespace ::com::sun::star; using namespace ::com::sun::star::uno; using namespace ::com::sun::star::beans; @@ -2798,8 +2803,23 @@ void ImpEditEngine::SetAutoCompleteText( const String& rStr, sal_Bool bClearTipW #endif // !SVX_LIGHT } + +struct TransliterationChgData +{ + USHORT nStart; + xub_StrLen nLen; + EditSelection aSelection; + String aNewText; + uno::Sequence< sal_Int32 > aOffsets; +}; + + EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection, sal_Int32 nTransliterationMode ) { + uno::Reference < i18n::XBreakIterator > _xBI( ImplGetBreakIterator() ); + if (!_xBI.is()) + return rSelection; + EditSelection aSel( rSelection ); aSel.Adjust( aEditDoc ); @@ -2808,8 +2828,8 @@ EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection, EditSelection aNewSel( aSel ); - USHORT nStartNode = aEditDoc.GetPos( aSel.Min().GetNode() ); - USHORT nEndNode = aEditDoc.GetPos( aSel.Max().GetNode() ); + const USHORT nStartNode = aEditDoc.GetPos( aSel.Min().GetNode() ); + const USHORT nEndNode = aEditDoc.GetPos( aSel.Max().GetNode() ); BOOL bChanges = FALSE; BOOL bLenChanged = FALSE; @@ -2832,83 +2852,266 @@ EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection, USHORT nCurrentEnd = nEndPos; sal_uInt16 nLanguage = LANGUAGE_SYSTEM; - do - { - if ( bConsiderLanguage ) + // since we don't use Hiragana/Katakana or half-width/full-width transliterations here + // it is fine to use ANYWORD_IGNOREWHITESPACES. (ANY_WORD btw is broken and will + // occasionaly miss words in consecutive sentences). Also with ANYWORD_IGNOREWHITESPACES + // text like 'just-in-time' will be converted to 'Just-In-Time' which seems to be the + // proper thing to do. + const sal_Int16 nWordType = i18n::WordType::ANYWORD_IGNOREWHITESPACES; + + //! In order to have less trouble with changing text size, e.g. because + //! of ligatures or � (German small sz) being resolved, we need to process + //! the text replacements from end to start. + //! This way the offsets for the yet to be changed words will be + //! left unchanged by the already replaced text. + //! For this we temporarily save the changes to be done in this vector + std::vector< TransliterationChgData > aChanges; + TransliterationChgData aChgData; + + if (nTransliterationMode == i18n::TransliterationModulesExtra::TITLE_CASE) + { + // for 'capitalize every word' we need to iterate over each word + + i18n::Boundary aSttBndry; + i18n::Boundary aEndBndry; + aSttBndry = _xBI->getWordBoundary( + *pNode, nStartPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nStartPos + 1 ) ) ), + nWordType, TRUE /*prefer forward direction*/); + aEndBndry = _xBI->getWordBoundary( + *pNode, nEndPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nEndPos + 1 ) ) ), + nWordType, FALSE /*prefer backward direction*/); + + // prevent backtracking to the previous word if selection is at word boundary + if (aSttBndry.endPos <= nStartPos) + { + aSttBndry = _xBI->nextWord( + *pNode, aSttBndry.endPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, aSttBndry.endPos + 1 ) ) ), + nWordType); + } + // prevent advancing to the next word if selection is at word boundary + if (aEndBndry.startPos >= nEndPos) { - nLanguage = GetLanguage( EditPaM( pNode, nCurrentStart+1 ), &nCurrentEnd ); - if ( nCurrentEnd > nEndPos ) - nCurrentEnd = nEndPos; + aEndBndry = _xBI->previousWord( + *pNode, aEndBndry.startPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, aEndBndry.startPos + 1 ) ) ), + nWordType); } - xub_StrLen nLen = nCurrentEnd - nCurrentStart; + i18n::Boundary aCurWordBndry( aSttBndry ); + while (aCurWordBndry.startPos <= aEndBndry.startPos) + { + nCurrentStart = (xub_StrLen)aCurWordBndry.startPos; + nCurrentEnd = (xub_StrLen)aCurWordBndry.endPos; + sal_Int32 nLen = nCurrentEnd - nCurrentStart; + DBG_ASSERT( nLen > 0, "invalid word length of 0" ); +#if OSL_DEBUG_LEVEL > 1 + String aText( pNode->Copy( nCurrentStart, nLen ) ); +#endif + + Sequence< sal_Int32 > aOffsets; + String aNewText( aTranslitarationWrapper.transliterate( *pNode, + GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ), + nCurrentStart, nLen, &aOffsets )); + + if (!pNode->Equals( aNewText, nCurrentStart, nLen )) + { + aChgData.nStart = nCurrentStart; + aChgData.nLen = nLen; + aChgData.aSelection = EditSelection( EditPaM( pNode, nCurrentStart ), EditPaM( pNode, nCurrentEnd ) ); + aChgData.aNewText = aNewText; + aChgData.aOffsets = aOffsets; + aChanges.push_back( aChgData ); + } +#if OSL_DEBUG_LEVEL > 1 + String aSelTxt ( GetSelected( aChgData.aSelection ) ); + (void) aSelTxt; +#endif + + aCurWordBndry = _xBI->nextWord( *pNode, nCurrentEnd, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentEnd + 1 ) ) ), + nWordType); + } + DBG_ASSERT( nCurrentEnd >= aEndBndry.endPos, "failed to reach end of transliteration" ); + } + else if (nTransliterationMode == i18n::TransliterationModulesExtra::SENTENCE_CASE) + { + // for 'sentence case' we need to iterate sentence by sentence + + sal_Int32 nLastStart = _xBI->beginOfSentence( + *pNode, nEndPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nEndPos + 1 ) ) ) ); + sal_Int32 nLastEnd = _xBI->endOfSentence( + *pNode, nLastStart, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nLastStart + 1 ) ) ) ); - Sequence <sal_Int32> aOffsets; - String aNewText( aTranslitarationWrapper.transliterate( *pNode, nLanguage, nCurrentStart, nLen, &aOffsets ) ); + // extend nCurrentStart, nCurrentEnd to the current sentence boundaries + nCurrentStart = _xBI->beginOfSentence( + *pNode, nStartPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nStartPos + 1 ) ) ) ); + nCurrentEnd = _xBI->endOfSentence( + *pNode, nCurrentStart, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ) ) ); - if( ( nLen != aNewText.Len() ) || !pNode->Equals( aNewText, nCurrentStart, nLen ) ) + // prevent backtracking to the previous sentence if selection starts at end of a sentence + if (nCurrentEnd <= nStartPos) { - bChanges = TRUE; - if ( nLen != aNewText.Len() ) - bLenChanged = TRUE; + // now nCurrentStart is probably located on a non-letter word. (unless we + // are in Asian text with no spaces...) + // Thus to get the real sentence start we should locate the next real word, + // that is one found by DICTIONARY_WORD + i18n::Boundary aBndry = _xBI->nextWord( *pNode, nCurrentEnd, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentEnd + 1 ) ) ), + i18n::WordType::DICTIONARY_WORD); + + // now get new current sentence boundaries + nCurrentStart = _xBI->beginOfSentence( + *pNode, aBndry.startPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, aBndry.startPos + 1 ) ) ) ); + nCurrentEnd = _xBI->endOfSentence( + *pNode, nCurrentStart, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ) ) ); + } + // prevent advancing to the next sentence if selection ends at start of a sentence + if (nLastStart >= nEndPos) + { + // now nCurrentStart is probably located on a non-letter word. (unless we + // are in Asian text with no spaces...) + // Thus to get the real sentence start we should locate the previous real word, + // that is one found by DICTIONARY_WORD + i18n::Boundary aBndry = _xBI->previousWord( *pNode, nLastStart, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nLastStart + 1 ) ) ), + i18n::WordType::DICTIONARY_WORD); + nLastEnd = _xBI->endOfSentence( + *pNode, aBndry.startPos, + SvxCreateLocale( GetLanguage( EditPaM( pNode, aBndry.startPos + 1 ) ) ) ); + if (nCurrentEnd > nLastEnd) + nCurrentEnd = nLastEnd; + } -#ifndef SVX_LIGHT - // Create UndoAction on Demand.... - if ( !pUndo && IsUndoEnabled() && !IsInUndo() ) - { - ESelection aESel( CreateESel( aSel ) ); - pUndo = new EditUndoTransliteration( this, aESel, nTransliterationMode ); + while (nCurrentStart < nLastEnd) + { + sal_Int32 nLen = nCurrentEnd - nCurrentStart; + DBG_ASSERT( nLen > 0, "invalid word length of 0" ); +#if OSL_DEBUG_LEVEL > 1 + String aText( pNode->Copy( nCurrentStart, nLen ) ); +#endif - if ( ( nStartNode == nEndNode ) && !aSel.Min().GetNode()->GetCharAttribs().HasAttrib( aSel.Min().GetIndex(), aSel.Max().GetIndex() ) ) - pUndo->SetText( aSel.Min().GetNode()->Copy( aSel.Min().GetIndex(), aSel.Max().GetIndex()-aSel.Min().GetIndex() ) ); - else - pUndo->SetText( CreateBinTextObject( aSel, NULL ) ); + Sequence< sal_Int32 > aOffsets; + String aNewText( aTranslitarationWrapper.transliterate( *pNode, + GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ), + nCurrentStart, nLen, &aOffsets )); + + if (!pNode->Equals( aNewText, nCurrentStart, nLen )) + { + aChgData.nStart = nCurrentStart; + aChgData.nLen = nLen; + aChgData.aSelection = EditSelection( EditPaM( pNode, nCurrentStart ), EditPaM( pNode, nCurrentEnd ) ); + aChgData.aNewText = aNewText; + aChgData.aOffsets = aOffsets; + aChanges.push_back( aChgData ); } -#endif - // Change text without loosing the attributes - USHORT nCharsAfterTransliteration = - sal::static_int_cast< USHORT >(aOffsets.getLength()); - const sal_Int32* pOffsets = aOffsets.getConstArray(); - short nDiffs = 0; - for ( USHORT n = 0; n < nCharsAfterTransliteration; n++ ) + i18n::Boundary aFirstWordBndry; + aFirstWordBndry = _xBI->nextWord( + *pNode, nCurrentEnd, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentEnd + 1 ) ) ), + nWordType); + nCurrentStart = aFirstWordBndry.startPos; + nCurrentEnd = _xBI->endOfSentence( + *pNode, nCurrentStart, + SvxCreateLocale( GetLanguage( EditPaM( pNode, nCurrentStart + 1 ) ) ) ); + } + DBG_ASSERT( nCurrentEnd >= nLastEnd, "failed to reach end of transliteration" ); + } + else + { + do + { + if ( bConsiderLanguage ) { - USHORT nCurrentPos = nCurrentStart+n; - sal_Int32 nDiff = (nCurrentPos-nDiffs) - pOffsets[n]; + nLanguage = GetLanguage( EditPaM( pNode, nCurrentStart+1 ), &nCurrentEnd ); + if ( nCurrentEnd > nEndPos ) + nCurrentEnd = nEndPos; + } - if ( !nDiff ) - { - DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" ); - pNode->SetChar( nCurrentPos, aNewText.GetChar(n) ); - } - else if ( nDiff < 0 ) - { - // Replace first char, delete the rest... - DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" ); - pNode->SetChar( nCurrentPos, aNewText.GetChar(n) ); + xub_StrLen nLen = nCurrentEnd - nCurrentStart; - DBG_ASSERT( (nCurrentPos+1) < pNode->Len(), "TransliterateText - String smaller than expected!" ); - GetEditDoc().RemoveChars( EditPaM( pNode, nCurrentPos+1 ), sal::static_int_cast< USHORT >(-nDiff) ); - } - else - { - DBG_ASSERT( nDiff == 1, "TransliterateText - Diff other than expected! But should work..." ); - GetEditDoc().InsertText( EditPaM( pNode, nCurrentPos ), aNewText.GetChar(n) ); + Sequence< sal_Int32 > aOffsets; + String aNewText( aTranslitarationWrapper.transliterate( *pNode, nLanguage, nCurrentStart, nLen, &aOffsets ) ); - } - nDiffs = sal::static_int_cast< short >(nDiffs + nDiff); + if (!pNode->Equals( aNewText, nCurrentStart, nLen )) + { + aChgData.nStart = nCurrentStart; + aChgData.nLen = nLen; + aChgData.aSelection = EditSelection( EditPaM( pNode, nCurrentStart ), EditPaM( pNode, nCurrentEnd ) ); + aChgData.aNewText = aNewText; + aChgData.aOffsets = aOffsets; + aChanges.push_back( aChgData ); } - if ( nNode == nEndNode ) - aNewSel.Max().GetIndex() = - aNewSel.Max().GetIndex() + nDiffs; + nCurrentStart = nCurrentEnd; + } while( nCurrentEnd < nEndPos ); + } + + if (aChanges.size() > 0) + { +#ifndef SVX_LIGHT + // Create a single UndoAction on Demand for all the changes ... + if ( !pUndo && IsUndoEnabled() && !IsInUndo() ) + { + // adjust selection to include all changes + for (size_t i = 0; i < aChanges.size(); ++i) + { + const EditSelection &rSel = aChanges[i].aSelection; + if (aSel.Min().GetNode() == rSel.Min().GetNode() && + aSel.Min().GetIndex() > rSel.Min().GetIndex()) + aSel.Min().SetIndex( rSel.Min().GetIndex() ); + if (aSel.Max().GetNode() == rSel.Max().GetNode() && + aSel.Max().GetIndex() < rSel.Max().GetIndex()) + aSel.Max().SetIndex( rSel.Max().GetIndex() ); + } + aNewSel = aSel; + + ESelection aESel( CreateESel( aSel ) ); + pUndo = new EditUndoTransliteration( this, aESel, nTransliterationMode ); + + const bool bSingleNode = aSel.Min().GetNode()== aSel.Max().GetNode(); + const bool bHasAttribs = aSel.Min().GetNode()->GetCharAttribs().HasAttrib( aSel.Min().GetIndex(), aSel.Max().GetIndex() ); + if (bSingleNode && !bHasAttribs) + pUndo->SetText( aSel.Min().GetNode()->Copy( aSel.Min().GetIndex(), aSel.Max().GetIndex()-aSel.Min().GetIndex() ) ); + else + pUndo->SetText( CreateBinTextObject( aSel, NULL ) ); + } +#endif + + // now apply the changes from end to start to leave the offsets of the + // yet unchanged text parts remain the same. + for (size_t i = 0; i < aChanges.size(); ++i) + { + const TransliterationChgData &rData = aChanges[ aChanges.size() - 1 - i ]; - ParaPortion* pParaPortion = GetParaPortions()[nNode]; - pParaPortion->MarkSelectionInvalid( nCurrentStart, std::max< USHORT >( nCurrentStart+nLen, nCurrentStart+aNewText.Len() ) ); + bChanges = TRUE; + if (rData.nLen != rData.aNewText.Len()) + bLenChanged = TRUE; + // Change text without loosing the attributes + USHORT nDiffs = ReplaceTextOnly( rData.aSelection.Min().GetNode(), + rData.nStart, rData.nLen, rData.aNewText, rData.aOffsets ); + + // adjust selection in end node to possibly changed size + if (aSel.Max().GetNode() == rData.aSelection.Max().GetNode()) + aNewSel.Max().GetIndex() = aNewSel.Max().GetIndex() + nDiffs; + + USHORT nSelNode = aEditDoc.GetPos( rData.aSelection.Min().GetNode() ); + ParaPortion* pParaPortion = GetParaPortions()[nSelNode]; + pParaPortion->MarkSelectionInvalid( rData.nStart, + std::max< USHORT >( rData.nStart + rData.nLen, + rData.nStart + rData.aNewText.Len() ) ); } - nCurrentStart = nCurrentEnd; - } while( nCurrentEnd < nEndPos ); + } // if (aChanges.size() > 0) } #ifndef SVX_LIGHT @@ -2932,6 +3135,52 @@ EditSelection ImpEditEngine::TransliterateText( const EditSelection& rSelection, return aNewSel; } + +short ImpEditEngine::ReplaceTextOnly( + ContentNode* pNode, + USHORT nCurrentStart, xub_StrLen nLen, + const String& rNewText, + const uno::Sequence< sal_Int32 >& rOffsets ) +{ + (void) nLen; + + // Change text without loosing the attributes + USHORT nCharsAfterTransliteration = + sal::static_int_cast< USHORT >(rOffsets.getLength()); + const sal_Int32* pOffsets = rOffsets.getConstArray(); + short nDiffs = 0; + for ( USHORT n = 0; n < nCharsAfterTransliteration; n++ ) + { + USHORT nCurrentPos = nCurrentStart+n; + sal_Int32 nDiff = (nCurrentPos-nDiffs) - pOffsets[n]; + + if ( !nDiff ) + { + DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" ); + pNode->SetChar( nCurrentPos, rNewText.GetChar(n) ); + } + else if ( nDiff < 0 ) + { + // Replace first char, delete the rest... + DBG_ASSERT( nCurrentPos < pNode->Len(), "TransliterateText - String smaller than expected!" ); + pNode->SetChar( nCurrentPos, rNewText.GetChar(n) ); + + DBG_ASSERT( (nCurrentPos+1) < pNode->Len(), "TransliterateText - String smaller than expected!" ); + GetEditDoc().RemoveChars( EditPaM( pNode, nCurrentPos+1 ), sal::static_int_cast< USHORT >(-nDiff) ); + } + else + { + DBG_ASSERT( nDiff == 1, "TransliterateText - Diff other than expected! But should work..." ); + GetEditDoc().InsertText( EditPaM( pNode, nCurrentPos ), rNewText.GetChar(n) ); + + } + nDiffs = sal::static_int_cast< short >(nDiffs + nDiff); + } + + return nDiffs; +} + + void ImpEditEngine::SetAsianCompressionMode( USHORT n ) { if ( n != nAsianCompressionMode ) |