From 616c4c6bcb5ddeb932cf1e154f0a1a79c6a8bd45 Mon Sep 17 00:00:00 2001 From: Kevin Suo Date: Sat, 3 Dec 2022 09:52:25 +0800 Subject: tdf#152083: sdext Pre-reverse the text for RTL in Writer pdfimport as well Follow up to: commit 3a2f0e4772e7b4646dd518b33aeafb6fd7025179 Author: Kevin Suo Date: Sun Nov 20 00:10:14 2022 +0800 Resolves tdf#152083: Ligatures are incorrectly reversed in Draw pdf import The unit test is adjusted accordingly to reflect this change. Change-Id: I051a76ed38f76ff92197e1dc87cbfa7151b8ac35 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/143606 Tested-by: Jenkins Reviewed-by: Noel Grandin --- sdext/source/pdfimport/tree/writertreevisiting.cxx | 62 +++++++++++++++++++++- sdext/source/pdfimport/tree/writertreevisiting.hxx | 3 ++ 2 files changed, 63 insertions(+), 2 deletions(-) (limited to 'sdext/source') diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx index de44dcf711b4..f6c9df0805d5 100644 --- a/sdext/source/pdfimport/tree/writertreevisiting.cxx +++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx @@ -43,6 +43,15 @@ using namespace ::com::sun::star::uno; namespace pdfi { +const Reference& WriterXmlOptimizer::GetBreakIterator() +{ + if (!mxBreakIter.is()) + { + mxBreakIter = BreakIterator::create(m_rProcessor.m_xContext); + } + return mxBreakIter; +} + const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification() { if ( !mxCharClass.is() ) @@ -815,6 +824,11 @@ void WriterXmlOptimizer::optimizeTextElements(Element& rParent) if( pCur ) { TextElement* pNext = dynamic_cast(next->get()); + OUString str; + bool bPara = strspn("ParagraphElement", typeid(rParent).name()); + ParagraphElement* pPara = dynamic_cast(&rParent); + if (bPara && pPara && isComplex(GetBreakIterator(), pCur)) + pPara->bRtl = true; if( pNext ) { const GraphicsContext& rCurGC = m_rProcessor.getGraphicsContext( pCur->GCId ); @@ -872,8 +886,52 @@ void WriterXmlOptimizer::optimizeTextElements(Element& rParent) ) { pCur->updateGeometryWith( pNext ); - // append text to current element - pCur->Text.append( pNext->Text ); + if (pPara && pPara->bRtl) + { + // Tdf#152083: If RTL, reverse the text in pNext so that its correct order is + // restored when the combined text is reversed in WriterXmlEmitter::visit. + OUString tempStr; + bool bNeedReverse=false; + str = pNext->Text.toString(); + for (sal_Int32 i=0; i < str.getLength(); i++) + { + if (str[i] == u' ') + { // Space char (e.g. the space as in " م") needs special treatment. + // First, append the space char to pCur. + pCur->Text.append(OUStringChar(str[i])); + // Then, check whether the tmpStr needs reverse, if so then reverse and append. + if (bNeedReverse) + { + tempStr = ::comphelper::string::reverseCodePoints(tempStr); + pCur->Text.append(tempStr); + tempStr = u""; + } + bNeedReverse = false; + } + else + { + tempStr += OUStringChar(str[i]); + bNeedReverse = true; + } + } + // Do the last append + if (bNeedReverse) + { + tempStr = ::comphelper::string::reverseCodePoints(tempStr); + pCur->Text.append(tempStr); + } + else + { + pCur->Text.append(tempStr); + } + } + else + { + // append text to current element directly without reverse + pCur->Text.append(pNext->Text); + } + if (bPara && pPara && isComplex(GetBreakIterator(), pCur)) + pPara->bRtl = true; // append eventual children to current element // and clear children (else the children just // appended to pCur would be destroyed) diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx index e473c27372e6..b0644dd48154 100644 --- a/sdext/source/pdfimport/tree/writertreevisiting.hxx +++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx @@ -24,6 +24,7 @@ #include +#include #include namespace pdfi @@ -34,10 +35,12 @@ namespace pdfi { private: PDFIProcessor& m_rProcessor; + css::uno::Reference mxBreakIter; void optimizeTextElements(Element& rParent); void checkHeaderAndFooter( PageElement& rElem ); public: + const css::uno::Reference& GetBreakIterator(); explicit WriterXmlOptimizer(PDFIProcessor& rProcessor) : m_rProcessor(rProcessor) {} -- cgit