diff options
Diffstat (limited to 'sdext')
-rw-r--r-- | sdext/source/pdfimport/test/tests.cxx | 30 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/writertreevisiting.cxx | 46 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/writertreevisiting.hxx | 4 |
3 files changed, 70 insertions, 10 deletions
diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx index 25c12a23901c..7cff15a36d0f 100644 --- a/sdext/source/pdfimport/test/tests.cxx +++ b/sdext/source/pdfimport/test/tests.cxx @@ -799,36 +799,54 @@ namespace new OutputWrapString(aOutput), nullptr)); - // std::cout << aOutput << std::endl; xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput.getStr()))); // Test for امُ عَلَيْكَ // TODO: How to get the "عَلَيْكَ" in xpath, as shown after the <text:s> tag? OString xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 14821.9583333333 2159.23861112778)']/draw:text-box/text:p/text:span"; OUString sContent = getXPathContent(pXmlDoc, xpath); // u"\nا\nُ\nم\n" - CPPUNIT_ASSERT_EQUAL(OUString(u"اُم"), sContent.replaceAll("\n", "")); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"اُم"), sContent.replaceAll("\n", "")); // Test for ٱلَّسَل . It appears in the 3rd frame, i.e. after the امُ عَلَيْكَ which is in the 2nd frame (from left to right) // thus these two frames together appear as ٱلَّسَل امُ عَلَيْكَ in Draw. xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 17420.1666666667 2159.23861112778)']/draw:text-box/text:p/text:span"; sContent = getXPathContent(pXmlDoc, xpath); - CPPUNIT_ASSERT_EQUAL(OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", "")); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", "")); // Test for "LibreOffice LTR" // TODO: How to get the "LTR" as shown after the <text:s> tag? xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 12779.375 5121.79583335)']/draw:text-box/text:p/text:span"; sContent = getXPathContent(pXmlDoc, xpath); - CPPUNIT_ASSERT_EQUAL(OUString(u"LibreOffice"), sContent.replaceAll("\n", "")); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"LibreOffice"), sContent.replaceAll("\n", "")); /* Test for Chinese characters */ // Use last() instead of matrix below, because the matrix may be different on different OS due to fallback of Chinese fonts. xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span"; sContent = getXPathContent(pXmlDoc, xpath); - CPPUNIT_ASSERT_EQUAL(OUString(u"中文测试,中文"), sContent.replaceAll("\n", "")); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", "")); + + // Test pdf text run in the Writer PDF import filter + xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory()); + OString aOutput2; + xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/tdf104597_textrun.pdf"), + new OutputWrapString(aOutput2), + nullptr); + // FIXME: the same draw:frame is duplicated in the xml output, + // e.g. there are two draw:frame with draw:z-index="3" with the same content. + xmlDocUniquePtr pXmlDoc2(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput2.getStr()))); + xpath = "//draw:frame[@draw:z-index='3'][1]/draw:text-box/text:p/text:span"; + sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", ""); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"ٱلَّسَل"), sContent); + xpath = "//draw:frame[@draw:z-index='2'][1]/draw:text-box/text:p/text:span"; + sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", ""); + // need to use اُم rather than اُم َعَلْيَك here, because this node may be different on different systems + CPPUNIT_ASSERT_EQUAL(true, sContent.match(u"اُم")); + xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span"; + sContent = getXPathContent(pXmlDoc2, xpath); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", "")); #endif } - CPPUNIT_TEST_SUITE(PDFITest); CPPUNIT_TEST(testXPDFParser); CPPUNIT_TEST(testOdfWriterExport); diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx index 3e21932eb6c9..2ece5307bd53 100644 --- a/sdext/source/pdfimport/tree/writertreevisiting.cxx +++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx @@ -31,12 +31,28 @@ #include <basegfx/polygon/b2dpolypolygontools.hxx> #include <osl/diagnose.h> +#include <com/sun/star/i18n/CharacterClassification.hpp> +#include <com/sun/star/i18n/DirectionProperty.hpp> +#include <comphelper/string.hxx> using namespace ::com::sun::star; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::uno; namespace pdfi { +const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification() +{ + if ( !mxCharClass.is() ) + { + Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW ); + mxCharClass = CharacterClassification::create(xContext); + } + return mxCharClass; +} + void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) { if( elem.Children.empty() ) @@ -72,8 +88,31 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt m_rEmitContext.rStyles.getStyleName( elem.StyleId ); } + OUString str(elem.Text.toString()); + + // Check for RTL + bool isRTL = false; + Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() ); + if( xCC.is() ) + { + for(int i=1; i< elem.Text.getLength(); i++) + { + i18n::DirectionProperty nType = static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i )); + if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT || + nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC || + nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING || + nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE + ) + isRTL = true; + } + } + + if (isRTL) // If so, reverse string + str = ::comphelper::string::reverseString(str); + m_rEmitContext.rEmitter.beginTag( "text:span", aProps ); - m_rEmitContext.rEmitter.write( elem.Text.makeStringAndClear() ); + // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( TextElement& elem...) + m_rEmitContext.rEmitter.write(str); auto this_it = elem.Children.begin(); while( this_it != elem.Children.end() && this_it->get() != &elem ) { @@ -797,13 +836,12 @@ void WriterXmlOptimizer::optimizeTextElements(Element& rParent) } } // concatenate consecutive text elements unless there is a - // font or text color or matrix change, leave a new span in that case + // font or text color change, leave a new span in that case if( pCur->FontId == pNext->FontId && rCurGC.FillColor.Red == rNextGC.FillColor.Red && rCurGC.FillColor.Green == rNextGC.FillColor.Green && rCurGC.FillColor.Blue == rNextGC.FillColor.Blue && - rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha && - rCurGC.Transformation == rNextGC.Transformation + rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha ) { pCur->updateGeometryWith( pNext ); diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx index 1c1507f13349..e473c27372e6 100644 --- a/sdext/source/pdfimport/tree/writertreevisiting.hxx +++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx @@ -24,6 +24,8 @@ #include <pdfihelper.hxx> +#include <com/sun/star/i18n/XCharacterClassification.hpp> + namespace pdfi { struct DrawElement; @@ -80,12 +82,14 @@ namespace pdfi class WriterXmlEmitter : public ElementTreeVisitor { private: + css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass; EmitContext& m_rEmitContext ; static void fillFrameProps( DrawElement& rElem, PropertyMap& rProps, const EmitContext& rEmitContext ); public: + const css::uno::Reference<css::i18n::XCharacterClassification >& GetCharacterClassification(); explicit WriterXmlEmitter(EmitContext& rEmitContext) : m_rEmitContext(rEmitContext) {} |