summaryrefslogtreecommitdiff
path: root/sdext/source/pdfimport
diff options
context:
space:
mode:
authorKevin Suo <suokunlong@126.com>2022-10-15 19:43:54 +0800
committerThorsten Behrens <thorsten.behrens@allotropia.de>2022-10-19 21:34:13 +0200
commitf6004e1c457ddab5e0c91e6159875d25130b108a (patch)
treeb0da4a24c1bc8edd5b14c447d76eaf39b1b3a845 /sdext/source/pdfimport
parentef9d461e420ca1869f88fa0d7ea749581819b360 (diff)
tdf#151546: RTL text is reversed (Writer pdfimport)
This is a followup to commit 69e9925ded584113e52f84ef0ed7c224079fa061 for the fix of tdf#104597. The Writer pdf import filter code is similar than the Draw part. However, many fixes to the Draw part was not done in the Writer part historically. This patch ports the fix of text run in the Draw part to the Writer part. There is a todo related to continuous spaces issue which should be fixed separately. Also use CPPUNIT_ASSERT_EQUAL_MESSAGE for the output of xml content instread of using std::cout, in case of unit test failure. Change-Id: Id013700524750e6e5283d85eeab72d8075f16f1b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141420 Tested-by: Thorsten Behrens <thorsten.behrens@allotropia.de> Reviewed-by: Thorsten Behrens <thorsten.behrens@allotropia.de>
Diffstat (limited to 'sdext/source/pdfimport')
-rw-r--r--sdext/source/pdfimport/test/tests.cxx30
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.cxx46
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.hxx4
3 files changed, 70 insertions, 10 deletions
diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx
index 25c12a23901c..7cff15a36d0f 100644
--- a/sdext/source/pdfimport/test/tests.cxx
+++ b/sdext/source/pdfimport/test/tests.cxx
@@ -799,36 +799,54 @@ namespace
new OutputWrapString(aOutput),
nullptr));
- // std::cout << aOutput << std::endl;
xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput.getStr())));
// Test for امُ عَلَيْكَ
// TODO: How to get the "عَلَيْكَ" in xpath, as shown after the <text:s> tag?
OString xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 14821.9583333333 2159.23861112778)']/draw:text-box/text:p/text:span";
OUString sContent = getXPathContent(pXmlDoc, xpath); // u"\nا\nُ\nم\n"
- CPPUNIT_ASSERT_EQUAL(OUString(u"اُم"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"اُم"), sContent.replaceAll("\n", ""));
// Test for ٱلَّسَل‬ . It appears in the 3rd frame, i.e. after the امُ عَلَيْكَ which is in the 2nd frame (from left to right)
// thus these two frames together appear as ٱلَّسَل امُ عَلَيْكَ in Draw‬.
xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 17420.1666666667 2159.23861112778)']/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
- CPPUNIT_ASSERT_EQUAL(OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
// Test for "LibreOffice LTR"
// TODO: How to get the "LTR" as shown after the <text:s> tag?
xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 12779.375 5121.79583335)']/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
- CPPUNIT_ASSERT_EQUAL(OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
/* Test for Chinese characters */
// Use last() instead of matrix below, because the matrix may be different on different OS due to fallback of Chinese fonts.
xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
- CPPUNIT_ASSERT_EQUAL(OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
+
+ // Test pdf text run in the Writer PDF import filter
+ xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory());
+ OString aOutput2;
+ xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/tdf104597_textrun.pdf"),
+ new OutputWrapString(aOutput2),
+ nullptr);
+ // FIXME: the same draw:frame is duplicated in the xml output,
+ // e.g. there are two draw:frame with draw:z-index="3" with the same content.
+ xmlDocUniquePtr pXmlDoc2(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput2.getStr())));
+ xpath = "//draw:frame[@draw:z-index='3'][1]/draw:text-box/text:p/text:span";
+ sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"ٱلَّسَل"), sContent);
+ xpath = "//draw:frame[@draw:z-index='2'][1]/draw:text-box/text:p/text:span";
+ sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
+ // need to use اُم rather than اُم َعَلْيَك here, because this node may be different on different systems
+ CPPUNIT_ASSERT_EQUAL(true, sContent.match(u"اُم"));
+ xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
+ sContent = getXPathContent(pXmlDoc2, xpath);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
#endif
}
-
CPPUNIT_TEST_SUITE(PDFITest);
CPPUNIT_TEST(testXPDFParser);
CPPUNIT_TEST(testOdfWriterExport);
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx
index 3e21932eb6c9..2ece5307bd53 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.cxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -31,12 +31,28 @@
#include <basegfx/polygon/b2dpolypolygontools.hxx>
#include <osl/diagnose.h>
+#include <com/sun/star/i18n/CharacterClassification.hpp>
+#include <com/sun/star/i18n/DirectionProperty.hpp>
+#include <comphelper/string.hxx>
using namespace ::com::sun::star;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::uno;
namespace pdfi
{
+const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification()
+{
+ if ( !mxCharClass.is() )
+ {
+ Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW );
+ mxCharClass = CharacterClassification::create(xContext);
+ }
+ return mxCharClass;
+}
+
void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
{
if( elem.Children.empty() )
@@ -72,8 +88,31 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt
m_rEmitContext.rStyles.getStyleName( elem.StyleId );
}
+ OUString str(elem.Text.toString());
+
+ // Check for RTL
+ bool isRTL = false;
+ Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() );
+ if( xCC.is() )
+ {
+ for(int i=1; i< elem.Text.getLength(); i++)
+ {
+ i18n::DirectionProperty nType = static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i ));
+ if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE
+ )
+ isRTL = true;
+ }
+ }
+
+ if (isRTL) // If so, reverse string
+ str = ::comphelper::string::reverseString(str);
+
m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
- m_rEmitContext.rEmitter.write( elem.Text.makeStringAndClear() );
+ // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( TextElement& elem...)
+ m_rEmitContext.rEmitter.write(str);
auto this_it = elem.Children.begin();
while( this_it != elem.Children.end() && this_it->get() != &elem )
{
@@ -797,13 +836,12 @@ void WriterXmlOptimizer::optimizeTextElements(Element& rParent)
}
}
// concatenate consecutive text elements unless there is a
- // font or text color or matrix change, leave a new span in that case
+ // font or text color change, leave a new span in that case
if( pCur->FontId == pNext->FontId &&
rCurGC.FillColor.Red == rNextGC.FillColor.Red &&
rCurGC.FillColor.Green == rNextGC.FillColor.Green &&
rCurGC.FillColor.Blue == rNextGC.FillColor.Blue &&
- rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha &&
- rCurGC.Transformation == rNextGC.Transformation
+ rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha
)
{
pCur->updateGeometryWith( pNext );
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx
index 1c1507f13349..e473c27372e6 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.hxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx
@@ -24,6 +24,8 @@
#include <pdfihelper.hxx>
+#include <com/sun/star/i18n/XCharacterClassification.hpp>
+
namespace pdfi
{
struct DrawElement;
@@ -80,12 +82,14 @@ namespace pdfi
class WriterXmlEmitter : public ElementTreeVisitor
{
private:
+ css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass;
EmitContext& m_rEmitContext ;
static void fillFrameProps( DrawElement& rElem,
PropertyMap& rProps,
const EmitContext& rEmitContext );
public:
+ const css::uno::Reference<css::i18n::XCharacterClassification >& GetCharacterClassification();
explicit WriterXmlEmitter(EmitContext& rEmitContext) :
m_rEmitContext(rEmitContext)
{}