summaryrefslogtreecommitdiff
path: root/sdext
diff options
context:
space:
mode:
Diffstat (limited to 'sdext')
-rw-r--r--sdext/source/pdfimport/test/tests.cxx30
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.cxx46
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.hxx4
3 files changed, 70 insertions, 10 deletions
diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx
index 25c12a23901c..7cff15a36d0f 100644
--- a/sdext/source/pdfimport/test/tests.cxx
+++ b/sdext/source/pdfimport/test/tests.cxx
@@ -799,36 +799,54 @@ namespace
new OutputWrapString(aOutput),
nullptr));
- // std::cout << aOutput << std::endl;
xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput.getStr())));
// Test for امُ عَلَيْكَ
// TODO: How to get the "عَلَيْكَ" in xpath, as shown after the <text:s> tag?
OString xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 14821.9583333333 2159.23861112778)']/draw:text-box/text:p/text:span";
OUString sContent = getXPathContent(pXmlDoc, xpath); // u"\nا\nُ\nم\n"
- CPPUNIT_ASSERT_EQUAL(OUString(u"اُم"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"اُم"), sContent.replaceAll("\n", ""));
// Test for ٱلَّسَل‬ . It appears in the 3rd frame, i.e. after the امُ عَلَيْكَ which is in the 2nd frame (from left to right)
// thus these two frames together appear as ٱلَّسَل امُ عَلَيْكَ in Draw‬.
xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 17420.1666666667 2159.23861112778)']/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
- CPPUNIT_ASSERT_EQUAL(OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
// Test for "LibreOffice LTR"
// TODO: How to get the "LTR" as shown after the <text:s> tag?
xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 12779.375 5121.79583335)']/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
- CPPUNIT_ASSERT_EQUAL(OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
/* Test for Chinese characters */
// Use last() instead of matrix below, because the matrix may be different on different OS due to fallback of Chinese fonts.
xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
- CPPUNIT_ASSERT_EQUAL(OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
+
+ // Test pdf text run in the Writer PDF import filter
+ xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory());
+ OString aOutput2;
+ xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/tdf104597_textrun.pdf"),
+ new OutputWrapString(aOutput2),
+ nullptr);
+ // FIXME: the same draw:frame is duplicated in the xml output,
+ // e.g. there are two draw:frame with draw:z-index="3" with the same content.
+ xmlDocUniquePtr pXmlDoc2(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput2.getStr())));
+ xpath = "//draw:frame[@draw:z-index='3'][1]/draw:text-box/text:p/text:span";
+ sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"ٱلَّسَل"), sContent);
+ xpath = "//draw:frame[@draw:z-index='2'][1]/draw:text-box/text:p/text:span";
+ sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
+ // need to use اُم rather than اُم َعَلْيَك here, because this node may be different on different systems
+ CPPUNIT_ASSERT_EQUAL(true, sContent.match(u"اُم"));
+ xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
+ sContent = getXPathContent(pXmlDoc2, xpath);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
#endif
}
-
CPPUNIT_TEST_SUITE(PDFITest);
CPPUNIT_TEST(testXPDFParser);
CPPUNIT_TEST(testOdfWriterExport);
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx
index 3e21932eb6c9..2ece5307bd53 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.cxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -31,12 +31,28 @@
#include <basegfx/polygon/b2dpolypolygontools.hxx>
#include <osl/diagnose.h>
+#include <com/sun/star/i18n/CharacterClassification.hpp>
+#include <com/sun/star/i18n/DirectionProperty.hpp>
+#include <comphelper/string.hxx>
using namespace ::com::sun::star;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::uno;
namespace pdfi
{
+const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification()
+{
+ if ( !mxCharClass.is() )
+ {
+ Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW );
+ mxCharClass = CharacterClassification::create(xContext);
+ }
+ return mxCharClass;
+}
+
void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
{
if( elem.Children.empty() )
@@ -72,8 +88,31 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt
m_rEmitContext.rStyles.getStyleName( elem.StyleId );
}
+ OUString str(elem.Text.toString());
+
+ // Check for RTL
+ bool isRTL = false;
+ Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() );
+ if( xCC.is() )
+ {
+ for(int i=1; i< elem.Text.getLength(); i++)
+ {
+ i18n::DirectionProperty nType = static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i ));
+ if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE
+ )
+ isRTL = true;
+ }
+ }
+
+ if (isRTL) // If so, reverse string
+ str = ::comphelper::string::reverseString(str);
+
m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
- m_rEmitContext.rEmitter.write( elem.Text.makeStringAndClear() );
+ // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( TextElement& elem...)
+ m_rEmitContext.rEmitter.write(str);
auto this_it = elem.Children.begin();
while( this_it != elem.Children.end() && this_it->get() != &elem )
{
@@ -797,13 +836,12 @@ void WriterXmlOptimizer::optimizeTextElements(Element& rParent)
}
}
// concatenate consecutive text elements unless there is a
- // font or text color or matrix change, leave a new span in that case
+ // font or text color change, leave a new span in that case
if( pCur->FontId == pNext->FontId &&
rCurGC.FillColor.Red == rNextGC.FillColor.Red &&
rCurGC.FillColor.Green == rNextGC.FillColor.Green &&
rCurGC.FillColor.Blue == rNextGC.FillColor.Blue &&
- rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha &&
- rCurGC.Transformation == rNextGC.Transformation
+ rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha
)
{
pCur->updateGeometryWith( pNext );
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx
index 1c1507f13349..e473c27372e6 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.hxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx
@@ -24,6 +24,8 @@
#include <pdfihelper.hxx>
+#include <com/sun/star/i18n/XCharacterClassification.hpp>
+
namespace pdfi
{
struct DrawElement;
@@ -80,12 +82,14 @@ namespace pdfi
class WriterXmlEmitter : public ElementTreeVisitor
{
private:
+ css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass;
EmitContext& m_rEmitContext ;
static void fillFrameProps( DrawElement& rElem,
PropertyMap& rProps,
const EmitContext& rEmitContext );
public:
+ const css::uno::Reference<css::i18n::XCharacterClassification >& GetCharacterClassification();
explicit WriterXmlEmitter(EmitContext& rEmitContext) :
m_rEmitContext(rEmitContext)
{}