summaryrefslogtreecommitdiff
path: root/sdext
diff options
context:
space:
mode:
authorKevin Suo <suokunlong@126.com>2022-10-19 19:08:27 +0800
committerNoel Grandin <noel.grandin@collabora.co.uk>2022-11-08 08:40:29 +0100
commitc2e2997f452b93b400d541c2d0b2ee396a889007 (patch)
tree219e1d6204ae03441b4a0abfd717fb016ed49744 /sdext
parent34c7d169a248578d953661217144f2dda9831cb4 (diff)
sdext.pdfimport - Wirter: add handling for continuous space characters
This was done for Draw in sdext/source/pdfimport/tree/drawtreevisiting.cxx, but was not done for Writer. Without this, continuous spaces in PDF will show only one space on pdfimport using the Writer pdfimport filter. Change-Id: I2279d9b1750e07f5743aeba80a3fd553bc037d13 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141527 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Diffstat (limited to 'sdext')
-rw-r--r--sdext/source/pdfimport/test/testdocs/testSpace.pdfbin0 -> 8140 bytes
-rw-r--r--sdext/source/pdfimport/test/tests.cxx38
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.cxx28
3 files changed, 63 insertions, 3 deletions
diff --git a/sdext/source/pdfimport/test/testdocs/testSpace.pdf b/sdext/source/pdfimport/test/testdocs/testSpace.pdf
new file mode 100644
index 000000000000..3c94f31ea15b
--- /dev/null
+++ b/sdext/source/pdfimport/test/testdocs/testSpace.pdf
Binary files differ
diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx
index 71661ae9e6d5..786815941445 100644
--- a/sdext/source/pdfimport/test/tests.cxx
+++ b/sdext/source/pdfimport/test/tests.cxx
@@ -841,6 +841,43 @@ namespace
#endif
}
+ void testSpaces()
+ {
+#if HAVE_FEATURE_POPPLER
+ rtl::Reference<pdfi::PDFIRawAdaptor> xAdaptor(new pdfi::PDFIRawAdaptor(OUString(), getComponentContext()));
+ xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory());
+
+ OString aOutput;
+ xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/testSpace.pdf"),
+ new OutputWrapString(aOutput),
+ nullptr);
+ xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput.getStr())));
+
+ // Space test: there are 10 spaces, each space is expressed as a <text:s text:c="1" ...>,
+ // thus the 10th text:s should exist and the attribute "text:c" should be "1".
+ OString xpath = "//draw:frame[@draw:z-index='1'][1]/draw:text-box/text:p/text:span/text:s[10]";
+ OUString sContent = getXPath(pXmlDoc, xpath, "c");
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("1"), sContent);
+
+ // Tab test: there are 10 tabs. Text before and after the tabs are shown in different draw frames.
+ // With the Liberation Serif font, the horizontal position of the first frame is 20.03mm and the
+ // second frame is 94.12mm.
+ xpath = "//draw:frame[@draw:z-index='2'][1]";
+ sContent = getXPath(pXmlDoc, xpath, "transform");
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("translate( 20.03mm 25.05mm )"), sContent);
+ xpath = "//draw:frame[@draw:z-index='3'][1]";
+ sContent = getXPath(pXmlDoc, xpath, "transform");
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("translate( 94.12mm 25.05mm )"), sContent);
+
+ // Non-breaking space test: there are 10 NBSpaces, which are treated as the same as normal space in PDF,
+ // thus each is expressed as a <text:s text:c="1" ...>.
+ // The 10th text:s should exist and the attribute "text:c" should be "1".
+ xpath = "//draw:frame[@draw:z-index='4'][1]/draw:text-box/text:p/text:span/text:s[10]";
+ sContent = getXPath(pXmlDoc, xpath, "c");
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("1"), sContent);
+#endif
+ }
+
CPPUNIT_TEST_SUITE(PDFITest);
CPPUNIT_TEST(testXPDFParser);
CPPUNIT_TEST(testOdfWriterExport);
@@ -853,6 +890,7 @@ namespace
CPPUNIT_TEST(testTdf78427_FontWeight_MyraidProSemibold);
CPPUNIT_TEST(testTdf143959_nameFromFontFile);
CPPUNIT_TEST(testTdf104597_textrun);
+ CPPUNIT_TEST(testSpaces);
CPPUNIT_TEST_SUITE_END();
};
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx
index deabf365088b..510689be1588 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.cxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -81,7 +81,11 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt
if( elem.Text.isEmpty() )
return;
- PropertyMap aProps;
+ PropertyMap aProps = {};
+ const sal_Unicode strSpace = 0x0020;
+ const sal_Unicode strNbSpace = 0x00A0;
+ const sal_Unicode tabSpace = 0x0009;
+
if( elem.StyleId != -1 )
{
aProps[ OUString( "text:style-name" ) ] =
@@ -111,8 +115,26 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt
str = ::comphelper::string::reverseString(str);
m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
- // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( TextElement& elem...)
- m_rEmitContext.rEmitter.write(str);
+
+ sal_Unicode strToken;
+ for (int i = 0; i < elem.Text.getLength(); i++)
+ {
+ strToken = str[i];
+ if (strToken == strSpace || strToken == strNbSpace)
+ {
+ aProps["text:c"] = "1";
+ m_rEmitContext.rEmitter.beginTag("text:s", aProps);
+ m_rEmitContext.rEmitter.endTag("text:s");
+ }
+ else if (strToken == tabSpace)
+ {
+ m_rEmitContext.rEmitter.beginTag("text:tab", aProps);
+ m_rEmitContext.rEmitter.endTag("text:tab");
+ }
+ else
+ m_rEmitContext.rEmitter.write(OUString(strToken));
+ }
+
auto this_it = elem.Children.begin();
while( this_it != elem.Children.end() && this_it->get() != &elem )
{