From a1759769804a7f3b9895b481229d497a9eb4c70a Mon Sep 17 00:00:00 2001 From: Kevin Suo Date: Sat, 21 Aug 2021 17:37:52 +0800 Subject: tdf#143959 sdext.pdfimport: fix font name with subtag as returned by the font descriptor when reading the font file. Change-Id: I376b887e6356e765f669b41c43776f78f94c3623 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/120815 Tested-by: Jenkins Reviewed-by: Noel Grandin --- .../pdfimport/test/testdocs/testTdf143959.pdf | Bin 0 -> 63999 bytes sdext/source/pdfimport/test/tests.cxx | 34 +++++++++++++++++++++ sdext/source/pdfimport/wrapper/wrapper.cxx | 9 ++++++ 3 files changed, 43 insertions(+) create mode 100644 sdext/source/pdfimport/test/testdocs/testTdf143959.pdf diff --git a/sdext/source/pdfimport/test/testdocs/testTdf143959.pdf b/sdext/source/pdfimport/test/testdocs/testTdf143959.pdf new file mode 100644 index 000000000000..594e734a5629 Binary files /dev/null and b/sdext/source/pdfimport/test/testdocs/testTdf143959.pdf differ diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx index cb8416d71b21..f47fa459a03c 100644 --- a/sdext/source/pdfimport/test/tests.cxx +++ b/sdext/source/pdfimport/test/tests.cxx @@ -710,6 +710,39 @@ namespace assertXPath(pXmlDoc, xpath, "text-outline", "true"); } + void testTdf143959_nameFromFontFile() + { + rtl::Reference xAdaptor(new pdfi::PDFIRawAdaptor(OUString(), getComponentContext())); + xAdaptor->setTreeVisitorFactory(createDrawTreeVisitorFactory()); + + OString aOutput; + CPPUNIT_ASSERT_MESSAGE("Converting PDF to ODF XML", + xAdaptor->odfConvert( m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/testTdf143959.pdf"), + new OutputWrapString(aOutput), + nullptr )); + + //std::cout << aOutput << std::endl; + xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast(aOutput.getStr()))); + + /* Test for the 1st text paragraph */ + OUString styleName = getXPath(pXmlDoc, "//draw:frame[2]//text:span[1]", "style-name"); + OString xpath = "//office:automatic-styles/style:style[@style:name=\"" + + OUStringToOString(styleName, RTL_TEXTENCODING_UTF8) + + "\"]/style:text-properties"; + CPPUNIT_ASSERT_EQUAL(OUString("TimesNewRoman"), + getXPath(pXmlDoc, xpath, "font-family").replaceAll(u" ", u"")); + + /* Test for the "TOTAL ESTA HOJA USD" paragraph" */ + styleName = getXPath(pXmlDoc, "//draw:frame[last()-1]//text:span[1]", "style-name"); + xpath = "//office:automatic-styles/style:style[@style:name=\"" + + OUStringToOString(styleName, RTL_TEXTENCODING_UTF8) + + "\"]/style:text-properties"; + CPPUNIT_ASSERT_EQUAL(OUString("TimesNewRoman"), + getXPath(pXmlDoc, xpath, "font-family").replaceAll(u" ", u"")); + CPPUNIT_ASSERT_EQUAL(OUString("bold"), + getXPath(pXmlDoc, xpath, "font-weight")); + } + CPPUNIT_TEST_SUITE(PDFITest); CPPUNIT_TEST(testXPDFParser); CPPUNIT_TEST(testOdfWriterExport); @@ -719,6 +752,7 @@ namespace CPPUNIT_TEST(testTdf105536); CPPUNIT_TEST(testTdf141709); CPPUNIT_TEST(testFontFeatures); + CPPUNIT_TEST(testTdf143959_nameFromFontFile); CPPUNIT_TEST_SUITE_END(); }; diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx b/sdext/source/pdfimport/wrapper/wrapper.cxx index 7cf30241c5ee..2efdab6f8553 100644 --- a/sdext/source/pdfimport/wrapper/wrapper.cxx +++ b/sdext/source/pdfimport/wrapper/wrapper.cxx @@ -584,6 +584,15 @@ void LineParser::readFont() if (!aFontDescriptor.Name.isEmpty()) { aResult.familyName = aFontDescriptor.Name; + // tdf#143959: there are cases when the family name returned by font descriptor + // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name + // determined by parseFontFamilyName instead, but still determine the font + // attributes (bold italic etc) from the font descriptor. + if (aResult.familyName.getLength() > 7 and aResult.familyName.indexOf(u"+", 6) == 6) + { + aResult.familyName = aResult.familyName.copy(7, aResult.familyName.getLength() - 7); + parseFontFamilyName(aResult); + } aResult.isBold = (aFontDescriptor.Weight > 100.0); aResult.isItalic = (aFontDescriptor.Slant == awt::FontSlant_OBLIQUE || aFontDescriptor.Slant == awt::FontSlant_ITALIC); -- cgit