From ee73747ab58fbbd5039823767693431223c347d3 Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Fri, 7 Apr 2017 18:19:41 +0200 Subject: tdf#107018 PDF export of PDF images: handle references in nested dictionaries Also get rid of the GetKeyOffset() and GetKeyValueLength() calls when copying dictionaries: the reference already knows its offset and length, so no need to call them. This makes the dictionary and the array handling more similar. Change-Id: I65936acfaf857636a8d83da3a4cec69289eb89d8 Reviewed-on: https://gerrit.libreoffice.org/36282 Reviewed-by: Miklos Vajna Tested-by: Jenkins --- vcl/qa/cppunit/pdfexport/data/tdf107018.odt | Bin 0 -> 15787 bytes vcl/qa/cppunit/pdfexport/pdfexport.cxx | 50 ++++++++++++++++++++++++++++ vcl/source/filter/ipdf/pdfdocument.cxx | 27 ++++++--------- vcl/source/gdi/pdfwriter_impl.cxx | 10 +++--- 4 files changed, 65 insertions(+), 22 deletions(-) create mode 100644 vcl/qa/cppunit/pdfexport/data/tdf107018.odt (limited to 'vcl') diff --git a/vcl/qa/cppunit/pdfexport/data/tdf107018.odt b/vcl/qa/cppunit/pdfexport/data/tdf107018.odt new file mode 100644 index 000000000000..3bfc7b2d73cb Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf107018.odt differ diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index 31d0dfb384f2..aacf36b2796b 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -53,6 +53,7 @@ public: void testTdf106972(); void testTdf106972Pdf17(); void testTdf107013(); + void testTdf107018(); #endif CPPUNIT_TEST_SUITE(PdfExportTest); @@ -65,6 +66,7 @@ public: CPPUNIT_TEST(testTdf106972); CPPUNIT_TEST(testTdf106972Pdf17); CPPUNIT_TEST(testTdf107013); + CPPUNIT_TEST(testTdf107018); #endif CPPUNIT_TEST_SUITE_END(); }; @@ -402,6 +404,54 @@ void PdfExportTest::testTdf107013() // This failed, the reference to the image was created, but not the image. CPPUNIT_ASSERT(pXObject); } + +void PdfExportTest::testTdf107018() +{ + vcl::filter::PDFDocument aDocument; + load("tdf107018.odt", aDocument); + + // Get access to the only image on the only page. + std::vector aPages = aDocument.GetPages(); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aPages.size()); + vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources"); + CPPUNIT_ASSERT(pResources); + auto pXObjects = dynamic_cast(pResources->Lookup("XObject")); + CPPUNIT_ASSERT(pXObjects); + CPPUNIT_ASSERT_EQUAL(static_cast(1), pXObjects->GetItems().size()); + vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first); + CPPUNIT_ASSERT(pXObject); + + // Get access to the form object inside the image. + auto pXObjectResources = dynamic_cast(pXObject->Lookup("Resources")); + CPPUNIT_ASSERT(pXObjectResources); + auto pXObjectForms = dynamic_cast(pXObjectResources->LookupElement("XObject")); + CPPUNIT_ASSERT(pXObjectForms); + vcl::filter::PDFObjectElement* pForm = pXObjectForms->LookupObject(pXObjectForms->GetItems().begin()->first); + CPPUNIT_ASSERT(pForm); + + // Get access to Resources -> Font -> F1 of the form. + auto pFormResources = dynamic_cast(pForm->Lookup("Resources")); + CPPUNIT_ASSERT(pFormResources); + auto pFonts = dynamic_cast(pFormResources->LookupElement("Font")); + CPPUNIT_ASSERT(pFonts); + auto pF1Ref = dynamic_cast(pFonts->LookupElement("F1")); + CPPUNIT_ASSERT(pF1Ref); + vcl::filter::PDFObjectElement* pF1 = pF1Ref->LookupObject(); + CPPUNIT_ASSERT(pF1); + + // Check that Foo -> Bar of the font is of type Pages. + auto pFontFoo = dynamic_cast(pF1->Lookup("Foo")); + CPPUNIT_ASSERT(pFontFoo); + auto pBar = dynamic_cast(pFontFoo->LookupElement("Bar")); + CPPUNIT_ASSERT(pBar); + vcl::filter::PDFObjectElement* pObject = pBar->LookupObject(); + CPPUNIT_ASSERT(pObject); + auto pName = dynamic_cast(pObject->Lookup("Type")); + CPPUNIT_ASSERT(pName); + // This was "XObject", reference in a nested dictionary wasn't updated when + // copying the page stream of a PDF image. + CPPUNIT_ASSERT_EQUAL(OString("Pages"), pName->GetValue()); +} #endif CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest); diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx index 43d4248cc8ad..b0bb8be6c93e 100644 --- a/vcl/source/filter/ipdf/pdfdocument.cxx +++ b/vcl/source/filter/ipdf/pdfdocument.cxx @@ -1071,10 +1071,14 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s } else { - rElements.push_back(std::unique_ptr(new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber))); + auto pReference = new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber); + rElements.push_back(std::unique_ptr(pReference)); if (pArray) // Reference is part of a direct (non-dictionary) array, inform the array. pArray->PushBack(rElements.back().get()); + if (bInObject && nDictionaryDepth > 0 && pObject) + // Inform the object about a new in-dictionary reference. + pObject->AddDictionaryReference(pReference); } if (!rElements.back()->Read(rStream)) { @@ -2512,23 +2516,14 @@ PDFNumberElement* PDFObjectElement::GetNumberElement() const return m_pNumberElement; } -std::vector< std::pair > PDFObjectElement::GetDictionaryItemsByOffset() +const std::vector& PDFObjectElement::GetDictionaryReferences() const { - std::vector< std::pair > aRet; - - for (const auto& rItem : m_aDictionary) - aRet.push_back(rItem); - - PDFDictionaryElement* pDictionary = GetDictionary(); - if (!pDictionary) - return aRet; - - std::sort(aRet.begin(), aRet.end(), [pDictionary](const std::pair& a, const std::pair& b) -> bool - { - return pDictionary->GetKeyOffset(a.first) < pDictionary->GetKeyOffset(b.first); - }); + return m_aDictionaryReferences; +} - return aRet; +void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference) +{ + m_aDictionaryReferences.push_back(pReference); } const std::map& PDFObjectElement::GetDictionaryItems() diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index 8445377a1a5a..d5c1f6e8e7ee 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -10899,17 +10899,15 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter OStringBuffer aLine; aLine.append(nObject); aLine.append(" 0 obj\n"); - if (filter::PDFDictionaryElement* pDictionary = rObject.GetDictionary()) + if (rObject.GetDictionary()) { aLine.append("<<"); // Complex case: can't copy the dictionary byte array as is, as it may contain references. bool bDone = false; - std::vector< std::pair > aItems = rObject.GetDictionaryItemsByOffset(); sal_uInt64 nCopyStart = 0; - for (const auto& rItem : aItems) + for (auto pReference : rObject.GetDictionaryReferences()) { - auto pReference = dynamic_cast(rItem.second); if (pReference) { filter::PDFObjectElement* pReferenced = pReference->LookupObject(); @@ -10918,8 +10916,8 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter // Copy the referenced object. sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources); - sal_uInt64 nReferenceStart = pDictionary->GetKeyOffset(rItem.first) + rItem.first.getLength(); - sal_uInt64 nReferenceEnd = pDictionary->GetKeyOffset(rItem.first) + pDictionary->GetKeyValueLength(rItem.first); + sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation(); + sal_uInt64 nReferenceEnd = pReference->GetOffset(); sal_uInt64 nOffset = 0; if (nCopyStart == 0) // Dict start -> reference start. -- cgit