diff options
author | Tomaž Vajngerl <tomaz.vajngerl@collabora.co.uk> | 2021-03-02 18:57:46 +0900 |
---|---|---|
committer | Tomaž Vajngerl <quikee@gmail.com> | 2021-03-13 03:49:41 +0100 |
commit | 90267d3dee8c0afe937c9aecb8561f6f158977f9 (patch) | |
tree | c3b23f6f2527e250f930f44fa53afae214573a6b | |
parent | 2225d8530b4a7fa29e551f9354d679da78be6435 (diff) |
tdf#140606 make PDF parsing more lenient and prevent a crash
If the external document can't be opened, it tried to continue
with the export anyway, which eventually lead to a crash. This
is fixed by handling this situation and prevent a crash, however
the part of the document in this case isn't exported.
The document couldn't be opened because of a parsing error - there
was a unexpected null character instead of a whitespace, which
made the parser panic. Fix this by making the parser more lenient
in such a situation when there is an unexpected null and try to
continue parsing.
Bug document seems to be created with a buggy PDF writer, but other
PDF readers don't complain when parsing the document so it looks to
be a valid. qpdf --check doesn't complain either.
Added a test that checks a document with a null parses.
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111820
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
(cherry picked from commit 2c1ed5a5dad827cde032f27a4348e81be15889bc)
Change-Id: I61eb281e821ccd195ef006d778556e25d1c7f5e3
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/112418
Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com>
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r-- | vcl/inc/pdf/ExternalPDFStreams.hxx | 12 | ||||
-rw-r--r-- | vcl/qa/cppunit/PDFDocumentTest.cxx | 12 | ||||
-rw-r--r-- | vcl/qa/cppunit/data/DocumentWithNull.pdf | bin | 0 -> 1080 bytes | |||
-rw-r--r-- | vcl/source/filter/ipdf/pdfdocument.cxx | 12 | ||||
-rw-r--r-- | vcl/source/gdi/pdfwriter_impl.cxx | 12 |
5 files changed, 38 insertions, 10 deletions
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx index 3bd59478c212..de0819539e76 100644 --- a/vcl/inc/pdf/ExternalPDFStreams.hxx +++ b/vcl/inc/pdf/ExternalPDFStreams.hxx @@ -29,21 +29,25 @@ struct VCL_DLLPUBLIC ExternalPDFStream std::map<sal_Int32, sal_Int32>& getCopiedResources() { return maCopiedResources; } - filter::PDFDocument& getPDFDocument() + std::shared_ptr<filter::PDFDocument>& getPDFDocument() { if (!mpPDFDocument) { SvMemoryStream aPDFStream; aPDFStream.WriteBytes(maData.data(), maData.size()); aPDFStream.Seek(0); - mpPDFDocument = std::make_unique<filter::PDFDocument>(); - if (!mpPDFDocument->Read(aPDFStream)) + auto pPDFDocument = std::make_shared<filter::PDFDocument>(); + if (!pPDFDocument->Read(aPDFStream)) { SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: reading the PDF document failed"); } + else + { + mpPDFDocument = pPDFDocument; + } } - return *mpPDFDocument; + return mpPDFDocument; } }; diff --git a/vcl/qa/cppunit/PDFDocumentTest.cxx b/vcl/qa/cppunit/PDFDocumentTest.cxx index 66de7dfc77d4..9b0374f00341 100644 --- a/vcl/qa/cppunit/PDFDocumentTest.cxx +++ b/vcl/qa/cppunit/PDFDocumentTest.cxx @@ -156,6 +156,18 @@ CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseBasicPDF) } } +CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseDocumentWithNullAsWhitespace) +{ + // tdf#140606 + // Bug document contained a null, which cause the parser to panic, + // but other PDF readers can handle the file well. + + OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "DocumentWithNull.pdf"; + vcl::filter::PDFDocument aDocument; + SvFileStream aStream(aURL, StreamMode::READ); + CPPUNIT_ASSERT(aDocument.Read(aStream)); +} + namespace { vcl::filter::PDFObjectElement* diff --git a/vcl/qa/cppunit/data/DocumentWithNull.pdf b/vcl/qa/cppunit/data/DocumentWithNull.pdf Binary files differnew file mode 100644 index 000000000000..f6d926957366 --- /dev/null +++ b/vcl/qa/cppunit/data/DocumentWithNull.pdf diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx index dd1355d924a8..64cf9dc4ef90 100644 --- a/vcl/source/filter/ipdf/pdfdocument.cxx +++ b/vcl/source/filter/ipdf/pdfdocument.cxx @@ -1327,12 +1327,18 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, } else { - if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch))) + auto uChar = static_cast<unsigned char>(ch); + // Be more lenient and allow unexpected null char + if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0) { - SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: " - << ch << " at byte position " << rStream.Tell()); + SAL_WARN("vcl.filter", + "PDFDocument::Tokenize: unexpected character with code " + << sal_Int32(ch) << " at byte position " << rStream.Tell()); return false; } + SAL_WARN_IF(uChar == 0, "vcl.filter", + "PDFDocument::Tokenize: unexpected null character at " + << rStream.Tell() << " - ignoring"); } break; } diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index 2aa973e567c5..cfc8ffeffedc 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -8686,10 +8686,16 @@ void PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit) // object. if (rEmit.m_nExternalPDFDataIndex < 0) return; - auto & rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex); - auto & rPDFDocument = rExternalPDFStream.getPDFDocument(); + auto& rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex); + auto& pPDFDocument = rExternalPDFStream.getPDFDocument(); + if (!pPDFDocument) + { + // Couldn't parse the document and can't continue + SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: failed to parse the document"); + return; + } - std::vector<filter::PDFObjectElement*> aPages = rPDFDocument.GetPages(); + std::vector<filter::PDFObjectElement*> aPages = pPDFDocument->GetPages(); if (aPages.empty()) { SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: no pages"); |