summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2021-03-02 18:57:46 +0900
committerTomaž Vajngerl <quikee@gmail.com>2021-03-13 03:49:41 +0100
commit90267d3dee8c0afe937c9aecb8561f6f158977f9 (patch)
treec3b23f6f2527e250f930f44fa53afae214573a6b
parent2225d8530b4a7fa29e551f9354d679da78be6435 (diff)
tdf#140606 make PDF parsing more lenient and prevent a crash
If the external document can't be opened, it tried to continue with the export anyway, which eventually lead to a crash. This is fixed by handling this situation and prevent a crash, however the part of the document in this case isn't exported. The document couldn't be opened because of a parsing error - there was a unexpected null character instead of a whitespace, which made the parser panic. Fix this by making the parser more lenient in such a situation when there is an unexpected null and try to continue parsing. Bug document seems to be created with a buggy PDF writer, but other PDF readers don't complain when parsing the document so it looks to be a valid. qpdf --check doesn't complain either. Added a test that checks a document with a null parses. Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111820 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com> (cherry picked from commit 2c1ed5a5dad827cde032f27a4348e81be15889bc) Change-Id: I61eb281e821ccd195ef006d778556e25d1c7f5e3 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/112418 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r--vcl/inc/pdf/ExternalPDFStreams.hxx12
-rw-r--r--vcl/qa/cppunit/PDFDocumentTest.cxx12
-rw-r--r--vcl/qa/cppunit/data/DocumentWithNull.pdfbin0 -> 1080 bytes
-rw-r--r--vcl/source/filter/ipdf/pdfdocument.cxx12
-rw-r--r--vcl/source/gdi/pdfwriter_impl.cxx12
5 files changed, 38 insertions, 10 deletions
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx
index 3bd59478c212..de0819539e76 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -29,21 +29,25 @@ struct VCL_DLLPUBLIC ExternalPDFStream
std::map<sal_Int32, sal_Int32>& getCopiedResources() { return maCopiedResources; }
- filter::PDFDocument& getPDFDocument()
+ std::shared_ptr<filter::PDFDocument>& getPDFDocument()
{
if (!mpPDFDocument)
{
SvMemoryStream aPDFStream;
aPDFStream.WriteBytes(maData.data(), maData.size());
aPDFStream.Seek(0);
- mpPDFDocument = std::make_unique<filter::PDFDocument>();
- if (!mpPDFDocument->Read(aPDFStream))
+ auto pPDFDocument = std::make_shared<filter::PDFDocument>();
+ if (!pPDFDocument->Read(aPDFStream))
{
SAL_WARN("vcl.pdfwriter",
"PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
}
+ else
+ {
+ mpPDFDocument = pPDFDocument;
+ }
}
- return *mpPDFDocument;
+ return mpPDFDocument;
}
};
diff --git a/vcl/qa/cppunit/PDFDocumentTest.cxx b/vcl/qa/cppunit/PDFDocumentTest.cxx
index 66de7dfc77d4..9b0374f00341 100644
--- a/vcl/qa/cppunit/PDFDocumentTest.cxx
+++ b/vcl/qa/cppunit/PDFDocumentTest.cxx
@@ -156,6 +156,18 @@ CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseBasicPDF)
}
}
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseDocumentWithNullAsWhitespace)
+{
+ // tdf#140606
+ // Bug document contained a null, which cause the parser to panic,
+ // but other PDF readers can handle the file well.
+
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "DocumentWithNull.pdf";
+ vcl::filter::PDFDocument aDocument;
+ SvFileStream aStream(aURL, StreamMode::READ);
+ CPPUNIT_ASSERT(aDocument.Read(aStream));
+}
+
namespace
{
vcl::filter::PDFObjectElement*
diff --git a/vcl/qa/cppunit/data/DocumentWithNull.pdf b/vcl/qa/cppunit/data/DocumentWithNull.pdf
new file mode 100644
index 000000000000..f6d926957366
--- /dev/null
+++ b/vcl/qa/cppunit/data/DocumentWithNull.pdf
Binary files differ
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index dd1355d924a8..64cf9dc4ef90 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -1327,12 +1327,18 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
}
else
{
- if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
+ auto uChar = static_cast<unsigned char>(ch);
+ // Be more lenient and allow unexpected null char
+ if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
{
- SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
- << ch << " at byte position " << rStream.Tell());
+ SAL_WARN("vcl.filter",
+ "PDFDocument::Tokenize: unexpected character with code "
+ << sal_Int32(ch) << " at byte position " << rStream.Tell());
return false;
}
+ SAL_WARN_IF(uChar == 0, "vcl.filter",
+ "PDFDocument::Tokenize: unexpected null character at "
+ << rStream.Tell() << " - ignoring");
}
break;
}
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 2aa973e567c5..cfc8ffeffedc 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -8686,10 +8686,16 @@ void PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
// object.
if (rEmit.m_nExternalPDFDataIndex < 0)
return;
- auto & rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
- auto & rPDFDocument = rExternalPDFStream.getPDFDocument();
+ auto& rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
+ auto& pPDFDocument = rExternalPDFStream.getPDFDocument();
+ if (!pPDFDocument)
+ {
+ // Couldn't parse the document and can't continue
+ SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: failed to parse the document");
+ return;
+ }
- std::vector<filter::PDFObjectElement*> aPages = rPDFDocument.GetPages();
+ std::vector<filter::PDFObjectElement*> aPages = pPDFDocument->GetPages();
if (aPages.empty())
{
SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: no pages");