xmlsecurity PDF signing: only write incremental xref in an incremental update

We used to just dump all the object offsets in the xref of the incremental update, but Adobe Acrobat doesn't like that, and considers that a second signature invalidates the first. If we properly only mention new and changed objects in the xref, then this doesn't happen. This requires actually parsing incremental updates, the previous code depended on LO writing not-really-incremental xrefs at the end of incremental updates. Change-Id: Icdd73fe0a3eab16f8c5a62f1355edbb49f6e73de Reviewed-on: https://gerrit.libreoffice.org/30288 Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk> Tested-by: Jenkins <ci@libreoffice.org>
author: Miklos Vajna <vmiklos@collabora.co.uk> 2016-10-26 11:59:39 +0200
committer: Miklos Vajna <vmiklos@collabora.co.uk> 2016-10-26 12:20:26 +0000
commit: a324099538916eae7f7239d32fd98ec8018cbb72 (patch)
tree: f1576aeb5c2275aebf78833d4bf0a600e00432e1
parent: da5d20562479174504c3795aa9e7ca0856883e81 (diff)
2 files changed, 147 insertions, 87 deletions
diff --git a/xmlsecurity/inc/pdfio/pdfdocument.hxx b/xmlsecurity/inc/pdfio/pdfdocument.hxx
index 3886eab8e73a..59c7f5dd6079 100644
--- a/xmlsecurity/inc/pdfio/pdfdocument.hxx
+++ b/xmlsecurity/inc/pdfio/pdfdocument.hxx
@@ -11,6 +11,7 @@
 #ifndef INCLUDED_XMLSECURITY_INC_PDFIO_PDFDOCUMENT_HXX
 #define INCLUDED_XMLSECURITY_INC_PDFIO_PDFDOCUMENT_HXX
 
+#include <map>
 #include <vector>
 
 #include <com/sun/star/security/XCertificate.hpp>
@@ -48,8 +49,10 @@ class XMLSECURITY_DLLPUBLIC PDFDocument
 {
     /// This vector owns all elements.
     std::vector< std::unique_ptr<PDFElement> > m_aElements;
-    // List of object offsets we know.
-    std::vector<size_t> m_aXRef;
+    /// Object ID <-> object offset map.
+    std::map<size_t, size_t> m_aXRef;
+    /// Object ID <-> "are changed as part of an incremental update?" map.
+    std::map<size_t, bool> m_aXRefDirty;
     /// List of xref offsets we know.
     std::vector<size_t> m_aStartXRefs;
     /// List of EOF offsets we know.
@@ -61,6 +64,8 @@ class XMLSECURITY_DLLPUBLIC PDFDocument
     static int AsHex(char ch);
     /// Decode a hex dump.
     static std::vector<unsigned char> DecodeHexString(PDFHexStringElement* pElement);
+    /// Tokenize elements from current offset, optionally only till the next EOF.
+    bool Tokenize(SvStream& rStream, bool bPartial);
 
 public:
     PDFDocument();
@@ -76,6 +81,7 @@ public:
     /// Remember the end location of an EOF token.
     void PushBackEOF(size_t nOffset);
 
+    /// Read elements from the start of the stream till its end.
     bool Read(SvStream& rStream);
     /// Sign the read document with xCertificate in the edit buffer.
     bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate, const OUString& rDescription);
diff --git a/xmlsecurity/source/pdfio/pdfdocument.cxx b/xmlsecurity/source/pdfio/pdfdocument.cxx
index fc942c1c5ecb..7b8cea042568 100644
--- a/xmlsecurity/source/pdfio/pdfdocument.cxx
+++ b/xmlsecurity/source/pdfio/pdfdocument.cxx
@@ -306,7 +306,8 @@ bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificat
     // Write signature object.
     sal_Int32 nSignatureId = m_aXRef.size();
     sal_uInt64 nSignatureOffset = m_aEditBuffer.Tell();
-    m_aXRef.push_back(nSignatureOffset);
+    m_aXRef[nSignatureId] = nSignatureOffset;
+    m_aXRefDirty[nSignatureId] = true;
     OStringBuffer aSigBuffer;
     aSigBuffer.append(nSignatureId);
     aSigBuffer.append(" 0 obj\n");
@@ -347,7 +348,8 @@ bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificat
 
     // Write appearance object.
     sal_Int32 nAppearanceId = m_aXRef.size();
-    m_aXRef.push_back(m_aEditBuffer.Tell());
+    m_aXRef[nAppearanceId] = m_aEditBuffer.Tell();
+    m_aXRefDirty[nAppearanceId] = true;
     m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
     m_aEditBuffer.WriteCharPtr(" 0 obj\n");
     m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
@@ -356,7 +358,8 @@ bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificat
 
     // Write the Annot object, references nSignatureId and nAppearanceId.
     sal_Int32 nAnnotId = m_aXRef.size();
-    m_aXRef.push_back(m_aEditBuffer.Tell());
+    m_aXRef[nAnnotId] = m_aEditBuffer.Tell();
+    m_aXRefDirty[nAnnotId] = true;
     m_aEditBuffer.WriteUInt32AsString(nAnnotId);
     m_aEditBuffer.WriteCharPtr(" 0 obj\n");
     m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
@@ -394,6 +397,7 @@ bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificat
         return false;
     }
     m_aXRef[nFirstPageId] = m_aEditBuffer.Tell();
+    m_aXRefDirty[nFirstPageId] = true;
     m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
     m_aEditBuffer.WriteCharPtr(" 0 obj\n");
     m_aEditBuffer.WriteCharPtr("<<");
@@ -446,6 +450,7 @@ bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificat
         return false;
     }
     m_aXRef[nCatalogId] = m_aEditBuffer.Tell();
+    m_aXRefDirty[nCatalogId] = true;
     m_aEditBuffer.WriteUInt32AsString(nCatalogId);
     m_aEditBuffer.WriteCharPtr(" 0 obj\n");
     m_aEditBuffer.WriteCharPtr("<<");
@@ -491,13 +496,18 @@ bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificat
 
     // Write the xref table.
     sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
-    m_aEditBuffer.WriteCharPtr("xref\n0 ");
-    m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
-    m_aEditBuffer.WriteCharPtr("\n");
-    for (size_t nObject = 0; nObject < m_aXRef.size(); ++nObject)
+    m_aEditBuffer.WriteCharPtr("xref\n");
+    for (const auto& rXRef : m_aXRef)
     {
+        size_t nObject = rXRef.first;
+        size_t nOffset = rXRef.second;
+        if (!m_aXRefDirty[nObject])
+            continue;
+
+        m_aEditBuffer.WriteUInt32AsString(nObject);
+        m_aEditBuffer.WriteCharPtr(" 1\n");
         OStringBuffer aBuffer;
-        aBuffer.append(static_cast<sal_Int32>(m_aXRef[nObject]));
+        aBuffer.append(static_cast<sal_Int32>(nOffset));
         while (aBuffer.getLength() < 10)
             aBuffer.insert(0, "0");
         if (nObject == 0)
@@ -612,41 +622,14 @@ bool PDFDocument::Write(SvStream& rStream)
     return rStream.good();
 }
 
-bool PDFDocument::Read(SvStream& rStream)
+bool PDFDocument::Tokenize(SvStream& rStream, bool bPartial)
 {
-    // Check file magic.
-    std::vector<sal_Int8> aHeader(5);
-    rStream.Seek(0);
-    rStream.ReadBytes(aHeader.data(), aHeader.size());
-    if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F' || aHeader[4] != '-')
-    {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: header mismatch");
-        return false;
-    }
-
-    // Allow later editing of the contents in-memory.
-    rStream.Seek(0);
-    m_aEditBuffer.WriteStream(rStream);
-
-    // Look up the offset of the xref table.
-    size_t nStartXRef = FindStartXRef(rStream);
-    SAL_INFO("xmlsecurity.pdfio", "PDFDocument::Read: nStartXRef is " << nStartXRef);
-    if (nStartXRef == 0)
-    {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: found no xref statrt offset");
-        return false;
-    }
-    rStream.Seek(nStartXRef);
-    char ch;
-    ReadXRef(rStream);
-
-    // Then we can tokenize the stream.
-    rStream.Seek(0);
     bool bInXRef = false;
     // The next number will be an xref offset.
     bool bInStartXRef = false;
     while (true)
     {
+        char ch;
         rStream.ReadChar(ch);
         if (rStream.IsEof())
             break;
@@ -655,10 +638,16 @@ bool PDFDocument::Read(SvStream& rStream)
         {
         case '%':
         {
-            m_aElements.push_back(std::unique_ptr<PDFElement>(new PDFCommentElement(*this)));
+            auto pComment = new PDFCommentElement(*this);
+            m_aElements.push_back(std::unique_ptr<PDFElement>(pComment));
             rStream.SeekRel(-1);
             if (!m_aElements.back()->Read(rStream))
                 return false;
+            if (bPartial && !m_aEOFs.empty() && m_aEOFs.back() == rStream.Tell())
+            {
+                // Found EOF and partial parsing requested, we're done.
+                return true;
+            }
             break;
         }
         case '<':
@@ -742,7 +731,7 @@ bool PDFDocument::Read(SvStream& rStream)
                     size_t nElements = m_aElements.size();
                     if (nElements < 2)
                     {
-                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: expected at least two tokens before 'obj' or 'R' keyword");
+                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: expected at least two tokens before 'obj' or 'R' keyword");
                         return false;
                     }
 
@@ -750,7 +739,7 @@ bool PDFDocument::Read(SvStream& rStream)
                     auto pGenerationNumber = dynamic_cast<PDFNumberElement*>(m_aElements[nElements - 1].get());
                     if (!pObjectNumber || !pGenerationNumber)
                     {
-                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: missing object or generation number before 'obj' or 'R' keyword");
+                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: missing object or generation number before 'obj' or 'R' keyword");
                         return false;
                     }
 
@@ -791,7 +780,7 @@ bool PDFDocument::Read(SvStream& rStream)
                             break;
                         }
 
-                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: found no Length key for stream keyword");
+                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: found no Length key for stream keyword");
                         return false;
                     }
 
@@ -833,7 +822,7 @@ bool PDFDocument::Read(SvStream& rStream)
                 }
                 else
                 {
-                    SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: unexpected '" << aKeyword << "' keyword");
+                    SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: unexpected '" << aKeyword << "' keyword");
                     return false;
                 }
             }
@@ -841,7 +830,7 @@ bool PDFDocument::Read(SvStream& rStream)
             {
                 if (!isspace(ch))
                 {
-                    SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: unexpected character: " << ch);
+                    SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: unexpected character: " << ch);
                     return false;
                 }
             }
@@ -853,6 +842,57 @@ bool PDFDocument::Read(SvStream& rStream)
     return true;
 }
 
+bool PDFDocument::Read(SvStream& rStream)
+{
+    // Check file magic.
+    std::vector<sal_Int8> aHeader(5);
+    rStream.Seek(0);
+    rStream.ReadBytes(aHeader.data(), aHeader.size());
+    if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F' || aHeader[4] != '-')
+    {
+        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: header mismatch");
+        return false;
+    }
+
+    // Allow later editing of the contents in-memory.
+    rStream.Seek(0);
+    m_aEditBuffer.WriteStream(rStream);
+
+    // Look up the offset of the xref table.
+    size_t nStartXRef = FindStartXRef(rStream);
+    SAL_INFO("xmlsecurity.pdfio", "PDFDocument::Read: nStartXRef is " << nStartXRef);
+    if (nStartXRef == 0)
+    {
+        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: found no xref statrt offset");
+        return false;
+    }
+    while (true)
+    {
+        rStream.Seek(nStartXRef);
+        ReadXRef(rStream);
+        if (!Tokenize(rStream, /*bPartial=*/true))
+        {
+            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: failed to tokenizer trailer after xref");
+            return false;
+        }
+        auto pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
+        if (pPrev)
+            nStartXRef = pPrev->GetValue();
+
+        // Reset state, except object offsets and the edit buffer.
+        m_aElements.clear();
+        m_aStartXRefs.clear();
+        m_aEOFs.clear();
+        m_pTrailer = nullptr;
+        if (!pPrev)
+            break;
+    }
+
+    // Then we can tokenize the stream.
+    rStream.Seek(0);
+    return Tokenize(rStream, /*bPartial=*/false);
+}
+
 OString PDFDocument::ReadKeyword(SvStream& rStream)
 {
     OStringBuffer aBuf;
@@ -911,60 +951,68 @@ void PDFDocument::ReadXRef(SvStream& rStream)
     }
 
     PDFDocument::SkipWhitespace(rStream);
-    PDFNumberElement aFirstObject;
-    if (!aFirstObject.Read(rStream))
-    {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read first object number");
-        return;
-    }
-
-    if (aFirstObject.GetValue() != 0)
-    {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: expected first object number == 0");
-        return;
-    }
-
-    PDFDocument::SkipWhitespace(rStream);
-    PDFNumberElement aNumberOfEntries;
-    if (!aNumberOfEntries.Read(rStream))
-    {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read number of entries");
-        return;
-    }
 
-    if (aNumberOfEntries.GetValue() <= 0)
+    while (true)
     {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: expected one or more entries");
-        return;
-    }
+        PDFNumberElement aFirstObject;
+        if (!aFirstObject.Read(rStream))
+        {
+            // Next token is not a number, it'll be the trailer.
+            return;
+        }
 
-    size_t nSize = aNumberOfEntries.GetValue();
-    for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
-    {
-        PDFDocument::SkipWhitespace(rStream);
-        PDFNumberElement aOffset;
-        if (!aOffset.Read(rStream))
+        if (aFirstObject.GetValue() < 0)
         {
-            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read offset");
+            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: expected first object number >= 0");
             return;
         }
 
         PDFDocument::SkipWhitespace(rStream);
-        PDFNumberElement aGenerationNumber;
-        if (!aGenerationNumber.Read(rStream))
+        PDFNumberElement aNumberOfEntries;
+        if (!aNumberOfEntries.Read(rStream))
         {
-            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read generation number");
+            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read number of entries");
             return;
         }
 
-        PDFDocument::SkipWhitespace(rStream);
-        OString aKeyword = ReadKeyword(rStream);
-        if (aKeyword != "f" && aKeyword != "n")
+        if (aNumberOfEntries.GetValue() <= 0)
         {
-            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: unexpected keyword");
+            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: expected one or more entries");
             return;
         }
-        m_aXRef.push_back(aOffset.GetValue());
+
+        size_t nSize = aNumberOfEntries.GetValue();
+        for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
+        {
+            size_t nIndex = aFirstObject.GetValue() + nEntry;
+            PDFDocument::SkipWhitespace(rStream);
+            PDFNumberElement aOffset;
+            if (!aOffset.Read(rStream))
+            {
+                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read offset");
+                return;
+            }
+
+            PDFDocument::SkipWhitespace(rStream);
+            PDFNumberElement aGenerationNumber;
+            if (!aGenerationNumber.Read(rStream))
+            {
+                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read generation number");
+                return;
+            }
+
+            PDFDocument::SkipWhitespace(rStream);
+            OString aKeyword = ReadKeyword(rStream);
+            if (aKeyword != "f" && aKeyword != "n")
+            {
+                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: unexpected keyword");
+                return;
+            }
+            m_aXRef[nIndex] = aOffset.GetValue();
+            // Initially only the first entry is dirty.
+            m_aXRefDirty[nIndex] = nIndex == 0;
+            PDFDocument::SkipWhitespace(rStream);
+        }
     }
 }
 
@@ -988,13 +1036,14 @@ void PDFDocument::SkipWhitespace(SvStream& rStream)
 
 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
 {
-    if (nIndex >= m_aXRef.size())
+    auto it = m_aXRef.find(nIndex);
+    if (it == m_aXRef.end())
     {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetObjectOffset: wanted to look up index #" << nIndex);
+        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetObjectOffset: wanted to look up index #" << nIndex << ", but failed");
         return 0;
     }
 
-    return m_aXRef[nIndex];
+    return it->second;
 }
 
 const std::vector< std::unique_ptr<PDFElement> >& PDFDocument::GetElements()
@@ -1461,6 +1510,11 @@ bool PDFNumberElement::Read(SvStream& rStream)
     m_nOffset = rStream.Tell();
     char ch;
     rStream.ReadChar(ch);
+    if (!isdigit(ch) && ch != '-')
+    {
+        rStream.SeekRel(-1);
+        return false;
+    }
     while (!rStream.IsEof())
     {
         if (!isdigit(ch) && ch != '-')
author	Miklos Vajna <vmiklos@collabora.co.uk>	2016-10-26 11:59:39 +0200
committer	Miklos Vajna <vmiklos@collabora.co.uk>	2016-10-26 12:20:26 +0000
commit	a324099538916eae7f7239d32fd98ec8018cbb72 (patch)
tree	f1576aeb5c2275aebf78833d4bf0a600e00432e1
parent	da5d20562479174504c3795aa9e7ca0856883e81 (diff)