diff options
-rw-r--r-- | include/vcl/filter/pdfdocument.hxx | 355 | ||||
-rw-r--r-- | include/xmlsecurity/pdfio/pdfdocument.hxx | 294 | ||||
-rw-r--r-- | vcl/CppunitTest_vcl_pdfexport.mk | 2 | ||||
-rw-r--r-- | vcl/Library_vcl.mk | 1 | ||||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/pdfexport.cxx | 48 | ||||
-rw-r--r-- | vcl/source/filter/ipdf/pdfdocument.cxx | 2899 | ||||
-rw-r--r-- | xmlsecurity/CppunitTest_xmlsecurity_pdfsigning.mk | 1 | ||||
-rw-r--r-- | xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx | 18 | ||||
-rw-r--r-- | xmlsecurity/source/helper/pdfsignaturehelper.cxx | 8 | ||||
-rw-r--r-- | xmlsecurity/source/pdfio/pdfdocument.cxx | 2978 | ||||
-rw-r--r-- | xmlsecurity/workben/pdfverify.cxx | 6 |
11 files changed, 3343 insertions, 3267 deletions
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx new file mode 100644 index 000000000000..8d362e7e339c --- /dev/null +++ b/include/vcl/filter/pdfdocument.hxx @@ -0,0 +1,355 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX +#define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX + +#include <map> +#include <vector> + +#include <com/sun/star/security/XCertificate.hpp> + +#include <tools/stream.hxx> + +#include <vcl/dllapi.h> + +namespace vcl +{ +namespace filter +{ + +class PDFTrailerElement; +class PDFHexStringElement; +class PDFReferenceElement; +class PDFDocument; +class PDFDictionaryElement; +class PDFArrayElement; +class PDFStreamElement; + +/// A byte range in a PDF file. +class VCL_DLLPUBLIC PDFElement +{ +public: + virtual bool Read(SvStream& rStream) = 0; + virtual ~PDFElement() { } +}; + +/// Indirect object: something with a unique ID. +class VCL_DLLPUBLIC PDFObjectElement : public PDFElement +{ + PDFDocument& m_rDoc; + double m_fObjectValue; + double m_fGenerationValue; + std::map<OString, PDFElement*> m_aDictionary; + /// Position after the '<<' token. + sal_uInt64 m_nDictionaryOffset; + /// Length of the dictionary buffer till (before) the '<<' token. + sal_uInt64 m_nDictionaryLength; + PDFDictionaryElement* m_pDictionaryElement; + /// The contained direct array, if any. + PDFArrayElement* m_pArrayElement; + /// The stream of this object, used when this is an object stream. + PDFStreamElement* m_pStreamElement; + /// Objects of an object stream. + std::vector< std::unique_ptr<PDFObjectElement> > m_aStoredElements; + /// Elements of an object in an object stream. + std::vector< std::unique_ptr<PDFElement> > m_aElements; + /// Uncompressed buffer of an object in an object stream. + std::unique_ptr<SvMemoryStream> m_pStreamBuffer; + +public: + PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue); + bool Read(SvStream& rStream) override; + PDFElement* Lookup(const OString& rDictionaryKey); + PDFObjectElement* LookupObject(const OString& rDictionaryKey); + double GetObjectValue() const; + void SetDictionaryOffset(sal_uInt64 nDictionaryOffset); + sal_uInt64 GetDictionaryOffset(); + void SetDictionaryLength(sal_uInt64 nDictionaryLength); + sal_uInt64 GetDictionaryLength(); + PDFDictionaryElement* GetDictionary() const; + void SetDictionary(PDFDictionaryElement* pDictionaryElement); + void SetArray(PDFArrayElement* pArrayElement); + void SetStream(PDFStreamElement* pStreamElement); + /// Access to the stream of the object, if it has any. + PDFStreamElement* GetStream() const; + PDFArrayElement* GetArray() const; + /// Parse objects stored in this object stream. + void ParseStoredObjects(); + std::vector< std::unique_ptr<PDFElement> >& GetStoredElements(); + SvMemoryStream* GetStreamBuffer() const; + void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer); +}; + +/// Array object: a list. +class VCL_DLLPUBLIC PDFArrayElement : public PDFElement +{ + /// Location after the '[' token. + sal_uInt64 m_nOffset = 0; + std::vector<PDFElement*> m_aElements; +public: + PDFArrayElement(); + bool Read(SvStream& rStream) override; + void PushBack(PDFElement* pElement); + const std::vector<PDFElement*>& GetElements(); +}; + +/// Reference object: something with a unique ID. +class VCL_DLLPUBLIC PDFReferenceElement : public PDFElement +{ + PDFDocument& m_rDoc; + int m_fObjectValue; + int m_fGenerationValue; + +public: + PDFReferenceElement(PDFDocument& rDoc, int fObjectValue, int fGenerationValue); + bool Read(SvStream& rStream) override; + /// Assuming the reference points to a number object, return its value. + double LookupNumber(SvStream& rStream) const; + /// Lookup referenced object, without assuming anything about its contents. + PDFObjectElement* LookupObject(); + int GetObjectValue() const; + int GetGenerationValue() const; +}; + +/// Stream object: a byte array with a known length. +class VCL_DLLPUBLIC PDFStreamElement : public PDFElement +{ + size_t m_nLength; + sal_uInt64 m_nOffset; + /// The byte array itself. + SvMemoryStream m_aMemory; + +public: + explicit PDFStreamElement(size_t nLength); + bool Read(SvStream& rStream) override; + sal_uInt64 GetOffset() const; + SvMemoryStream& GetMemory(); +}; + +/// Name object: a key string. +class VCL_DLLPUBLIC PDFNameElement : public PDFElement +{ + OString m_aValue; + /// Offset after the '/' token. + sal_uInt64 m_nLocation; + /// Length till the next token start. + sal_uInt64 m_nLength; +public: + PDFNameElement(); + bool Read(SvStream& rStream) override; + const OString& GetValue() const; + sal_uInt64 GetLocation() const; + sal_uInt64 GetLength() const; +}; + +/// Dictionary object: a set key-value pairs. +class VCL_DLLPUBLIC PDFDictionaryElement : public PDFElement +{ + /// Key-value pairs when the dictionary is a nested value. + std::map<OString, PDFElement*> m_aItems; + /// Offset after the '<<' token. + sal_uInt64 m_nLocation = 0; + /// Position after the '/' token. + std::map<OString, sal_uInt64> m_aDictionaryKeyOffset; + /// Length of the dictionary key and value, till (before) the next token. + std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength; + +public: + PDFDictionaryElement(); + bool Read(SvStream& rStream) override; + + static size_t Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary); + static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey); + void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset); + sal_uInt64 GetKeyOffset(const OString& rKey) const; + void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength); + sal_uInt64 GetKeyValueLength(const OString& rKey) const; + const std::map<OString, PDFElement*>& GetItems() const; + /// Looks up an object which is only referenced in this dictionary. + PDFObjectElement* LookupObject(const OString& rDictionaryKey); + /// Looks up an element which is contained in this dictionary. + PDFElement* LookupElement(const OString& rDictionaryKey); +}; + +enum class TokenizeMode +{ + /// Full file. + END_OF_STREAM, + /// Till the first %%EOF token. + EOF_TOKEN, + /// Till the end of the current object. + END_OF_OBJECT, + /// Same as END_OF_OBJECT, but for object streams (no endobj keyword). + STORED_OBJECT +}; + +/// The type column of an entry in a cross-reference stream. +enum class XRefEntryType +{ + /// xref "f" or xref stream "0". + FREE, + /// xref "n" or xref stream "1". + NOT_COMPRESSED, + /// xref stream "2. + COMPRESSED +}; + +/// An entry in a cross-reference stream. +struct XRefEntry +{ + XRefEntryType m_eType; + /** + * Non-compressed: The byte offset of the object, starting from the + * beginning of the file. + * Compressed: The object number of the object stream in which this object is + * stored. + */ + sal_uInt64 m_nOffset; + /** + * Non-compressed: The generation number of the object. + * Compressed: The index of this object within the object stream. + */ + sal_uInt64 m_nGenerationNumber; + /// Are changed as part of an incremental update?. + bool m_bDirty; + + XRefEntry(); +}; + +/// Hex string: in <AABB> form. +class VCL_DLLPUBLIC PDFHexStringElement : public PDFElement +{ + OString m_aValue; +public: + bool Read(SvStream& rStream) override; + const OString& GetValue() const; +}; + +/// Literal string: in (asdf) form. +class VCL_DLLPUBLIC PDFLiteralStringElement : public PDFElement +{ + OString m_aValue; +public: + bool Read(SvStream& rStream) override; + const OString& GetValue() const; +}; + +/// Numbering object: an integer or a real. +class VCL_DLLPUBLIC PDFNumberElement : public PDFElement +{ + /// Input file start location. + sal_uInt64 m_nOffset = 0; + /// Input file token length. + sal_uInt64 m_nLength = 0; + double m_fValue = 0; + +public: + PDFNumberElement(); + bool Read(SvStream& rStream) override; + double GetValue() const; + sal_uInt64 GetLocation() const; + sal_uInt64 GetLength() const; +}; + +/** + * In-memory representation of an on-disk PDF document. + * + * The PDF element list is not meant to be saved back to disk, but some + * elements remember their source offset / length, and based on that it's + * possible to modify the input file. + */ +class VCL_DLLPUBLIC PDFDocument +{ + /// This vector owns all elements. + std::vector< std::unique_ptr<PDFElement> > m_aElements; + /// Object ID <-> object offset map. + std::map<size_t, XRefEntry> m_aXRef; + /// Object offset <-> Object pointer map. + std::map<size_t, PDFObjectElement*> m_aOffsetObjects; + /// Object ID <-> Object pointer map. + std::map<size_t, PDFObjectElement*> m_aIDObjects; + /// List of xref offsets we know. + std::vector<size_t> m_aStartXRefs; + /// List of EOF offsets we know. + std::vector<size_t> m_aEOFs; + PDFTrailerElement* m_pTrailer; + /// When m_pTrailer is nullptr, this can still have a dictionary. + PDFObjectElement* m_pXRefStream; + /// All editing takes place in this buffer, if it happens. + SvMemoryStream m_aEditBuffer; + + static int AsHex(char ch); + /// Suggest a minimal, yet free signature ID to use for the next signature. + sal_uInt32 GetNextSignature(); + /// Write the signature object as part of signing. + sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rSignatureContentOffset); + /// Write the appearance object as part of signing. + sal_Int32 WriteAppearanceObject(); + /// Write the annot object as part of signing. + sal_Int32 WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId); + /// Write the updated Page object as part of signing. + bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId); + /// Write the updated Catalog object as part of signing. + bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot); + /// Write the updated cross-references as part of signing. + void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot); + +public: + PDFDocument(); + PDFDocument& operator=(const PDFDocument&) = delete; + PDFDocument(const PDFDocument&) = delete; + /// @name Low-level functions, to be used by PDFElement subclasses. + //@{ + /// Decode a hex dump. + static std::vector<unsigned char> DecodeHexString(PDFHexStringElement* pElement); + static OString ReadKeyword(SvStream& rStream); + static size_t FindStartXRef(SvStream& rStream); + void ReadXRef(SvStream& rStream); + void ReadXRefStream(SvStream& rStream); + static void SkipWhitespace(SvStream& rStream); + /// Instead of all whitespace, just skip CR and NL characters. + static void SkipLineBreaks(SvStream& rStream); + size_t GetObjectOffset(size_t nIndex) const; + const std::vector< std::unique_ptr<PDFElement> >& GetElements(); + std::vector<PDFObjectElement*> GetPages(); + /// Remember the end location of an EOF token. + void PushBackEOF(size_t nOffset); + /// Look up object based on object number, possibly by parsing object streams. + PDFObjectElement* LookupObject(size_t nObjectNumber); + /// Access to the input document, even after the input stream is gone. + SvMemoryStream& GetEditBuffer(); + /// Tokenize elements from current offset. + bool Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObject); + /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID. + void SetIDObject(size_t nID, PDFObjectElement* pObject); + //@} + + /// @name High-level functions, to be used by others. + //@{ + /// Read elements from the start of the stream till its end. + bool Read(SvStream& rStream); + /// Sign the read document with xCertificate in the edit buffer. + bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES); + /// Serializes the contents of the edit buffer. + bool Write(SvStream& rStream); + /// Get a list of signatures embedded into this document. + std::vector<PDFObjectElement*> GetSignatureWidgets(); + /// Remove the nth signature from read document in the edit buffer. + bool RemoveSignature(size_t nPosition); + //@} +}; + +} // namespace pdfio +} // namespace xmlsecurity + +#endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/xmlsecurity/pdfio/pdfdocument.hxx b/include/xmlsecurity/pdfio/pdfdocument.hxx index f5179fd4af6d..6f3b0f263add 100644 --- a/include/xmlsecurity/pdfio/pdfdocument.hxx +++ b/include/xmlsecurity/pdfio/pdfdocument.hxx @@ -11,10 +11,7 @@ #ifndef INCLUDED_XMLSECURITY_PDFIO_PDFDOCUMENT_HXX #define INCLUDED_XMLSECURITY_PDFIO_PDFDOCUMENT_HXX -#include <map> -#include <vector> - -#include <com/sun/star/security/XCertificate.hpp> +#include <vcl/filter/pdfdocument.hxx> #include <tools/stream.hxx> @@ -27,299 +24,12 @@ namespace xmlsecurity namespace pdfio { -class PDFTrailerElement; -class PDFHexStringElement; -class PDFReferenceElement; -class PDFDocument; -class PDFDictionaryElement; -class PDFArrayElement; -class PDFStreamElement; - -/// A byte range in a PDF file. -class XMLSECURITY_DLLPUBLIC PDFElement -{ -public: - virtual bool Read(SvStream& rStream) = 0; - virtual ~PDFElement() { } -}; - -/// Indirect object: something with a unique ID. -class XMLSECURITY_DLLPUBLIC PDFObjectElement : public PDFElement -{ - PDFDocument& m_rDoc; - double m_fObjectValue; - double m_fGenerationValue; - std::map<OString, PDFElement*> m_aDictionary; - /// Position after the '<<' token. - sal_uInt64 m_nDictionaryOffset; - /// Length of the dictionary buffer till (before) the '<<' token. - sal_uInt64 m_nDictionaryLength; - PDFDictionaryElement* m_pDictionaryElement; - /// The contained direct array, if any. - PDFArrayElement* m_pArrayElement; - /// The stream of this object, used when this is an object stream. - PDFStreamElement* m_pStreamElement; - /// Objects of an object stream. - std::vector< std::unique_ptr<PDFObjectElement> > m_aStoredElements; - /// Elements of an object in an object stream. - std::vector< std::unique_ptr<PDFElement> > m_aElements; - /// Uncompressed buffer of an object in an object stream. - std::unique_ptr<SvMemoryStream> m_pStreamBuffer; - -public: - PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue); - bool Read(SvStream& rStream) override; - PDFElement* Lookup(const OString& rDictionaryKey); - PDFObjectElement* LookupObject(const OString& rDictionaryKey); - double GetObjectValue() const; - void SetDictionaryOffset(sal_uInt64 nDictionaryOffset); - sal_uInt64 GetDictionaryOffset(); - void SetDictionaryLength(sal_uInt64 nDictionaryLength); - sal_uInt64 GetDictionaryLength(); - PDFDictionaryElement* GetDictionary() const; - void SetDictionary(PDFDictionaryElement* pDictionaryElement); - void SetArray(PDFArrayElement* pArrayElement); - void SetStream(PDFStreamElement* pStreamElement); - /// Access to the stream of the object, if it has any. - PDFStreamElement* GetStream() const; - PDFArrayElement* GetArray() const; - /// Parse objects stored in this object stream. - void ParseStoredObjects(); - std::vector< std::unique_ptr<PDFElement> >& GetStoredElements(); - SvMemoryStream* GetStreamBuffer() const; - void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer); -}; - -/// Array object: a list. -class XMLSECURITY_DLLPUBLIC PDFArrayElement : public PDFElement -{ - /// Location after the '[' token. - sal_uInt64 m_nOffset = 0; - std::vector<PDFElement*> m_aElements; -public: - PDFArrayElement(); - bool Read(SvStream& rStream) override; - void PushBack(PDFElement* pElement); - const std::vector<PDFElement*>& GetElements(); -}; - -/// Reference object: something with a unique ID. -class XMLSECURITY_DLLPUBLIC PDFReferenceElement : public PDFElement -{ - PDFDocument& m_rDoc; - int m_fObjectValue; - int m_fGenerationValue; - -public: - PDFReferenceElement(PDFDocument& rDoc, int fObjectValue, int fGenerationValue); - bool Read(SvStream& rStream) override; - /// Assuming the reference points to a number object, return its value. - double LookupNumber(SvStream& rStream) const; - /// Lookup referenced object, without assuming anything about its contents. - PDFObjectElement* LookupObject(); - int GetObjectValue() const; - int GetGenerationValue() const; -}; - -/// Stream object: a byte array with a known length. -class XMLSECURITY_DLLPUBLIC PDFStreamElement : public PDFElement -{ - size_t m_nLength; - sal_uInt64 m_nOffset; - /// The byte array itself. - SvMemoryStream m_aMemory; - -public: - explicit PDFStreamElement(size_t nLength); - bool Read(SvStream& rStream) override; - sal_uInt64 GetOffset() const; - SvMemoryStream& GetMemory(); -}; - -/// Name object: a key string. -class XMLSECURITY_DLLPUBLIC PDFNameElement : public PDFElement -{ - OString m_aValue; - /// Offset after the '/' token. - sal_uInt64 m_nLocation; - /// Length till the next token start. - sal_uInt64 m_nLength; -public: - PDFNameElement(); - bool Read(SvStream& rStream) override; - const OString& GetValue() const; - sal_uInt64 GetLocation() const; - sal_uInt64 GetLength() const; -}; - -/// Dictionary object: a set key-value pairs. -class XMLSECURITY_DLLPUBLIC PDFDictionaryElement : public PDFElement -{ - /// Key-value pairs when the dictionary is a nested value. - std::map<OString, PDFElement*> m_aItems; - /// Offset after the '<<' token. - sal_uInt64 m_nLocation = 0; - /// Position after the '/' token. - std::map<OString, sal_uInt64> m_aDictionaryKeyOffset; - /// Length of the dictionary key and value, till (before) the next token. - std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength; - -public: - PDFDictionaryElement(); - bool Read(SvStream& rStream) override; - - static size_t Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary); - static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey); - void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset); - sal_uInt64 GetKeyOffset(const OString& rKey) const; - void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength); - sal_uInt64 GetKeyValueLength(const OString& rKey) const; - const std::map<OString, PDFElement*>& GetItems() const; - /// Looks up an object which is only referenced in this dictionary. - PDFObjectElement* LookupObject(const OString& rDictionaryKey); - /// Looks up an element which is contained in this dictionary. - PDFElement* LookupElement(const OString& rDictionaryKey); -}; - -enum class TokenizeMode -{ - /// Full file. - END_OF_STREAM, - /// Till the first %%EOF token. - EOF_TOKEN, - /// Till the end of the current object. - END_OF_OBJECT, - /// Same as END_OF_OBJECT, but for object streams (no endobj keyword). - STORED_OBJECT -}; - -/// The type column of an entry in a cross-reference stream. -enum class XRefEntryType -{ - /// xref "f" or xref stream "0". - FREE, - /// xref "n" or xref stream "1". - NOT_COMPRESSED, - /// xref stream "2. - COMPRESSED -}; - -/// An entry in a cross-reference stream. -struct XRefEntry -{ - XRefEntryType m_eType; - /** - * Non-compressed: The byte offset of the object, starting from the - * beginning of the file. - * Compressed: The object number of the object stream in which this object is - * stored. - */ - sal_uInt64 m_nOffset; - /** - * Non-compressed: The generation number of the object. - * Compressed: The index of this object within the object stream. - */ - sal_uInt64 m_nGenerationNumber; - /// Are changed as part of an incremental update?. - bool m_bDirty; - - XRefEntry(); -}; - -/** - * In-memory representation of an on-disk PDF document. - * - * The PDF element list is not meant to be saved back to disk, but some - * elements remember their source offset / length, and based on that it's - * possible to modify the input file. - */ -class XMLSECURITY_DLLPUBLIC PDFDocument -{ - /// This vector owns all elements. - std::vector< std::unique_ptr<PDFElement> > m_aElements; - /// Object ID <-> object offset map. - std::map<size_t, XRefEntry> m_aXRef; - /// Object offset <-> Object pointer map. - std::map<size_t, PDFObjectElement*> m_aOffsetObjects; - /// Object ID <-> Object pointer map. - std::map<size_t, PDFObjectElement*> m_aIDObjects; - /// List of xref offsets we know. - std::vector<size_t> m_aStartXRefs; - /// List of EOF offsets we know. - std::vector<size_t> m_aEOFs; - PDFTrailerElement* m_pTrailer; - /// When m_pTrailer is nullptr, this can still have a dictionary. - PDFObjectElement* m_pXRefStream; - /// All editing takes place in this buffer, if it happens. - SvMemoryStream m_aEditBuffer; - - static int AsHex(char ch); - /// Suggest a minimal, yet free signature ID to use for the next signature. - sal_uInt32 GetNextSignature(); - /// Write the signature object as part of signing. - sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rSignatureContentOffset); - /// Write the appearance object as part of signing. - sal_Int32 WriteAppearanceObject(); - /// Write the annot object as part of signing. - sal_Int32 WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId); - /// Write the updated Page object as part of signing. - bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId); - /// Write the updated Catalog object as part of signing. - bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot); - /// Write the updated cross-references as part of signing. - void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot); - -public: - PDFDocument(); - PDFDocument& operator=(const PDFDocument&) = delete; - PDFDocument(const PDFDocument&) = delete; - /// @name Low-level functions, to be used by PDFElement subclasses. - //@{ - /// Decode a hex dump. - static std::vector<unsigned char> DecodeHexString(PDFHexStringElement* pElement); - static OString ReadKeyword(SvStream& rStream); - static size_t FindStartXRef(SvStream& rStream); - void ReadXRef(SvStream& rStream); - void ReadXRefStream(SvStream& rStream); - static void SkipWhitespace(SvStream& rStream); - /// Instead of all whitespace, just skip CR and NL characters. - static void SkipLineBreaks(SvStream& rStream); - size_t GetObjectOffset(size_t nIndex) const; - const std::vector< std::unique_ptr<PDFElement> >& GetElements(); - std::vector<PDFObjectElement*> GetPages(); - /// Remember the end location of an EOF token. - void PushBackEOF(size_t nOffset); - /// Look up object based on object number, possibly by parsing object streams. - PDFObjectElement* LookupObject(size_t nObjectNumber); - /// Access to the input document, even after the input stream is gone. - SvMemoryStream& GetEditBuffer(); - /// Tokenize elements from current offset. - bool Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObject); - /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID. - void SetIDObject(size_t nID, PDFObjectElement* pObject); - //@} - - /// @name High-level functions, to be used by others. - //@{ - /// Read elements from the start of the stream till its end. - bool Read(SvStream& rStream); - /// Sign the read document with xCertificate in the edit buffer. - bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES); - /// Serializes the contents of the edit buffer. - bool Write(SvStream& rStream); - /// Get a list of signatures embedded into this document. - std::vector<PDFObjectElement*> GetSignatureWidgets(); - /// Remove the nth signature from read document in the edit buffer. - bool RemoveSignature(size_t nPosition); - //@} -}; - /** * @param rInformation The actual result. * @param bLast If this is the last signature in the file, so it covers the whole file physically. * @return If we can determinate a result. */ -XMLSECURITY_DLLPUBLIC bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, SignatureInformation& rInformation, bool bLast); +XMLSECURITY_DLLPUBLIC bool ValidateSignature(SvStream& rStream, vcl::filter::PDFObjectElement* pSignature, SignatureInformation& rInformation, bool bLast); } // namespace pdfio } // namespace xmlsecurity diff --git a/vcl/CppunitTest_vcl_pdfexport.mk b/vcl/CppunitTest_vcl_pdfexport.mk index 8f0cbcdf47f9..1e56d24f3c84 100644 --- a/vcl/CppunitTest_vcl_pdfexport.mk +++ b/vcl/CppunitTest_vcl_pdfexport.mk @@ -24,7 +24,7 @@ $(eval $(call gb_CppunitTest_use_libraries,vcl_pdfexport, \ unotest \ utl \ tl \ - xmlsecurity \ + vcl \ $(gb_UWINAPI) \ )) diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk index cd76fdffcb6f..af61feff9165 100644 --- a/vcl/Library_vcl.mk +++ b/vcl/Library_vcl.mk @@ -366,6 +366,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\ vcl/source/filter/igif/decode \ vcl/source/filter/igif/gifread \ vcl/source/filter/ipdf/pdfread \ + vcl/source/filter/ipdf/pdfdocument \ vcl/source/filter/ixbm/xbmread \ vcl/source/filter/ixpm/xpmread \ vcl/source/filter/jpeg/Exif \ diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index edd63e6fc351..4e80f08d9e45 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -18,7 +18,7 @@ #include <unotest/macros_test.hxx> #include <unotools/mediadescriptor.hxx> #include <unotools/tempfile.hxx> -#include <xmlsecurity/pdfio/pdfdocument.hxx> +#include <vcl/filter/pdfdocument.hxx> #include <tools/zcodec.hxx> using namespace ::com::sun::star; @@ -90,21 +90,21 @@ void PdfExportTest::testTdf106059() xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList()); // Parse the export result. - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ); CPPUNIT_ASSERT(aDocument.Read(aStream)); // Assert that the XObject in the page resources dictionary is a reference XObject. - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aPages = aDocument.GetPages(); + std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages(); // The document has one page. CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size()); - xmlsecurity::pdfio::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources"); + vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources"); CPPUNIT_ASSERT(pResources); - auto pXObjects = dynamic_cast<xmlsecurity::pdfio::PDFDictionaryElement*>(pResources->Lookup("XObject")); + auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject")); CPPUNIT_ASSERT(pXObjects); // The page has one image. CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size()); - xmlsecurity::pdfio::PDFObjectElement* pReferenceXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first); + vcl::filter::PDFObjectElement* pReferenceXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first); CPPUNIT_ASSERT(pReferenceXObject); // The image is a reference XObject. // This dictionary key was missing, so the XObject wasn't a reference one. @@ -126,18 +126,18 @@ void PdfExportTest::testTdf105461() xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList()); // Parse the export result. - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ); CPPUNIT_ASSERT(aDocument.Read(aStream)); // The document has one page. - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aPages = aDocument.GetPages(); + std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages(); CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size()); // The page has a stream. - xmlsecurity::pdfio::PDFObjectElement* pContents = aPages[0]->LookupObject("Contents"); + vcl::filter::PDFObjectElement* pContents = aPages[0]->LookupObject("Contents"); CPPUNIT_ASSERT(pContents); - xmlsecurity::pdfio::PDFStreamElement* pStream = pContents->GetStream(); + vcl::filter::PDFStreamElement* pStream = pContents->GetStream(); CPPUNIT_ASSERT(pStream); SvMemoryStream& rObjectStream = pStream->GetMemory(); // Uncompress it. @@ -172,32 +172,32 @@ void PdfExportTest::testTdf105093() xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList()); // Parse the export result. - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ); CPPUNIT_ASSERT(aDocument.Read(aStream)); // The document has one page. - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aPages = aDocument.GetPages(); + std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages(); CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size()); // Get page annotations. - auto pAnnots = dynamic_cast<xmlsecurity::pdfio::PDFArrayElement*>(aPages[0]->Lookup("Annots")); + auto pAnnots = dynamic_cast<vcl::filter::PDFArrayElement*>(aPages[0]->Lookup("Annots")); CPPUNIT_ASSERT(pAnnots); CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pAnnots->GetElements().size()); - auto pAnnotReference = dynamic_cast<xmlsecurity::pdfio::PDFReferenceElement*>(pAnnots->GetElements()[0]); + auto pAnnotReference = dynamic_cast<vcl::filter::PDFReferenceElement*>(pAnnots->GetElements()[0]); CPPUNIT_ASSERT(pAnnotReference); - xmlsecurity::pdfio::PDFObjectElement* pAnnot = pAnnotReference->LookupObject(); + vcl::filter::PDFObjectElement* pAnnot = pAnnotReference->LookupObject(); CPPUNIT_ASSERT(pAnnot); - CPPUNIT_ASSERT_EQUAL(OString("Annot"), static_cast<xmlsecurity::pdfio::PDFNameElement*>(pAnnot->Lookup("Type"))->GetValue()); + CPPUNIT_ASSERT_EQUAL(OString("Annot"), static_cast<vcl::filter::PDFNameElement*>(pAnnot->Lookup("Type"))->GetValue()); // Get the Action -> Rendition -> MediaClip -> FileSpec. - auto pAction = dynamic_cast<xmlsecurity::pdfio::PDFDictionaryElement*>(pAnnot->Lookup("A")); + auto pAction = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pAnnot->Lookup("A")); CPPUNIT_ASSERT(pAction); - auto pRendition = dynamic_cast<xmlsecurity::pdfio::PDFDictionaryElement*>(pAction->LookupElement("R")); + auto pRendition = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pAction->LookupElement("R")); CPPUNIT_ASSERT(pRendition); - auto pMediaClip = dynamic_cast<xmlsecurity::pdfio::PDFDictionaryElement*>(pRendition->LookupElement("C")); + auto pMediaClip = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pRendition->LookupElement("C")); CPPUNIT_ASSERT(pMediaClip); - auto pFileSpec = dynamic_cast<xmlsecurity::pdfio::PDFDictionaryElement*>(pMediaClip->LookupElement("D")); + auto pFileSpec = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pMediaClip->LookupElement("D")); CPPUNIT_ASSERT(pFileSpec); // Make sure the filespec refers to an embedded file. // This key was missing, the embedded video was handled as a linked one. @@ -219,18 +219,18 @@ void PdfExportTest::testTdf106206() xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList()); // Parse the export result. - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ); CPPUNIT_ASSERT(aDocument.Read(aStream)); // The document has one page. - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aPages = aDocument.GetPages(); + std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages(); CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size()); // The page has a stream. - xmlsecurity::pdfio::PDFObjectElement* pContents = aPages[0]->LookupObject("Contents"); + vcl::filter::PDFObjectElement* pContents = aPages[0]->LookupObject("Contents"); CPPUNIT_ASSERT(pContents); - xmlsecurity::pdfio::PDFStreamElement* pStream = pContents->GetStream(); + vcl::filter::PDFStreamElement* pStream = pContents->GetStream(); CPPUNIT_ASSERT(pStream); SvMemoryStream& rObjectStream = pStream->GetMemory(); // Uncompress it. diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx new file mode 100644 index 000000000000..0e458e053a24 --- /dev/null +++ b/vcl/source/filter/ipdf/pdfdocument.cxx @@ -0,0 +1,2899 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <vcl/filter/pdfdocument.hxx> + +#include <map> +#include <memory> +#include <vector> + +#include <com/sun/star/uno/Sequence.hxx> + +#include <comphelper/processfactory.hxx> +#include <comphelper/scopeguard.hxx> +#include <comphelper/string.hxx> +#include <filter/msfilter/mscodec.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/string.hxx> +#include <sal/log.hxx> +#include <sal/types.h> +#include <sax/tools/converter.hxx> +#include <tools/zcodec.hxx> +#include <unotools/calendarwrapper.hxx> +#include <unotools/datetime.hxx> +#include <vcl/pdfwriter.hxx> +#include <xmloff/xmluconv.hxx> +#include <o3tl/make_unique.hxx> + +using namespace com::sun::star; + +namespace vcl +{ +namespace filter +{ + +const int MAX_SIGNATURE_CONTENT_LENGTH = 50000; + +class PDFTrailerElement; + +/// A one-liner comment. +class PDFCommentElement : public PDFElement +{ + PDFDocument& m_rDoc; + OString m_aComment; + +public: + explicit PDFCommentElement(PDFDocument& rDoc); + bool Read(SvStream& rStream) override; +}; + +class PDFReferenceElement; + +/// End of a dictionary: '>>'. +class PDFEndDictionaryElement : public PDFElement +{ + /// Offset before the '>>' token. + sal_uInt64 m_nLocation = 0; +public: + PDFEndDictionaryElement(); + bool Read(SvStream& rStream) override; + sal_uInt64 GetLocation() const; +}; + +/// End of a stream: 'endstream' keyword. +class PDFEndStreamElement : public PDFElement +{ +public: + bool Read(SvStream& rStream) override; +}; + +/// End of a object: 'endobj' keyword. +class PDFEndObjectElement : public PDFElement +{ +public: + bool Read(SvStream& rStream) override; +}; + +/// End of an array: ']'. +class PDFEndArrayElement : public PDFElement +{ + /// Location before the ']' token. + sal_uInt64 m_nOffset = 0; +public: + PDFEndArrayElement(); + bool Read(SvStream& rStream) override; + sal_uInt64 GetOffset() const; +}; + +/// Boolean object: a 'true' or a 'false'. +class PDFBooleanElement : public PDFElement +{ +public: + explicit PDFBooleanElement(bool bValue); + bool Read(SvStream& rStream) override; +}; + +/// Null object: the 'null' singleton. +class PDFNullElement : public PDFElement +{ +public: + bool Read(SvStream& rStream) override; +}; + +/// The trailer singleton is at the end of the doc. +class PDFTrailerElement : public PDFElement +{ + PDFDocument& m_rDoc; + std::map<OString, PDFElement*> m_aDictionary; + +public: + explicit PDFTrailerElement(PDFDocument& rDoc); + bool Read(SvStream& rStream) override; + PDFElement* Lookup(const OString& rDictionaryKey); +}; + +XRefEntry::XRefEntry() + : m_eType(XRefEntryType::NOT_COMPRESSED), + m_nOffset(0), + m_nGenerationNumber(0), + m_bDirty(false) +{ +} + +PDFDocument::PDFDocument() + : m_pTrailer(nullptr), + m_pXRefStream(nullptr) +{ +} + +bool PDFDocument::RemoveSignature(size_t nPosition) +{ + std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets(); + if (nPosition >= aSignatures.size()) + { + SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition"); + return false; + } + + if (aSignatures.size() != m_aEOFs.size() - 1) + { + SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures and incremental updates"); + return false; + } + + // The EOF offset is the end of the original file, without the signature at + // nPosition. + m_aEditBuffer.Seek(m_aEOFs[nPosition]); + // Drop all bytes after the current position. + m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1); + + return m_aEditBuffer.good(); +} + +sal_uInt32 PDFDocument::GetNextSignature() +{ + sal_uInt32 nRet = 0; + for (const auto& pSignature : GetSignatureWidgets()) + { + auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T")); + if (!pT) + continue; + + const OString& rValue = pT->GetValue(); + const OString aPrefix = "Signature"; + if (!rValue.startsWith(aPrefix)) + continue; + + nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32()); + } + + return nRet + 1; +} + +sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset) +{ + // Write signature object. + sal_Int32 nSignatureId = m_aXRef.size(); + XRefEntry aSignatureEntry; + aSignatureEntry.m_nOffset = m_aEditBuffer.Tell(); + aSignatureEntry.m_bDirty = true; + m_aXRef[nSignatureId] = aSignatureEntry; + OStringBuffer aSigBuffer; + aSigBuffer.append(nSignatureId); + aSigBuffer.append(" 0 obj\n"); + aSigBuffer.append("<</Contents <"); + rContentOffset = aSignatureEntry.m_nOffset + aSigBuffer.getLength(); + // Reserve space for the PKCS#7 object. + OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH); + comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0'); + aSigBuffer.append(aContentFiller.makeStringAndClear()); + aSigBuffer.append(">\n/Type/Sig/SubFilter"); + if (bAdES) + aSigBuffer.append("/ETSI.CAdES.detached"); + else + aSigBuffer.append("/adbe.pkcs7.detached"); + + // Time of signing. + aSigBuffer.append(" /M ("); + aSigBuffer.append(vcl::PDFWriter::GetDateTime()); + aSigBuffer.append(")"); + + // Byte range: we can write offset1-length1 and offset2 right now, will + // write length2 later. + aSigBuffer.append(" /ByteRange [ 0 "); + // -1 and +1 is the leading "<" and the trailing ">" around the hex string. + aSigBuffer.append(rContentOffset - 1); + aSigBuffer.append(" "); + aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1); + aSigBuffer.append(" "); + rLastByteRangeOffset = aSignatureEntry.m_nOffset + aSigBuffer.getLength(); + // We don't know how many bytes we need for the last ByteRange value, this + // should be enough. + OStringBuffer aByteRangeFiller; + comphelper::string::padToLength(aByteRangeFiller, 100, ' '); + aSigBuffer.append(aByteRangeFiller.makeStringAndClear()); + // Finish the Sig obj. + aSigBuffer.append(" /Filter/Adobe.PPKMS"); + + if (!rDescription.isEmpty()) + { + aSigBuffer.append("/Reason<"); + vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer); + aSigBuffer.append(">"); + } + + aSigBuffer.append(" >>\nendobj\n\n"); + m_aEditBuffer.WriteOString(aSigBuffer.toString()); + + return nSignatureId; +} + +sal_Int32 PDFDocument::WriteAppearanceObject() +{ + // Write appearance object. + sal_Int32 nAppearanceId = m_aXRef.size(); + XRefEntry aAppearanceEntry; + aAppearanceEntry.m_nOffset = m_aEditBuffer.Tell(); + aAppearanceEntry.m_bDirty = true; + m_aXRef[nAppearanceId] = aAppearanceEntry; + m_aEditBuffer.WriteUInt32AsString(nAppearanceId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n"); + m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n"); + m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n"); + m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n"); + + return nAppearanceId; +} + +sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId) +{ + // Decide what identifier to use for the new signature. + sal_uInt32 nNextSignature = GetNextSignature(); + + // Write the Annot object, references nSignatureId and nAppearanceId. + sal_Int32 nAnnotId = m_aXRef.size(); + XRefEntry aAnnotEntry; + aAnnotEntry.m_nOffset = m_aEditBuffer.Tell(); + aAnnotEntry.m_bDirty = true; + m_aXRef[nAnnotId] = aAnnotEntry; + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n"); + m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n"); + m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n"); + m_aEditBuffer.WriteCharPtr("/FT/Sig\n"); + m_aEditBuffer.WriteCharPtr("/P "); + m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue()); + m_aEditBuffer.WriteCharPtr(" 0 R\n"); + m_aEditBuffer.WriteCharPtr("/T(Signature"); + m_aEditBuffer.WriteUInt32AsString(nNextSignature); + m_aEditBuffer.WriteCharPtr(")\n"); + m_aEditBuffer.WriteCharPtr("/V "); + m_aEditBuffer.WriteUInt32AsString(nSignatureId); + m_aEditBuffer.WriteCharPtr(" 0 R\n"); + m_aEditBuffer.WriteCharPtr("/DV "); + m_aEditBuffer.WriteUInt32AsString(nSignatureId); + m_aEditBuffer.WriteCharPtr(" 0 R\n"); + m_aEditBuffer.WriteCharPtr("/AP<<\n/N "); + m_aEditBuffer.WriteUInt32AsString(nAppearanceId); + m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n"); + m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n"); + + return nAnnotId; +} + +bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId) +{ + PDFElement* pAnnots = rFirstPage.Lookup("Annots"); + auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots); + if (pAnnotsReference) + { + // Write the updated Annots key of the Page object. + PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject(); + if (!pAnnotsObject) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference"); + return false; + } + + sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue(); + m_aXRef[nAnnotsId].m_eType = XRefEntryType::NOT_COMPRESSED; + m_aXRef[nAnnotsId].m_nOffset = m_aEditBuffer.Tell(); + m_aXRef[nAnnotsId].m_nGenerationNumber = 0; + m_aXRef[nAnnotsId].m_bDirty = true; + m_aEditBuffer.WriteUInt32AsString(nAnnotsId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n["); + + // Write existing references. + PDFArrayElement* pArray = pAnnotsObject->GetArray(); + if (!pArray) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array"); + return false; + } + + for (size_t i = 0; i < pArray->GetElements().size(); ++i) + { + auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]); + if (!pReference) + continue; + + if (i) + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue()); + m_aEditBuffer.WriteCharPtr(" 0 R"); + } + // Write our reference. + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 R"); + + m_aEditBuffer.WriteCharPtr("]\nendobj\n\n"); + } + else + { + // Write the updated first page object, references nAnnotId. + sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue(); + if (nFirstPageId >= m_aXRef.size()) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id"); + return false; + } + m_aXRef[nFirstPageId].m_nOffset = m_aEditBuffer.Tell(); + m_aXRef[nFirstPageId].m_bDirty = true; + m_aEditBuffer.WriteUInt32AsString(nFirstPageId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n"); + m_aEditBuffer.WriteCharPtr("<<"); + auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots); + if (!pAnnotsArray) + { + // No Annots key, just write the key with a single reference. + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + rFirstPage.GetDictionaryOffset(), rFirstPage.GetDictionaryLength()); + m_aEditBuffer.WriteCharPtr("/Annots["); + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 R]"); + } + else + { + // Annots key is already there, insert our reference at the end. + PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary(); + + // Offset right before the end of the Annots array. + sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots") + pDictionary->GetKeyValueLength("Annots") - 1; + // Length of beginning of the dictionary -> Annots end. + sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset(); + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + rFirstPage.GetDictionaryOffset(), nAnnotsBeforeEndLength); + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 R"); + // Length of Annots end -> end of the dictionary. + sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset() + rFirstPage.GetDictionaryLength() - nAnnotsEndOffset; + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nAnnotsEndOffset, nAnnotsAfterEndLength); + } + m_aEditBuffer.WriteCharPtr(">>"); + m_aEditBuffer.WriteCharPtr("\nendobj\n\n"); + } + + return true; +} + +bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot) +{ + if (m_pXRefStream) + pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root")); + else + { + if (!m_pTrailer) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer"); + return false; + } + pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root")); + } + if (!pRoot) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference"); + return false; + } + PDFObjectElement* pCatalog = pRoot->LookupObject(); + if (!pCatalog) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference"); + return false; + } + sal_uInt32 nCatalogId = pCatalog->GetObjectValue(); + if (nCatalogId >= m_aXRef.size()) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id"); + return false; + } + PDFElement* pAcroForm = pCatalog->Lookup("AcroForm"); + auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm); + if (pAcroFormReference) + { + // Write the updated AcroForm key of the Catalog object. + PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject(); + if (!pAcroFormObject) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference"); + return false; + } + + sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue(); + m_aXRef[nAcroFormId].m_eType = XRefEntryType::NOT_COMPRESSED; + m_aXRef[nAcroFormId].m_nOffset = m_aEditBuffer.Tell(); + m_aXRef[nAcroFormId].m_nGenerationNumber = 0; + m_aXRef[nAcroFormId].m_bDirty = true; + m_aEditBuffer.WriteUInt32AsString(nAcroFormId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n"); + + // If this is nullptr, then the AcroForm object is not in an object stream. + SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer(); + + if (!pAcroFormObject->Lookup("Fields")) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object without required Fields key"); + return false; + } + + PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary(); + if (!pAcroFormDictionary) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary"); + return false; + } + + // Offset right before the end of the Fields array. + sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields") + pAcroFormDictionary->GetKeyValueLength("Fields") - strlen("]"); + // Length of beginning of the object dictionary -> Fields end. + sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset; + if (pStreamBuffer) + m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength); + else + { + nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset(); + m_aEditBuffer.WriteCharPtr("<<"); + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pAcroFormObject->GetDictionaryOffset(), nFieldsBeforeEndLength); + } + + // Append our reference at the end of the Fields array. + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 R"); + + // Length of Fields end -> end of the object dictionary. + if (pStreamBuffer) + { + sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset; + m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData()) + nFieldsEndOffset, nFieldsAfterEndLength); + } + else + { + sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset() + pAcroFormObject->GetDictionaryLength() - nFieldsEndOffset; + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nFieldsEndOffset, nFieldsAfterEndLength); + m_aEditBuffer.WriteCharPtr(">>"); + } + + m_aEditBuffer.WriteCharPtr("\nendobj\n\n"); + } + else + { + // Write the updated Catalog object, references nAnnotId. + auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm); + m_aXRef[nCatalogId].m_nOffset = m_aEditBuffer.Tell(); + m_aXRef[nCatalogId].m_bDirty = true; + m_aEditBuffer.WriteUInt32AsString(nCatalogId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n"); + m_aEditBuffer.WriteCharPtr("<<"); + if (!pAcroFormDictionary) + { + // No AcroForm key, assume no signatures. + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pCatalog->GetDictionaryOffset(), pCatalog->GetDictionaryLength()); + m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n"); + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n"); + } + else + { + // AcroForm key is already there, insert our reference at the Fields end. + auto it = pAcroFormDictionary->GetItems().find("Fields"); + if (it == pAcroFormDictionary->GetItems().end()) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key"); + return false; + } + + auto pFields = dynamic_cast<PDFArrayElement*>(it->second); + if (!pFields) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array"); + return false; + } + + // Offset right before the end of the Fields array. + sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields") + pAcroFormDictionary->GetKeyValueLength("Fields") - 1; + // Length of beginning of the Catalog dictionary -> Fields end. + sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset(); + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pCatalog->GetDictionaryOffset(), nFieldsBeforeEndLength); + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(nAnnotId); + m_aEditBuffer.WriteCharPtr(" 0 R"); + // Length of Fields end -> end of the Catalog dictionary. + sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset() + pCatalog->GetDictionaryLength() - nFieldsEndOffset; + m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nFieldsEndOffset, nFieldsAfterEndLength); + } + m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n"); + } + + return true; +} + +void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot) +{ + if (m_pXRefStream) + { + // Write the xref stream. + // This is a bit meta: the xref stream stores its own offset. + sal_Int32 nXRefStreamId = m_aXRef.size(); + XRefEntry aXRefStreamEntry; + aXRefStreamEntry.m_nOffset = nXRefOffset; + aXRefStreamEntry.m_bDirty = true; + m_aXRef[nXRefStreamId] = aXRefStreamEntry; + + // Write stream data. + SvMemoryStream aXRefStream; + const size_t nOffsetLen = 3; + // 3 additional bytes: predictor, the first and the third field. + const size_t nLineLength = nOffsetLen + 3; + // This is the line as it appears before tweaking according to the predictor. + std::vector<unsigned char> aOrigLine(nLineLength); + // This is the previous line. + std::vector<unsigned char> aPrevLine(nLineLength); + // This is the line as written to the stream. + std::vector<unsigned char> aFilteredLine(nLineLength); + for (const auto& rXRef : m_aXRef) + { + const XRefEntry& rEntry = rXRef.second; + + if (!rEntry.m_bDirty) + continue; + + // Predictor. + size_t nPos = 0; + // PNG prediction: up (on all rows). + aOrigLine[nPos++] = 2; + + // First field. + unsigned char nType = 0; + switch (rEntry.m_eType) + { + case XRefEntryType::FREE: + nType = 0; + break; + case XRefEntryType::NOT_COMPRESSED: + nType = 1; + break; + case XRefEntryType::COMPRESSED: + nType = 2; + break; + } + aOrigLine[nPos++] = nType; + + // Second field. + for (size_t i = 0; i < nOffsetLen; ++i) + { + size_t nByte = nOffsetLen - i - 1; + // Fields requiring more than one byte are stored with the + // high-order byte first. + unsigned char nCh = (rEntry.m_nOffset & (0xff << (nByte * 8))) >> (nByte * 8); + aOrigLine[nPos++] = nCh; + } + + // Third field. + aOrigLine[nPos++] = 0; + + // Now apply the predictor. + aFilteredLine[0] = aOrigLine[0]; + for (size_t i = 1; i < nLineLength; ++i) + { + // Count the delta vs the previous line. + aFilteredLine[i] = aOrigLine[i] - aPrevLine[i]; + // Remember the new reference. + aPrevLine[i] = aOrigLine[i]; + } + + aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size()); + } + + m_aEditBuffer.WriteUInt32AsString(nXRefStreamId); + m_aEditBuffer.WriteCharPtr(" 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode"); + + // ID. + auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID")); + if (pID) + { + const std::vector<PDFElement*>& rElements = pID->GetElements(); + m_aEditBuffer.WriteCharPtr("/ID [ <"); + for (size_t i = 0; i < rElements.size(); ++i) + { + auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]); + if (!pIDString) + continue; + + m_aEditBuffer.WriteOString(pIDString->GetValue()); + if ((i + 1) < rElements.size()) + m_aEditBuffer.WriteCharPtr("> <"); + } + m_aEditBuffer.WriteCharPtr("> ] "); + } + + // Index. + m_aEditBuffer.WriteCharPtr("/Index [ "); + for (const auto& rXRef : m_aXRef) + { + if (!rXRef.second.m_bDirty) + continue; + + m_aEditBuffer.WriteUInt32AsString(rXRef.first); + m_aEditBuffer.WriteCharPtr(" 1 "); + } + m_aEditBuffer.WriteCharPtr("] "); + + // Info. + auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info")); + if (pInfo) + { + m_aEditBuffer.WriteCharPtr("/Info "); + m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue()); + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue()); + m_aEditBuffer.WriteCharPtr(" R "); + } + + // Length. + m_aEditBuffer.WriteCharPtr("/Length "); + { + ZCodec aZCodec; + aZCodec.BeginCompression(); + aXRefStream.Seek(0); + SvMemoryStream aStream; + aZCodec.Compress(aXRefStream, aStream); + aZCodec.EndCompression(); + aXRefStream.Seek(0); + aXRefStream.SetStreamSize(0); + aStream.Seek(0); + aXRefStream.WriteStream(aStream); + } + m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize()); + + if (!m_aStartXRefs.empty()) + { + // Write location of the previous cross-reference section. + m_aEditBuffer.WriteCharPtr("/Prev "); + m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back()); + } + + // Root. + m_aEditBuffer.WriteCharPtr("/Root "); + m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue()); + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue()); + m_aEditBuffer.WriteCharPtr(" R "); + + // Size. + m_aEditBuffer.WriteCharPtr("/Size "); + m_aEditBuffer.WriteUInt32AsString(m_aXRef.size()); + + m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n"); + aXRefStream.Seek(0); + m_aEditBuffer.WriteStream(aXRefStream); + m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n"); + } + else + { + // Write the xref table. + m_aEditBuffer.WriteCharPtr("xref\n"); + for (const auto& rXRef : m_aXRef) + { + size_t nObject = rXRef.first; + size_t nOffset = rXRef.second.m_nOffset; + if (!rXRef.second.m_bDirty) + continue; + + m_aEditBuffer.WriteUInt32AsString(nObject); + m_aEditBuffer.WriteCharPtr(" 1\n"); + OStringBuffer aBuffer; + aBuffer.append(static_cast<sal_Int32>(nOffset)); + while (aBuffer.getLength() < 10) + aBuffer.insert(0, "0"); + if (nObject == 0) + aBuffer.append(" 65535 f \n"); + else + aBuffer.append(" 00000 n \n"); + m_aEditBuffer.WriteOString(aBuffer.toString()); + } + + // Write the trailer. + m_aEditBuffer.WriteCharPtr("trailer\n<</Size "); + m_aEditBuffer.WriteUInt32AsString(m_aXRef.size()); + m_aEditBuffer.WriteCharPtr("/Root "); + m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue()); + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue()); + m_aEditBuffer.WriteCharPtr(" R\n"); + auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info")); + if (pInfo) + { + m_aEditBuffer.WriteCharPtr("/Info "); + m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue()); + m_aEditBuffer.WriteCharPtr(" "); + m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue()); + m_aEditBuffer.WriteCharPtr(" R\n"); + } + auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID")); + if (pID) + { + const std::vector<PDFElement*>& rElements = pID->GetElements(); + m_aEditBuffer.WriteCharPtr("/ID [ <"); + for (size_t i = 0; i < rElements.size(); ++i) + { + auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]); + if (!pIDString) + continue; + + m_aEditBuffer.WriteOString(pIDString->GetValue()); + if ((i + 1) < rElements.size()) + m_aEditBuffer.WriteCharPtr(">\n<"); + } + m_aEditBuffer.WriteCharPtr("> ]\n"); + } + + if (!m_aStartXRefs.empty()) + { + // Write location of the previous cross-reference section. + m_aEditBuffer.WriteCharPtr("/Prev "); + m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back()); + } + + m_aEditBuffer.WriteCharPtr(">>\n"); + } +} + +bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES) +{ + m_aEditBuffer.Seek(STREAM_SEEK_TO_END); + m_aEditBuffer.WriteCharPtr("\n"); + + sal_uInt64 nSignatureLastByteRangeOffset = 0; + sal_Int64 nSignatureContentOffset = 0; + sal_Int32 nSignatureId = WriteSignatureObject(rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset); + + sal_Int32 nAppearanceId = WriteAppearanceObject(); + + std::vector<PDFObjectElement*> aPages = GetPages(); + if (aPages.empty() || !aPages[0]) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages"); + return false; + } + + PDFObjectElement& rFirstPage = *aPages[0]; + sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId); + + if (!WritePageObject(rFirstPage, nAnnotId)) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object"); + return false; + } + + PDFReferenceElement* pRoot = nullptr; + if (!WriteCatalogObject(nAnnotId, pRoot)) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object"); + return false; + } + + sal_uInt64 nXRefOffset = m_aEditBuffer.Tell(); + WriteXRef(nXRefOffset, pRoot); + + // Write startxref. + m_aEditBuffer.WriteCharPtr("startxref\n"); + m_aEditBuffer.WriteUInt32AsString(nXRefOffset); + m_aEditBuffer.WriteCharPtr("\n%%EOF\n"); + + // Finalize the signature, now that we know the total file size. + // Calculate the length of the last byte range. + sal_uInt64 nFileEnd = m_aEditBuffer.Tell(); + sal_Int64 nLastByteRangeLength = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1); + // Write the length to the buffer. + m_aEditBuffer.Seek(nSignatureLastByteRangeOffset); + OStringBuffer aByteRangeBuffer; + aByteRangeBuffer.append(nLastByteRangeLength); + aByteRangeBuffer.append(" ]"); + m_aEditBuffer.WriteOString(aByteRangeBuffer.toString()); + + // Create the PKCS#7 object. + css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded(); + if (!aDerEncoded.hasElements()) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate"); + return false; + } + + m_aEditBuffer.Seek(0); + sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1; + std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]); + m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1); + + m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1); + sal_uInt64 nBufferSize2 = nLastByteRangeLength; + std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]); + m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2); + + OStringBuffer aCMSHexBuffer; + vcl::PDFWriter::PDFSignContext aSignContext(aCMSHexBuffer); + aSignContext.m_pDerEncoded = aDerEncoded.getArray(); + aSignContext.m_nDerEncoded = aDerEncoded.getLength(); + aSignContext.m_pByteRange1 = aBuffer1.get(); + aSignContext.m_nByteRange1 = nBufferSize1; + aSignContext.m_pByteRange2 = aBuffer2.get(); + aSignContext.m_nByteRange2 = nBufferSize2; + if (!vcl::PDFWriter::Sign(aSignContext)) + { + SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed"); + return false; + } + + assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH); + + m_aEditBuffer.Seek(nSignatureContentOffset); + m_aEditBuffer.WriteOString(aCMSHexBuffer.toString()); + + return true; +} + +bool PDFDocument::Write(SvStream& rStream) +{ + m_aEditBuffer.Seek(0); + rStream.WriteStream(m_aEditBuffer); + return rStream.good(); +} + +bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObjectElement) +{ + // Last seen object token. + PDFObjectElement* pObject = pObjectElement; + PDFNameElement* pObjectKey = nullptr; + PDFObjectElement* pObjectStream = nullptr; + bool bInXRef = false; + // The next number will be an xref offset. + bool bInStartXRef = false; + // Dictionary depth, so we know when we're outside any dictionaries. + int nDictionaryDepth = 0; + // Last seen array token that's outside any dictionaries. + PDFArrayElement* pArray = nullptr; + while (true) + { + char ch; + rStream.ReadChar(ch); + if (rStream.IsEof()) + break; + + switch (ch) + { + case '%': + { + auto pComment = new PDFCommentElement(*this); + rElements.push_back(std::unique_ptr<PDFElement>(pComment)); + rStream.SeekRel(-1); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFCommentElement::Read() failed"); + return false; + } + if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty() && m_aEOFs.back() == rStream.Tell()) + { + // Found EOF and partial parsing requested, we're done. + return true; + } + break; + } + case '<': + { + // Dictionary or hex string. + rStream.ReadChar(ch); + rStream.SeekRel(-2); + if (ch == '<') + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement())); + ++nDictionaryDepth; + } + else + rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement)); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed"); + return false; + } + break; + } + case '>': + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement())); + --nDictionaryDepth; + rStream.SeekRel(-1); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed"); + return false; + } + break; + } + case '[': + { + auto pArr = new PDFArrayElement(); + rElements.push_back(std::unique_ptr<PDFElement>(pArr)); + if (nDictionaryDepth == 0) + { + // The array is attached directly, inform the object. + pArray = pArr; + if (pObject) + pObject->SetArray(pArray); + } + rStream.SeekRel(-1); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed"); + return false; + } + break; + } + case ']': + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement())); + pArray = nullptr; + rStream.SeekRel(-1); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed"); + return false; + } + break; + } + case '/': + { + auto pNameElement = new PDFNameElement(); + rElements.push_back(std::unique_ptr<PDFElement>(pNameElement)); + rStream.SeekRel(-1); + if (!pNameElement->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed"); + return false; + } + if (pObject && pObjectKey && pObjectKey->GetValue() == "Type" && pNameElement->GetValue() == "ObjStm") + pObjectStream = pObject; + else + pObjectKey = pNameElement; + break; + } + case '(': + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement)); + rStream.SeekRel(-1); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed"); + return false; + } + break; + } + default: + { + if (isdigit(ch) || ch == '-') + { + // Numbering object: an integer or a real. + auto pNumberElement = new PDFNumberElement(); + rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement)); + rStream.SeekRel(-1); + if (!pNumberElement->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNumberElement::Read() failed"); + return false; + } + if (bInStartXRef) + { + bInStartXRef = false; + m_aStartXRefs.push_back(pNumberElement->GetValue()); + + auto it = m_aOffsetObjects.find(pNumberElement->GetValue()); + if (it != m_aOffsetObjects.end()) + m_pXRefStream = it->second; + } + } + else if (isalpha(ch)) + { + // Possible keyword, like "obj". + rStream.SeekRel(-1); + OString aKeyword = ReadKeyword(rStream); + + bool bObj = aKeyword == "obj"; + if (bObj || aKeyword == "R") + { + size_t nElements = rElements.size(); + if (nElements < 2) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two tokens before 'obj' or 'R' keyword"); + return false; + } + + auto pObjectNumber = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get()); + auto pGenerationNumber = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get()); + if (!pObjectNumber || !pGenerationNumber) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or generation number before 'obj' or 'R' keyword"); + return false; + } + + if (bObj) + { + pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(), pGenerationNumber->GetValue()); + rElements.push_back(std::unique_ptr<PDFElement>(pObject)); + m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject; + m_aIDObjects[pObjectNumber->GetValue()] = pObject; + } + else + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFReferenceElement(*this, pObjectNumber->GetValue(), pGenerationNumber->GetValue()))); + if (pArray) + // Reference is part of a direct (non-dictionary) array, inform the array. + pArray->PushBack(rElements.back().get()); + } + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFElement::Read() failed"); + return false; + } + } + else if (aKeyword == "stream") + { + // Look up the length of the stream from the parent object's dictionary. + size_t nLength = 0; + for (size_t nElement = 0; nElement < rElements.size(); ++nElement) + { + // Iterate in reverse order. + size_t nIndex = rElements.size() - nElement - 1; + PDFElement* pElement = rElements[nIndex].get(); + auto pObj = dynamic_cast<PDFObjectElement*>(pElement); + if (!pObj) + continue; + + PDFElement* pLookup = pObj->Lookup("Length"); + auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup); + if (pReference) + { + // Length is provided as a reference. + nLength = pReference->LookupNumber(rStream); + break; + } + + auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup); + if (pNumber) + { + // Length is provided directly. + nLength = pNumber->GetValue(); + break; + } + + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: found no Length key for stream keyword"); + return false; + } + + PDFDocument::SkipLineBreaks(rStream); + auto pStreamElement = new PDFStreamElement(nLength); + if (pObject) + pObject->SetStream(pStreamElement); + rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement)); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFStreamElement::Read() failed"); + return false; + } + } + else if (aKeyword == "endstream") + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement)); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed"); + return false; + } + } + else if (aKeyword == "endobj") + { + rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement)); + if (!rElements.back()->Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed"); + return false; + } + if (eMode == TokenizeMode::END_OF_OBJECT) + { + // Found endobj and only object parsing was requested, we're done. + return true; + } + + if (pObjectStream) + { + // We're at the end of an object stream, parse the stored objects. + pObjectStream->ParseStoredObjects(); + pObjectStream = nullptr; + pObjectKey = nullptr; + } + } + else if (aKeyword == "true" || aKeyword == "false") + rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean()))); + else if (aKeyword == "null") + rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement)); + else if (aKeyword == "xref") + // Allow 'f' and 'n' keywords. + bInXRef = true; + else if (bInXRef && (aKeyword == "f" || aKeyword == "n")) + { + } + else if (aKeyword == "trailer") + { + auto pTrailer = new PDFTrailerElement(*this); + // When reading till the first EOF token only, remember + // just the first trailer token. + if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer) + m_pTrailer = pTrailer; + rElements.push_back(std::unique_ptr<PDFElement>(pTrailer)); + } + else if (aKeyword == "startxref") + { + bInStartXRef = true; + } + else + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '" << aKeyword << "' keyword at byte position " << rStream.Tell()); + return false; + } + } + else + { + if (!isspace(ch)) + { + SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: " << ch << " at byte position " << rStream.Tell()); + return false; + } + } + break; + } + } + } + + return true; +} + +void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject) +{ + m_aIDObjects[nID] = pObject; +} + +bool PDFDocument::Read(SvStream& rStream) +{ + // Check file magic. + std::vector<sal_Int8> aHeader(5); + rStream.Seek(0); + rStream.ReadBytes(aHeader.data(), aHeader.size()); + if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F' || aHeader[4] != '-') + { + SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch"); + return false; + } + + // Allow later editing of the contents in-memory. + rStream.Seek(0); + m_aEditBuffer.WriteStream(rStream); + + // Look up the offset of the xref table. + size_t nStartXRef = FindStartXRef(rStream); + SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef); + if (nStartXRef == 0) + { + SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset"); + return false; + } + while (true) + { + rStream.Seek(nStartXRef); + OString aKeyword = ReadKeyword(rStream); + if (aKeyword.isEmpty()) + ReadXRefStream(rStream); + + else + { + if (aKeyword != "xref") + { + SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword"); + return false; + } + ReadXRef(rStream); + if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr)) + { + SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref"); + return false; + } + } + + PDFNumberElement* pPrev = nullptr; + if (m_pTrailer) + pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev")); + else if (m_pXRefStream) + pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev")); + if (pPrev) + nStartXRef = pPrev->GetValue(); + + // Reset state, except object offsets and the edit buffer. + m_aElements.clear(); + m_aStartXRefs.clear(); + m_aEOFs.clear(); + m_pTrailer = nullptr; + m_pXRefStream = nullptr; + if (!pPrev) + break; + } + + // Then we can tokenize the stream. + rStream.Seek(0); + return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr); +} + +OString PDFDocument::ReadKeyword(SvStream& rStream) +{ + OStringBuffer aBuf; + char ch; + rStream.ReadChar(ch); + if (rStream.IsEof()) + return OString(); + while (isalpha(ch)) + { + aBuf.append(ch); + rStream.ReadChar(ch); + if (rStream.IsEof()) + return aBuf.toString(); + } + rStream.SeekRel(-1); + return aBuf.toString(); +} + +size_t PDFDocument::FindStartXRef(SvStream& rStream) +{ + // Find the "startxref" token, somewhere near the end of the document. + std::vector<char> aBuf(1024); + rStream.Seek(STREAM_SEEK_TO_END); + if (rStream.Tell() > aBuf.size()) + rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size()); + else + // The document is really short, then just read it from the start. + rStream.Seek(0); + size_t nBeforePeek = rStream.Tell(); + size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size()); + rStream.Seek(nBeforePeek); + if (nSize != aBuf.size()) + aBuf.resize(nSize); + OString aPrefix("startxref"); + // Find the last startxref at the end of the document. + auto itLastValid = aBuf.end(); + auto it = aBuf.begin(); + while (true) + { + it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength()); + if (it == aBuf.end()) + break; + else + { + itLastValid = it; + ++it; + } + } + if (itLastValid == aBuf.end()) + { + SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref"); + return 0; + } + + rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength()); + if (rStream.IsEof()) + { + SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: unexpected end of stream after startxref"); + return 0; + } + + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aNumber; + if (!aNumber.Read(rStream)) + return 0; + return aNumber.GetValue(); +} + +void PDFDocument::ReadXRefStream(SvStream& rStream) +{ + // Look up the stream length in the object dictionary. + if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr)) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object"); + return; + } + + if (m_aElements.empty()) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found"); + return; + } + + PDFObjectElement* pObject = nullptr; + for (const auto& pElement : m_aElements) + { + if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get())) + { + pObject = pObj; + break; + } + } + if (!pObject) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found"); + return; + } + + // So that the Prev key can be looked up later. + m_pXRefStream = pObject; + + PDFElement* pLookup = pObject->Lookup("Length"); + auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup); + if (!pNumber) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided"); + return; + } + sal_uInt64 nLength = pNumber->GetValue(); + + // Look up the stream offset. + PDFStreamElement* pStream = nullptr; + for (const auto& pElement : m_aElements) + { + if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get())) + { + pStream = pS; + break; + } + } + if (!pStream) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found"); + return; + } + + // Read and decompress it. + rStream.Seek(pStream->GetOffset()); + std::vector<char> aBuf(nLength); + rStream.ReadBytes(aBuf.data(), aBuf.size()); + + auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter")); + if (!pFilter) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found"); + return; + } + + if (pFilter->GetValue() != "FlateDecode") + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue()); + return; + } + + int nColumns = 1; + int nPredictor = 1; + if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms"))) + { + const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems(); + auto it = rItems.find("Columns"); + if (it != rItems.end()) + if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second)) + nColumns = pColumns->GetValue(); + it = rItems.find("Predictor"); + if (it != rItems.end()) + if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second)) + nPredictor = pPredictor->GetValue(); + } + + SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ); + SvMemoryStream aStream; + ZCodec aZCodec; + aZCodec.BeginCompression(); + aZCodec.Decompress(aSource, aStream); + if (!aZCodec.EndCompression()) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed"); + return; + } + + // Look up the first and the last entry we need to read. + auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index")); + std::vector<size_t> aFirstObjects; + std::vector<size_t> aNumberOfObjects; + if (!pIndex) + { + auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size")); + if (pSize) + { + aFirstObjects.push_back(0); + aNumberOfObjects.push_back(pSize->GetValue()); + } + else + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found"); + return; + } + } + else + { + const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements(); + size_t nFirstObject = 0; + for (size_t i = 0; i < rIndexElements.size(); ++i) + { + if (i % 2 == 0) + { + auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]); + if (!pFirstObject) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index has no first object"); + return; + } + nFirstObject = pFirstObject->GetValue(); + continue; + } + + auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]); + if (!pNumberOfObjects) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index has no number of objects"); + return; + } + aFirstObjects.push_back(nFirstObject); + aNumberOfObjects.push_back(pNumberOfObjects->GetValue()); + } + } + + // Look up the format of a single entry. + const int nWSize = 3; + auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W")); + if (!pW || pW->GetElements().size() < nWSize) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements"); + return; + } + int aW[nWSize]; + // First character is the (kind of) repeated predictor. + int nLineLength = 1; + for (size_t i = 0; i < nWSize; ++i) + { + auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]); + if (!pI) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number"); + return; + } + aW[i] = pI->GetValue(); + nLineLength += aW[i]; + } + + if (nPredictor > 1 && nLineLength - 1 != nColumns) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W"); + return; + } + + aStream.Seek(0); + for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection) + { + size_t nFirstObject = aFirstObjects[nSubSection]; + size_t nNumberOfObjects = aNumberOfObjects[nSubSection]; + + // This is the line as read from the stream. + std::vector<unsigned char> aOrigLine(nLineLength); + // This is the line as it appears after tweaking according to nPredictor. + std::vector<unsigned char> aFilteredLine(nLineLength); + for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry) + { + size_t nIndex = nFirstObject + nEntry; + + aStream.ReadBytes(aOrigLine.data(), aOrigLine.size()); + if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is inconsistent with /DecodeParms/Predictor for object #" << nIndex); + return; + } + + for (int i = 0; i < nLineLength; ++i) + { + switch (nPredictor) + { + case 1: + // No prediction. + break; + case 12: + // PNG prediction: up (on all rows). + aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i]; + break; + default: + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: " << nPredictor); + return; + break; + } + } + + // First character is already handled above. + int nPos = 1; + size_t nType = 0; + // Start of the current field in the stream data. + int nOffset = nPos; + for (; nPos < nOffset + aW[0]; ++nPos) + { + unsigned char nCh = aFilteredLine[nPos]; + nType = (nType << 8) + nCh; + } + + // Start of the object in the file stream. + size_t nStreamOffset = 0; + nOffset = nPos; + for (; nPos < nOffset + aW[1]; ++nPos) + { + unsigned char nCh = aFilteredLine[nPos]; + nStreamOffset = (nStreamOffset << 8) + nCh; + } + + // Generation number of the object. + size_t nGenerationNumber = 0; + nOffset = nPos; + for (; nPos < nOffset + aW[2]; ++nPos) + { + unsigned char nCh = aFilteredLine[nPos]; + nGenerationNumber = (nGenerationNumber << 8) + nCh; + } + + // Ignore invalid nType. + if (nType <= 2) + { + if (m_aXRef.find(nIndex) == m_aXRef.end()) + { + XRefEntry aEntry; + switch (nType) + { + case 0: + aEntry.m_eType = XRefEntryType::FREE; + break; + case 1: + aEntry.m_eType = XRefEntryType::NOT_COMPRESSED; + break; + case 2: + aEntry.m_eType = XRefEntryType::COMPRESSED; + break; + } + aEntry.m_nOffset = nStreamOffset; + aEntry.m_nGenerationNumber = nGenerationNumber; + m_aXRef[nIndex] = aEntry; + } + } + } + } +} + +void PDFDocument::ReadXRef(SvStream& rStream) +{ + PDFDocument::SkipWhitespace(rStream); + + while (true) + { + PDFNumberElement aFirstObject; + if (!aFirstObject.Read(rStream)) + { + // Next token is not a number, it'll be the trailer. + return; + } + + if (aFirstObject.GetValue() < 0) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0"); + return; + } + + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aNumberOfEntries; + if (!aNumberOfEntries.Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries"); + return; + } + + if (aNumberOfEntries.GetValue() < 0) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries"); + return; + } + + size_t nSize = aNumberOfEntries.GetValue(); + for (size_t nEntry = 0; nEntry < nSize; ++nEntry) + { + size_t nIndex = aFirstObject.GetValue() + nEntry; + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aOffset; + if (!aOffset.Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset"); + return; + } + + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aGenerationNumber; + if (!aGenerationNumber.Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number"); + return; + } + + PDFDocument::SkipWhitespace(rStream); + OString aKeyword = ReadKeyword(rStream); + if (aKeyword != "f" && aKeyword != "n") + { + SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword"); + return; + } + // xrefs are read in reverse order, so never update an existing + // offset with an older one. + if (m_aXRef.find(nIndex) == m_aXRef.end()) + { + XRefEntry aEntry; + aEntry.m_nOffset = aOffset.GetValue(); + aEntry.m_nGenerationNumber = aGenerationNumber.GetValue(); + // Initially only the first entry is dirty. + if (nIndex == 0) + aEntry.m_bDirty = true; + m_aXRef[nIndex] = aEntry; + } + PDFDocument::SkipWhitespace(rStream); + } + } +} + +void PDFDocument::SkipWhitespace(SvStream& rStream) +{ + char ch = 0; + + while (true) + { + rStream.ReadChar(ch); + if (rStream.IsEof()) + break; + + if (!isspace(ch)) + { + rStream.SeekRel(-1); + return; + } + } +} + +void PDFDocument::SkipLineBreaks(SvStream& rStream) +{ + char ch = 0; + + while (true) + { + rStream.ReadChar(ch); + if (rStream.IsEof()) + break; + + if (ch != '\n' && ch != '\r') + { + rStream.SeekRel(-1); + return; + } + } +} + +size_t PDFDocument::GetObjectOffset(size_t nIndex) const +{ + auto it = m_aXRef.find(nIndex); + if (it == m_aXRef.end() || it->second.m_eType == XRefEntryType::COMPRESSED) + { + SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #" << nIndex << ", but failed"); + return 0; + } + + return it->second.m_nOffset; +} + +const std::vector< std::unique_ptr<PDFElement> >& PDFDocument::GetElements() +{ + return m_aElements; +} + +std::vector<PDFObjectElement*> PDFDocument::GetPages() +{ + std::vector<PDFObjectElement*> aRet; + + PDFReferenceElement* pRoot = nullptr; + if (m_pTrailer) + pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root")); + else if (m_pXRefStream) + pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root")); + + if (!pRoot) + { + SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key"); + return aRet; + } + + PDFObjectElement* pCatalog = pRoot->LookupObject(); + if (!pCatalog) + { + SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog"); + return aRet; + } + + PDFObjectElement* pPages = pCatalog->LookupObject("Pages"); + if (!pPages) + { + SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue() << ") has no pages"); + return aRet; + } + + auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids")); + if (!pKids) + { + SAL_WARN("vcl.filter", "PDFDocument::GetPages: pages has no kids"); + return aRet; + } + + for (const auto& pKid : pKids->GetElements()) + { + auto pReference = dynamic_cast<PDFReferenceElement*>(pKid); + if (!pReference) + continue; + + aRet.push_back(pReference->LookupObject()); + } + + return aRet; +} + +void PDFDocument::PushBackEOF(size_t nOffset) +{ + m_aEOFs.push_back(nOffset); +} + +std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets() +{ + std::vector<PDFObjectElement*> aRet; + + std::vector<PDFObjectElement*> aPages = GetPages(); + + for (const auto& pPage : aPages) + { + if (!pPage) + continue; + + PDFElement* pAnnotsElement = pPage->Lookup("Annots"); + auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement); + if (!pAnnots) + { + // Annots is not an array, see if it's a reference to an object + // with a direct array. + auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement); + if (pAnnotsRef) + { + if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject()) + { + pAnnots = pAnnotsObject->GetArray(); + } + } + } + + if (!pAnnots) + continue; + + for (const auto& pAnnot : pAnnots->GetElements()) + { + auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot); + if (!pReference) + continue; + + PDFObjectElement* pAnnotObject = pReference->LookupObject(); + if (!pAnnotObject) + continue; + + auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT")); + if (!pFT || pFT->GetValue() != "Sig") + continue; + + aRet.push_back(pAnnotObject); + } + } + + return aRet; +} + +int PDFDocument::AsHex(char ch) +{ + int nRet = 0; + if (isdigit(ch)) + nRet = ch - '0'; + else + { + if (ch >= 'a' && ch <= 'f') + nRet = ch - 'a'; + else if (ch >= 'A' && ch <= 'F') + nRet = ch - 'A'; + else + return -1; + nRet += 10; + } + return nRet; +} + +std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement* pElement) +{ + std::vector<unsigned char> aRet; + const OString& rHex = pElement->GetValue(); + size_t nHexLen = rHex.getLength(); + { + int nByte = 0; + int nCount = 2; + for (size_t i = 0; i < nHexLen; ++i) + { + nByte = nByte << 4; + sal_Int8 nParsed = AsHex(rHex[i]); + if (nParsed == -1) + { + SAL_WARN("vcl.filter", "PDFDocument::DecodeHexString: invalid hex value"); + return aRet; + } + nByte += nParsed; + --nCount; + if (!nCount) + { + aRet.push_back(nByte); + nCount = 2; + nByte = 0; + } + } + } + + return aRet; +} + +PDFCommentElement::PDFCommentElement(PDFDocument& rDoc) + : m_rDoc(rDoc) +{ +} + +bool PDFCommentElement::Read(SvStream& rStream) +{ + // Read from (including) the % char till (excluding) the end of the line/stream. + OStringBuffer aBuf; + char ch; + rStream.ReadChar(ch); + while (true) + { + if (ch == '\n' || ch == '\r' || rStream.IsEof()) + { + m_aComment = aBuf.makeStringAndClear(); + + if (m_aComment.startsWith("%%EOF")) + m_rDoc.PushBackEOF(rStream.Tell()); + + SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'"); + return true; + } + aBuf.append(ch); + rStream.ReadChar(ch); + } + + return false; +} + +PDFNumberElement::PDFNumberElement() = default; + +bool PDFNumberElement::Read(SvStream& rStream) +{ + OStringBuffer aBuf; + m_nOffset = rStream.Tell(); + char ch; + rStream.ReadChar(ch); + if (!isdigit(ch) && ch != '-' && ch != '.') + { + rStream.SeekRel(-1); + return false; + } + while (!rStream.IsEof()) + { + if (!isdigit(ch) && ch != '-' && ch != '.') + { + rStream.SeekRel(-1); + m_nLength = rStream.Tell() - m_nOffset; + m_fValue = aBuf.makeStringAndClear().toDouble(); + SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'"); + return true; + } + aBuf.append(ch); + rStream.ReadChar(ch); + } + + return false; +} + +sal_uInt64 PDFNumberElement::GetLocation() const +{ + return m_nOffset; +} + +sal_uInt64 PDFNumberElement::GetLength() const +{ + return m_nLength; +} + +PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) +{ +} + +bool PDFBooleanElement::Read(SvStream& /*rStream*/) +{ + return true; +} + +bool PDFNullElement::Read(SvStream& /*rStream*/) +{ + return true; +} + +bool PDFHexStringElement::Read(SvStream& rStream) +{ + char ch; + rStream.ReadChar(ch); + if (ch != '<') + { + SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character"); + return false; + } + rStream.ReadChar(ch); + + OStringBuffer aBuf; + while (!rStream.IsEof()) + { + if (ch == '>') + { + m_aValue = aBuf.makeStringAndClear(); + SAL_INFO("vcl.filter", "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength()); + return true; + } + aBuf.append(ch); + rStream.ReadChar(ch); + } + + return false; +} + +const OString& PDFHexStringElement::GetValue() const +{ + return m_aValue; +} + +bool PDFLiteralStringElement::Read(SvStream& rStream) +{ + char nPrevCh = 0; + char ch = 0; + rStream.ReadChar(ch); + if (ch != '(') + { + SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character"); + return false; + } + nPrevCh = ch; + rStream.ReadChar(ch); + + OStringBuffer aBuf; + while (!rStream.IsEof()) + { + if (ch == ')' && nPrevCh != '\\') + { + m_aValue = aBuf.makeStringAndClear(); + SAL_INFO("vcl.filter", "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'"); + return true; + } + aBuf.append(ch); + nPrevCh = ch; + rStream.ReadChar(ch); + } + + return false; +} + +const OString& PDFLiteralStringElement::GetValue() const +{ + return m_aValue; +} + +PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc) + : m_rDoc(rDoc) +{ +} + +bool PDFTrailerElement::Read(SvStream& /*rStream*/) +{ + return true; +} + +PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey) +{ + if (m_aDictionary.empty()) + PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); + + return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey); +} + + +double PDFNumberElement::GetValue() const +{ + return m_fValue; +} + +PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue) + : m_rDoc(rDoc), + m_fObjectValue(fObjectValue), + m_fGenerationValue(fGenerationValue), + m_nDictionaryOffset(0), + m_nDictionaryLength(0), + m_pDictionaryElement(nullptr), + m_pArrayElement(nullptr), + m_pStreamElement(nullptr) +{ +} + +bool PDFObjectElement::Read(SvStream& /*rStream*/) +{ + SAL_INFO("vcl.filter", "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj"); + return true; +} + +PDFDictionaryElement::PDFDictionaryElement() = default; + +size_t PDFDictionaryElement::Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary) +{ + // The index of last parsed element, in case of nested dictionaries. + size_t nRet = 0; + + if (!rDictionary.empty()) + return nRet; + + auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis); + // This is set to non-nullptr here for nested dictionaries only. + auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis); + + // Find out where the dictionary for this object starts. + size_t nIndex = 0; + for (size_t i = 0; i < rElements.size(); ++i) + { + if (rElements[i].get() == pThis) + { + nIndex = i; + break; + } + } + + OString aName; + sal_uInt64 nNameOffset = 0; + std::vector<PDFNumberElement*> aNumbers; + // The array value we're in -- if any. + PDFArrayElement* pArray = nullptr; + sal_uInt64 nDictionaryOffset = 0; + int nDictionaryDepth = 0; + for (size_t i = nIndex; i < rElements.size(); ++i) + { + // Dictionary tokens can be nested, track enter/leave. + if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get())) + { + if (++nDictionaryDepth == 1) + { + // First dictionary start, track start offset. + nDictionaryOffset = pDictionary->m_nLocation; + if (pThisObject) + { + pThisObject->SetDictionary(pDictionary); + pThisDictionary = pDictionary; + pThisObject->SetDictionaryOffset(nDictionaryOffset); + } + } + else + { + // Nested dictionary. + i = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems); + rDictionary[aName] = pDictionary; + aName.clear(); + } + } + + if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get())) + { + if (--nDictionaryDepth == 0) + { + // Last dictionary end, track length and stop parsing. + if (pThisObject) + pThisObject->SetDictionaryLength(pEndDictionary->GetLocation() - nDictionaryOffset); + nRet = i; + break; + } + } + + auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get()); + if (pName) + { + if (!aNumbers.empty()) + { + PDFNumberElement* pNumber = aNumbers.back(); + rDictionary[aName] = pNumber; + if (pThisDictionary) + { + pThisDictionary->SetKeyOffset(aName, nNameOffset); + pThisDictionary->SetKeyValueLength(aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset); + } + aName.clear(); + aNumbers.clear(); + } + + if (aName.isEmpty()) + { + // Remember key. + aName = pName->GetValue(); + nNameOffset = pName->GetLocation(); + } + else + { + if (pArray) + pArray->PushBack(pName); + else + { + // Name-name key-value. + rDictionary[aName] = pName; + if (pThisDictionary) + { + pThisDictionary->SetKeyOffset(aName, nNameOffset); + pThisDictionary->SetKeyValueLength(aName, pName->GetLocation() + pName->GetLength() - nNameOffset); + } + aName.clear(); + } + } + continue; + } + + auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get()); + if (pArr) + { + pArray = pArr; + continue; + } + + auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get()); + if (pArray && pEndArr) + { + if (!aNumbers.empty()) + { + for (auto& pNumber : aNumbers) + pArray->PushBack(pNumber); + aNumbers.clear(); + } + rDictionary[aName] = pArray; + if (pThisDictionary) + { + pThisDictionary->SetKeyOffset(aName, nNameOffset); + // Include the ending ']' in the length of the key - (array)value pair length. + pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1); + } + aName.clear(); + pArray = nullptr; + continue; + } + + auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get()); + if (pReference) + { + if (!pArray) + { + rDictionary[aName] = pReference; + if (pThisDictionary) + pThisDictionary->SetKeyOffset(aName, nNameOffset); + aName.clear(); + } + else + { + pArray->PushBack(pReference); + } + aNumbers.clear(); + continue; + } + + auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get()); + if (pLiteralString) + { + rDictionary[aName] = pLiteralString; + if (pThisDictionary) + pThisDictionary->SetKeyOffset(aName, nNameOffset); + aName.clear(); + continue; + } + + auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get()); + if (pBoolean) + { + rDictionary[aName] = pBoolean; + if (pThisDictionary) + pThisDictionary->SetKeyOffset(aName, nNameOffset); + aName.clear(); + continue; + } + + auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get()); + if (pHexString) + { + if (!pArray) + { + rDictionary[aName] = pHexString; + if (pThisDictionary) + pThisDictionary->SetKeyOffset(aName, nNameOffset); + aName.clear(); + } + else + { + pArray->PushBack(pHexString); + } + continue; + } + + if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get())) + break; + + // Just remember this, so that in case it's not a reference parameter, + // we can handle it later. + auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get()); + if (pNumber) + aNumbers.push_back(pNumber); + } + + if (!aNumbers.empty()) + { + rDictionary[aName] = aNumbers.back(); + if (pThisDictionary) + pThisDictionary->SetKeyOffset(aName, nNameOffset); + aName.clear(); + aNumbers.clear(); + } + + return nRet; +} + +PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey) +{ + auto it = rDictionary.find(rKey); + if (it == rDictionary.end()) + return nullptr; + + return it->second; +} + +PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey) +{ + auto pKey = dynamic_cast<PDFReferenceElement*>(PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey)); + if (!pKey) + { + SAL_WARN("vcl.filter", "PDFDictionaryElement::LookupObject: no such key with reference value: " << rDictionaryKey); + return nullptr; + } + + return pKey->LookupObject(); +} + +PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey) +{ + return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey); +} + +PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey) +{ + if (m_aDictionary.empty()) + { + if (!m_aElements.empty()) + // This is a stored object in an object stream. + PDFDictionaryElement::Parse(m_aElements, this, m_aDictionary); + else + // Normal object: elements are stored as members of the document itself. + PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); + } + + return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey); +} + +PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey) +{ + auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey)); + if (!pKey) + { + SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: " << rDictionaryKey); + return nullptr; + } + + return pKey->LookupObject(); +} + +double PDFObjectElement::GetObjectValue() const +{ + return m_fObjectValue; +} + +void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset) +{ + m_nDictionaryOffset = nDictionaryOffset; +} + +sal_uInt64 PDFObjectElement::GetDictionaryOffset() +{ + if (m_aDictionary.empty()) + PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); + + return m_nDictionaryOffset; +} + +void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset) +{ + m_aDictionaryKeyOffset[rKey] = nOffset; +} + +void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength) +{ + m_aDictionaryKeyValueLength[rKey] = nLength; +} + +sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const +{ + auto it = m_aDictionaryKeyOffset.find(rKey); + if (it == m_aDictionaryKeyOffset.end()) + return 0; + + return it->second; +} + +sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const +{ + auto it = m_aDictionaryKeyValueLength.find(rKey); + if (it == m_aDictionaryKeyValueLength.end()) + return 0; + + return it->second; +} + +const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const +{ + return m_aItems; +} + +void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength) +{ + m_nDictionaryLength = nDictionaryLength; +} + +sal_uInt64 PDFObjectElement::GetDictionaryLength() +{ + if (m_aDictionary.empty()) + PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); + + return m_nDictionaryLength; +} + +PDFDictionaryElement* PDFObjectElement::GetDictionary() const +{ + return m_pDictionaryElement; +} + +void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement) +{ + m_pDictionaryElement = pDictionaryElement; +} + +void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) +{ + m_pArrayElement = pArrayElement; +} + +void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement) +{ + m_pStreamElement = pStreamElement; +} + +PDFStreamElement* PDFObjectElement::GetStream() const +{ + return m_pStreamElement; +} + +PDFArrayElement* PDFObjectElement::GetArray() const +{ + return m_pArrayElement; +} + +void PDFObjectElement::ParseStoredObjects() +{ + if (!m_pStreamElement) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream"); + return; + } + + auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type")); + if (!pType || pType->GetValue() != "ObjStm") + { + if (!pType) + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type"); + else + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue()); + return; + } + + auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter")); + if (!pFilter || pFilter->GetValue() != "FlateDecode") + { + if (!pFilter) + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter"); + else + SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue()); + return; + } + + auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First")); + if (!pFirst) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First"); + return; + } + + auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N")); + if (!pN) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N"); + return; + } + size_t nN = pN->GetValue(); + + auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length")); + if (!pLength) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length"); + return; + } + size_t nLength = pLength->GetValue(); + + // Read and decompress it. + SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer(); + rEditBuffer.Seek(m_pStreamElement->GetOffset()); + std::vector<char> aBuf(nLength); + rEditBuffer.ReadBytes(aBuf.data(), aBuf.size()); + SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ); + SvMemoryStream aStream; + ZCodec aZCodec; + aZCodec.BeginCompression(); + aZCodec.Decompress(aSource, aStream); + if (!aZCodec.EndCompression()) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed"); + return; + } + + aStream.Seek(STREAM_SEEK_TO_END); + nLength = aStream.Tell(); + aStream.Seek(0); + std::vector<size_t> aObjNums; + std::vector<size_t> aOffsets; + std::vector<size_t> aLengths; + // First iterate over and find out the lengths. + for (size_t nObject = 0; nObject < nN; ++nObject) + { + PDFNumberElement aObjNum; + if (!aObjNum.Read(aStream)) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: failed to read object number"); + return; + } + aObjNums.push_back(aObjNum.GetValue()); + + PDFDocument::SkipWhitespace(aStream); + + PDFNumberElement aByteOffset; + if (!aByteOffset.Read(aStream)) + { + SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: failed to read byte offset"); + return; + } + aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue()); + + if (aOffsets.size() > 1) + aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]); + if (nObject + 1 == nN) + aLengths.push_back(nLength - aOffsets.back()); + + PDFDocument::SkipWhitespace(aStream); + } + + // Now create streams with the proper length and tokenize the data. + for (size_t nObject = 0; nObject < nN; ++nObject) + { + size_t nObjNum = aObjNums[nObject]; + size_t nOffset = aOffsets[nObject]; + size_t nLen = aLengths[nObject]; + + aStream.Seek(nOffset); + m_aStoredElements.push_back(o3tl::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0)); + PDFObjectElement* pStored = m_aStoredElements.back().get(); + + aBuf.clear(); + aBuf.resize(nLen); + aStream.ReadBytes(aBuf.data(), aBuf.size()); + SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ); + + m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(), pStored); + // This is how references know the object is stored inside this object stream. + m_rDoc.SetIDObject(nObjNum, pStored); + + // Store the stream of the object in the object stream for later use. + std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream()); + aStoredStream.Seek(0); + pStreamBuffer->WriteStream(aStoredStream); + pStored->SetStreamBuffer(pStreamBuffer); + } +} + +std::vector< std::unique_ptr<PDFElement> >& PDFObjectElement::GetStoredElements() +{ + return m_aElements; +} + +SvMemoryStream* PDFObjectElement::GetStreamBuffer() const +{ + return m_pStreamBuffer.get(); +} + +void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer) +{ + m_pStreamBuffer = std::move(pStreamBuffer); +} + +PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, int fObjectValue, int fGenerationValue) + : m_rDoc(rDoc), + m_fObjectValue(fObjectValue), + m_fGenerationValue(fGenerationValue) +{ +} + +bool PDFReferenceElement::Read(SvStream& /*rStream*/) +{ + SAL_INFO("vcl.filter", "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R"); + return true; +} + +double PDFReferenceElement::LookupNumber(SvStream& rStream) const +{ + size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue); + if (nOffset == 0) + { + SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #" << m_fObjectValue); + return 0; + } + + sal_uInt64 nOrigPos = rStream.Tell(); + comphelper::ScopeGuard g([&]() + { + rStream.Seek(nOrigPos); + }); + + rStream.Seek(nOffset); + { + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aNumber; + bool bRet = aNumber.Read(rStream); + if (!bRet || aNumber.GetValue() != m_fObjectValue) + { + SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset points to not matching object"); + return 0; + } + } + + { + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aNumber; + bool bRet = aNumber.Read(rStream); + if (!bRet || aNumber.GetValue() != m_fGenerationValue) + { + SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset points to not matching generation"); + return 0; + } + } + + { + PDFDocument::SkipWhitespace(rStream); + OString aKeyword = PDFDocument::ReadKeyword(rStream); + if (aKeyword != "obj") + { + SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword"); + return 0; + } + } + + PDFDocument::SkipWhitespace(rStream); + PDFNumberElement aNumber; + if (!aNumber.Read(rStream)) + { + SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: failed to read referenced number"); + return 0; + } + + return aNumber.GetValue(); +} + +PDFObjectElement* PDFReferenceElement::LookupObject() +{ + return m_rDoc.LookupObject(m_fObjectValue); +} + +PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber) +{ + auto itIDObjects = m_aIDObjects.find(nObjectNumber); + + if (itIDObjects != m_aIDObjects.end()) + return itIDObjects->second; + + SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber); + return nullptr; +} + +SvMemoryStream& PDFDocument::GetEditBuffer() +{ + return m_aEditBuffer; +} + +int PDFReferenceElement::GetObjectValue() const +{ + return m_fObjectValue; +} + +int PDFReferenceElement::GetGenerationValue() const +{ + return m_fGenerationValue; +} + +bool PDFDictionaryElement::Read(SvStream& rStream) +{ + char ch; + rStream.ReadChar(ch); + if (ch != '<') + { + SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch); + return false; + } + + if (rStream.IsEof()) + { + SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file"); + return false; + } + + rStream.ReadChar(ch); + if (ch != '<') + { + SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch); + return false; + } + + m_nLocation = rStream.Tell(); + + SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'"); + + return true; +} + +PDFEndDictionaryElement::PDFEndDictionaryElement() = default; + +sal_uInt64 PDFEndDictionaryElement::GetLocation() const +{ + return m_nLocation; +} + +bool PDFEndDictionaryElement::Read(SvStream& rStream) +{ + m_nLocation = rStream.Tell(); + char ch; + rStream.ReadChar(ch); + if (ch != '>') + { + SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch); + return false; + } + + if (rStream.IsEof()) + { + SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file"); + return false; + } + + rStream.ReadChar(ch); + if (ch != '>') + { + SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch); + return false; + } + + SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'"); + + return true; +} + +PDFNameElement::PDFNameElement() + : m_nLocation(0), + m_nLength(0) +{ +} + +bool PDFNameElement::Read(SvStream& rStream) +{ + char ch; + rStream.ReadChar(ch); + if (ch != '/') + { + SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch); + return false; + } + m_nLocation = rStream.Tell(); + + if (rStream.IsEof()) + { + SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file"); + return false; + } + + // Read till the first white-space. + OStringBuffer aBuf; + rStream.ReadChar(ch); + while (!rStream.IsEof()) + { + if (isspace(ch) || ch == '/' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '(') + { + rStream.SeekRel(-1); + m_aValue = aBuf.makeStringAndClear(); + SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'"); + return true; + } + aBuf.append(ch); + rStream.ReadChar(ch); + } + + return false; +} + +const OString& PDFNameElement::GetValue() const +{ + return m_aValue; +} + +sal_uInt64 PDFNameElement::GetLocation() const +{ + return m_nLocation; +} + +sal_uInt64 PDFNameElement::GetLength() const +{ + return m_nLength; +} + +PDFStreamElement::PDFStreamElement(size_t nLength) + : m_nLength(nLength), + m_nOffset(0) +{ +} + +bool PDFStreamElement::Read(SvStream& rStream) +{ + SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength); + m_nOffset = rStream.Tell(); + std::vector<unsigned char> aBytes(m_nLength); + rStream.ReadBytes(aBytes.data(), aBytes.size()); + m_aMemory.WriteBytes(aBytes.data(), aBytes.size()); + + return rStream.good(); +} + +SvMemoryStream& PDFStreamElement::GetMemory() +{ + return m_aMemory; +} + +sal_uInt64 PDFStreamElement::GetOffset() const +{ + return m_nOffset; +} + +bool PDFEndStreamElement::Read(SvStream& /*rStream*/) +{ + return true; +} + +bool PDFEndObjectElement::Read(SvStream& /*rStream*/) +{ + return true; +} + +PDFArrayElement::PDFArrayElement() = default; + +bool PDFArrayElement::Read(SvStream& rStream) +{ + char ch; + rStream.ReadChar(ch); + if (ch != '[') + { + SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch); + return false; + } + m_nOffset = rStream.Tell(); + + SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['"); + + return true; +} + +void PDFArrayElement::PushBack(PDFElement* pElement) +{ + m_aElements.push_back(pElement); +} + +const std::vector<PDFElement*>& PDFArrayElement::GetElements() +{ + return m_aElements; +} + +PDFEndArrayElement::PDFEndArrayElement() = default; + +bool PDFEndArrayElement::Read(SvStream& rStream) +{ + m_nOffset = rStream.Tell(); + char ch; + rStream.ReadChar(ch); + if (ch != ']') + { + SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch); + return false; + } + + SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'"); + + return true; +} + +sal_uInt64 PDFEndArrayElement::GetOffset() const +{ + return m_nOffset; +} + +} // namespace filter +} // namespace vcl + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/xmlsecurity/CppunitTest_xmlsecurity_pdfsigning.mk b/xmlsecurity/CppunitTest_xmlsecurity_pdfsigning.mk index be4828e6ade0..a21a1e37a7d4 100644 --- a/xmlsecurity/CppunitTest_xmlsecurity_pdfsigning.mk +++ b/xmlsecurity/CppunitTest_xmlsecurity_pdfsigning.mk @@ -27,6 +27,7 @@ $(eval $(call gb_CppunitTest_use_libraries,xmlsecurity_pdfsigning, \ unotest \ utl \ xmlsecurity \ + vcl \ $(gb_UWINAPI) \ )) diff --git a/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx b/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx index 3b18a332b4a0..69916d677a98 100644 --- a/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx +++ b/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx @@ -123,9 +123,9 @@ std::vector<SignatureInformation> PDFSigningTest::verify(const OUString& rURL, s std::vector<SignatureInformation> aRet; SvFileStream aStream(rURL, StreamMode::READ); - xmlsecurity::pdfio::PDFDocument aVerifyDocument; + vcl::filter::PDFDocument aVerifyDocument; CPPUNIT_ASSERT(aVerifyDocument.Read(aStream)); - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aSignatures = aVerifyDocument.GetSignatureWidgets(); + std::vector<vcl::filter::PDFObjectElement*> aSignatures = aVerifyDocument.GetSignatureWidgets(); CPPUNIT_ASSERT_EQUAL(nCount, aSignatures.size()); for (size_t i = 0; i < aSignatures.size(); ++i) { @@ -136,9 +136,9 @@ std::vector<SignatureInformation> PDFSigningTest::verify(const OUString& rURL, s if (!rExpectedSubFilter.isEmpty()) { - xmlsecurity::pdfio::PDFObjectElement* pValue = aSignatures[i]->LookupObject("V"); + vcl::filter::PDFObjectElement* pValue = aSignatures[i]->LookupObject("V"); CPPUNIT_ASSERT(pValue); - auto pSubFilter = dynamic_cast<xmlsecurity::pdfio::PDFNameElement*>(pValue->Lookup("SubFilter")); + auto pSubFilter = dynamic_cast<vcl::filter::PDFNameElement*>(pValue->Lookup("SubFilter")); CPPUNIT_ASSERT(pSubFilter); CPPUNIT_ASSERT_EQUAL(rExpectedSubFilter, pSubFilter->GetValue()); } @@ -152,11 +152,11 @@ bool PDFSigningTest::sign(const OUString& rInURL, const OUString& rOutURL, size_ // Make sure that input has nOriginalSignatureCount signatures. uno::Reference<xml::crypto::XSEInitializer> xSEInitializer = xml::crypto::SEInitializer::create(mxComponentContext); uno::Reference<xml::crypto::XXMLSecurityContext> xSecurityContext = xSEInitializer->createSecurityContext(OUString()); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; { SvFileStream aStream(rInURL, StreamMode::READ); CPPUNIT_ASSERT(aDocument.Read(aStream)); - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); + std::vector<vcl::filter::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); CPPUNIT_ASSERT_EQUAL(nOriginalSignatureCount, aSignatures.size()); } @@ -224,13 +224,13 @@ void PDFSigningTest::testPDFRemove() // Make sure that good.pdf has 1 valid signature. uno::Reference<xml::crypto::XSEInitializer> xSEInitializer = xml::crypto::SEInitializer::create(mxComponentContext); uno::Reference<xml::crypto::XXMLSecurityContext> xSecurityContext = xSEInitializer->createSecurityContext(OUString()); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; { OUString aSourceDir = m_directories.getURLFromSrc(DATA_DIRECTORY); OUString aInURL = aSourceDir + "good.pdf"; SvFileStream aStream(aInURL, StreamMode::READ); CPPUNIT_ASSERT(aDocument.Read(aStream)); - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); + std::vector<vcl::filter::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aSignatures.size()); SignatureInformation aInfo(0); CPPUNIT_ASSERT(xmlsecurity::pdfio::ValidateSignature(aStream, aSignatures[0], aInfo, /*bLast=*/true)); @@ -400,7 +400,7 @@ void PDFSigningTest::testTokenize() for (const auto& rName : aNames) { SvFileStream aStream(m_directories.getURLFromSrc(DATA_DIRECTORY) + rName, StreamMode::READ); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; // Just make sure the tokenizer finishes without an error, don't look at the signature. CPPUNIT_ASSERT(aDocument.Read(aStream)); } diff --git a/xmlsecurity/source/helper/pdfsignaturehelper.cxx b/xmlsecurity/source/helper/pdfsignaturehelper.cxx index 7b0616155722..6c9d07125011 100644 --- a/xmlsecurity/source/helper/pdfsignaturehelper.cxx +++ b/xmlsecurity/source/helper/pdfsignaturehelper.cxx @@ -37,14 +37,14 @@ bool PDFSignatureHelper::ReadAndVerifySignature(const uno::Reference<io::XInputS } std::unique_ptr<SvStream> pStream(utl::UcbStreamHelper::CreateStream(xInputStream, true)); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; if (!aDocument.Read(*pStream)) { SAL_WARN("xmlsecurity.helper", "failed to read the document"); return false; } - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); + std::vector<vcl::filter::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); if (aSignatures.empty()) return true; @@ -119,7 +119,7 @@ void PDFSignatureHelper::SetDescription(const OUString& rDescription) bool PDFSignatureHelper::Sign(const uno::Reference<io::XInputStream>& xInputStream, bool bAdES) { std::unique_ptr<SvStream> pStream(utl::UcbStreamHelper::CreateStream(xInputStream, true)); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; if (!aDocument.Read(*pStream)) { SAL_WARN("xmlsecurity.helper", "failed to read the document"); @@ -146,7 +146,7 @@ bool PDFSignatureHelper::Sign(const uno::Reference<io::XInputStream>& xInputStre bool PDFSignatureHelper::RemoveSignature(const uno::Reference<io::XInputStream>& xInputStream, sal_uInt16 nPosition) { std::unique_ptr<SvStream> pStream(utl::UcbStreamHelper::CreateStream(xInputStream, true)); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; if (!aDocument.Read(*pStream)) { SAL_WARN("xmlsecurity.helper", "failed to read the document"); diff --git a/xmlsecurity/source/pdfio/pdfdocument.cxx b/xmlsecurity/source/pdfio/pdfdocument.cxx index 209b6df3dbe2..78d41c94e097 100644 --- a/xmlsecurity/source/pdfio/pdfdocument.cxx +++ b/xmlsecurity/source/pdfio/pdfdocument.cxx @@ -55,1882 +55,6 @@ namespace xmlsecurity namespace pdfio { -const int MAX_SIGNATURE_CONTENT_LENGTH = 50000; - -class PDFTrailerElement; - -/// A one-liner comment. -class PDFCommentElement : public PDFElement -{ - PDFDocument& m_rDoc; - OString m_aComment; - -public: - explicit PDFCommentElement(PDFDocument& rDoc); - bool Read(SvStream& rStream) override; -}; - -/// Numbering object: an integer or a real. -class PDFNumberElement : public PDFElement -{ - /// Input file start location. - sal_uInt64 m_nOffset = 0; - /// Input file token length. - sal_uInt64 m_nLength = 0; - double m_fValue = 0; - -public: - PDFNumberElement(); - bool Read(SvStream& rStream) override; - double GetValue() const; - sal_uInt64 GetLocation() const; - sal_uInt64 GetLength() const; -}; - -class PDFReferenceElement; - -/// End of a dictionary: '>>'. -class PDFEndDictionaryElement : public PDFElement -{ - /// Offset before the '>>' token. - sal_uInt64 m_nLocation = 0; -public: - PDFEndDictionaryElement(); - bool Read(SvStream& rStream) override; - sal_uInt64 GetLocation() const; -}; - -/// End of a stream: 'endstream' keyword. -class PDFEndStreamElement : public PDFElement -{ -public: - bool Read(SvStream& rStream) override; -}; - -/// End of a object: 'endobj' keyword. -class PDFEndObjectElement : public PDFElement -{ -public: - bool Read(SvStream& rStream) override; -}; - -/// End of an array: ']'. -class PDFEndArrayElement : public PDFElement -{ - /// Location before the ']' token. - sal_uInt64 m_nOffset = 0; -public: - PDFEndArrayElement(); - bool Read(SvStream& rStream) override; - sal_uInt64 GetOffset() const; -}; - -/// Boolean object: a 'true' or a 'false'. -class PDFBooleanElement : public PDFElement -{ -public: - explicit PDFBooleanElement(bool bValue); - bool Read(SvStream& rStream) override; -}; - -/// Null object: the 'null' singleton. -class PDFNullElement : public PDFElement -{ -public: - bool Read(SvStream& rStream) override; -}; - -/// Hex string: in <AABB> form. -class PDFHexStringElement : public PDFElement -{ - OString m_aValue; -public: - bool Read(SvStream& rStream) override; - const OString& GetValue() const; -}; - -/// Literal string: in (asdf) form. -class PDFLiteralStringElement : public PDFElement -{ - OString m_aValue; -public: - bool Read(SvStream& rStream) override; - const OString& GetValue() const; -}; - -/// The trailer singleton is at the end of the doc. -class PDFTrailerElement : public PDFElement -{ - PDFDocument& m_rDoc; - std::map<OString, PDFElement*> m_aDictionary; - -public: - explicit PDFTrailerElement(PDFDocument& rDoc); - bool Read(SvStream& rStream) override; - PDFElement* Lookup(const OString& rDictionaryKey); -}; - -XRefEntry::XRefEntry() - : m_eType(XRefEntryType::NOT_COMPRESSED), - m_nOffset(0), - m_nGenerationNumber(0), - m_bDirty(false) -{ -} - -PDFDocument::PDFDocument() - : m_pTrailer(nullptr), - m_pXRefStream(nullptr) -{ -} - -bool PDFDocument::RemoveSignature(size_t nPosition) -{ - std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets(); - if (nPosition >= aSignatures.size()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::RemoveSignature: invalid nPosition"); - return false; - } - - if (aSignatures.size() != m_aEOFs.size() - 1) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures and incremental updates"); - return false; - } - - // The EOF offset is the end of the original file, without the signature at - // nPosition. - m_aEditBuffer.Seek(m_aEOFs[nPosition]); - // Drop all bytes after the current position. - m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1); - - return m_aEditBuffer.good(); -} - -sal_uInt32 PDFDocument::GetNextSignature() -{ - sal_uInt32 nRet = 0; - for (const auto& pSignature : GetSignatureWidgets()) - { - auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T")); - if (!pT) - continue; - - const OString& rValue = pT->GetValue(); - const OString aPrefix = "Signature"; - if (!rValue.startsWith(aPrefix)) - continue; - - nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32()); - } - - return nRet + 1; -} - -sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset) -{ - // Write signature object. - sal_Int32 nSignatureId = m_aXRef.size(); - XRefEntry aSignatureEntry; - aSignatureEntry.m_nOffset = m_aEditBuffer.Tell(); - aSignatureEntry.m_bDirty = true; - m_aXRef[nSignatureId] = aSignatureEntry; - OStringBuffer aSigBuffer; - aSigBuffer.append(nSignatureId); - aSigBuffer.append(" 0 obj\n"); - aSigBuffer.append("<</Contents <"); - rContentOffset = aSignatureEntry.m_nOffset + aSigBuffer.getLength(); - // Reserve space for the PKCS#7 object. - OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH); - comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0'); - aSigBuffer.append(aContentFiller.makeStringAndClear()); - aSigBuffer.append(">\n/Type/Sig/SubFilter"); - if (bAdES) - aSigBuffer.append("/ETSI.CAdES.detached"); - else - aSigBuffer.append("/adbe.pkcs7.detached"); - - // Time of signing. - aSigBuffer.append(" /M ("); - aSigBuffer.append(vcl::PDFWriter::GetDateTime()); - aSigBuffer.append(")"); - - // Byte range: we can write offset1-length1 and offset2 right now, will - // write length2 later. - aSigBuffer.append(" /ByteRange [ 0 "); - // -1 and +1 is the leading "<" and the trailing ">" around the hex string. - aSigBuffer.append(rContentOffset - 1); - aSigBuffer.append(" "); - aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1); - aSigBuffer.append(" "); - rLastByteRangeOffset = aSignatureEntry.m_nOffset + aSigBuffer.getLength(); - // We don't know how many bytes we need for the last ByteRange value, this - // should be enough. - OStringBuffer aByteRangeFiller; - comphelper::string::padToLength(aByteRangeFiller, 100, ' '); - aSigBuffer.append(aByteRangeFiller.makeStringAndClear()); - // Finish the Sig obj. - aSigBuffer.append(" /Filter/Adobe.PPKMS"); - - if (!rDescription.isEmpty()) - { - aSigBuffer.append("/Reason<"); - vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer); - aSigBuffer.append(">"); - } - - aSigBuffer.append(" >>\nendobj\n\n"); - m_aEditBuffer.WriteOString(aSigBuffer.toString()); - - return nSignatureId; -} - -sal_Int32 PDFDocument::WriteAppearanceObject() -{ - // Write appearance object. - sal_Int32 nAppearanceId = m_aXRef.size(); - XRefEntry aAppearanceEntry; - aAppearanceEntry.m_nOffset = m_aEditBuffer.Tell(); - aAppearanceEntry.m_bDirty = true; - m_aXRef[nAppearanceId] = aAppearanceEntry; - m_aEditBuffer.WriteUInt32AsString(nAppearanceId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n"); - m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n"); - m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n"); - m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n"); - - return nAppearanceId; -} - -sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId) -{ - // Decide what identifier to use for the new signature. - sal_uInt32 nNextSignature = GetNextSignature(); - - // Write the Annot object, references nSignatureId and nAppearanceId. - sal_Int32 nAnnotId = m_aXRef.size(); - XRefEntry aAnnotEntry; - aAnnotEntry.m_nOffset = m_aEditBuffer.Tell(); - aAnnotEntry.m_bDirty = true; - m_aXRef[nAnnotId] = aAnnotEntry; - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n"); - m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n"); - m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n"); - m_aEditBuffer.WriteCharPtr("/FT/Sig\n"); - m_aEditBuffer.WriteCharPtr("/P "); - m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue()); - m_aEditBuffer.WriteCharPtr(" 0 R\n"); - m_aEditBuffer.WriteCharPtr("/T(Signature"); - m_aEditBuffer.WriteUInt32AsString(nNextSignature); - m_aEditBuffer.WriteCharPtr(")\n"); - m_aEditBuffer.WriteCharPtr("/V "); - m_aEditBuffer.WriteUInt32AsString(nSignatureId); - m_aEditBuffer.WriteCharPtr(" 0 R\n"); - m_aEditBuffer.WriteCharPtr("/DV "); - m_aEditBuffer.WriteUInt32AsString(nSignatureId); - m_aEditBuffer.WriteCharPtr(" 0 R\n"); - m_aEditBuffer.WriteCharPtr("/AP<<\n/N "); - m_aEditBuffer.WriteUInt32AsString(nAppearanceId); - m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n"); - m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n"); - - return nAnnotId; -} - -bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId) -{ - PDFElement* pAnnots = rFirstPage.Lookup("Annots"); - auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots); - if (pAnnotsReference) - { - // Write the updated Annots key of the Page object. - PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject(); - if (!pAnnotsObject) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: invalid Annots reference"); - return false; - } - - sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue(); - m_aXRef[nAnnotsId].m_eType = XRefEntryType::NOT_COMPRESSED; - m_aXRef[nAnnotsId].m_nOffset = m_aEditBuffer.Tell(); - m_aXRef[nAnnotsId].m_nGenerationNumber = 0; - m_aXRef[nAnnotsId].m_bDirty = true; - m_aEditBuffer.WriteUInt32AsString(nAnnotsId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n["); - - // Write existing references. - PDFArrayElement* pArray = pAnnotsObject->GetArray(); - if (!pArray) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: Page Annots is a reference to a non-array"); - return false; - } - - for (size_t i = 0; i < pArray->GetElements().size(); ++i) - { - auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]); - if (!pReference) - continue; - - if (i) - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue()); - m_aEditBuffer.WriteCharPtr(" 0 R"); - } - // Write our reference. - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 R"); - - m_aEditBuffer.WriteCharPtr("]\nendobj\n\n"); - } - else - { - // Write the updated first page object, references nAnnotId. - sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue(); - if (nFirstPageId >= m_aXRef.size()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: invalid first page obj id"); - return false; - } - m_aXRef[nFirstPageId].m_nOffset = m_aEditBuffer.Tell(); - m_aXRef[nFirstPageId].m_bDirty = true; - m_aEditBuffer.WriteUInt32AsString(nFirstPageId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n"); - m_aEditBuffer.WriteCharPtr("<<"); - auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots); - if (!pAnnotsArray) - { - // No Annots key, just write the key with a single reference. - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + rFirstPage.GetDictionaryOffset(), rFirstPage.GetDictionaryLength()); - m_aEditBuffer.WriteCharPtr("/Annots["); - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 R]"); - } - else - { - // Annots key is already there, insert our reference at the end. - PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary(); - - // Offset right before the end of the Annots array. - sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots") + pDictionary->GetKeyValueLength("Annots") - 1; - // Length of beginning of the dictionary -> Annots end. - sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset(); - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + rFirstPage.GetDictionaryOffset(), nAnnotsBeforeEndLength); - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 R"); - // Length of Annots end -> end of the dictionary. - sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset() + rFirstPage.GetDictionaryLength() - nAnnotsEndOffset; - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nAnnotsEndOffset, nAnnotsAfterEndLength); - } - m_aEditBuffer.WriteCharPtr(">>"); - m_aEditBuffer.WriteCharPtr("\nendobj\n\n"); - } - - return true; -} - -bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot) -{ - if (m_pXRefStream) - pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root")); - else - { - if (!m_pTrailer) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: found no trailer"); - return false; - } - pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root")); - } - if (!pRoot) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: trailer has no root reference"); - return false; - } - PDFObjectElement* pCatalog = pRoot->LookupObject(); - if (!pCatalog) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: invalid catalog reference"); - return false; - } - sal_uInt32 nCatalogId = pCatalog->GetObjectValue(); - if (nCatalogId >= m_aXRef.size()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: invalid catalog obj id"); - return false; - } - PDFElement* pAcroForm = pCatalog->Lookup("AcroForm"); - auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm); - if (pAcroFormReference) - { - // Write the updated AcroForm key of the Catalog object. - PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject(); - if (!pAcroFormObject) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: invalid AcroForm reference"); - return false; - } - - sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue(); - m_aXRef[nAcroFormId].m_eType = XRefEntryType::NOT_COMPRESSED; - m_aXRef[nAcroFormId].m_nOffset = m_aEditBuffer.Tell(); - m_aXRef[nAcroFormId].m_nGenerationNumber = 0; - m_aXRef[nAcroFormId].m_bDirty = true; - m_aEditBuffer.WriteUInt32AsString(nAcroFormId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n"); - - // If this is nullptr, then the AcroForm object is not in an object stream. - SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer(); - - if (!pAcroFormObject->Lookup("Fields")) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: AcroForm object without required Fields key"); - return false; - } - - PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary(); - if (!pAcroFormDictionary) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: AcroForm object has no dictionary"); - return false; - } - - // Offset right before the end of the Fields array. - sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields") + pAcroFormDictionary->GetKeyValueLength("Fields") - strlen("]"); - // Length of beginning of the object dictionary -> Fields end. - sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset; - if (pStreamBuffer) - m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength); - else - { - nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset(); - m_aEditBuffer.WriteCharPtr("<<"); - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pAcroFormObject->GetDictionaryOffset(), nFieldsBeforeEndLength); - } - - // Append our reference at the end of the Fields array. - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 R"); - - // Length of Fields end -> end of the object dictionary. - if (pStreamBuffer) - { - sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset; - m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData()) + nFieldsEndOffset, nFieldsAfterEndLength); - } - else - { - sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset() + pAcroFormObject->GetDictionaryLength() - nFieldsEndOffset; - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nFieldsEndOffset, nFieldsAfterEndLength); - m_aEditBuffer.WriteCharPtr(">>"); - } - - m_aEditBuffer.WriteCharPtr("\nendobj\n\n"); - } - else - { - // Write the updated Catalog object, references nAnnotId. - auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm); - m_aXRef[nCatalogId].m_nOffset = m_aEditBuffer.Tell(); - m_aXRef[nCatalogId].m_bDirty = true; - m_aEditBuffer.WriteUInt32AsString(nCatalogId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n"); - m_aEditBuffer.WriteCharPtr("<<"); - if (!pAcroFormDictionary) - { - // No AcroForm key, assume no signatures. - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pCatalog->GetDictionaryOffset(), pCatalog->GetDictionaryLength()); - m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n"); - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n"); - } - else - { - // AcroForm key is already there, insert our reference at the Fields end. - auto it = pAcroFormDictionary->GetItems().find("Fields"); - if (it == pAcroFormDictionary->GetItems().end()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: AcroForm without required Fields key"); - return false; - } - - auto pFields = dynamic_cast<PDFArrayElement*>(it->second); - if (!pFields) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: AcroForm Fields is not an array"); - return false; - } - - // Offset right before the end of the Fields array. - sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields") + pAcroFormDictionary->GetKeyValueLength("Fields") - 1; - // Length of beginning of the Catalog dictionary -> Fields end. - sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset(); - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pCatalog->GetDictionaryOffset(), nFieldsBeforeEndLength); - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(nAnnotId); - m_aEditBuffer.WriteCharPtr(" 0 R"); - // Length of Fields end -> end of the Catalog dictionary. - sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset() + pCatalog->GetDictionaryLength() - nFieldsEndOffset; - m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nFieldsEndOffset, nFieldsAfterEndLength); - } - m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n"); - } - - return true; -} - -void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot) -{ - if (m_pXRefStream) - { - // Write the xref stream. - // This is a bit meta: the xref stream stores its own offset. - sal_Int32 nXRefStreamId = m_aXRef.size(); - XRefEntry aXRefStreamEntry; - aXRefStreamEntry.m_nOffset = nXRefOffset; - aXRefStreamEntry.m_bDirty = true; - m_aXRef[nXRefStreamId] = aXRefStreamEntry; - - // Write stream data. - SvMemoryStream aXRefStream; - const size_t nOffsetLen = 3; - // 3 additional bytes: predictor, the first and the third field. - const size_t nLineLength = nOffsetLen + 3; - // This is the line as it appears before tweaking according to the predictor. - std::vector<unsigned char> aOrigLine(nLineLength); - // This is the previous line. - std::vector<unsigned char> aPrevLine(nLineLength); - // This is the line as written to the stream. - std::vector<unsigned char> aFilteredLine(nLineLength); - for (const auto& rXRef : m_aXRef) - { - const XRefEntry& rEntry = rXRef.second; - - if (!rEntry.m_bDirty) - continue; - - // Predictor. - size_t nPos = 0; - // PNG prediction: up (on all rows). - aOrigLine[nPos++] = 2; - - // First field. - unsigned char nType = 0; - switch (rEntry.m_eType) - { - case XRefEntryType::FREE: - nType = 0; - break; - case XRefEntryType::NOT_COMPRESSED: - nType = 1; - break; - case XRefEntryType::COMPRESSED: - nType = 2; - break; - } - aOrigLine[nPos++] = nType; - - // Second field. - for (size_t i = 0; i < nOffsetLen; ++i) - { - size_t nByte = nOffsetLen - i - 1; - // Fields requiring more than one byte are stored with the - // high-order byte first. - unsigned char nCh = (rEntry.m_nOffset & (0xff << (nByte * 8))) >> (nByte * 8); - aOrigLine[nPos++] = nCh; - } - - // Third field. - aOrigLine[nPos++] = 0; - - // Now apply the predictor. - aFilteredLine[0] = aOrigLine[0]; - for (size_t i = 1; i < nLineLength; ++i) - { - // Count the delta vs the previous line. - aFilteredLine[i] = aOrigLine[i] - aPrevLine[i]; - // Remember the new reference. - aPrevLine[i] = aOrigLine[i]; - } - - aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size()); - } - - m_aEditBuffer.WriteUInt32AsString(nXRefStreamId); - m_aEditBuffer.WriteCharPtr(" 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode"); - - // ID. - auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID")); - if (pID) - { - const std::vector<PDFElement*>& rElements = pID->GetElements(); - m_aEditBuffer.WriteCharPtr("/ID [ <"); - for (size_t i = 0; i < rElements.size(); ++i) - { - auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]); - if (!pIDString) - continue; - - m_aEditBuffer.WriteOString(pIDString->GetValue()); - if ((i + 1) < rElements.size()) - m_aEditBuffer.WriteCharPtr("> <"); - } - m_aEditBuffer.WriteCharPtr("> ] "); - } - - // Index. - m_aEditBuffer.WriteCharPtr("/Index [ "); - for (const auto& rXRef : m_aXRef) - { - if (!rXRef.second.m_bDirty) - continue; - - m_aEditBuffer.WriteUInt32AsString(rXRef.first); - m_aEditBuffer.WriteCharPtr(" 1 "); - } - m_aEditBuffer.WriteCharPtr("] "); - - // Info. - auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info")); - if (pInfo) - { - m_aEditBuffer.WriteCharPtr("/Info "); - m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue()); - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue()); - m_aEditBuffer.WriteCharPtr(" R "); - } - - // Length. - m_aEditBuffer.WriteCharPtr("/Length "); - { - ZCodec aZCodec; - aZCodec.BeginCompression(); - aXRefStream.Seek(0); - SvMemoryStream aStream; - aZCodec.Compress(aXRefStream, aStream); - aZCodec.EndCompression(); - aXRefStream.Seek(0); - aXRefStream.SetStreamSize(0); - aStream.Seek(0); - aXRefStream.WriteStream(aStream); - } - m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize()); - - if (!m_aStartXRefs.empty()) - { - // Write location of the previous cross-reference section. - m_aEditBuffer.WriteCharPtr("/Prev "); - m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back()); - } - - // Root. - m_aEditBuffer.WriteCharPtr("/Root "); - m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue()); - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue()); - m_aEditBuffer.WriteCharPtr(" R "); - - // Size. - m_aEditBuffer.WriteCharPtr("/Size "); - m_aEditBuffer.WriteUInt32AsString(m_aXRef.size()); - - m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n"); - aXRefStream.Seek(0); - m_aEditBuffer.WriteStream(aXRefStream); - m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n"); - } - else - { - // Write the xref table. - m_aEditBuffer.WriteCharPtr("xref\n"); - for (const auto& rXRef : m_aXRef) - { - size_t nObject = rXRef.first; - size_t nOffset = rXRef.second.m_nOffset; - if (!rXRef.second.m_bDirty) - continue; - - m_aEditBuffer.WriteUInt32AsString(nObject); - m_aEditBuffer.WriteCharPtr(" 1\n"); - OStringBuffer aBuffer; - aBuffer.append(static_cast<sal_Int32>(nOffset)); - while (aBuffer.getLength() < 10) - aBuffer.insert(0, "0"); - if (nObject == 0) - aBuffer.append(" 65535 f \n"); - else - aBuffer.append(" 00000 n \n"); - m_aEditBuffer.WriteOString(aBuffer.toString()); - } - - // Write the trailer. - m_aEditBuffer.WriteCharPtr("trailer\n<</Size "); - m_aEditBuffer.WriteUInt32AsString(m_aXRef.size()); - m_aEditBuffer.WriteCharPtr("/Root "); - m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue()); - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue()); - m_aEditBuffer.WriteCharPtr(" R\n"); - auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info")); - if (pInfo) - { - m_aEditBuffer.WriteCharPtr("/Info "); - m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue()); - m_aEditBuffer.WriteCharPtr(" "); - m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue()); - m_aEditBuffer.WriteCharPtr(" R\n"); - } - auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID")); - if (pID) - { - const std::vector<PDFElement*>& rElements = pID->GetElements(); - m_aEditBuffer.WriteCharPtr("/ID [ <"); - for (size_t i = 0; i < rElements.size(); ++i) - { - auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]); - if (!pIDString) - continue; - - m_aEditBuffer.WriteOString(pIDString->GetValue()); - if ((i + 1) < rElements.size()) - m_aEditBuffer.WriteCharPtr(">\n<"); - } - m_aEditBuffer.WriteCharPtr("> ]\n"); - } - - if (!m_aStartXRefs.empty()) - { - // Write location of the previous cross-reference section. - m_aEditBuffer.WriteCharPtr("/Prev "); - m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back()); - } - - m_aEditBuffer.WriteCharPtr(">>\n"); - } -} - -bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES) -{ - m_aEditBuffer.Seek(STREAM_SEEK_TO_END); - m_aEditBuffer.WriteCharPtr("\n"); - - sal_uInt64 nSignatureLastByteRangeOffset = 0; - sal_Int64 nSignatureContentOffset = 0; - sal_Int32 nSignatureId = WriteSignatureObject(rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset); - - sal_Int32 nAppearanceId = WriteAppearanceObject(); - - std::vector<PDFObjectElement*> aPages = GetPages(); - if (aPages.empty() || !aPages[0]) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: found no pages"); - return false; - } - - PDFObjectElement& rFirstPage = *aPages[0]; - sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId); - - if (!WritePageObject(rFirstPage, nAnnotId)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: failed to write the updated Page object"); - return false; - } - - PDFReferenceElement* pRoot = nullptr; - if (!WriteCatalogObject(nAnnotId, pRoot)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: failed to write the updated Catalog object"); - return false; - } - - sal_uInt64 nXRefOffset = m_aEditBuffer.Tell(); - WriteXRef(nXRefOffset, pRoot); - - // Write startxref. - m_aEditBuffer.WriteCharPtr("startxref\n"); - m_aEditBuffer.WriteUInt32AsString(nXRefOffset); - m_aEditBuffer.WriteCharPtr("\n%%EOF\n"); - - // Finalize the signature, now that we know the total file size. - // Calculate the length of the last byte range. - sal_uInt64 nFileEnd = m_aEditBuffer.Tell(); - sal_Int64 nLastByteRangeLength = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1); - // Write the length to the buffer. - m_aEditBuffer.Seek(nSignatureLastByteRangeOffset); - OStringBuffer aByteRangeBuffer; - aByteRangeBuffer.append(nLastByteRangeLength); - aByteRangeBuffer.append(" ]"); - m_aEditBuffer.WriteOString(aByteRangeBuffer.toString()); - - // Create the PKCS#7 object. - css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded(); - if (!aDerEncoded.hasElements()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: empty certificate"); - return false; - } - - m_aEditBuffer.Seek(0); - sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1; - std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]); - m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1); - - m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1); - sal_uInt64 nBufferSize2 = nLastByteRangeLength; - std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]); - m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2); - - OStringBuffer aCMSHexBuffer; - vcl::PDFWriter::PDFSignContext aSignContext(aCMSHexBuffer); - aSignContext.m_pDerEncoded = aDerEncoded.getArray(); - aSignContext.m_nDerEncoded = aDerEncoded.getLength(); - aSignContext.m_pByteRange1 = aBuffer1.get(); - aSignContext.m_nByteRange1 = nBufferSize1; - aSignContext.m_pByteRange2 = aBuffer2.get(); - aSignContext.m_nByteRange2 = nBufferSize2; - if (!vcl::PDFWriter::Sign(aSignContext)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Sign: PDFWriter::Sign() failed"); - return false; - } - - assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH); - - m_aEditBuffer.Seek(nSignatureContentOffset); - m_aEditBuffer.WriteOString(aCMSHexBuffer.toString()); - - return true; -} - -bool PDFDocument::Write(SvStream& rStream) -{ - m_aEditBuffer.Seek(0); - rStream.WriteStream(m_aEditBuffer); - return rStream.good(); -} - -bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObjectElement) -{ - // Last seen object token. - PDFObjectElement* pObject = pObjectElement; - PDFNameElement* pObjectKey = nullptr; - PDFObjectElement* pObjectStream = nullptr; - bool bInXRef = false; - // The next number will be an xref offset. - bool bInStartXRef = false; - // Dictionary depth, so we know when we're outside any dictionaries. - int nDictionaryDepth = 0; - // Last seen array token that's outside any dictionaries. - PDFArrayElement* pArray = nullptr; - while (true) - { - char ch; - rStream.ReadChar(ch); - if (rStream.IsEof()) - break; - - switch (ch) - { - case '%': - { - auto pComment = new PDFCommentElement(*this); - rElements.push_back(std::unique_ptr<PDFElement>(pComment)); - rStream.SeekRel(-1); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFCommentElement::Read() failed"); - return false; - } - if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty() && m_aEOFs.back() == rStream.Tell()) - { - // Found EOF and partial parsing requested, we're done. - return true; - } - break; - } - case '<': - { - // Dictionary or hex string. - rStream.ReadChar(ch); - rStream.SeekRel(-2); - if (ch == '<') - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement())); - ++nDictionaryDepth; - } - else - rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement)); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed"); - return false; - } - break; - } - case '>': - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement())); - --nDictionaryDepth; - rStream.SeekRel(-1); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed"); - return false; - } - break; - } - case '[': - { - auto pArr = new PDFArrayElement(); - rElements.push_back(std::unique_ptr<PDFElement>(pArr)); - if (nDictionaryDepth == 0) - { - // The array is attached directly, inform the object. - pArray = pArr; - if (pObject) - pObject->SetArray(pArray); - } - rStream.SeekRel(-1); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFArrayElement::Read() failed"); - return false; - } - break; - } - case ']': - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement())); - pArray = nullptr; - rStream.SeekRel(-1); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed"); - return false; - } - break; - } - case '/': - { - auto pNameElement = new PDFNameElement(); - rElements.push_back(std::unique_ptr<PDFElement>(pNameElement)); - rStream.SeekRel(-1); - if (!pNameElement->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFNameElement::Read() failed"); - return false; - } - if (pObject && pObjectKey && pObjectKey->GetValue() == "Type" && pNameElement->GetValue() == "ObjStm") - pObjectStream = pObject; - else - pObjectKey = pNameElement; - break; - } - case '(': - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement)); - rStream.SeekRel(-1); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed"); - return false; - } - break; - } - default: - { - if (isdigit(ch) || ch == '-') - { - // Numbering object: an integer or a real. - auto pNumberElement = new PDFNumberElement(); - rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement)); - rStream.SeekRel(-1); - if (!pNumberElement->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFNumberElement::Read() failed"); - return false; - } - if (bInStartXRef) - { - bInStartXRef = false; - m_aStartXRefs.push_back(pNumberElement->GetValue()); - - auto it = m_aOffsetObjects.find(pNumberElement->GetValue()); - if (it != m_aOffsetObjects.end()) - m_pXRefStream = it->second; - } - } - else if (isalpha(ch)) - { - // Possible keyword, like "obj". - rStream.SeekRel(-1); - OString aKeyword = ReadKeyword(rStream); - - bool bObj = aKeyword == "obj"; - if (bObj || aKeyword == "R") - { - size_t nElements = rElements.size(); - if (nElements < 2) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: expected at least two tokens before 'obj' or 'R' keyword"); - return false; - } - - auto pObjectNumber = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get()); - auto pGenerationNumber = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get()); - if (!pObjectNumber || !pGenerationNumber) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: missing object or generation number before 'obj' or 'R' keyword"); - return false; - } - - if (bObj) - { - pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(), pGenerationNumber->GetValue()); - rElements.push_back(std::unique_ptr<PDFElement>(pObject)); - m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject; - m_aIDObjects[pObjectNumber->GetValue()] = pObject; - } - else - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFReferenceElement(*this, pObjectNumber->GetValue(), pGenerationNumber->GetValue()))); - if (pArray) - // Reference is part of a direct (non-dictionary) array, inform the array. - pArray->PushBack(rElements.back().get()); - } - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFElement::Read() failed"); - return false; - } - } - else if (aKeyword == "stream") - { - // Look up the length of the stream from the parent object's dictionary. - size_t nLength = 0; - for (size_t nElement = 0; nElement < rElements.size(); ++nElement) - { - // Iterate in reverse order. - size_t nIndex = rElements.size() - nElement - 1; - PDFElement* pElement = rElements[nIndex].get(); - auto pObj = dynamic_cast<PDFObjectElement*>(pElement); - if (!pObj) - continue; - - PDFElement* pLookup = pObj->Lookup("Length"); - auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup); - if (pReference) - { - // Length is provided as a reference. - nLength = pReference->LookupNumber(rStream); - break; - } - - auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup); - if (pNumber) - { - // Length is provided directly. - nLength = pNumber->GetValue(); - break; - } - - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: found no Length key for stream keyword"); - return false; - } - - PDFDocument::SkipLineBreaks(rStream); - auto pStreamElement = new PDFStreamElement(nLength); - if (pObject) - pObject->SetStream(pStreamElement); - rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement)); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFStreamElement::Read() failed"); - return false; - } - } - else if (aKeyword == "endstream") - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement)); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed"); - return false; - } - } - else if (aKeyword == "endobj") - { - rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement)); - if (!rElements.back()->Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed"); - return false; - } - if (eMode == TokenizeMode::END_OF_OBJECT) - { - // Found endobj and only object parsing was requested, we're done. - return true; - } - - if (pObjectStream) - { - // We're at the end of an object stream, parse the stored objects. - pObjectStream->ParseStoredObjects(); - pObjectStream = nullptr; - pObjectKey = nullptr; - } - } - else if (aKeyword == "true" || aKeyword == "false") - rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean()))); - else if (aKeyword == "null") - rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement)); - else if (aKeyword == "xref") - // Allow 'f' and 'n' keywords. - bInXRef = true; - else if (bInXRef && (aKeyword == "f" || aKeyword == "n")) - { - } - else if (aKeyword == "trailer") - { - auto pTrailer = new PDFTrailerElement(*this); - // When reading till the first EOF token only, remember - // just the first trailer token. - if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer) - m_pTrailer = pTrailer; - rElements.push_back(std::unique_ptr<PDFElement>(pTrailer)); - } - else if (aKeyword == "startxref") - { - bInStartXRef = true; - } - else - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: unexpected '" << aKeyword << "' keyword at byte position " << rStream.Tell()); - return false; - } - } - else - { - if (!isspace(ch)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: unexpected character: " << ch << " at byte position " << rStream.Tell()); - return false; - } - } - break; - } - } - } - - return true; -} - -void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject) -{ - m_aIDObjects[nID] = pObject; -} - -bool PDFDocument::Read(SvStream& rStream) -{ - // Check file magic. - std::vector<sal_Int8> aHeader(5); - rStream.Seek(0); - rStream.ReadBytes(aHeader.data(), aHeader.size()); - if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F' || aHeader[4] != '-') - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: header mismatch"); - return false; - } - - // Allow later editing of the contents in-memory. - rStream.Seek(0); - m_aEditBuffer.WriteStream(rStream); - - // Look up the offset of the xref table. - size_t nStartXRef = FindStartXRef(rStream); - SAL_INFO("xmlsecurity.pdfio", "PDFDocument::Read: nStartXRef is " << nStartXRef); - if (nStartXRef == 0) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: found no xref start offset"); - return false; - } - while (true) - { - rStream.Seek(nStartXRef); - OString aKeyword = ReadKeyword(rStream); - if (aKeyword.isEmpty()) - ReadXRefStream(rStream); - - else - { - if (aKeyword != "xref") - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: xref is not the first keyword"); - return false; - } - ReadXRef(rStream); - if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: failed to tokenizer trailer after xref"); - return false; - } - } - - PDFNumberElement* pPrev = nullptr; - if (m_pTrailer) - pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev")); - else if (m_pXRefStream) - pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev")); - if (pPrev) - nStartXRef = pPrev->GetValue(); - - // Reset state, except object offsets and the edit buffer. - m_aElements.clear(); - m_aStartXRefs.clear(); - m_aEOFs.clear(); - m_pTrailer = nullptr; - m_pXRefStream = nullptr; - if (!pPrev) - break; - } - - // Then we can tokenize the stream. - rStream.Seek(0); - return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr); -} - -OString PDFDocument::ReadKeyword(SvStream& rStream) -{ - OStringBuffer aBuf; - char ch; - rStream.ReadChar(ch); - if (rStream.IsEof()) - return OString(); - while (isalpha(ch)) - { - aBuf.append(ch); - rStream.ReadChar(ch); - if (rStream.IsEof()) - return aBuf.toString(); - } - rStream.SeekRel(-1); - return aBuf.toString(); -} - -size_t PDFDocument::FindStartXRef(SvStream& rStream) -{ - // Find the "startxref" token, somewhere near the end of the document. - std::vector<char> aBuf(1024); - rStream.Seek(STREAM_SEEK_TO_END); - if (rStream.Tell() > aBuf.size()) - rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size()); - else - // The document is really short, then just read it from the start. - rStream.Seek(0); - size_t nBeforePeek = rStream.Tell(); - size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size()); - rStream.Seek(nBeforePeek); - if (nSize != aBuf.size()) - aBuf.resize(nSize); - OString aPrefix("startxref"); - // Find the last startxref at the end of the document. - auto itLastValid = aBuf.end(); - auto it = aBuf.begin(); - while (true) - { - it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength()); - if (it == aBuf.end()) - break; - else - { - itLastValid = it; - ++it; - } - } - if (itLastValid == aBuf.end()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::FindStartXRef: found no startxref"); - return 0; - } - - rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength()); - if (rStream.IsEof()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::FindStartXRef: unexpected end of stream after startxref"); - return 0; - } - - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aNumber; - if (!aNumber.Read(rStream)) - return 0; - return aNumber.GetValue(); -} - -void PDFDocument::ReadXRefStream(SvStream& rStream) -{ - // Look up the stream length in the object dictionary. - if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: failed to read object"); - return; - } - - if (m_aElements.empty()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: no tokens found"); - return; - } - - PDFObjectElement* pObject = nullptr; - for (const auto& pElement : m_aElements) - { - if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get())) - { - pObject = pObj; - break; - } - } - if (!pObject) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: no object token found"); - return; - } - - // So that the Prev key can be looked up later. - m_pXRefStream = pObject; - - PDFElement* pLookup = pObject->Lookup("Length"); - auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup); - if (!pNumber) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: stream length is not provided"); - return; - } - sal_uInt64 nLength = pNumber->GetValue(); - - // Look up the stream offset. - PDFStreamElement* pStream = nullptr; - for (const auto& pElement : m_aElements) - { - if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get())) - { - pStream = pS; - break; - } - } - if (!pStream) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: no stream token found"); - return; - } - - // Read and decompress it. - rStream.Seek(pStream->GetOffset()); - std::vector<char> aBuf(nLength); - rStream.ReadBytes(aBuf.data(), aBuf.size()); - - auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter")); - if (!pFilter) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: no Filter found"); - return; - } - - if (pFilter->GetValue() != "FlateDecode") - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue()); - return; - } - - int nColumns = 1; - int nPredictor = 1; - if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms"))) - { - const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems(); - auto it = rItems.find("Columns"); - if (it != rItems.end()) - if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second)) - nColumns = pColumns->GetValue(); - it = rItems.find("Predictor"); - if (it != rItems.end()) - if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second)) - nPredictor = pPredictor->GetValue(); - } - - SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ); - SvMemoryStream aStream; - ZCodec aZCodec; - aZCodec.BeginCompression(); - aZCodec.Decompress(aSource, aStream); - if (!aZCodec.EndCompression()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: decompression failed"); - return; - } - - // Look up the first and the last entry we need to read. - auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index")); - std::vector<size_t> aFirstObjects; - std::vector<size_t> aNumberOfObjects; - if (!pIndex) - { - auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size")); - if (pSize) - { - aFirstObjects.push_back(0); - aNumberOfObjects.push_back(pSize->GetValue()); - } - else - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: Index and Size not found"); - return; - } - } - else - { - const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements(); - size_t nFirstObject = 0; - for (size_t i = 0; i < rIndexElements.size(); ++i) - { - if (i % 2 == 0) - { - auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]); - if (!pFirstObject) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: Index has no first object"); - return; - } - nFirstObject = pFirstObject->GetValue(); - continue; - } - - auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]); - if (!pNumberOfObjects) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: Index has no number of objects"); - return; - } - aFirstObjects.push_back(nFirstObject); - aNumberOfObjects.push_back(pNumberOfObjects->GetValue()); - } - } - - // Look up the format of a single entry. - const int nWSize = 3; - auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W")); - if (!pW || pW->GetElements().size() < nWSize) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: W not found or has < 3 elements"); - return; - } - int aW[nWSize]; - // First character is the (kind of) repeated predictor. - int nLineLength = 1; - for (size_t i = 0; i < nWSize; ++i) - { - auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]); - if (!pI) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: W contains non-number"); - return; - } - aW[i] = pI->GetValue(); - nLineLength += aW[i]; - } - - if (nPredictor > 1 && nLineLength - 1 != nColumns) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W"); - return; - } - - aStream.Seek(0); - for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection) - { - size_t nFirstObject = aFirstObjects[nSubSection]; - size_t nNumberOfObjects = aNumberOfObjects[nSubSection]; - - // This is the line as read from the stream. - std::vector<unsigned char> aOrigLine(nLineLength); - // This is the line as it appears after tweaking according to nPredictor. - std::vector<unsigned char> aFilteredLine(nLineLength); - for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry) - { - size_t nIndex = nFirstObject + nEntry; - - aStream.ReadBytes(aOrigLine.data(), aOrigLine.size()); - if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: in-stream predictor is inconsistent with /DecodeParms/Predictor for object #" << nIndex); - return; - } - - for (int i = 0; i < nLineLength; ++i) - { - switch (nPredictor) - { - case 1: - // No prediction. - break; - case 12: - // PNG prediction: up (on all rows). - aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i]; - break; - default: - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: unexpected predictor: " << nPredictor); - return; - break; - } - } - - // First character is already handled above. - int nPos = 1; - size_t nType = 0; - // Start of the current field in the stream data. - int nOffset = nPos; - for (; nPos < nOffset + aW[0]; ++nPos) - { - unsigned char nCh = aFilteredLine[nPos]; - nType = (nType << 8) + nCh; - } - - // Start of the object in the file stream. - size_t nStreamOffset = 0; - nOffset = nPos; - for (; nPos < nOffset + aW[1]; ++nPos) - { - unsigned char nCh = aFilteredLine[nPos]; - nStreamOffset = (nStreamOffset << 8) + nCh; - } - - // Generation number of the object. - size_t nGenerationNumber = 0; - nOffset = nPos; - for (; nPos < nOffset + aW[2]; ++nPos) - { - unsigned char nCh = aFilteredLine[nPos]; - nGenerationNumber = (nGenerationNumber << 8) + nCh; - } - - // Ignore invalid nType. - if (nType <= 2) - { - if (m_aXRef.find(nIndex) == m_aXRef.end()) - { - XRefEntry aEntry; - switch (nType) - { - case 0: - aEntry.m_eType = XRefEntryType::FREE; - break; - case 1: - aEntry.m_eType = XRefEntryType::NOT_COMPRESSED; - break; - case 2: - aEntry.m_eType = XRefEntryType::COMPRESSED; - break; - } - aEntry.m_nOffset = nStreamOffset; - aEntry.m_nGenerationNumber = nGenerationNumber; - m_aXRef[nIndex] = aEntry; - } - } - } - } -} - -void PDFDocument::ReadXRef(SvStream& rStream) -{ - PDFDocument::SkipWhitespace(rStream); - - while (true) - { - PDFNumberElement aFirstObject; - if (!aFirstObject.Read(rStream)) - { - // Next token is not a number, it'll be the trailer. - return; - } - - if (aFirstObject.GetValue() < 0) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: expected first object number >= 0"); - return; - } - - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aNumberOfEntries; - if (!aNumberOfEntries.Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read number of entries"); - return; - } - - if (aNumberOfEntries.GetValue() < 0) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: expected zero or more entries"); - return; - } - - size_t nSize = aNumberOfEntries.GetValue(); - for (size_t nEntry = 0; nEntry < nSize; ++nEntry) - { - size_t nIndex = aFirstObject.GetValue() + nEntry; - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aOffset; - if (!aOffset.Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read offset"); - return; - } - - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aGenerationNumber; - if (!aGenerationNumber.Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: failed to read generation number"); - return; - } - - PDFDocument::SkipWhitespace(rStream); - OString aKeyword = ReadKeyword(rStream); - if (aKeyword != "f" && aKeyword != "n") - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRef: unexpected keyword"); - return; - } - // xrefs are read in reverse order, so never update an existing - // offset with an older one. - if (m_aXRef.find(nIndex) == m_aXRef.end()) - { - XRefEntry aEntry; - aEntry.m_nOffset = aOffset.GetValue(); - aEntry.m_nGenerationNumber = aGenerationNumber.GetValue(); - // Initially only the first entry is dirty. - if (nIndex == 0) - aEntry.m_bDirty = true; - m_aXRef[nIndex] = aEntry; - } - PDFDocument::SkipWhitespace(rStream); - } - } -} - -void PDFDocument::SkipWhitespace(SvStream& rStream) -{ - char ch = 0; - - while (true) - { - rStream.ReadChar(ch); - if (rStream.IsEof()) - break; - - if (!isspace(ch)) - { - rStream.SeekRel(-1); - return; - } - } -} - -void PDFDocument::SkipLineBreaks(SvStream& rStream) -{ - char ch = 0; - - while (true) - { - rStream.ReadChar(ch); - if (rStream.IsEof()) - break; - - if (ch != '\n' && ch != '\r') - { - rStream.SeekRel(-1); - return; - } - } -} - -size_t PDFDocument::GetObjectOffset(size_t nIndex) const -{ - auto it = m_aXRef.find(nIndex); - if (it == m_aXRef.end() || it->second.m_eType == XRefEntryType::COMPRESSED) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetObjectOffset: wanted to look up index #" << nIndex << ", but failed"); - return 0; - } - - return it->second.m_nOffset; -} - -const std::vector< std::unique_ptr<PDFElement> >& PDFDocument::GetElements() -{ - return m_aElements; -} - -std::vector<PDFObjectElement*> PDFDocument::GetPages() -{ - std::vector<PDFObjectElement*> aRet; - - PDFReferenceElement* pRoot = nullptr; - if (m_pTrailer) - pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root")); - else if (m_pXRefStream) - pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root")); - - if (!pRoot) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: trailer has no Root key"); - return aRet; - } - - PDFObjectElement* pCatalog = pRoot->LookupObject(); - if (!pCatalog) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: trailer has no catalog"); - return aRet; - } - - PDFObjectElement* pPages = pCatalog->LookupObject("Pages"); - if (!pPages) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue() << ") has no pages"); - return aRet; - } - - auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids")); - if (!pKids) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: pages has no kids"); - return aRet; - } - - for (const auto& pKid : pKids->GetElements()) - { - auto pReference = dynamic_cast<PDFReferenceElement*>(pKid); - if (!pReference) - continue; - - aRet.push_back(pReference->LookupObject()); - } - - return aRet; -} - -void PDFDocument::PushBackEOF(size_t nOffset) -{ - m_aEOFs.push_back(nOffset); -} - -std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets() -{ - std::vector<PDFObjectElement*> aRet; - - std::vector<PDFObjectElement*> aPages = GetPages(); - - for (const auto& pPage : aPages) - { - if (!pPage) - continue; - - PDFElement* pAnnotsElement = pPage->Lookup("Annots"); - auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement); - if (!pAnnots) - { - // Annots is not an array, see if it's a reference to an object - // with a direct array. - auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement); - if (pAnnotsRef) - { - if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject()) - { - pAnnots = pAnnotsObject->GetArray(); - } - } - } - - if (!pAnnots) - continue; - - for (const auto& pAnnot : pAnnots->GetElements()) - { - auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot); - if (!pReference) - continue; - - PDFObjectElement* pAnnotObject = pReference->LookupObject(); - if (!pAnnotObject) - continue; - - auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT")); - if (!pFT || pFT->GetValue() != "Sig") - continue; - - aRet.push_back(pAnnotObject); - } - } - - return aRet; -} - -int PDFDocument::AsHex(char ch) -{ - int nRet = 0; - if (isdigit(ch)) - nRet = ch - '0'; - else - { - if (ch >= 'a' && ch <= 'f') - nRet = ch - 'a'; - else if (ch >= 'A' && ch <= 'F') - nRet = ch - 'A'; - else - return -1; - nRet += 10; - } - return nRet; -} - -std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement* pElement) -{ - std::vector<unsigned char> aRet; - const OString& rHex = pElement->GetValue(); - size_t nHexLen = rHex.getLength(); - { - int nByte = 0; - int nCount = 2; - for (size_t i = 0; i < nHexLen; ++i) - { - nByte = nByte << 4; - sal_Int8 nParsed = AsHex(rHex[i]); - if (nParsed == -1) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::DecodeHexString: invalid hex value"); - return aRet; - } - nByte += nParsed; - --nCount; - if (!nCount) - { - aRet.push_back(nByte); - nCount = 2; - nByte = 0; - } - } - } - - return aRet; -} - namespace { #ifdef XMLSEC_CRYPTO_NSS @@ -2157,46 +281,46 @@ bool VerifyNonDetachedSignature(SvStream& rStream, std::vector<std::pair<size_t, #endif } -bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, SignatureInformation& rInformation, bool bLast) +bool ValidateSignature(SvStream& rStream, vcl::filter::PDFObjectElement* pSignature, SignatureInformation& rInformation, bool bLast) { - PDFObjectElement* pValue = pSignature->LookupObject("V"); + vcl::filter::PDFObjectElement* pValue = pSignature->LookupObject("V"); if (!pValue) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: no value"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: no value"); return false; } - auto pContents = dynamic_cast<PDFHexStringElement*>(pValue->Lookup("Contents")); + auto pContents = dynamic_cast<vcl::filter::PDFHexStringElement*>(pValue->Lookup("Contents")); if (!pContents) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: no contents"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: no contents"); return false; } - auto pByteRange = dynamic_cast<PDFArrayElement*>(pValue->Lookup("ByteRange")); + auto pByteRange = dynamic_cast<vcl::filter::PDFArrayElement*>(pValue->Lookup("ByteRange")); if (!pByteRange || pByteRange->GetElements().size() < 2) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: no byte range or too few elements"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: no byte range or too few elements"); return false; } - auto pSubFilter = dynamic_cast<PDFNameElement*>(pValue->Lookup("SubFilter")); + auto pSubFilter = dynamic_cast<vcl::filter::PDFNameElement*>(pValue->Lookup("SubFilter")); bool bNonDetached = pSubFilter && pSubFilter->GetValue() == "adbe.pkcs7.sha1"; if (!pSubFilter || (pSubFilter->GetValue() != "adbe.pkcs7.detached" && !bNonDetached && pSubFilter->GetValue() != "ETSI.CAdES.detached")) { if (!pSubFilter) - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: missing sub-filter"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: missing sub-filter"); else - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: unsupported sub-filter: '"<<pSubFilter->GetValue()<<"'"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: unsupported sub-filter: '"<<pSubFilter->GetValue()<<"'"); return false; } // Reason / comment / description is optional. - auto pReason = dynamic_cast<PDFHexStringElement*>(pValue->Lookup("Reason")); + auto pReason = dynamic_cast<vcl::filter::PDFHexStringElement*>(pValue->Lookup("Reason")); if (pReason) { // See appendUnicodeTextString() for the export equivalent of this. - std::vector<unsigned char> aReason = PDFDocument::DecodeHexString(pReason); + std::vector<unsigned char> aReason = vcl::filter::PDFDocument::DecodeHexString(pReason); OUStringBuffer aBuffer; sal_uInt16 nByte = 0; for (size_t i = 0; i < aReason.size(); ++i) @@ -2218,7 +342,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur // Date: used only when the time of signing is not available in the // signature. - auto pM = dynamic_cast<PDFLiteralStringElement*>(pValue->Lookup("M")); + auto pM = dynamic_cast<vcl::filter::PDFLiteralStringElement*>(pValue->Lookup("M")); if (pM) { // Example: "D:20161027100104". @@ -2237,13 +361,13 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur // Build a list of offset-length pairs, representing the signed bytes. std::vector<std::pair<size_t, size_t>> aByteRanges; size_t nByteRangeOffset = 0; - const std::vector<PDFElement*>& rByteRangeElements = pByteRange->GetElements(); + const std::vector<vcl::filter::PDFElement*>& rByteRangeElements = pByteRange->GetElements(); for (size_t i = 0; i < rByteRangeElements.size(); ++i) { - auto pNumber = dynamic_cast<PDFNumberElement*>(rByteRangeElements[i]); + auto pNumber = dynamic_cast<vcl::filter::PDFNumberElement*>(rByteRangeElements[i]); if (!pNumber) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: signature offset and length has to be a number"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: signature offset and length has to be a number"); return false; } @@ -2259,19 +383,19 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur // Detect if the byte ranges don't cover everything, but the signature itself. if (aByteRanges.size() < 2) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: expected 2 byte ranges"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: expected 2 byte ranges"); return false; } if (aByteRanges[0].first != 0) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: first range start is not 0"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: first range start is not 0"); return false; } // 2 is the leading "<" and the trailing ">" around the hex string. size_t nSignatureLength = static_cast<size_t>(pContents->GetValue().getLength()) + 2; if (aByteRanges[1].first != (aByteRanges[0].second + nSignatureLength)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: second range start is not the end of the signature"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: second range start is not the end of the signature"); return false; } rStream.Seek(STREAM_SEEK_TO_END); @@ -2282,10 +406,10 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur // At this point there is no obviously missing info to validate the // signature. - std::vector<unsigned char> aSignature = PDFDocument::DecodeHexString(pContents); + std::vector<unsigned char> aSignature = vcl::filter::PDFDocument::DecodeHexString(pContents); if (aSignature.empty()) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: empty contents"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: empty contents"); return false; } @@ -2305,21 +429,21 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur /*decrypt_key_cb_arg=*/nullptr); if (!NSS_CMSMessage_IsSigned(pCMSMessage)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: message is not signed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: message is not signed"); return false; } NSSCMSContentInfo* pCMSContentInfo = NSS_CMSMessage_ContentLevel(pCMSMessage, 0); if (!pCMSContentInfo) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: NSS_CMSMessage_ContentLevel() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: NSS_CMSMessage_ContentLevel() failed"); return false; } auto pCMSSignedData = static_cast<NSSCMSSignedData*>(NSS_CMSContentInfo_GetContent(pCMSContentInfo)); if (!pCMSSignedData) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: NSS_CMSContentInfo_GetContent() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: NSS_CMSContentInfo_GetContent() failed"); return false; } @@ -2333,7 +457,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur NSSCMSSignerInfo* pCMSSignerInfo = NSS_CMSSignedData_GetSignerInfo(pCMSSignedData, 0); if (!pCMSSignerInfo) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: NSS_CMSSignedData_GetSignerInfo() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: NSS_CMSSignedData_GetSignerInfo() failed"); return false; } @@ -2361,7 +485,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur HASHContext* pHASHContext = HASH_Create(eHashType); if (!pHASHContext) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: HASH_Create() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: HASH_Create() failed"); return false; } @@ -2407,7 +531,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur nMaxResultLen = msfilter::SHA512_HASH_LENGTH; break; default: - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: unrecognized algorithm"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: unrecognized algorithm"); return false; } @@ -2418,7 +542,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur CERTCertificate* pCertificate = NSS_CMSSignerInfo_GetSigningCertificate(pCMSSignerInfo, CERT_GetDefaultCertDB()); if (!pCertificate) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: NSS_CMSSignerInfo_GetSigningCertificate() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: NSS_CMSSignerInfo_GetSigningCertificate() failed"); return false; } else @@ -2450,7 +574,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur } catch (const std::length_error&) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: failed to parse signature date string"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: failed to parse signature date string"); return false; } DateTime aDateTime(aUNODateTime); @@ -2514,14 +638,14 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur nullptr); if (!hMsg) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgOpenToDecode() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgOpenToDecode() failed"); return false; } // Update the message with the encoded header blob. if (!CryptMsgUpdate(hMsg, aSignature.data(), aSignature.size(), TRUE)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature, CryptMsgUpdate() for the header failed: " << WindowsErrorString(GetLastError())); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature, CryptMsgUpdate() for the header failed: " << WindowsErrorString(GetLastError())); return false; } @@ -2540,7 +664,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur rStream.ReadBytes(aBuffer.data(), nRemainingSize); if (!CryptMsgUpdate(hMsg, aBuffer.data(), nRemainingSize, FALSE)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature, CryptMsgUpdate() for the content failed: " << WindowsErrorString(GetLastError())); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature, CryptMsgUpdate() for the content failed: " << WindowsErrorString(GetLastError())); return false; } nByte = rByteRange.second; @@ -2550,7 +674,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur rStream.ReadBytes(aBuffer.data(), nChunkLen); if (!CryptMsgUpdate(hMsg, aBuffer.data(), nChunkLen, FALSE)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature, CryptMsgUpdate() for the content failed: " << WindowsErrorString(GetLastError())); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature, CryptMsgUpdate() for the content failed: " << WindowsErrorString(GetLastError())); return false; } nByte += nChunkLen; @@ -2559,7 +683,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur } if (!CryptMsgUpdate(hMsg, nullptr, 0, TRUE)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature, CryptMsgUpdate() for the last content failed: " << WindowsErrorString(GetLastError())); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature, CryptMsgUpdate() for the last content failed: " << WindowsErrorString(GetLastError())); return false; } @@ -2567,13 +691,13 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur DWORD nDigestID = 0; if (!CryptMsgGetParam(hMsg, CMSG_SIGNER_HASH_ALGORITHM_PARAM, 0, nullptr, &nDigestID)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed: " << WindowsErrorString(GetLastError())); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed: " << WindowsErrorString(GetLastError())); return false; } std::unique_ptr<BYTE[]> pDigestBytes(new BYTE[nDigestID]); if (!CryptMsgGetParam(hMsg, CMSG_SIGNER_HASH_ALGORITHM_PARAM, 0, pDigestBytes.get(), &nDigestID)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed: " << WindowsErrorString(GetLastError())); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed: " << WindowsErrorString(GetLastError())); return false; } auto pDigestID = reinterpret_cast<CRYPT_ALGORITHM_IDENTIFIER*>(pDigestBytes.get()); @@ -2583,19 +707,19 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur rInformation.nDigestID = xml::crypto::DigestID::SHA1; else // Don't error out here, we can still verify the message digest correctly, just the digest ID won't be set. - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: unhandled algorithm identifier '"<<pDigestID->pszObjId<<"'"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: unhandled algorithm identifier '"<<pDigestID->pszObjId<<"'"); // Get the signer CERT_INFO from the message. DWORD nSignerCertInfo = 0; if (!CryptMsgGetParam(hMsg, CMSG_SIGNER_CERT_INFO_PARAM, 0, nullptr, &nSignerCertInfo)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed"); return false; } std::unique_ptr<BYTE[]> pSignerCertInfoBuf(new BYTE[nSignerCertInfo]); if (!CryptMsgGetParam(hMsg, CMSG_SIGNER_CERT_INFO_PARAM, 0, pSignerCertInfoBuf.get(), &nSignerCertInfo)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed"); return false; } PCERT_INFO pSignerCertInfo = reinterpret_cast<PCERT_INFO>(pSignerCertInfoBuf.get()); @@ -2609,7 +733,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur hMsg); if (!hStoreHandle) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CertOpenStore() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CertOpenStore() failed"); return false; } @@ -2619,7 +743,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur pSignerCertInfo); if (!pSignerCertContext) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CertGetSubjectCertificateFromStore() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CertGetSubjectCertificateFromStore() failed"); return false; } else @@ -2639,14 +763,14 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur DWORD nContentParam = 0; if (!CryptMsgGetParam(hMsg, CMSG_CONTENT_PARAM, 0, nullptr, &nContentParam)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed"); return false; } std::vector<BYTE> aContentParam(nContentParam); if (!CryptMsgGetParam(hMsg, CMSG_CONTENT_PARAM, 0, aContentParam.data(), &nContentParam)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed"); return false; } @@ -2668,7 +792,7 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur std::unique_ptr<BYTE[]> pSignedAttributesBuf(new BYTE[nSignedAttributes]); if (!CryptMsgGetParam(hMsg, CMSG_SIGNER_AUTH_ATTR_PARAM, 0, pSignedAttributesBuf.get(), &nSignedAttributes)) { - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: CryptMsgGetParam() failed"); + SAL_WARN("xmlsecurity.pdfio", "ValidateSignature: CryptMsgGetParam() failed"); return false; } auto pSignedAttributes = reinterpret_cast<PCRYPT_ATTRIBUTES>(pSignedAttributesBuf.get()); @@ -2701,1020 +825,6 @@ bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, Signatur #endif } -PDFCommentElement::PDFCommentElement(PDFDocument& rDoc) - : m_rDoc(rDoc) -{ -} - -bool PDFCommentElement::Read(SvStream& rStream) -{ - // Read from (including) the % char till (excluding) the end of the line/stream. - OStringBuffer aBuf; - char ch; - rStream.ReadChar(ch); - while (true) - { - if (ch == '\n' || ch == '\r' || rStream.IsEof()) - { - m_aComment = aBuf.makeStringAndClear(); - - if (m_aComment.startsWith("%%EOF")) - m_rDoc.PushBackEOF(rStream.Tell()); - - SAL_INFO("xmlsecurity.pdfio", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'"); - return true; - } - aBuf.append(ch); - rStream.ReadChar(ch); - } - - return false; -} - -PDFNumberElement::PDFNumberElement() = default; - -bool PDFNumberElement::Read(SvStream& rStream) -{ - OStringBuffer aBuf; - m_nOffset = rStream.Tell(); - char ch; - rStream.ReadChar(ch); - if (!isdigit(ch) && ch != '-' && ch != '.') - { - rStream.SeekRel(-1); - return false; - } - while (!rStream.IsEof()) - { - if (!isdigit(ch) && ch != '-' && ch != '.') - { - rStream.SeekRel(-1); - m_nLength = rStream.Tell() - m_nOffset; - m_fValue = aBuf.makeStringAndClear().toDouble(); - SAL_INFO("xmlsecurity.pdfio", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'"); - return true; - } - aBuf.append(ch); - rStream.ReadChar(ch); - } - - return false; -} - -sal_uInt64 PDFNumberElement::GetLocation() const -{ - return m_nOffset; -} - -sal_uInt64 PDFNumberElement::GetLength() const -{ - return m_nLength; -} - -PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) -{ -} - -bool PDFBooleanElement::Read(SvStream& /*rStream*/) -{ - return true; -} - -bool PDFNullElement::Read(SvStream& /*rStream*/) -{ - return true; -} - -bool PDFHexStringElement::Read(SvStream& rStream) -{ - char ch; - rStream.ReadChar(ch); - if (ch != '<') - { - SAL_INFO("xmlsecurity.pdfio", "PDFHexStringElement::Read: expected '<' as first character"); - return false; - } - rStream.ReadChar(ch); - - OStringBuffer aBuf; - while (!rStream.IsEof()) - { - if (ch == '>') - { - m_aValue = aBuf.makeStringAndClear(); - SAL_INFO("xmlsecurity.pdfio", "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength()); - return true; - } - aBuf.append(ch); - rStream.ReadChar(ch); - } - - return false; -} - -const OString& PDFHexStringElement::GetValue() const -{ - return m_aValue; -} - -bool PDFLiteralStringElement::Read(SvStream& rStream) -{ - char nPrevCh = 0; - char ch = 0; - rStream.ReadChar(ch); - if (ch != '(') - { - SAL_INFO("xmlsecurity.pdfio", "PDFHexStringElement::Read: expected '(' as first character"); - return false; - } - nPrevCh = ch; - rStream.ReadChar(ch); - - OStringBuffer aBuf; - while (!rStream.IsEof()) - { - if (ch == ')' && nPrevCh != '\\') - { - m_aValue = aBuf.makeStringAndClear(); - SAL_INFO("xmlsecurity.pdfio", "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'"); - return true; - } - aBuf.append(ch); - nPrevCh = ch; - rStream.ReadChar(ch); - } - - return false; -} - -const OString& PDFLiteralStringElement::GetValue() const -{ - return m_aValue; -} - -PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc) - : m_rDoc(rDoc) -{ -} - -bool PDFTrailerElement::Read(SvStream& /*rStream*/) -{ - return true; -} - -PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey) -{ - if (m_aDictionary.empty()) - PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); - - return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey); -} - - -double PDFNumberElement::GetValue() const -{ - return m_fValue; -} - -PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue) - : m_rDoc(rDoc), - m_fObjectValue(fObjectValue), - m_fGenerationValue(fGenerationValue), - m_nDictionaryOffset(0), - m_nDictionaryLength(0), - m_pDictionaryElement(nullptr), - m_pArrayElement(nullptr), - m_pStreamElement(nullptr) -{ -} - -bool PDFObjectElement::Read(SvStream& /*rStream*/) -{ - SAL_INFO("xmlsecurity.pdfio", "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj"); - return true; -} - -PDFDictionaryElement::PDFDictionaryElement() = default; - -size_t PDFDictionaryElement::Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary) -{ - // The index of last parsed element, in case of nested dictionaries. - size_t nRet = 0; - - if (!rDictionary.empty()) - return nRet; - - auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis); - // This is set to non-nullptr here for nested dictionaries only. - auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis); - - // Find out where the dictionary for this object starts. - size_t nIndex = 0; - for (size_t i = 0; i < rElements.size(); ++i) - { - if (rElements[i].get() == pThis) - { - nIndex = i; - break; - } - } - - OString aName; - sal_uInt64 nNameOffset = 0; - std::vector<PDFNumberElement*> aNumbers; - // The array value we're in -- if any. - PDFArrayElement* pArray = nullptr; - sal_uInt64 nDictionaryOffset = 0; - int nDictionaryDepth = 0; - for (size_t i = nIndex; i < rElements.size(); ++i) - { - // Dictionary tokens can be nested, track enter/leave. - if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get())) - { - if (++nDictionaryDepth == 1) - { - // First dictionary start, track start offset. - nDictionaryOffset = pDictionary->m_nLocation; - if (pThisObject) - { - pThisObject->SetDictionary(pDictionary); - pThisDictionary = pDictionary; - pThisObject->SetDictionaryOffset(nDictionaryOffset); - } - } - else - { - // Nested dictionary. - i = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems); - rDictionary[aName] = pDictionary; - aName.clear(); - } - } - - if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get())) - { - if (--nDictionaryDepth == 0) - { - // Last dictionary end, track length and stop parsing. - if (pThisObject) - pThisObject->SetDictionaryLength(pEndDictionary->GetLocation() - nDictionaryOffset); - nRet = i; - break; - } - } - - auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get()); - if (pName) - { - if (!aNumbers.empty()) - { - PDFNumberElement* pNumber = aNumbers.back(); - rDictionary[aName] = pNumber; - if (pThisDictionary) - { - pThisDictionary->SetKeyOffset(aName, nNameOffset); - pThisDictionary->SetKeyValueLength(aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset); - } - aName.clear(); - aNumbers.clear(); - } - - if (aName.isEmpty()) - { - // Remember key. - aName = pName->GetValue(); - nNameOffset = pName->GetLocation(); - } - else - { - if (pArray) - pArray->PushBack(pName); - else - { - // Name-name key-value. - rDictionary[aName] = pName; - if (pThisDictionary) - { - pThisDictionary->SetKeyOffset(aName, nNameOffset); - pThisDictionary->SetKeyValueLength(aName, pName->GetLocation() + pName->GetLength() - nNameOffset); - } - aName.clear(); - } - } - continue; - } - - auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get()); - if (pArr) - { - pArray = pArr; - continue; - } - - auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get()); - if (pArray && pEndArr) - { - if (!aNumbers.empty()) - { - for (auto& pNumber : aNumbers) - pArray->PushBack(pNumber); - aNumbers.clear(); - } - rDictionary[aName] = pArray; - if (pThisDictionary) - { - pThisDictionary->SetKeyOffset(aName, nNameOffset); - // Include the ending ']' in the length of the key - (array)value pair length. - pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1); - } - aName.clear(); - pArray = nullptr; - continue; - } - - auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get()); - if (pReference) - { - if (!pArray) - { - rDictionary[aName] = pReference; - if (pThisDictionary) - pThisDictionary->SetKeyOffset(aName, nNameOffset); - aName.clear(); - } - else - { - pArray->PushBack(pReference); - } - aNumbers.clear(); - continue; - } - - auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get()); - if (pLiteralString) - { - rDictionary[aName] = pLiteralString; - if (pThisDictionary) - pThisDictionary->SetKeyOffset(aName, nNameOffset); - aName.clear(); - continue; - } - - auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get()); - if (pBoolean) - { - rDictionary[aName] = pBoolean; - if (pThisDictionary) - pThisDictionary->SetKeyOffset(aName, nNameOffset); - aName.clear(); - continue; - } - - auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get()); - if (pHexString) - { - if (!pArray) - { - rDictionary[aName] = pHexString; - if (pThisDictionary) - pThisDictionary->SetKeyOffset(aName, nNameOffset); - aName.clear(); - } - else - { - pArray->PushBack(pHexString); - } - continue; - } - - if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get())) - break; - - // Just remember this, so that in case it's not a reference parameter, - // we can handle it later. - auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get()); - if (pNumber) - aNumbers.push_back(pNumber); - } - - if (!aNumbers.empty()) - { - rDictionary[aName] = aNumbers.back(); - if (pThisDictionary) - pThisDictionary->SetKeyOffset(aName, nNameOffset); - aName.clear(); - aNumbers.clear(); - } - - return nRet; -} - -PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey) -{ - auto it = rDictionary.find(rKey); - if (it == rDictionary.end()) - return nullptr; - - return it->second; -} - -PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey) -{ - auto pKey = dynamic_cast<PDFReferenceElement*>(PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey)); - if (!pKey) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDictionaryElement::LookupObject: no such key with reference value: " << rDictionaryKey); - return nullptr; - } - - return pKey->LookupObject(); -} - -PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey) -{ - return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey); -} - -PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey) -{ - if (m_aDictionary.empty()) - { - if (!m_aElements.empty()) - // This is a stored object in an object stream. - PDFDictionaryElement::Parse(m_aElements, this, m_aDictionary); - else - // Normal object: elements are stored as members of the document itself. - PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); - } - - return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey); -} - -PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey) -{ - auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey)); - if (!pKey) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::LookupObject: no such key with reference value: " << rDictionaryKey); - return nullptr; - } - - return pKey->LookupObject(); -} - -double PDFObjectElement::GetObjectValue() const -{ - return m_fObjectValue; -} - -void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset) -{ - m_nDictionaryOffset = nDictionaryOffset; -} - -sal_uInt64 PDFObjectElement::GetDictionaryOffset() -{ - if (m_aDictionary.empty()) - PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); - - return m_nDictionaryOffset; -} - -void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset) -{ - m_aDictionaryKeyOffset[rKey] = nOffset; -} - -void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength) -{ - m_aDictionaryKeyValueLength[rKey] = nLength; -} - -sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const -{ - auto it = m_aDictionaryKeyOffset.find(rKey); - if (it == m_aDictionaryKeyOffset.end()) - return 0; - - return it->second; -} - -sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const -{ - auto it = m_aDictionaryKeyValueLength.find(rKey); - if (it == m_aDictionaryKeyValueLength.end()) - return 0; - - return it->second; -} - -const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const -{ - return m_aItems; -} - -void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength) -{ - m_nDictionaryLength = nDictionaryLength; -} - -sal_uInt64 PDFObjectElement::GetDictionaryLength() -{ - if (m_aDictionary.empty()) - PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary); - - return m_nDictionaryLength; -} - -PDFDictionaryElement* PDFObjectElement::GetDictionary() const -{ - return m_pDictionaryElement; -} - -void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement) -{ - m_pDictionaryElement = pDictionaryElement; -} - -void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) -{ - m_pArrayElement = pArrayElement; -} - -void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement) -{ - m_pStreamElement = pStreamElement; -} - -PDFStreamElement* PDFObjectElement::GetStream() const -{ - return m_pStreamElement; -} - -PDFArrayElement* PDFObjectElement::GetArray() const -{ - return m_pArrayElement; -} - -void PDFObjectElement::ParseStoredObjects() -{ - if (!m_pStreamElement) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: no stream"); - return; - } - - auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type")); - if (!pType || pType->GetValue() != "ObjStm") - { - if (!pType) - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: missing unexpected type"); - else - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue()); - return; - } - - auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter")); - if (!pFilter || pFilter->GetValue() != "FlateDecode") - { - if (!pFilter) - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: missing filter"); - else - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue()); - return; - } - - auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First")); - if (!pFirst) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: no First"); - return; - } - - auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N")); - if (!pN) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: no N"); - return; - } - size_t nN = pN->GetValue(); - - auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length")); - if (!pLength) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: no length"); - return; - } - size_t nLength = pLength->GetValue(); - - // Read and decompress it. - SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer(); - rEditBuffer.Seek(m_pStreamElement->GetOffset()); - std::vector<char> aBuf(nLength); - rEditBuffer.ReadBytes(aBuf.data(), aBuf.size()); - SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ); - SvMemoryStream aStream; - ZCodec aZCodec; - aZCodec.BeginCompression(); - aZCodec.Decompress(aSource, aStream); - if (!aZCodec.EndCompression()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: decompression failed"); - return; - } - - aStream.Seek(STREAM_SEEK_TO_END); - nLength = aStream.Tell(); - aStream.Seek(0); - std::vector<size_t> aObjNums; - std::vector<size_t> aOffsets; - std::vector<size_t> aLengths; - // First iterate over and find out the lengths. - for (size_t nObject = 0; nObject < nN; ++nObject) - { - PDFNumberElement aObjNum; - if (!aObjNum.Read(aStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: failed to read object number"); - return; - } - aObjNums.push_back(aObjNum.GetValue()); - - PDFDocument::SkipWhitespace(aStream); - - PDFNumberElement aByteOffset; - if (!aByteOffset.Read(aStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFObjectElement::ParseStoredObjects: failed to read byte offset"); - return; - } - aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue()); - - if (aOffsets.size() > 1) - aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]); - if (nObject + 1 == nN) - aLengths.push_back(nLength - aOffsets.back()); - - PDFDocument::SkipWhitespace(aStream); - } - - // Now create streams with the proper length and tokenize the data. - for (size_t nObject = 0; nObject < nN; ++nObject) - { - size_t nObjNum = aObjNums[nObject]; - size_t nOffset = aOffsets[nObject]; - size_t nLen = aLengths[nObject]; - - aStream.Seek(nOffset); - m_aStoredElements.push_back(o3tl::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0)); - PDFObjectElement* pStored = m_aStoredElements.back().get(); - - aBuf.clear(); - aBuf.resize(nLen); - aStream.ReadBytes(aBuf.data(), aBuf.size()); - SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ); - - m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(), pStored); - // This is how references know the object is stored inside this object stream. - m_rDoc.SetIDObject(nObjNum, pStored); - - // Store the stream of the object in the object stream for later use. - std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream()); - aStoredStream.Seek(0); - pStreamBuffer->WriteStream(aStoredStream); - pStored->SetStreamBuffer(pStreamBuffer); - } -} - -std::vector< std::unique_ptr<PDFElement> >& PDFObjectElement::GetStoredElements() -{ - return m_aElements; -} - -SvMemoryStream* PDFObjectElement::GetStreamBuffer() const -{ - return m_pStreamBuffer.get(); -} - -void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer) -{ - m_pStreamBuffer = std::move(pStreamBuffer); -} - -PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, int fObjectValue, int fGenerationValue) - : m_rDoc(rDoc), - m_fObjectValue(fObjectValue), - m_fGenerationValue(fGenerationValue) -{ -} - -bool PDFReferenceElement::Read(SvStream& /*rStream*/) -{ - SAL_INFO("xmlsecurity.pdfio", "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R"); - return true; -} - -double PDFReferenceElement::LookupNumber(SvStream& rStream) const -{ - size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue); - if (nOffset == 0) - { - SAL_WARN("xmlsecurity.pdfio", "PDFReferenceElement::LookupNumber: found no offset for object #" << m_fObjectValue); - return 0; - } - - sal_uInt64 nOrigPos = rStream.Tell(); - comphelper::ScopeGuard g([&]() - { - rStream.Seek(nOrigPos); - }); - - rStream.Seek(nOffset); - { - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aNumber; - bool bRet = aNumber.Read(rStream); - if (!bRet || aNumber.GetValue() != m_fObjectValue) - { - SAL_WARN("xmlsecurity.pdfio", "PDFReferenceElement::LookupNumber: offset points to not matching object"); - return 0; - } - } - - { - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aNumber; - bool bRet = aNumber.Read(rStream); - if (!bRet || aNumber.GetValue() != m_fGenerationValue) - { - SAL_WARN("xmlsecurity.pdfio", "PDFReferenceElement::LookupNumber: offset points to not matching generation"); - return 0; - } - } - - { - PDFDocument::SkipWhitespace(rStream); - OString aKeyword = PDFDocument::ReadKeyword(rStream); - if (aKeyword != "obj") - { - SAL_WARN("xmlsecurity.pdfio", "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword"); - return 0; - } - } - - PDFDocument::SkipWhitespace(rStream); - PDFNumberElement aNumber; - if (!aNumber.Read(rStream)) - { - SAL_WARN("xmlsecurity.pdfio", "PDFReferenceElement::LookupNumber: failed to read referenced number"); - return 0; - } - - return aNumber.GetValue(); -} - -PDFObjectElement* PDFReferenceElement::LookupObject() -{ - return m_rDoc.LookupObject(m_fObjectValue); -} - -PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber) -{ - auto itIDObjects = m_aIDObjects.find(nObjectNumber); - - if (itIDObjects != m_aIDObjects.end()) - return itIDObjects->second; - - SAL_WARN("xmlsecurity.pdfio", "PDFDocument::LookupObject: can't find obj " << nObjectNumber); - return nullptr; -} - -SvMemoryStream& PDFDocument::GetEditBuffer() -{ - return m_aEditBuffer; -} - -int PDFReferenceElement::GetObjectValue() const -{ - return m_fObjectValue; -} - -int PDFReferenceElement::GetGenerationValue() const -{ - return m_fGenerationValue; -} - -bool PDFDictionaryElement::Read(SvStream& rStream) -{ - char ch; - rStream.ReadChar(ch); - if (ch != '<') - { - SAL_WARN("xmlsecurity.pdfio", "PDFDictionaryElement::Read: unexpected character: " << ch); - return false; - } - - if (rStream.IsEof()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFDictionaryElement::Read: unexpected end of file"); - return false; - } - - rStream.ReadChar(ch); - if (ch != '<') - { - SAL_WARN("xmlsecurity.pdfio", "PDFDictionaryElement::Read: unexpected character: " << ch); - return false; - } - - m_nLocation = rStream.Tell(); - - SAL_INFO("xmlsecurity.pdfio", "PDFDictionaryElement::Read: '<<'"); - - return true; -} - -PDFEndDictionaryElement::PDFEndDictionaryElement() = default; - -sal_uInt64 PDFEndDictionaryElement::GetLocation() const -{ - return m_nLocation; -} - -bool PDFEndDictionaryElement::Read(SvStream& rStream) -{ - m_nLocation = rStream.Tell(); - char ch; - rStream.ReadChar(ch); - if (ch != '>') - { - SAL_WARN("xmlsecurity.pdfio", "PDFEndDictionaryElement::Read: unexpected character: " << ch); - return false; - } - - if (rStream.IsEof()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFEndDictionaryElement::Read: unexpected end of file"); - return false; - } - - rStream.ReadChar(ch); - if (ch != '>') - { - SAL_WARN("xmlsecurity.pdfio", "PDFEndDictionaryElement::Read: unexpected character: " << ch); - return false; - } - - SAL_INFO("xmlsecurity.pdfio", "PDFEndDictionaryElement::Read: '>>'"); - - return true; -} - -PDFNameElement::PDFNameElement() - : m_nLocation(0), - m_nLength(0) -{ -} - -bool PDFNameElement::Read(SvStream& rStream) -{ - char ch; - rStream.ReadChar(ch); - if (ch != '/') - { - SAL_WARN("xmlsecurity.pdfio", "PDFNameElement::Read: unexpected character: " << ch); - return false; - } - m_nLocation = rStream.Tell(); - - if (rStream.IsEof()) - { - SAL_WARN("xmlsecurity.pdfio", "PDFNameElement::Read: unexpected end of file"); - return false; - } - - // Read till the first white-space. - OStringBuffer aBuf; - rStream.ReadChar(ch); - while (!rStream.IsEof()) - { - if (isspace(ch) || ch == '/' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '(') - { - rStream.SeekRel(-1); - m_aValue = aBuf.makeStringAndClear(); - SAL_INFO("xmlsecurity.pdfio", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'"); - return true; - } - aBuf.append(ch); - rStream.ReadChar(ch); - } - - return false; -} - -const OString& PDFNameElement::GetValue() const -{ - return m_aValue; -} - -sal_uInt64 PDFNameElement::GetLocation() const -{ - return m_nLocation; -} - -sal_uInt64 PDFNameElement::GetLength() const -{ - return m_nLength; -} - -PDFStreamElement::PDFStreamElement(size_t nLength) - : m_nLength(nLength), - m_nOffset(0) -{ -} - -bool PDFStreamElement::Read(SvStream& rStream) -{ - SAL_INFO("xmlsecurity.pdfio", "PDFStreamElement::Read: length is " << m_nLength); - m_nOffset = rStream.Tell(); - std::vector<unsigned char> aBytes(m_nLength); - rStream.ReadBytes(aBytes.data(), aBytes.size()); - m_aMemory.WriteBytes(aBytes.data(), aBytes.size()); - - return rStream.good(); -} - -SvMemoryStream& PDFStreamElement::GetMemory() -{ - return m_aMemory; -} - -sal_uInt64 PDFStreamElement::GetOffset() const -{ - return m_nOffset; -} - -bool PDFEndStreamElement::Read(SvStream& /*rStream*/) -{ - return true; -} - -bool PDFEndObjectElement::Read(SvStream& /*rStream*/) -{ - return true; -} - -PDFArrayElement::PDFArrayElement() = default; - -bool PDFArrayElement::Read(SvStream& rStream) -{ - char ch; - rStream.ReadChar(ch); - if (ch != '[') - { - SAL_WARN("xmlsecurity.pdfio", "PDFArrayElement::Read: unexpected character: " << ch); - return false; - } - m_nOffset = rStream.Tell(); - - SAL_INFO("xmlsecurity.pdfio", "PDFArrayElement::Read: '['"); - - return true; -} - -void PDFArrayElement::PushBack(PDFElement* pElement) -{ - m_aElements.push_back(pElement); -} - -const std::vector<PDFElement*>& PDFArrayElement::GetElements() -{ - return m_aElements; -} - -PDFEndArrayElement::PDFEndArrayElement() = default; - -bool PDFEndArrayElement::Read(SvStream& rStream) -{ - m_nOffset = rStream.Tell(); - char ch; - rStream.ReadChar(ch); - if (ch != ']') - { - SAL_WARN("xmlsecurity.pdfio", "PDFEndArrayElement::Read: unexpected character: " << ch); - return false; - } - - SAL_INFO("xmlsecurity.pdfio", "PDFEndArrayElement::Read: ']'"); - - return true; -} - -sal_uInt64 PDFEndArrayElement::GetOffset() const -{ - return m_nOffset; -} - } // namespace pdfio } // namespace xmlsecurity diff --git a/xmlsecurity/workben/pdfverify.cxx b/xmlsecurity/workben/pdfverify.cxx index 2b43c1ef3137..7746a6e7b716 100644 --- a/xmlsecurity/workben/pdfverify.cxx +++ b/xmlsecurity/workben/pdfverify.cxx @@ -177,7 +177,7 @@ int pdfVerify(int nArgc, char** pArgv) bRemoveSignature = true; SvFileStream aStream(aInURL, StreamMode::READ); - xmlsecurity::pdfio::PDFDocument aDocument; + vcl::filter::PDFDocument aDocument; if (!aDocument.Read(aStream)) { SAL_WARN("xmlsecurity.pdfio", "failed to read the document"); @@ -187,7 +187,7 @@ int pdfVerify(int nArgc, char** pArgv) if (bRemoveSignature) { std::cerr << "removing the last signature" << std::endl; - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); + std::vector<vcl::filter::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); if (aSignatures.empty()) { std::cerr << "found no signatures" << std::endl; @@ -214,7 +214,7 @@ int pdfVerify(int nArgc, char** pArgv) if (aOutURL.isEmpty()) { std::cerr << "verifying signatures" << std::endl; - std::vector<xmlsecurity::pdfio::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); + std::vector<vcl::filter::PDFObjectElement*> aSignatures = aDocument.GetSignatureWidgets(); if (aSignatures.empty()) std::cerr << "found no signatures" << std::endl; else |