/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * */ #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX #include #include #include #include #include #include namespace com { namespace sun { namespace star { namespace security { class XCertificate; } } } } namespace vcl { namespace filter { class PDFTrailerElement; class PDFHexStringElement; class PDFReferenceElement; class PDFDocument; class PDFDictionaryElement; class PDFArrayElement; class PDFStreamElement; class PDFNumberElement; /// A byte range in a PDF file. class VCL_DLLPUBLIC PDFElement { bool m_bVisiting; bool m_bParsing; public: PDFElement() : m_bVisiting(false) , m_bParsing(false) { } virtual bool Read(SvStream& rStream) = 0; virtual ~PDFElement() = default; void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; } bool alreadyVisiting() const { return m_bVisiting; } void setParsing(bool bParsing) { m_bParsing = bParsing; } bool alreadyParsing() const { return m_bParsing; } }; /// Indirect object: something with a unique ID. class VCL_DLLPUBLIC PDFObjectElement : public PDFElement { /// The document owning this element. PDFDocument& m_rDoc; double m_fObjectValue; double m_fGenerationValue; std::map m_aDictionary; /// If set, the object contains this number element (outside any dictionary/array). PDFNumberElement* m_pNumberElement; /// Position after the '<<' token. sal_uInt64 m_nDictionaryOffset; /// Length of the dictionary buffer till (before) the '>>' token. sal_uInt64 m_nDictionaryLength; PDFDictionaryElement* m_pDictionaryElement; /// Position after the '[' token, if m_pArrayElement is set. sal_uInt64 m_nArrayOffset; /// Length of the array buffer till (before) the ']' token. sal_uInt64 m_nArrayLength; /// The contained direct array, if any. PDFArrayElement* m_pArrayElement; /// The stream of this object, used when this is an object stream. PDFStreamElement* m_pStreamElement; /// Objects of an object stream. std::vector> m_aStoredElements; /// Elements of an object in an object stream. std::vector> m_aElements; /// Uncompressed buffer of an object in an object stream. std::unique_ptr m_pStreamBuffer; /// List of all reference elements inside this object's dictionary and /// nested dictionaries. std::vector m_aDictionaryReferences; public: PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue); bool Read(SvStream& rStream) override; PDFElement* Lookup(const OString& rDictionaryKey); PDFObjectElement* LookupObject(const OString& rDictionaryKey); double GetObjectValue() const; void SetDictionaryOffset(sal_uInt64 nDictionaryOffset); sal_uInt64 GetDictionaryOffset(); void SetDictionaryLength(sal_uInt64 nDictionaryLength); sal_uInt64 GetDictionaryLength(); PDFDictionaryElement* GetDictionary(); void SetDictionary(PDFDictionaryElement* pDictionaryElement); void SetNumberElement(PDFNumberElement* pNumberElement); PDFNumberElement* GetNumberElement() const; /// Get access to the parsed key-value items from the object dictionary. const std::map& GetDictionaryItems(); const std::vector& GetDictionaryReferences() const; void AddDictionaryReference(PDFReferenceElement* pReference); void SetArray(PDFArrayElement* pArrayElement); void SetStream(PDFStreamElement* pStreamElement); /// Access to the stream of the object, if it has any. PDFStreamElement* GetStream() const; void SetArrayOffset(sal_uInt64 nArrayOffset); sal_uInt64 GetArrayOffset(); void SetArrayLength(sal_uInt64 nArrayLength); sal_uInt64 GetArrayLength(); PDFArrayElement* GetArray() const; /// Parse objects stored in this object stream. void ParseStoredObjects(); std::vector>& GetStoredElements(); SvMemoryStream* GetStreamBuffer() const; void SetStreamBuffer(std::unique_ptr& pStreamBuffer); PDFDocument& GetDocument(); }; /// Array object: a list. class VCL_DLLPUBLIC PDFArrayElement : public PDFElement { std::vector m_aElements; /// The object that contains this array. PDFObjectElement* m_pObject; public: PDFArrayElement(PDFObjectElement* pObject); bool Read(SvStream& rStream) override; void PushBack(PDFElement* pElement); const std::vector& GetElements(); }; /// Reference object: something with a unique ID. class VCL_DLLPUBLIC PDFReferenceElement : public PDFElement { PDFDocument& m_rDoc; int m_fObjectValue; int m_fGenerationValue; /// Location after the 'R' token. sal_uInt64 m_nOffset = 0; /// The element providing the object number. PDFNumberElement& m_rObject; public: PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject, PDFNumberElement const& rGeneration); bool Read(SvStream& rStream) override; /// Assuming the reference points to a number object, return its value. double LookupNumber(SvStream& rStream) const; /// Lookup referenced object, without assuming anything about its contents. PDFObjectElement* LookupObject(); int GetObjectValue() const; int GetGenerationValue() const; sal_uInt64 GetOffset() const; PDFNumberElement& GetObjectElement() const; }; /// Stream object: a byte array with a known length. class VCL_DLLPUBLIC PDFStreamElement : public PDFElement { size_t m_nLength; sal_uInt64 m_nOffset; /// The byte array itself. SvMemoryStream m_aMemory; public: explicit PDFStreamElement(size_t nLength); bool Read(SvStream& rStream) override; sal_uInt64 GetOffset() const; SvMemoryStream& GetMemory(); }; /// Name object: a key string. class VCL_DLLPUBLIC PDFNameElement : public PDFElement { OString m_aValue; /// Offset after the '/' token. sal_uInt64 m_nLocation = 0; /// Length till the next token start. sal_uInt64 m_nLength = 0; public: PDFNameElement(); bool Read(SvStream& rStream) override; const OString& GetValue() const; sal_uInt64 GetLocation() const; sal_uInt64 GetLength() const; }; /// Dictionary object: a set key-value pairs. class VCL_DLLPUBLIC PDFDictionaryElement : public PDFElement { /// Key-value pairs when the dictionary is a nested value. std::map m_aItems; /// Offset after the '<<' token. sal_uInt64 m_nLocation = 0; /// Position after the '/' token. std::map m_aDictionaryKeyOffset; /// Length of the dictionary key and value, till (before) the next token. std::map m_aDictionaryKeyValueLength; public: PDFDictionaryElement(); bool Read(SvStream& rStream) override; static size_t Parse(const std::vector>& rElements, PDFElement* pThis, std::map& rDictionary); static PDFElement* Lookup(const std::map& rDictionary, const OString& rKey); void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset); sal_uInt64 GetKeyOffset(const OString& rKey) const; void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength); sal_uInt64 GetKeyValueLength(const OString& rKey) const; const std::map& GetItems() const; /// Looks up an object which is only referenced in this dictionary. PDFObjectElement* LookupObject(const OString& rDictionaryKey); /// Looks up an element which is contained in this dictionary. PDFElement* LookupElement(const OString& rDictionaryKey); }; enum class TokenizeMode { /// Full file. END_OF_STREAM, /// Till the first %%EOF token. EOF_TOKEN, /// Till the end of the current object. END_OF_OBJECT, /// Same as END_OF_OBJECT, but for object streams (no endobj keyword). STORED_OBJECT }; /// The type column of an entry in a cross-reference stream. enum class XRefEntryType { /// xref "f" or xref stream "0". FREE, /// xref "n" or xref stream "1". NOT_COMPRESSED, /// xref stream "2". COMPRESSED }; /// An entry in a cross-reference stream. struct XRefEntry { XRefEntryType m_eType = XRefEntryType::NOT_COMPRESSED; /** * Non-compressed: The byte offset of the object, starting from the * beginning of the file. * Compressed: The object number of the object stream in which this object is * stored. */ sal_uInt64 m_nOffset = 0; /// Are changed as part of an incremental update?. bool m_bDirty = false; XRefEntry(); }; /// Hex string: in form. class VCL_DLLPUBLIC PDFHexStringElement : public PDFElement { OString m_aValue; public: bool Read(SvStream& rStream) override; const OString& GetValue() const; }; /// Literal string: in (asdf) form. class VCL_DLLPUBLIC PDFLiteralStringElement : public PDFElement { OString m_aValue; public: bool Read(SvStream& rStream) override; const OString& GetValue() const; }; /// Numbering object: an integer or a real. class VCL_DLLPUBLIC PDFNumberElement : public PDFElement { /// Input file start location. sal_uInt64 m_nOffset = 0; /// Input file token length. sal_uInt64 m_nLength = 0; double m_fValue = 0; public: PDFNumberElement(); bool Read(SvStream& rStream) override; double GetValue() const; sal_uInt64 GetLocation() const; sal_uInt64 GetLength() const; }; /** * In-memory representation of an on-disk PDF document. * * The PDF element list is not meant to be saved back to disk, but some * elements remember their source offset / length, and based on that it's * possible to modify the input file. */ class VCL_DLLPUBLIC PDFDocument { /// This vector owns all elements. std::vector> m_aElements; /// Object ID <-> object offset map. std::map m_aXRef; /// Object offset <-> Object pointer map. std::map m_aOffsetObjects; /// Object ID <-> Object pointer map. std::map m_aIDObjects; /// List of xref offsets we know. std::vector m_aStartXRefs; /// Offsets of trailers, from latest to oldest. std::vector m_aTrailerOffsets; /// Trailer offset <-> Trailer pointer map. std::map m_aOffsetTrailers; /// List of EOF offsets we know. std::vector m_aEOFs; PDFTrailerElement* m_pTrailer = nullptr; /// When m_pTrailer is nullptr, this can still have a dictionary. PDFObjectElement* m_pXRefStream = nullptr; /// All editing takes place in this buffer, if it happens. SvMemoryStream m_aEditBuffer; /// Suggest a minimal, yet free signature ID to use for the next signature. sal_uInt32 GetNextSignature(); /// Write the signature object as part of signing. sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset); /// Write the appearance object as part of signing. sal_Int32 WriteAppearanceObject(); /// Write the annot object as part of signing. sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId); /// Write the updated Page object as part of signing. bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId); /// Write the updated Catalog object as part of signing. bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot); /// Write the updated cross-references as part of signing. void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot); public: PDFDocument(); PDFDocument& operator=(const PDFDocument&) = delete; PDFDocument(const PDFDocument&) = delete; /// @name Low-level functions, to be used by PDFElement subclasses. //@{ /// Decode a hex dump. static std::vector DecodeHexString(PDFHexStringElement const* pElement); static OString ReadKeyword(SvStream& rStream); static size_t FindStartXRef(SvStream& rStream); void ReadXRef(SvStream& rStream); void ReadXRefStream(SvStream& rStream); static void SkipWhitespace(SvStream& rStream); /// Instead of all whitespace, just skip CR and NL characters. static void SkipLineBreaks(SvStream& rStream); size_t GetObjectOffset(size_t nIndex) const; const std::vector>& GetElements(); std::vector GetPages(); /// Remember the end location of an EOF token. void PushBackEOF(size_t nOffset); /// Look up object based on object number, possibly by parsing object streams. PDFObjectElement* LookupObject(size_t nObjectNumber); /// Access to the input document, even after the input stream is gone. SvMemoryStream& GetEditBuffer(); /// Tokenize elements from current offset. bool Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector>& rElements, PDFObjectElement* pObjectElement); /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID. void SetIDObject(size_t nID, PDFObjectElement* pObject); //@} /// @name High-level functions, to be used by others. //@{ /// Read elements from the start of the stream till its end. bool Read(SvStream& rStream); /// Sign the read document with xCertificate in the edit buffer. bool Sign(const css::uno::Reference& xCertificate, const OUString& rDescription, bool bAdES); /// Serializes the contents of the edit buffer. bool Write(SvStream& rStream); /// Get a list of signatures embedded into this document. std::vector GetSignatureWidgets(); /// Remove the nth signature from read document in the edit buffer. bool RemoveSignature(size_t nPosition); //@} }; } // namespace pdfio } // namespace xmlsecurity #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX /* vim:set shiftwidth=4 softtabstop=4 expandtab: */