diff options
Diffstat (limited to 'include/vcl/filter/pdfdocument.hxx')
-rw-r--r-- | include/vcl/filter/pdfdocument.hxx | 355 |
1 files changed, 355 insertions, 0 deletions
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx new file mode 100644 index 000000000000..8d362e7e339c --- /dev/null +++ b/include/vcl/filter/pdfdocument.hxx @@ -0,0 +1,355 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX +#define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX + +#include <map> +#include <vector> + +#include <com/sun/star/security/XCertificate.hpp> + +#include <tools/stream.hxx> + +#include <vcl/dllapi.h> + +namespace vcl +{ +namespace filter +{ + +class PDFTrailerElement; +class PDFHexStringElement; +class PDFReferenceElement; +class PDFDocument; +class PDFDictionaryElement; +class PDFArrayElement; +class PDFStreamElement; + +/// A byte range in a PDF file. +class VCL_DLLPUBLIC PDFElement +{ +public: + virtual bool Read(SvStream& rStream) = 0; + virtual ~PDFElement() { } +}; + +/// Indirect object: something with a unique ID. +class VCL_DLLPUBLIC PDFObjectElement : public PDFElement +{ + PDFDocument& m_rDoc; + double m_fObjectValue; + double m_fGenerationValue; + std::map<OString, PDFElement*> m_aDictionary; + /// Position after the '<<' token. + sal_uInt64 m_nDictionaryOffset; + /// Length of the dictionary buffer till (before) the '<<' token. + sal_uInt64 m_nDictionaryLength; + PDFDictionaryElement* m_pDictionaryElement; + /// The contained direct array, if any. + PDFArrayElement* m_pArrayElement; + /// The stream of this object, used when this is an object stream. + PDFStreamElement* m_pStreamElement; + /// Objects of an object stream. + std::vector< std::unique_ptr<PDFObjectElement> > m_aStoredElements; + /// Elements of an object in an object stream. + std::vector< std::unique_ptr<PDFElement> > m_aElements; + /// Uncompressed buffer of an object in an object stream. + std::unique_ptr<SvMemoryStream> m_pStreamBuffer; + +public: + PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue); + bool Read(SvStream& rStream) override; + PDFElement* Lookup(const OString& rDictionaryKey); + PDFObjectElement* LookupObject(const OString& rDictionaryKey); + double GetObjectValue() const; + void SetDictionaryOffset(sal_uInt64 nDictionaryOffset); + sal_uInt64 GetDictionaryOffset(); + void SetDictionaryLength(sal_uInt64 nDictionaryLength); + sal_uInt64 GetDictionaryLength(); + PDFDictionaryElement* GetDictionary() const; + void SetDictionary(PDFDictionaryElement* pDictionaryElement); + void SetArray(PDFArrayElement* pArrayElement); + void SetStream(PDFStreamElement* pStreamElement); + /// Access to the stream of the object, if it has any. + PDFStreamElement* GetStream() const; + PDFArrayElement* GetArray() const; + /// Parse objects stored in this object stream. + void ParseStoredObjects(); + std::vector< std::unique_ptr<PDFElement> >& GetStoredElements(); + SvMemoryStream* GetStreamBuffer() const; + void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer); +}; + +/// Array object: a list. +class VCL_DLLPUBLIC PDFArrayElement : public PDFElement +{ + /// Location after the '[' token. + sal_uInt64 m_nOffset = 0; + std::vector<PDFElement*> m_aElements; +public: + PDFArrayElement(); + bool Read(SvStream& rStream) override; + void PushBack(PDFElement* pElement); + const std::vector<PDFElement*>& GetElements(); +}; + +/// Reference object: something with a unique ID. +class VCL_DLLPUBLIC PDFReferenceElement : public PDFElement +{ + PDFDocument& m_rDoc; + int m_fObjectValue; + int m_fGenerationValue; + +public: + PDFReferenceElement(PDFDocument& rDoc, int fObjectValue, int fGenerationValue); + bool Read(SvStream& rStream) override; + /// Assuming the reference points to a number object, return its value. + double LookupNumber(SvStream& rStream) const; + /// Lookup referenced object, without assuming anything about its contents. + PDFObjectElement* LookupObject(); + int GetObjectValue() const; + int GetGenerationValue() const; +}; + +/// Stream object: a byte array with a known length. +class VCL_DLLPUBLIC PDFStreamElement : public PDFElement +{ + size_t m_nLength; + sal_uInt64 m_nOffset; + /// The byte array itself. + SvMemoryStream m_aMemory; + +public: + explicit PDFStreamElement(size_t nLength); + bool Read(SvStream& rStream) override; + sal_uInt64 GetOffset() const; + SvMemoryStream& GetMemory(); +}; + +/// Name object: a key string. +class VCL_DLLPUBLIC PDFNameElement : public PDFElement +{ + OString m_aValue; + /// Offset after the '/' token. + sal_uInt64 m_nLocation; + /// Length till the next token start. + sal_uInt64 m_nLength; +public: + PDFNameElement(); + bool Read(SvStream& rStream) override; + const OString& GetValue() const; + sal_uInt64 GetLocation() const; + sal_uInt64 GetLength() const; +}; + +/// Dictionary object: a set key-value pairs. +class VCL_DLLPUBLIC PDFDictionaryElement : public PDFElement +{ + /// Key-value pairs when the dictionary is a nested value. + std::map<OString, PDFElement*> m_aItems; + /// Offset after the '<<' token. + sal_uInt64 m_nLocation = 0; + /// Position after the '/' token. + std::map<OString, sal_uInt64> m_aDictionaryKeyOffset; + /// Length of the dictionary key and value, till (before) the next token. + std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength; + +public: + PDFDictionaryElement(); + bool Read(SvStream& rStream) override; + + static size_t Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary); + static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey); + void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset); + sal_uInt64 GetKeyOffset(const OString& rKey) const; + void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength); + sal_uInt64 GetKeyValueLength(const OString& rKey) const; + const std::map<OString, PDFElement*>& GetItems() const; + /// Looks up an object which is only referenced in this dictionary. + PDFObjectElement* LookupObject(const OString& rDictionaryKey); + /// Looks up an element which is contained in this dictionary. + PDFElement* LookupElement(const OString& rDictionaryKey); +}; + +enum class TokenizeMode +{ + /// Full file. + END_OF_STREAM, + /// Till the first %%EOF token. + EOF_TOKEN, + /// Till the end of the current object. + END_OF_OBJECT, + /// Same as END_OF_OBJECT, but for object streams (no endobj keyword). + STORED_OBJECT +}; + +/// The type column of an entry in a cross-reference stream. +enum class XRefEntryType +{ + /// xref "f" or xref stream "0". + FREE, + /// xref "n" or xref stream "1". + NOT_COMPRESSED, + /// xref stream "2. + COMPRESSED +}; + +/// An entry in a cross-reference stream. +struct XRefEntry +{ + XRefEntryType m_eType; + /** + * Non-compressed: The byte offset of the object, starting from the + * beginning of the file. + * Compressed: The object number of the object stream in which this object is + * stored. + */ + sal_uInt64 m_nOffset; + /** + * Non-compressed: The generation number of the object. + * Compressed: The index of this object within the object stream. + */ + sal_uInt64 m_nGenerationNumber; + /// Are changed as part of an incremental update?. + bool m_bDirty; + + XRefEntry(); +}; + +/// Hex string: in <AABB> form. +class VCL_DLLPUBLIC PDFHexStringElement : public PDFElement +{ + OString m_aValue; +public: + bool Read(SvStream& rStream) override; + const OString& GetValue() const; +}; + +/// Literal string: in (asdf) form. +class VCL_DLLPUBLIC PDFLiteralStringElement : public PDFElement +{ + OString m_aValue; +public: + bool Read(SvStream& rStream) override; + const OString& GetValue() const; +}; + +/// Numbering object: an integer or a real. +class VCL_DLLPUBLIC PDFNumberElement : public PDFElement +{ + /// Input file start location. + sal_uInt64 m_nOffset = 0; + /// Input file token length. + sal_uInt64 m_nLength = 0; + double m_fValue = 0; + +public: + PDFNumberElement(); + bool Read(SvStream& rStream) override; + double GetValue() const; + sal_uInt64 GetLocation() const; + sal_uInt64 GetLength() const; +}; + +/** + * In-memory representation of an on-disk PDF document. + * + * The PDF element list is not meant to be saved back to disk, but some + * elements remember their source offset / length, and based on that it's + * possible to modify the input file. + */ +class VCL_DLLPUBLIC PDFDocument +{ + /// This vector owns all elements. + std::vector< std::unique_ptr<PDFElement> > m_aElements; + /// Object ID <-> object offset map. + std::map<size_t, XRefEntry> m_aXRef; + /// Object offset <-> Object pointer map. + std::map<size_t, PDFObjectElement*> m_aOffsetObjects; + /// Object ID <-> Object pointer map. + std::map<size_t, PDFObjectElement*> m_aIDObjects; + /// List of xref offsets we know. + std::vector<size_t> m_aStartXRefs; + /// List of EOF offsets we know. + std::vector<size_t> m_aEOFs; + PDFTrailerElement* m_pTrailer; + /// When m_pTrailer is nullptr, this can still have a dictionary. + PDFObjectElement* m_pXRefStream; + /// All editing takes place in this buffer, if it happens. + SvMemoryStream m_aEditBuffer; + + static int AsHex(char ch); + /// Suggest a minimal, yet free signature ID to use for the next signature. + sal_uInt32 GetNextSignature(); + /// Write the signature object as part of signing. + sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rSignatureContentOffset); + /// Write the appearance object as part of signing. + sal_Int32 WriteAppearanceObject(); + /// Write the annot object as part of signing. + sal_Int32 WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId); + /// Write the updated Page object as part of signing. + bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId); + /// Write the updated Catalog object as part of signing. + bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot); + /// Write the updated cross-references as part of signing. + void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot); + +public: + PDFDocument(); + PDFDocument& operator=(const PDFDocument&) = delete; + PDFDocument(const PDFDocument&) = delete; + /// @name Low-level functions, to be used by PDFElement subclasses. + //@{ + /// Decode a hex dump. + static std::vector<unsigned char> DecodeHexString(PDFHexStringElement* pElement); + static OString ReadKeyword(SvStream& rStream); + static size_t FindStartXRef(SvStream& rStream); + void ReadXRef(SvStream& rStream); + void ReadXRefStream(SvStream& rStream); + static void SkipWhitespace(SvStream& rStream); + /// Instead of all whitespace, just skip CR and NL characters. + static void SkipLineBreaks(SvStream& rStream); + size_t GetObjectOffset(size_t nIndex) const; + const std::vector< std::unique_ptr<PDFElement> >& GetElements(); + std::vector<PDFObjectElement*> GetPages(); + /// Remember the end location of an EOF token. + void PushBackEOF(size_t nOffset); + /// Look up object based on object number, possibly by parsing object streams. + PDFObjectElement* LookupObject(size_t nObjectNumber); + /// Access to the input document, even after the input stream is gone. + SvMemoryStream& GetEditBuffer(); + /// Tokenize elements from current offset. + bool Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObject); + /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID. + void SetIDObject(size_t nID, PDFObjectElement* pObject); + //@} + + /// @name High-level functions, to be used by others. + //@{ + /// Read elements from the start of the stream till its end. + bool Read(SvStream& rStream); + /// Sign the read document with xCertificate in the edit buffer. + bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES); + /// Serializes the contents of the edit buffer. + bool Write(SvStream& rStream); + /// Get a list of signatures embedded into this document. + std::vector<PDFObjectElement*> GetSignatureWidgets(); + /// Remove the nth signature from read document in the edit buffer. + bool RemoveSignature(size_t nPosition); + //@} +}; + +} // namespace pdfio +} // namespace xmlsecurity + +#endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |