/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #ifndef INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX #define INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include class SfxMedium; class SfxViewFrame; class SdrObject; class SvxMacroTableDtor; class SwDoc; class SwPaM; class SwViewShell; class SwStartNode; class SwFormatColl; class SwField; class SwHTMLForm_Impl; class SwApplet_Impl; struct SwHTMLFootEndNote_Impl; class HTMLTableCnts; struct SwPending; class SvxCSS1PropertyInfo; struct ImplSVEvent; constexpr tools::Long HTML_CJK_PARSPACE = o3tl::toTwips(25, o3tl::Length::mm10); // 2.5mm constexpr tools::Long HTML_CTL_PARSPACE = o3tl::toTwips(25, o3tl::Length::mm10); // 2.5mm constexpr tools::Long HTML_DFLT_IMG_WIDTH = o3tl::toTwips(2, o3tl::Length::cm); // 2cm constexpr tools::Long HTML_DFLT_IMG_HEIGHT = o3tl::toTwips(1, o3tl::Length::cm); // 1cm // some things you often need extern HTMLOptionEnum const aHTMLPAlignTable[]; extern HTMLOptionEnum const aHTMLImgHAlignTable[]; extern HTMLOptionEnum const aHTMLImgVAlignTable[]; // attribute stack: class HTMLAttr; typedef std::deque HTMLAttrs; // Table of attributes: The order here is important: The attributes in the // beginning of the table will set first in EndAllAttrs. struct HTMLAttrTable { HTMLAttr* pKeep; // frame attributes HTMLAttr* pBox; HTMLAttr* pBrush; HTMLAttr* pBreak; HTMLAttr* pPageDesc; HTMLAttr* pLRSpace; // paragraph attributes HTMLAttr* pULSpace; HTMLAttr* pLineSpacing; HTMLAttr* pAdjust; HTMLAttr* pDropCap; HTMLAttr* pSplit; HTMLAttr* pWidows; HTMLAttr* pOrphans; HTMLAttr* pDirection; HTMLAttr* pCharFormats; // text attributes HTMLAttr* pINetFormat; HTMLAttr* pBold; // character attributes HTMLAttr* pBoldCJK; HTMLAttr* pBoldCTL; HTMLAttr* pItalic; HTMLAttr* pItalicCJK; HTMLAttr* pItalicCTL; HTMLAttr* pStrike; HTMLAttr* pUnderline; HTMLAttr* pBlink; HTMLAttr* pFont; HTMLAttr* pFontCJK; HTMLAttr* pFontCTL; HTMLAttr* pFontHeight; HTMLAttr* pFontHeightCJK; HTMLAttr* pFontHeightCTL; HTMLAttr* pFontColor; HTMLAttr* pEscapement; HTMLAttr* pCaseMap; HTMLAttr* pKerning; // (only for SPACER) HTMLAttr* pCharBrush; // character background HTMLAttr* pLanguage; HTMLAttr* pLanguageCJK; HTMLAttr* pLanguageCTL; HTMLAttr* pCharBox; }; class HTMLAttr { friend class SwHTMLParser; friend class CellSaveStruct; SwNodeIndex m_nStartPara; SwNodeIndex m_nEndPara; sal_Int32 m_nStartContent; sal_Int32 m_nEndContent; bool m_bInsAtStart : 1; bool m_bLikePara : 1; // set attribute above the whole paragraph bool m_bValid : 1; // is the attribute valid? std::unique_ptr m_pItem; std::shared_ptr m_xAttrTab; HTMLAttr *m_pNext; // still to close attributes with different values HTMLAttr *m_pPrev; // already closed but not set attributes HTMLAttr **m_ppHead; // list head HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem, HTMLAttr **pHd, const std::shared_ptr& rAttrTab ); HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara, sal_Int32 nEndCnt, HTMLAttr **pHd, const std::shared_ptr& rAttrTab ); public: ~HTMLAttr(); HTMLAttr *Clone( const SwNodeIndex& rEndPara, sal_Int32 nEndCnt ) const; void Reset( const SwNodeIndex& rSttPara, sal_Int32 nSttCnt, HTMLAttr **pHd, const std::shared_ptr& rAttrTab ); inline void SetStart( const SwPosition& rPos ); SwNodeOffset GetStartParagraphIdx() const { return m_nStartPara.GetIndex(); } SwNodeOffset GetEndParagraphIdx() const { return m_nEndPara.GetIndex(); } const SwNodeIndex& GetStartParagraph() const { return m_nStartPara; } const SwNodeIndex& GetEndParagraph() const { return m_nEndPara; } sal_Int32 GetStartContent() const { return m_nStartContent; } sal_Int32 GetEndContent() const { return m_nEndContent; } bool IsLikePara() const { return m_bLikePara; } void SetLikePara() { m_bLikePara = true; } SfxPoolItem& GetItem() { return *m_pItem; } const SfxPoolItem& GetItem() const { return *m_pItem; } HTMLAttr *GetNext() const { return m_pNext; } void InsertNext( HTMLAttr *pNxt ) { m_pNext = pNxt; } HTMLAttr *GetPrev() const { return m_pPrev; } void InsertPrev( HTMLAttr *pPrv ); void ClearPrev() { m_pPrev = nullptr; } void SetHead(HTMLAttr **ppHd, const std::shared_ptr& rAttrTab) { m_ppHead = ppHd; m_xAttrTab = rAttrTab; } // During setting attributes from styles it can happen that these // shouldn't be set anymore. To delete them would be very expensive, because // you don't know all the places where they are linked in. Therefore they're // made invalid and deleted at the next call of SetAttr_(). void Invalidate() { m_bValid = false; } }; class HTMLAttrContext_SaveDoc; enum SwHTMLAppendMode { AM_NORMAL, // no paragraph spacing handling AM_NOSPACE, // set spacing hard to 0cm AM_SPACE, // set spacing hard to 0.5cm AM_SOFTNOSPACE, // don't set spacing, but save 0cm AM_NONE // no append }; class HTMLAttrContext { HTMLAttrs m_aAttrs; // the attributes created in the context OUString m_aClass; // context class std::unique_ptr m_pSaveDocContext; std::unique_ptr m_pFrameItemSet; HtmlTokenId m_nToken; // the token of the context sal_uInt16 m_nTextFormatColl; // a style created in the context or zero sal_uInt16 m_nLeftMargin; // a changed left border sal_uInt16 m_nRightMargin; // a changed right border sal_uInt16 m_nFirstLineIndent; // a changed first line indent sal_uInt16 m_nUpperSpace; sal_uInt16 m_nLowerSpace; SwHTMLAppendMode m_eAppend; bool m_bLRSpaceChanged : 1; // left/right border, changed indent? bool m_bULSpaceChanged : 1; // top/bottom border changed? bool m_bDefaultTextFormatColl : 1;// nTextFormatColl is only default bool m_bSpansSection : 1; // the context opens a SwSection bool m_bPopStack : 1; // delete above stack elements bool m_bFinishPREListingXMP : 1; bool m_bRestartPRE : 1; bool m_bRestartXMP : 1; bool m_bRestartListing : 1; bool m_bHeaderOrFooter : 1; bool m_bVisible = true; public: void ClearSaveDocContext(); HTMLAttrContext( HtmlTokenId nTokn, sal_uInt16 nPoolId, const OUString& rClass, bool bDfltColl=false ); explicit HTMLAttrContext( HtmlTokenId nTokn ); ~HTMLAttrContext(); HtmlTokenId GetToken() const { return m_nToken; } sal_uInt16 GetTextFormatColl() const { return m_bDefaultTextFormatColl ? 0 : m_nTextFormatColl; } sal_uInt16 GetDefaultTextFormatColl() const { return m_bDefaultTextFormatColl ? m_nTextFormatColl : 0; } const OUString& GetClass() const { return m_aClass; } inline void SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight, short nIndent ); bool IsLRSpaceChanged() const { return m_bLRSpaceChanged; } inline void GetMargins( sal_uInt16& nLeft, sal_uInt16& nRight, short &nIndent ) const; inline void SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower ); bool IsULSpaceChanged() const { return m_bULSpaceChanged; } inline void GetULSpace( sal_uInt16& rUpper, sal_uInt16& rLower ) const; bool HasAttrs() const { return !m_aAttrs.empty(); } const HTMLAttrs& GetAttrs() const { return m_aAttrs; } HTMLAttrs& GetAttrs() { return m_aAttrs; } void SetSpansSection( bool bSet ) { m_bSpansSection = bSet; } bool GetSpansSection() const { return m_bSpansSection; } void SetPopStack( bool bSet ) { m_bPopStack = bSet; } bool GetPopStack() const { return m_bPopStack; } bool HasSaveDocContext() const { return m_pSaveDocContext!=nullptr; } HTMLAttrContext_SaveDoc *GetSaveDocContext( bool bCreate=false ); const SfxItemSet *GetFrameItemSet() const { return m_pFrameItemSet.get(); } SfxItemSet *GetFrameItemSet( SwDoc *pCreateDoc ); void SetFinishPREListingXMP( bool bSet ) { m_bFinishPREListingXMP = bSet; } bool IsFinishPREListingXMP() const { return m_bFinishPREListingXMP; } void SetRestartPRE( bool bSet ) { m_bRestartPRE = bSet; } bool IsRestartPRE() const { return m_bRestartPRE; } void SetRestartXMP( bool bSet ) { m_bRestartXMP = bSet; } bool IsRestartXMP() const { return m_bRestartXMP; } void SetRestartListing( bool bSet ) { m_bRestartListing = bSet; } bool IsRestartListing() const { return m_bRestartListing; } void SetHeaderOrFooter( bool bSet ) { m_bHeaderOrFooter = bSet; } bool IsHeaderOrFooter() const { return m_bHeaderOrFooter; } void SetAppendMode( SwHTMLAppendMode eMode ) { m_eAppend = eMode; } SwHTMLAppendMode GetAppendMode() const { return m_eAppend; } void SetVisible(bool bVisible) { m_bVisible = bVisible; } bool IsVisible() const { return m_bVisible; } }; typedef std::vector> HTMLAttrContexts; class HTMLTable; class SwCSS1Parser; class SwHTMLNumRuleInfo; typedef std::vector> ImageMaps; enum class HtmlContextFlags { ProtectStack = 0x0001, StripPara = 0x0002, KeepNumrule = 0x0004, HeaderDist = 0x0008, FooterDist = 0x0010, KeepAttrs = 0x0020, MultiColMask = StripPara | KeepNumrule | KeepAttrs // for headers, footers or footnotes }; namespace o3tl { template<> struct typed_flags : is_typed_flags {}; } enum class HtmlFrameFormatFlags { Box = 0x0001, Background = 0x0002, Padding = 0x0004, Direction = 0x0008, }; namespace o3tl { template<> struct typed_flags : is_typed_flags {}; } class SwHTMLFrameFormatListener : public SvtListener { SwFrameFormat* m_pFrameFormat; public: SwHTMLFrameFormatListener(SwFrameFormat* pFrameFormat); SwFrameFormat* GetFrameFormat() { return m_pFrameFormat; } virtual void Notify(const SfxHint&) override; }; class SwHTMLParser : public SfxHTMLParser, public SvtListener { friend class SectionSaveStruct; friend class CellSaveStruct; friend class CaptionSaveStruct; /* Progress bar */ std::unique_ptr m_xProgress; OUString m_aPathToFile; OUString m_sBaseURL; OUString m_aBasicLib; OUString m_aBasicModule; OUString m_aScriptSource; // content of the current script block OUString m_aScriptType; // type of read script (StarBasic/VB/JAVA) OUString m_aScriptURL; // script URL OUString m_aStyleSource; // content of current style sheet OUString m_aContents; // text of current marquee, field and so OUStringBuffer m_sTitle; OUString m_aUnknownToken; // a started unknown token OUString m_aBulletGrfs[MAXLEVEL]; OUString m_sJmpMark; std::vector m_aBaseFontStack; // stack for // Bit 0-2: font size (1-7) std::vector m_aFontStack; // stack for , , // Bit 0-2: font size (1-7) // Bit 15: font colour was set HTMLAttrs m_aSetAttrTab;// "closed", not set attributes HTMLAttrs m_aParaAttrs; // temporary paragraph attributes std::shared_ptr m_xAttrTab; // "open" attributes HTMLAttrContexts m_aContexts;// the current context of attribute/token std::vector> m_aMoveFlyFrames;// Fly-Frames, the anchor is moved std::deque m_aMoveFlyCnts;// and the Content-Positions //stray SwTableBoxes which need to be deleted to avoid leaking, but hold //onto them until parsing is done std::vector> m_aOrphanedTableBoxes; std::unique_ptr m_pAppletImpl; // current applet std::unique_ptr m_pCSS1Parser; // Style-Sheet-Parser std::unique_ptr m_pNumRuleInfo; std::vector m_vPendingStack; rtl::Reference m_xDoc; SwPaM *m_pPam; // SwPosition should be enough, or ?? SwViewShell *m_pActionViewShell; // SwViewShell, where StartAction was called SwNodeIndex *m_pSttNdIdx; std::vector m_aTables; std::shared_ptr m_xTable; // current "outermost" table SwHTMLForm_Impl* m_pFormImpl; // current form SdrObject *m_pMarquee; // current marquee std::unique_ptr m_xField; // current field ImageMap *m_pImageMap; // current image map std::unique_ptr m_pImageMaps; ///< all Image-Maps that have been read std::unique_ptr m_pFootEndNoteImpl; Size m_aHTMLPageSize; // page size of HTML template sal_uInt32 m_aFontHeights[7]; // font heights 1-7 ImplSVEvent * m_nEventId; sal_uInt16 m_nBaseFontStMin; sal_uInt16 m_nFontStMin; sal_uInt16 m_nDefListDeep; sal_uInt16 m_nFontStHeadStart; // elements in font stack at sal_uInt16 m_nSBModuleCnt; // counter for basic modules sal_uInt16 m_nMissingImgMaps; // How many image maps are still missing? size_t m_nParaCnt; size_t m_nContextStMin; // lower limit of PopContext size_t m_nContextStAttrMin; // lower limit of attributes sal_uInt16 m_nSelectEntryCnt; // Number of entries in the actual listbox HtmlTokenId m_nOpenParaToken; // opened paragraph element enum class JumpToMarks { NONE, Mark, Table, Region, Graphic }; JumpToMarks m_eJumpTo; #ifdef DBG_UTIL sal_uInt16 m_nContinue; // depth of Continue calls #endif SvxAdjust m_eParaAdjust; // adjustment of current paragraph HTMLScriptLanguage m_eScriptLang; // current script language bool m_bOldIsHTMLMode : 1; // Was it a HTML document? bool m_bDocInitialized : 1; // document resp. shell was initialize // flag to prevent double init via recursion bool m_bViewCreated : 1; // the view was already created (asynchronous) bool m_bSetModEnabled : 1; bool m_bInFloatingFrame : 1; // We are in a floating frame bool m_bInField : 1; bool m_bKeepUnknown : 1; // handle unknown/not supported tokens // 8 bool m_bCallNextToken : 1; // In tables: call NextToken in any case bool m_bIgnoreRawData : 1; // ignore content of script/style bool m_bLBEntrySelected : 1; // Is the current option selected? bool m_bTAIgnoreNewPara : 1; // ignore next LF in text area? bool m_bFixMarqueeWidth : 1; // Change size of marquee? bool m_bUpperSpace : 1; // top paragraph spacing is needed bool m_bNoParSpace : 1; // 16 bool m_bInNoEmbed : 1; // we are in a NOEMBED area bool m_bInTitle : 1; // we are in title bool m_bChkJumpMark : 1; // maybe jump to predetermined mark bool m_bUpdateDocStat : 1; bool m_bFixSelectWidth : 1; // Set new width of select? bool m_bTextArea : 1; // 24 bool m_bSelect : 1; bool m_bInFootEndNoteAnchor : 1; bool m_bInFootEndNoteSymbol : 1; bool m_bIgnoreHTMLComments : 1; bool m_bRemoveHidden : 1; // the filter implementation might set the hidden flag bool m_bBodySeen : 1; bool m_bReadingHeaderOrFooter : 1; bool m_bNotifyMacroEventRead : 1; bool m_bFuzzing; bool m_isInTableStructure; int m_nTableDepth; int m_nFloatingFrames; int m_nListItems; /// the names corresponding to the DOCINFO field subtypes INFO[1-4] OUString m_InfoNames[4]; SfxViewFrame* m_pTempViewFrame; bool m_bXHTML = false; bool m_bReqIF = false; /** * Non-owning pointers to already inserted OLE nodes, matching opened * XHTML elements. */ std::stack m_aEmbeds; std::set m_aAllowedRTFOLEMimeTypes; /// This is the URL of the outer data if it's not OLE2 or an image. OUString m_aEmbedURL; void DeleteFormImpl(); void DocumentDetected(); void Show(); void ShowStatline(); SwViewShell *CallStartAction( SwViewShell *pVSh = nullptr, bool bChkPtr = true ); SwViewShell *CallEndAction( bool bChkAction = false, bool bChkPtr = true ); SwViewShell *CheckActionViewShell(); DECL_LINK( AsyncCallback, void*, void ); // set attribute on document void SetAttr_( bool bChkEnd, bool bBeforeTable, std::deque> *pPostIts ); void SetAttr( bool bChkEnd = true, bool bBeforeTable = false, std::deque> *pPostIts = nullptr ) { if( !m_aSetAttrTab.empty() || !m_aMoveFlyFrames.empty() ) SetAttr_( bChkEnd, bBeforeTable, pPostIts ); } HTMLAttr **GetAttrTabEntry( sal_uInt16 nWhich ); // create a new text node on PaM position bool AppendTextNode( SwHTMLAppendMode eMode=AM_NORMAL, bool bUpdateNum=true ); void AddParSpace(); // start/end an attribute // ppDepAttr indicated an attribute table entry, which attribute has to be // set, before the attribute is closed void NewAttr(const std::shared_ptr& rAttrTab, HTMLAttr **ppAttr, const SfxPoolItem& rItem); bool EndAttr( HTMLAttr *pAttr, bool bChkEmpty=true ); void DeleteAttr( HTMLAttr* pAttr ); void EndContextAttrs( HTMLAttrContext *pContext ); void SaveAttrTab(std::shared_ptr const & rNewAttrTab); void SplitAttrTab( const SwPosition& rNewPos ); void SplitAttrTab(std::shared_ptr const & rNewAttrTab, bool bMoveEndBack); void RestoreAttrTab(std::shared_ptr const & rNewAttrTab); void InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart ); void InsertAttrs( std::deque> rAttrs ); bool DoPositioning( SfxItemSet &rItemSet, SvxCSS1PropertyInfo &rPropInfo, HTMLAttrContext *pContext ); bool CreateContainer( std::u16string_view rClass, SfxItemSet &rItemSet, SvxCSS1PropertyInfo &rPropInfo, HTMLAttrContext *pContext ); bool EndSection( bool bLFStripped=false ); void InsertAttrs( SfxItemSet &rItemSet, SvxCSS1PropertyInfo const &rPropInfo, HTMLAttrContext *pContext, bool bCharLvl=false ); void InsertAttr( HTMLAttr **ppAttr, const SfxPoolItem & rItem, HTMLAttrContext *pCntxt ); void SplitPREListingXMP( HTMLAttrContext *pCntxt ); void FixHeaderFooterDistance( bool bHeader, const SwPosition *pOldPos ); void EndContext( HTMLAttrContext *pContext ); void ClearContext( HTMLAttrContext *pContext ); const SwFormatColl *GetCurrFormatColl() const; SwTwips GetCurrentBrowseWidth(); SwHTMLNumRuleInfo& GetNumInfo() { return *m_pNumRuleInfo; } // add parameter void SetNodeNum( sal_uInt8 nLevel ); // Manage paragraph styles // set the style resp. its attributes on the stack void SetTextCollAttrs( HTMLAttrContext *pContext = nullptr ); void InsertParaAttrs( const SfxItemSet& rItemSet ); // Manage attribute context // save current context void PushContext(std::unique_ptr& rCntxt) { m_aContexts.push_back(std::move(rCntxt)); } // Fetch top/specified context but not outside the context with token // nLimit. If bRemove set then remove it. std::unique_ptr PopContext(HtmlTokenId nToken = HtmlTokenId::NONE); void GetMarginsFromContext( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent, bool bIgnoreCurrent=false ) const; void GetMarginsFromContextWithNumberBullet( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent ) const; void GetULSpaceFromContext( sal_uInt16 &rUpper, sal_uInt16 &rLower ) const; void MovePageDescAttrs( SwNode *pSrcNd, SwNodeOffset nDestIdx, bool bFormatBreak ); // Handling of tags at paragraph level //

and

to

void NewPara(); void EndPara( bool bReal = false ); void NewHeading( HtmlTokenId nToken ); void EndHeading(); //
,
and
    void NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nPoolId );
    void EndTextFormatColl( HtmlTokenId nToken );

    // 
and
void NewDivision( HtmlTokenId nToken ); void EndDivision(); // insert/close Fly-Frames void InsertFlyFrame( const SfxItemSet& rItemSet, HTMLAttrContext *pCntxt, const OUString& rId ); void SaveDocContext( HTMLAttrContext *pCntxt, HtmlContextFlags nFlags, const SwPosition *pNewPos ); void RestoreDocContext( HTMLAttrContext *pCntxt ); // end all opened
areas bool EndSections( bool bLFStripped ); // void NewMultiCol( sal_uInt16 columnsFromCss=0 ); // void NewMarquee( HTMLTable *pCurTable=nullptr ); void EndMarquee(); void InsertMarqueeText(); // Handling of lists // order list
    and unordered list