ofz#20455 sw: HTML import: fix invalid table in footer

The obvious problem was that a bookmark failed to be copied to the correct node, it was created on a SwStartNode, which failed in makeMark() and caused a null-pointer. The target position was off by 1 node because there was a spurious StartNode/EndNode pair directly below the table: [ 41] 0x5b13430 TableNode , [ 42] 0x5b1d010 StartNode , [ 43] 0x5b12a50 StartNode , [ 44] 0x5b135f0 TextNode "", This was created by a special case in SwTableNode::MakeCopy() because .GetTabSortBoxes().size() == 1. But the table had actually quite a bunch more cells in the nodes-array, just they were not yet in the SwTable. In an exciting twist of events, it turns out the table was copied while it was not yet finished parsing: the problem was that in the middle of the table, some CSS set some page attributes, and this caused a first-page page style to be created in SwCSS1Parser::ParseStyleSheet(), by copying the master page style. Unfortunately the table was in the <div title="footer">, so it was copied in this incomplete and inconsistent state. It might be possible to get rid of the special case in SwTableNode::MakeCopy() by restricting the special case skipping of StartNodes at the start in SwNodes::CopyNodes() a bit so that StartNodes whose EndNodes are copied aren't skipped; at least that's the most reasonable explanation for the special case. But for now just fix the HTML import. Additionally, only on MacOSX, using libc++, this triggered an assert: Assertion failed: (!pImpl->mpStaticDefaults || typeid(rItem) == typeid(GetDefaultItem(nWhich))), function PutImpl, file /Users/tdf/lode/jenkins/workspace/lo_gerrit/Config/macosx_clang_dbgutil/svl/source/items/itempool.cxx, line 611. Probably because SdrTextAniCountItem is not marked DLLPUBLIC. Change-Id: Ia167265e7540eea649801eaac2b89f9e18b685cd Reviewed-on: https://gerrit.libreoffice.org/c/core/+/87859 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.stahl@cib.de>
author: Caolán McNamara <caolanm@redhat.com> 2020-02-03 12:06:23 +0000
committer: Michael Stahl <michael.stahl@cib.de> 2020-02-12 13:07:39 +0100
commit: 086e43148059a7ebc6caa416fa82bb60fd2cc92f (patch)
tree: 05eb68b1f7e8a00e66d7675473be54a51abc1948 /sw/source
parent: a9ba8e57a41c5ddf3597272bddab30e51fb3fd38 (diff)
4 files changed, 20 insertions, 7 deletions
diff --git a/sw/source/filter/html/htmlcss1.cxx b/sw/source/filter/html/htmlcss1.cxx
index ca28eac168b5..c90c4e73307a 100644
--- a/sw/source/filter/html/htmlcss1.cxx
+++ b/sw/source/filter/html/htmlcss1.cxx
@@ -87,11 +87,13 @@ void SwCSS1Parser::ChgPageDesc( const SwPageDesc *pPageDesc,
         m_pDoc->ChgPageDesc( pos, rNewPageDesc );
 }
 
-SwCSS1Parser::SwCSS1Parser( SwDoc *pD, const sal_uInt32 aFHeights[7], const OUString& rBaseURL, bool bNewDoc ) :
-    SvxCSS1Parser( pD->GetAttrPool(), rBaseURL,
-                   aItemIds, SAL_N_ELEMENTS(aItemIds)),
-    m_pDoc( pD ),
-    m_nDropCapCnt( 0 ),
+SwCSS1Parser::SwCSS1Parser(SwDoc *const pDoc, SwHTMLParser const& rParser,
+        const sal_uInt32 aFHeights[7], const OUString& rBaseURL, bool const bNewDoc)
+    : SvxCSS1Parser(pDoc->GetAttrPool(), rBaseURL,
+                   aItemIds, SAL_N_ELEMENTS(aItemIds))
+    , m_pDoc( pDoc )
+    , m_rHTMLParser(rParser)
+    , m_nDropCapCnt( 0 ),
     m_bIsNewDoc( bNewDoc ),
     m_bBodyBGColorSet( false ),
     m_bBodyBackgroundSet( false ),
@@ -1341,6 +1343,12 @@ const SwPageDesc *SwCSS1Parser::GetPageDesc( sal_uInt16 nPoolId, bool bCreate )
     const SwPageDesc *pPageDesc = FindPageDesc(m_pDoc, nPoolId);
     if( !pPageDesc && bCreate )
     {
+        if (m_rHTMLParser.IsReadingHeaderOrFooter())
+        {   // (there should be only one definition of header/footer in HTML)
+            SAL_WARN("sw.html", "no creating PageDesc while reading header/footer");
+            return nullptr;
+        }
+
         // The first page is created from the right page, if there is one.
         SwPageDesc *pMasterPageDesc = nullptr;
         if( RES_POOLPAGE_FIRST == nPoolId )
diff --git a/sw/source/filter/html/swcss1.hxx b/sw/source/filter/html/swcss1.hxx
index c6a3e6bdb8fd..54d03edb5892 100644
--- a/sw/source/filter/html/swcss1.hxx
+++ b/sw/source/filter/html/swcss1.hxx
@@ -33,6 +33,7 @@ class SwTextFormatColl;
 class SvxBrushItem;
 class SwFormatDrop;
 class SwPageDesc;
+class SwHTMLParser;
 
 // This header looks harmless, but includes still quite
 // inconspicuous one or the other! On the other hand this class
@@ -41,6 +42,7 @@ class SwPageDesc;
 class SwCSS1Parser : public SvxCSS1Parser
 {
     SwDoc *m_pDoc;
+    SwHTMLParser const& m_rHTMLParser;
 
     sal_uLong m_aFontHeights[7];
 
@@ -75,7 +77,8 @@ protected:
     using CSS1Parser::ParseStyleSheet;
 
 public:
-    SwCSS1Parser( SwDoc *pDoc, sal_uInt32 const aFHeight[7], const OUString& rBaseURL, bool bNewDoc );
+    SwCSS1Parser( SwDoc *pDoc, SwHTMLParser const& rParser,
+        sal_uInt32 const aFHeight[7], const OUString& rBaseURL, bool bNewDoc);
     virtual ~SwCSS1Parser() override;
 
     virtual bool ParseStyleSheet( const OUString& rIn ) override;
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index 71377d3a7034..1168844d4673 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -358,7 +358,7 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn,
     m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
     m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
 
-    m_pCSS1Parser.reset( new SwCSS1Parser( m_xDoc.get(), m_aFontHeights, m_sBaseURL, IsNewDoc() ) );
+    m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
     m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
 
     if( bReadUTF8 )
diff --git a/sw/source/filter/html/swhtml.hxx b/sw/source/filter/html/swhtml.hxx
index 07f7e87c41e8..d14e431f0d09 100644
--- a/sw/source/filter/html/swhtml.hxx
+++ b/sw/source/filter/html/swhtml.hxx
@@ -924,6 +924,8 @@ public:
 
     bool IsReqIF() const;
 
+    bool IsReadingHeaderOrFooter() const { return m_bReadingHeaderOrFooter; }
+
     void NotifyMacroEventRead();
 
     /// Strips query and fragment from a URL path if base URL is a file:// one.
author	Caolán McNamara <caolanm@redhat.com>	2020-02-03 12:06:23 +0000
committer	Michael Stahl <michael.stahl@cib.de>	2020-02-12 13:07:39 +0100
commit	086e43148059a7ebc6caa416fa82bb60fd2cc92f (patch)
tree	05eb68b1f7e8a00e66d7675473be54a51abc1948 /sw/source
parent	a9ba8e57a41c5ddf3597272bddab30e51fb3fd38 (diff)