tdf#132149 ww8export: respect ginormous paragraphs

This handles the extremely unlikely case where a single paragraph contains multiple soft-page-breaks from spanning more than two pages. But it makes the unit tests I designed look much better, so I am happy. I think it might help to make the code slightly more understandable too, and it convinces me that I am understanding this section as I write multiple fixes against it. [Better evidence than all of this is that Michael Stahl came the the same conclusion from a code read - I just beat him to it.] (P.S. It isn't enough to change CurrentPageDesc, because that is reset from rNode.FindPageDesc on every WriteText. So the effective pageDesc needs to be kept track of.) Change-Id: I5852e90571a74f3df4362caf058f7960f413dad3 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/101545 Tested-by: Jenkins Reviewed-by: Justin Luth <justin_luth@sil.org> Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
author: Justin Luth <justin.luth@collabora.com> 2020-08-28 15:32:11 +0300
committer: Miklos Vajna <vmiklos@collabora.com> 2020-10-28 10:37:15 +0100
commit: 28dddd4f7e255c74c17c0c6b263303f4567b5678 (patch)
tree: b01df5b135d6100df209178ae2af57549ea9ccc3
parent: de1c4863ab993c80d8fea50ce420d568f75bf2e6 (diff)
3 files changed, 32 insertions, 7 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
new file mode 100644
index 000000000000..afa7961cb23f
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
index 65286eaa08b8..a9618eb20145 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
@@ -13,6 +13,7 @@
 #include <tools/color.hxx>
 #include <com/sun/star/beans/XPropertySet.hpp>
 #include <com/sun/star/beans/NamedValue.hpp>
+#include <com/sun/star/style/BreakType.hpp>
 #include <com/sun/star/text/RelOrientation.hpp>
 #include <com/sun/star/text/XTextViewCursorSupplier.hpp>
 #include <com/sun/star/text/XPageCursor.hpp>
@@ -535,11 +536,28 @@ DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak, "tdf132149_pgBreak.odt")
     assertXPath(pDump, "//page[2]/infos/bounds", "width", "5953");  //portrait
     // This two-line 3rd page ought not to exist. DID YOU FIX ME? The real page 3 should be "8391" landscape.
     assertXPath(pDump, "//page[3]/infos/bounds", "width", "5953");
+    // This really ought to be on odd page 3, but now it is on odd page 5.
+    assertXPath(pDump, "//page[5]/infos/bounds", "width", "8391");
+    assertXPath(pDump, "//page[5]/infos/prtBounds", "right", "6122");  //Left page style
 
 
-    //Page break is not lost. This SHOULD be on page 4, but sadly it is not.
-    assertXPathContent(pDump, "//page[5]/header/txt", "First Page Style");
-    CPPUNIT_ASSERT(getXPathContent(pDump, "//page[5]/body/txt").startsWith("Lorem ipsum"));
+    //Page style change here must not be lost. This SHOULD be on page 4, but sadly it is not.
+    assertXPathContent(pDump, "//page[6]/header/txt", "First Page Style");
+    CPPUNIT_ASSERT(getXPath(pDump, "//page[6]/body/txt[1]/Text[1]", "Portion").startsWith("Lorem ipsum"));
+}
+
+DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak2, "tdf132149_pgBreak2.odt")
+{
+    // This 3 page document is designed to visually exaggerate the problems
+    // of emulating LO's followed-by-page-style into MSWord's sections.
+
+    // The only specified page style change should be between page 1 and 2.
+    // When the first paragraph was split into 3, each paragraph specified a page break. The last was unnecessary.
+    uno::Reference<beans::XPropertySet> xParaThree(getParagraph(3), uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(uno::Any(), xParaThree->getPropertyValue("PageDescName"));
+    // The ODT is only 2 paragraphs, but a hack to get the right page style breaks para1 into pieces.
+    // This was 4 paragraphs - the unnecessary page break had hacked in another paragraph split.
+    CPPUNIT_ASSERT_LESSEQUAL( 3, getParagraphs() );
 }
 
 DECLARE_OOXMLEXPORT_TEST(testTdf135949_anchoredBeforeBreak, "tdf135949_anchoredBeforeBreak.docx")
diff --git a/sw/source/filter/ww8/wrtw8nds.cxx b/sw/source/filter/ww8/wrtw8nds.cxx
index 0c04d094e505..96d12e8afe43 100644
--- a/sw/source/filter/ww8/wrtw8nds.cxx
+++ b/sw/source/filter/ww8/wrtw8nds.cxx
@@ -2265,6 +2265,7 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
     // Let's decide if we need to split the paragraph because of a section break
     bool bNeedParaSplit = NeedTextNodeSplit( rNode, softBreakList )
                         && !IsInTable();
+    const SwPageDesc* pNextSplitParaPageDesc = m_pCurrentPageDesc;
 
     auto aBreakIt = softBreakList.begin();
     // iterate through portions on different pages
@@ -2273,7 +2274,13 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
         sal_Int32 nCurrentPos = *aBreakIt;
 
         if( softBreakList.size() > 1 ) // not for empty paragraph
-            ++aBreakIt;
+        {
+            // no need to split again if the page style won't change anymore
+            if ( pNextSplitParaPageDesc == pNextSplitParaPageDesc->GetFollow() )
+                aBreakIt = --softBreakList.end();
+            else
+                ++aBreakIt;
+        }
 
         AttrOutput().StartParagraph( pTextNodeInfo );
 
@@ -2718,9 +2725,9 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
         // if paragraph is split, put the section break between the parts
         if( bNeedParaSplit && *aBreakIt != rNode.GetText().getLength() )
         {
-                const SwPageDesc* pNextPageDesc = m_pCurrentPageDesc->GetFollow();
-                assert(pNextPageDesc);
-                PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextPageDesc);
+            pNextSplitParaPageDesc = pNextSplitParaPageDesc->GetFollow();
+            assert(pNextSplitParaPageDesc);
+            PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextSplitParaPageDesc);
         }
         else
         {
author	Justin Luth <justin.luth@collabora.com>	2020-08-28 15:32:11 +0300
committer	Miklos Vajna <vmiklos@collabora.com>	2020-10-28 10:37:15 +0100
commit	28dddd4f7e255c74c17c0c6b263303f4567b5678 (patch)
tree	b01df5b135d6100df209178ae2af57549ea9ccc3
parent	de1c4863ab993c80d8fea50ce420d568f75bf2e6 (diff)