summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Luth <justin.luth@collabora.com>2020-08-28 15:32:11 +0300
committerMiklos Vajna <vmiklos@collabora.com>2020-10-28 10:37:15 +0100
commit28dddd4f7e255c74c17c0c6b263303f4567b5678 (patch)
treeb01df5b135d6100df209178ae2af57549ea9ccc3
parentde1c4863ab993c80d8fea50ce420d568f75bf2e6 (diff)
tdf#132149 ww8export: respect ginormous paragraphs
This handles the extremely unlikely case where a single paragraph contains multiple soft-page-breaks from spanning more than two pages. But it makes the unit tests I designed look much better, so I am happy. I think it might help to make the code slightly more understandable too, and it convinces me that I am understanding this section as I write multiple fixes against it. [Better evidence than all of this is that Michael Stahl came the the same conclusion from a code read - I just beat him to it.] (P.S. It isn't enough to change CurrentPageDesc, because that is reset from rNode.FindPageDesc on every WriteText. So the effective pageDesc needs to be kept track of.) Change-Id: I5852e90571a74f3df4362caf058f7960f413dad3 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/101545 Tested-by: Jenkins Reviewed-by: Justin Luth <justin_luth@sil.org> Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
-rw-r--r--sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odtbin0 -> 17550 bytes
-rw-r--r--sw/qa/extras/ooxmlexport/ooxmlexport15.cxx24
-rw-r--r--sw/source/filter/ww8/wrtw8nds.cxx15
3 files changed, 32 insertions, 7 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
new file mode 100644
index 000000000000..afa7961cb23f
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
index 65286eaa08b8..a9618eb20145 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
@@ -13,6 +13,7 @@
#include <tools/color.hxx>
#include <com/sun/star/beans/XPropertySet.hpp>
#include <com/sun/star/beans/NamedValue.hpp>
+#include <com/sun/star/style/BreakType.hpp>
#include <com/sun/star/text/RelOrientation.hpp>
#include <com/sun/star/text/XTextViewCursorSupplier.hpp>
#include <com/sun/star/text/XPageCursor.hpp>
@@ -535,11 +536,28 @@ DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak, "tdf132149_pgBreak.odt")
assertXPath(pDump, "//page[2]/infos/bounds", "width", "5953"); //portrait
// This two-line 3rd page ought not to exist. DID YOU FIX ME? The real page 3 should be "8391" landscape.
assertXPath(pDump, "//page[3]/infos/bounds", "width", "5953");
+ // This really ought to be on odd page 3, but now it is on odd page 5.
+ assertXPath(pDump, "//page[5]/infos/bounds", "width", "8391");
+ assertXPath(pDump, "//page[5]/infos/prtBounds", "right", "6122"); //Left page style
- //Page break is not lost. This SHOULD be on page 4, but sadly it is not.
- assertXPathContent(pDump, "//page[5]/header/txt", "First Page Style");
- CPPUNIT_ASSERT(getXPathContent(pDump, "//page[5]/body/txt").startsWith("Lorem ipsum"));
+ //Page style change here must not be lost. This SHOULD be on page 4, but sadly it is not.
+ assertXPathContent(pDump, "//page[6]/header/txt", "First Page Style");
+ CPPUNIT_ASSERT(getXPath(pDump, "//page[6]/body/txt[1]/Text[1]", "Portion").startsWith("Lorem ipsum"));
+}
+
+DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak2, "tdf132149_pgBreak2.odt")
+{
+ // This 3 page document is designed to visually exaggerate the problems
+ // of emulating LO's followed-by-page-style into MSWord's sections.
+
+ // The only specified page style change should be between page 1 and 2.
+ // When the first paragraph was split into 3, each paragraph specified a page break. The last was unnecessary.
+ uno::Reference<beans::XPropertySet> xParaThree(getParagraph(3), uno::UNO_QUERY_THROW);
+ CPPUNIT_ASSERT_EQUAL(uno::Any(), xParaThree->getPropertyValue("PageDescName"));
+ // The ODT is only 2 paragraphs, but a hack to get the right page style breaks para1 into pieces.
+ // This was 4 paragraphs - the unnecessary page break had hacked in another paragraph split.
+ CPPUNIT_ASSERT_LESSEQUAL( 3, getParagraphs() );
}
DECLARE_OOXMLEXPORT_TEST(testTdf135949_anchoredBeforeBreak, "tdf135949_anchoredBeforeBreak.docx")
diff --git a/sw/source/filter/ww8/wrtw8nds.cxx b/sw/source/filter/ww8/wrtw8nds.cxx
index 0c04d094e505..96d12e8afe43 100644
--- a/sw/source/filter/ww8/wrtw8nds.cxx
+++ b/sw/source/filter/ww8/wrtw8nds.cxx
@@ -2265,6 +2265,7 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
// Let's decide if we need to split the paragraph because of a section break
bool bNeedParaSplit = NeedTextNodeSplit( rNode, softBreakList )
&& !IsInTable();
+ const SwPageDesc* pNextSplitParaPageDesc = m_pCurrentPageDesc;
auto aBreakIt = softBreakList.begin();
// iterate through portions on different pages
@@ -2273,7 +2274,13 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
sal_Int32 nCurrentPos = *aBreakIt;
if( softBreakList.size() > 1 ) // not for empty paragraph
- ++aBreakIt;
+ {
+ // no need to split again if the page style won't change anymore
+ if ( pNextSplitParaPageDesc == pNextSplitParaPageDesc->GetFollow() )
+ aBreakIt = --softBreakList.end();
+ else
+ ++aBreakIt;
+ }
AttrOutput().StartParagraph( pTextNodeInfo );
@@ -2718,9 +2725,9 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
// if paragraph is split, put the section break between the parts
if( bNeedParaSplit && *aBreakIt != rNode.GetText().getLength() )
{
- const SwPageDesc* pNextPageDesc = m_pCurrentPageDesc->GetFollow();
- assert(pNextPageDesc);
- PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextPageDesc);
+ pNextSplitParaPageDesc = pNextSplitParaPageDesc->GetFollow();
+ assert(pNextSplitParaPageDesc);
+ PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextSplitParaPageDesc);
}
else
{