summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2017-06-23 14:48:03 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2017-06-27 15:43:25 +0200
commita4a1467bc47b81ad68ecad0d5e2e163670582919 (patch)
treec045a2c70fa9441321da29323ed1bcdc82804ba2 /sw
parentd5b19100ca4d3670d1b5367e8000739af60a6892 (diff)
tdf#108714: allow <w:br> as direct child of <w:body>
LibreOffice doesn't accept <w:br> element as a child of <w:body>. ECMA-376-1:2016 17.3.3.1 describes br as element of a run content, and points to CT_Br in §A.1. CT_Br may appear only as part of EG_RunInnerContent. In turn, EG_RunInnerContent may appear only inside CT_R. So, using <w:br> outside of <w:r> produces ill-formed OOXML. Open XML SDK 2.5 Productivity Tool for Microsoft Office confirms that, showing OpenXmlUnknownElement error. However, Word accepts it as direct child of <w:body>. It behaves as if the <w:br> were used as first element in first run of the following <w:p> (thus creating page break after next paragraph). Another Word bug that provokes third-parties to create ill-formed documents, and requires LibreOffice to be bug-to-bug compatible. This commit makes the following changes: 1. Registers a dedicated complex type CT_Br_OutOfOrder to handle those unusual breaks, with corresponding handler function. 2. In the handler function, saves the gathered property set to parser state to use later in next paragraph group handler. This reproduces Word behaviour. Change-Id: I5df6927e2de9266b58f87807319ad1c4977e45a7 Reviewed-on: https://gerrit.libreoffice.org/39168 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'sw')
-rw-r--r--sw/qa/extras/ooxmlimport/data/tdf108714.docxbin0 -> 1310 bytes
-rw-r--r--sw/qa/extras/ooxmlimport/ooxmlimport.cxx47
2 files changed, 47 insertions, 0 deletions
diff --git a/sw/qa/extras/ooxmlimport/data/tdf108714.docx b/sw/qa/extras/ooxmlimport/data/tdf108714.docx
new file mode 100644
index 000000000000..e564d44a648b
--- /dev/null
+++ b/sw/qa/extras/ooxmlimport/data/tdf108714.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
index a5f7cf21cd96..97dd9128b9ea 100644
--- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
+++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
@@ -27,6 +27,7 @@
#include <com/sun/star/drawing/GraphicExportFilter.hpp>
#include <com/sun/star/drawing/EnhancedCustomShapeAdjustmentValue.hpp>
#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/style/BreakType.hpp>
#include <com/sun/star/style/XStyleFamiliesSupplier.hpp>
#include <com/sun/star/text/HoriOrientation.hpp>
#include <com/sun/star/text/RelOrientation.hpp>
@@ -1312,6 +1313,52 @@ DECLARE_OOXMLIMPORT_TEST(testVmlAdjustments, "vml-adjustments.docx")
CPPUNIT_ASSERT_EQUAL(sal_Int32(17639), aAdjustmentValue.Value.get<sal_Int32>());
}
+DECLARE_OOXMLIMPORT_TEST(testTdf108714, "tdf108714.docx")
+{
+ CPPUNIT_ASSERT_EQUAL(4, getParagraphs());
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Page break is absent - we lost bug-to-bug compatibility with Word", 3, getPages());
+
+ // The second (empty) paragraph must be at first page, despite the <w:br> element was before it.
+ // That's because Word treats such break as first element in first run of following paragraph:
+ //
+ // <w:br w:type="page"/>
+ // <w:p>
+ // <w:r>
+ // <w:t/>
+ // </w:r>
+ // </w:p>
+ //
+ // is equal to
+ //
+ // <w:p>
+ // <w:r>
+ // <w:br w:type="page"/>
+ // </w:r>
+ // </w:p>
+ //
+ // which emits page break after that empty paragraph.
+
+ uno::Reference< text::XTextRange > paragraph = getParagraph(1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 1"), paragraph->getString());
+ style::BreakType breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE, breakType);
+
+ paragraph = getParagraph(2);
+ CPPUNIT_ASSERT_EQUAL(OUString(), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE, breakType);
+
+ paragraph = getParagraph(3);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 2"), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
+
+ paragraph = getParagraph(4);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 3"), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
+}
+
// tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT
CPPUNIT_PLUGIN_IMPLEMENT();