summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.com>2022-09-05 16:15:21 +0200
committerMiklos Vajna <vmiklos@collabora.com>2022-09-05 21:00:44 +0200
commit16ed6110313cae310799a82294fc566ce75855a4 (patch)
treec2e5b2e40ffc288becef6a662579b868dc8708eb
parentac9ae6fed0c237b27861b718686c2cc23fd1ba5e (diff)
sw XHTML import: fix lost empty paragraphs
Plain HTML import ignores empty paragraph, because browsers ignore such paragraphs as well. This has the benefit of layout compatibility, but it breaks the semantics of documents when roundtripping them from Writer's document model to XHTML and back. Fix the problem by disabling this tweak for XHTML: the idea is that when it comes to paragraph / line breaks, XHTML is meant to preserve the semantics of the original document model, even if that results in slight differences in HTML rendering. So in case Writer/ODT doesn't collapse multiple line breaks and browsers/HTML does that, we apply workarounds in the HTML case, but not in the XHTML case. Change-Id: I55de8880503ee2d48fbd7a6af3891f2754f0d172 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139439 Reviewed-by: Miklos Vajna <vmiklos@collabora.com> Tested-by: Jenkins
-rw-r--r--sw/CppunitTest_sw_filter_html.mk75
-rw-r--r--sw/Module_sw.mk1
-rw-r--r--sw/qa/filter/html/data/empty-paragraph.xhtml3
-rw-r--r--sw/qa/filter/html/html.cxx54
-rw-r--r--sw/source/filter/html/swhtml.cxx5
5 files changed, 136 insertions, 2 deletions
diff --git a/sw/CppunitTest_sw_filter_html.mk b/sw/CppunitTest_sw_filter_html.mk
new file mode 100644
index 000000000000..130afe370dc7
--- /dev/null
+++ b/sw/CppunitTest_sw_filter_html.mk
@@ -0,0 +1,75 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#*************************************************************************
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+#*************************************************************************
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_filter_html, \
+ sw/qa/filter/html/html \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_filter_html, \
+ comphelper \
+ cppu \
+ cppuhelper \
+ editeng \
+ sal \
+ sfx \
+ svl \
+ svx \
+ svxcore \
+ sw \
+ swqahelper \
+ test \
+ unotest \
+ utl \
+ vcl \
+ tl \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_filter_html,\
+ boost_headers \
+ libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_filter_html,\
+ -I$(SRCDIR)/sw/inc \
+ -I$(SRCDIR)/sw/source/core/inc \
+ -I$(SRCDIR)/sw/source/uibase/inc \
+ -I$(SRCDIR)/sw/qa/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_filter_html,\
+ udkapi \
+ offapi \
+ oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sw_filter_html))
+$(eval $(call gb_CppunitTest_use_vcl,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_rdb,sw_filter_html,services))
+
+$(eval $(call gb_CppunitTest_use_custom_headers,sw_filter_html,\
+ officecfg/registry \
+))
+
+$(eval $(call gb_CppunitTest_use_configuration,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_uiconfigs,sw_filter_html, \
+ modules/swriter \
+))
+
+$(eval $(call gb_CppunitTest_use_more_fonts,sw_filter_html))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index a784af17d121..4c7a9d4dbbaa 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -151,6 +151,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\
CppunitTest_sw_core_view \
CppunitTest_sw_core_attr \
CppunitTest_sw_filter_ww8 \
+ CppunitTest_sw_filter_html \
CppunitTest_sw_a11y \
))
diff --git a/sw/qa/filter/html/data/empty-paragraph.xhtml b/sw/qa/filter/html/data/empty-paragraph.xhtml
new file mode 100644
index 000000000000..2a4ba3f65459
--- /dev/null
+++ b/sw/qa/filter/html/data/empty-paragraph.xhtml
@@ -0,0 +1,3 @@
+<reqif-xhtml:div><reqif-xhtml:p>a</reqif-xhtml:p>
+<reqif-xhtml:p></reqif-xhtml:p>
+</reqif-xhtml:div>
diff --git a/sw/qa/filter/html/html.cxx b/sw/qa/filter/html/html.cxx
new file mode 100644
index 000000000000..1b75903383d0
--- /dev/null
+++ b/sw/qa/filter/html/html.cxx
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <swmodeltestbase.hxx>
+
+#include <comphelper/propertyvalue.hxx>
+
+namespace
+{
+constexpr OUStringLiteral DATA_DIRECTORY = u"/sw/qa/filter/html/data/";
+
+/**
+ * Covers sw/source/filter/html/ fixes.
+ *
+ * Note that these tests are meant to be simple: either load a file and assert some result or build
+ * a document model with code, export and assert that result.
+ *
+ * Keep using the various sw_<format>import/export suites for multiple filter calls inside a single
+ * test.
+ */
+class Test : public SwModelTestBase
+{
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testEmptyParagraph)
+{
+ // Given a document with 2 paragraphs, the second is empty:
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "empty-paragraph.xhtml";
+ uno::Sequence<beans::PropertyValue> aLoadArgs = {
+ comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")),
+ comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")),
+ };
+
+ // When loading that file:
+ mxComponent = loadFromDesktop(aURL, OUString(), aLoadArgs);
+
+ // Then make sure that the resulting document has a 2nd empty paragraph:
+ getParagraph(1, "a");
+ // Without the accompanying fix in place, this test would have failed with:
+ // An uncaught exception of type com.sun.star.container.NoSuchElementException
+ // i.e. the 2nd paragraph was lost.
+ getParagraph(2);
+}
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index 9fcb2e0032dd..fa45f91406e3 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -4030,10 +4030,11 @@ void SwHTMLParser::EndPara( bool bReal )
#endif
}
- // Netscape skips empty paragraphs, we do the same.
+ // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, which prefers mapping
+ // the source document to the doc model 1:1 if possible.
if( bReal )
{
- if( m_pPam->GetPoint()->GetContentIndex() )
+ if (m_pPam->GetPoint()->GetContentIndex() || m_bXHTML)
AppendTextNode( AM_SPACE );
else
AddParSpace();