diff options
author | Miklos Vajna <vmiklos@collabora.com> | 2022-09-05 16:15:21 +0200 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.com> | 2022-09-05 21:00:44 +0200 |
commit | 16ed6110313cae310799a82294fc566ce75855a4 (patch) | |
tree | c2e5b2e40ffc288becef6a662579b868dc8708eb | |
parent | ac9ae6fed0c237b27861b718686c2cc23fd1ba5e (diff) |
sw XHTML import: fix lost empty paragraphs
Plain HTML import ignores empty paragraph, because browsers ignore such
paragraphs as well.
This has the benefit of layout compatibility, but it breaks the
semantics of documents when roundtripping them from Writer's document
model to XHTML and back.
Fix the problem by disabling this tweak for XHTML: the idea is that when
it comes to paragraph / line breaks, XHTML is meant to preserve the
semantics of the original document model, even if that results in slight
differences in HTML rendering. So in case Writer/ODT doesn't collapse
multiple line breaks and browsers/HTML does that, we apply workarounds
in the HTML case, but not in the XHTML case.
Change-Id: I55de8880503ee2d48fbd7a6af3891f2754f0d172
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139439
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Tested-by: Jenkins
-rw-r--r-- | sw/CppunitTest_sw_filter_html.mk | 75 | ||||
-rw-r--r-- | sw/Module_sw.mk | 1 | ||||
-rw-r--r-- | sw/qa/filter/html/data/empty-paragraph.xhtml | 3 | ||||
-rw-r--r-- | sw/qa/filter/html/html.cxx | 54 | ||||
-rw-r--r-- | sw/source/filter/html/swhtml.cxx | 5 |
5 files changed, 136 insertions, 2 deletions
diff --git a/sw/CppunitTest_sw_filter_html.mk b/sw/CppunitTest_sw_filter_html.mk new file mode 100644 index 000000000000..130afe370dc7 --- /dev/null +++ b/sw/CppunitTest_sw_filter_html.mk @@ -0,0 +1,75 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +#************************************************************************* +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +#************************************************************************* + +$(eval $(call gb_CppunitTest_CppunitTest,sw_filter_html)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_filter_html)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sw_filter_html, \ + sw/qa/filter/html/html \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sw_filter_html, \ + comphelper \ + cppu \ + cppuhelper \ + editeng \ + sal \ + sfx \ + svl \ + svx \ + svxcore \ + sw \ + swqahelper \ + test \ + unotest \ + utl \ + vcl \ + tl \ +)) + +$(eval $(call gb_CppunitTest_use_externals,sw_filter_html,\ + boost_headers \ + libxml2 \ +)) + +$(eval $(call gb_CppunitTest_set_include,sw_filter_html,\ + -I$(SRCDIR)/sw/inc \ + -I$(SRCDIR)/sw/source/core/inc \ + -I$(SRCDIR)/sw/source/uibase/inc \ + -I$(SRCDIR)/sw/qa/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_CppunitTest_use_api,sw_filter_html,\ + udkapi \ + offapi \ + oovbaapi \ +)) + +$(eval $(call gb_CppunitTest_use_ure,sw_filter_html)) +$(eval $(call gb_CppunitTest_use_vcl,sw_filter_html)) + +$(eval $(call gb_CppunitTest_use_rdb,sw_filter_html,services)) + +$(eval $(call gb_CppunitTest_use_custom_headers,sw_filter_html,\ + officecfg/registry \ +)) + +$(eval $(call gb_CppunitTest_use_configuration,sw_filter_html)) + +$(eval $(call gb_CppunitTest_use_uiconfigs,sw_filter_html, \ + modules/swriter \ +)) + +$(eval $(call gb_CppunitTest_use_more_fonts,sw_filter_html)) + +# vim: set noet sw=4 ts=4: diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk index a784af17d121..4c7a9d4dbbaa 100644 --- a/sw/Module_sw.mk +++ b/sw/Module_sw.mk @@ -151,6 +151,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\ CppunitTest_sw_core_view \ CppunitTest_sw_core_attr \ CppunitTest_sw_filter_ww8 \ + CppunitTest_sw_filter_html \ CppunitTest_sw_a11y \ )) diff --git a/sw/qa/filter/html/data/empty-paragraph.xhtml b/sw/qa/filter/html/data/empty-paragraph.xhtml new file mode 100644 index 000000000000..2a4ba3f65459 --- /dev/null +++ b/sw/qa/filter/html/data/empty-paragraph.xhtml @@ -0,0 +1,3 @@ +<reqif-xhtml:div><reqif-xhtml:p>a</reqif-xhtml:p> +<reqif-xhtml:p></reqif-xhtml:p> +</reqif-xhtml:div> diff --git a/sw/qa/filter/html/html.cxx b/sw/qa/filter/html/html.cxx new file mode 100644 index 000000000000..1b75903383d0 --- /dev/null +++ b/sw/qa/filter/html/html.cxx @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <swmodeltestbase.hxx> + +#include <comphelper/propertyvalue.hxx> + +namespace +{ +constexpr OUStringLiteral DATA_DIRECTORY = u"/sw/qa/filter/html/data/"; + +/** + * Covers sw/source/filter/html/ fixes. + * + * Note that these tests are meant to be simple: either load a file and assert some result or build + * a document model with code, export and assert that result. + * + * Keep using the various sw_<format>import/export suites for multiple filter calls inside a single + * test. + */ +class Test : public SwModelTestBase +{ +}; + +CPPUNIT_TEST_FIXTURE(Test, testEmptyParagraph) +{ + // Given a document with 2 paragraphs, the second is empty: + OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "empty-paragraph.xhtml"; + uno::Sequence<beans::PropertyValue> aLoadArgs = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")), + }; + + // When loading that file: + mxComponent = loadFromDesktop(aURL, OUString(), aLoadArgs); + + // Then make sure that the resulting document has a 2nd empty paragraph: + getParagraph(1, "a"); + // Without the accompanying fix in place, this test would have failed with: + // An uncaught exception of type com.sun.star.container.NoSuchElementException + // i.e. the 2nd paragraph was lost. + getParagraph(2); +} +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx index 9fcb2e0032dd..fa45f91406e3 100644 --- a/sw/source/filter/html/swhtml.cxx +++ b/sw/source/filter/html/swhtml.cxx @@ -4030,10 +4030,11 @@ void SwHTMLParser::EndPara( bool bReal ) #endif } - // Netscape skips empty paragraphs, we do the same. + // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, which prefers mapping + // the source document to the doc model 1:1 if possible. if( bReal ) { - if( m_pPam->GetPoint()->GetContentIndex() ) + if (m_pPam->GetPoint()->GetContentIndex() || m_bXHTML) AppendTextNode( AM_SPACE ); else AddParSpace(); |