diff options
author | Miklos Vajna <vmiklos@collabora.com> | 2024-02-01 13:10:26 +0100 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.com> | 2024-02-01 15:36:39 +0100 |
commit | e6e5660b726ecf3b0c39b277568568973b43c9f0 (patch) | |
tree | 93c846666990fdd7220d9b915a9ab71ba71437ad | |
parent | 829374381d9c9191746d528b52411ad757e39a08 (diff) |
tdf#159483 sc HTML import: handle data-sheets-value attribute for the text case
The A2 cell in the bugdoc has 01 in it, which was auto-converted to 1
(float) value on import, even if it was text originally.
This is hard to solve for HTML in general, which is not typed, but this
input is coming from google sheets, which has an additional
data-sheets-value attribute on <td> that does tell us about the type of
the cell.
Fix the problem by handling that attribute, and in case it explicitly
says it's text, then apply the matching number format.
Other types are not yet handled.
Change-Id: I2986ef864e97d9c46d191aba25ca5740a1151a71
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162869
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Tested-by: Jenkins
-rw-r--r-- | include/svtools/htmlkywd.hxx | 1 | ||||
-rw-r--r-- | include/svtools/htmltokn.h | 1 | ||||
-rw-r--r-- | sc/CppunitTest_sc_filter_html.mk | 82 | ||||
-rw-r--r-- | sc/Module_sc.mk | 1 | ||||
-rw-r--r-- | sc/qa/filter/html/data/text.html | 8 | ||||
-rw-r--r-- | sc/qa/filter/html/html.cxx | 62 | ||||
-rw-r--r-- | sc/source/filter/html/htmlpars.cxx | 22 | ||||
-rw-r--r-- | svtools/source/svhtml/htmlkywd.cxx | 1 |
8 files changed, 178 insertions, 0 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx index 00c8260749bd..23e836ea7cea 100644 --- a/include/svtools/htmlkywd.hxx +++ b/include/svtools/htmlkywd.hxx @@ -445,6 +445,7 @@ #define OOO_STRING_SVTOOLS_HTML_O_title "title" #define OOO_STRING_SVTOOLS_HTML_O_value "value" #define OOO_STRING_SVTOOLS_HTML_O_SDval "sdval" +#define OOO_STRING_SVTOOLS_HTML_O_DSval "data-sheets-value" #define OOO_STRING_SVTOOLS_HTML_O_SDnum "sdnum" #define OOO_STRING_SVTOOLS_HTML_O_sdlibrary "sdlibrary" #define OOO_STRING_SVTOOLS_HTML_O_sdmodule "sdmodule" diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h index 4a333ee2f6d9..27370e5cb869 100644 --- a/include/svtools/htmltokn.h +++ b/include/svtools/htmltokn.h @@ -344,6 +344,7 @@ STRING_START = BOOL_END, TITLE, VALUE, SDVAL, // StarDiv NumberValue + DSVAL, SDNUM, // StarDiv NumberFormat SDLIBRARY, SDMODULE, diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk new file mode 100644 index 000000000000..b78349d64703 --- /dev/null +++ b/sc/CppunitTest_sc_filter_html.mk @@ -0,0 +1,82 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_CppunitTest_CppunitTest,sc_filter_html)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sc_filter_html)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sc_filter_html, \ + sc/qa/filter/html/html \ +)) + +$(eval $(call gb_CppunitTest_use_externals,sc_filter_html, \ + boost_headers \ + libxml2 \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \ + basegfx \ + comphelper \ + cppu \ + cppuhelper \ + drawinglayer \ + drawinglayercore \ + editeng \ + for \ + forui \ + i18nlangtag \ + msfilter \ + oox \ + sal \ + salhelper \ + sax \ + sc \ + scqahelper \ + sfx \ + sot \ + subsequenttest \ + svl \ + svt \ + svx \ + svxcore \ + test \ + tk \ + tl \ + ucbhelper \ + unotest \ + utl \ + vcl \ + xo \ +)) + +$(eval $(call gb_CppunitTest_set_include,sc_filter_html,\ + -I$(SRCDIR)/sc/source/ui/inc \ + -I$(SRCDIR)/sc/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_CppunitTest_use_api,sc_filter_html,\ + udkapi \ + offapi \ + oovbaapi \ +)) + +$(eval $(call gb_CppunitTest_use_packages,sc_filter_html, \ + filter_xhtml \ + filter_xslt \ +)) + +$(eval $(call gb_CppunitTest_use_ure,sc_filter_html)) +$(eval $(call gb_CppunitTest_use_vcl,sc_filter_html)) + +$(eval $(call gb_CppunitTest_use_rdb,sc_filter_html,services)) + +$(eval $(call gb_CppunitTest_use_configuration,sc_filter_html)) + +# vim: set noet sw=4 ts=4: diff --git a/sc/Module_sc.mk b/sc/Module_sc.mk index a159c957d988..71488d5439e6 100644 --- a/sc/Module_sc.mk +++ b/sc/Module_sc.mk @@ -97,6 +97,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sc, \ CppunitTest_sc_uicalc2 \ CppunitTest_sc_vba_macro_test \ CppunitTest_sc_a11y \ + CppunitTest_sc_filter_html \ )) ifneq ($(ENABLE_JUMBO_SHEETS),) diff --git a/sc/qa/filter/html/data/text.html b/sc/qa/filter/html/data/text.html new file mode 100644 index 000000000000..eadb34b5e1f8 --- /dev/null +++ b/sc/qa/filter/html/data/text.html @@ -0,0 +1,8 @@ +<table> + <tr> + <td data-sheets-value="{"1":3,"3":1}">1</td> + </tr> + <tr> + <td data-sheets-value="{"1":2,"2":"01","6":1}">01</td> + </tr> +</table> diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx new file mode 100644 index 000000000000..76413c6455b4 --- /dev/null +++ b/sc/qa/filter/html/html.cxx @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <test/unoapixml_test.hxx> +#include <test/htmltesttools.hxx> + +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/sheet/XSpreadsheetDocument.hpp> +#include <com/sun/star/table/XCellRange.hpp> + +#include <comphelper/propertyvalue.hxx> + +using namespace com::sun::star; + +namespace +{ +/// Covers sc/source/filter/html/ fixes. +class Test : public UnoApiXmlTest, public HtmlTestTools +{ +public: + Test() + : UnoApiXmlTest("/sc/qa/filter/html/data/") + { + } +}; + +CPPUNIT_TEST_FIXTURE(Test, testTdAsText) +{ + // Given a document with an A2 cell that contains "02" as text: + OUString aURL = createFileURL(u"text.html"); + + // When loading that document to Calc: + uno::Sequence<beans::PropertyValue> aParams = { + comphelper::makePropertyValue("DocumentService", + OUString("com.sun.star.sheet.SpreadsheetDocument")), + }; + loadWithParams(aURL, aParams); + + // Then make sure "01" is not auto-converted to 1, as a number: + uno::Reference<sheet::XSpreadsheetDocument> xDocument(mxComponent, uno::UNO_QUERY); + uno::Reference<container::XIndexAccess> xSheets(xDocument->getSheets(), uno::UNO_QUERY); + uno::Reference<table::XCellRange> xSheet(xSheets->getByIndex(0), uno::UNO_QUERY); + uno::Reference<beans::XPropertySet> xCell(xSheet->getCellByPosition(0, 1), uno::UNO_QUERY); + table::CellContentType eType{}; + xCell->getPropertyValue("CellContentType") >>= eType; + // Without the accompanying fix in place, this test would have failed with: + // - Expected: 2 (TEXT) + // - Actual : 1 (VALUE) + // i.e. data-sheets-value was ignored on import. + CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType); +} +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx index 43ff3cca2de5..5d46d12dabe3 100644 --- a/sc/source/filter/html/htmlpars.cxx +++ b/sc/source/filter/html/htmlpars.cxx @@ -64,6 +64,7 @@ #include <rangelst.hxx> #include <orcus/css_parser.hpp> +#include <boost/property_tree/json_parser.hpp> #include <com/sun/star/document/XDocumentProperties.hpp> #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp> @@ -2126,6 +2127,27 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo ) } } break; + case HtmlOptionId::DSVAL: + { + // data-sheets-value from google sheets, value is a JSON. + OString aEncodedOption = rOption.GetString().toUtf8(); + const char* pEncodedOption = aEncodedOption.getStr(); + std::stringstream aStream(pEncodedOption); + boost::property_tree::ptree aTree; + boost::property_tree::read_json(aStream, aTree); + // The "1" key describes the original data type. + auto it = aTree.find("1"); + if (it != aTree.not_found()) + { + int nValueType = std::stoi(it->second.get_value<std::string>()); + // 2 is text. + if (nValueType == 2) + { + nNumberFormat = NF_STANDARD_FORMAT_TEXT; + } + } + } + break; default: break; } } diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx index d1b0ea2ee03e..f5799434b72a 100644 --- a/svtools/source/svhtml/htmlkywd.cxx +++ b/svtools/source/svhtml/htmlkywd.cxx @@ -524,6 +524,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = { {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_title), HtmlOptionId::TITLE}, {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_value), HtmlOptionId::VALUE}, {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDval), HtmlOptionId::SDVAL}, // StarDiv NumberValue + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSval), HtmlOptionId::DSVAL}, {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum), HtmlOptionId::SDNUM}, // StarDiv NumberFormat {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), HtmlOptionId::SDLIBRARY}, {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule), HtmlOptionId::SDMODULE}, |