summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.com>2024-02-01 13:10:26 +0100
committerMiklos Vajna <vmiklos@collabora.com>2024-02-01 15:36:39 +0100
commite6e5660b726ecf3b0c39b277568568973b43c9f0 (patch)
tree93c846666990fdd7220d9b915a9ab71ba71437ad
parent829374381d9c9191746d528b52411ad757e39a08 (diff)
tdf#159483 sc HTML import: handle data-sheets-value attribute for the text case
The A2 cell in the bugdoc has 01 in it, which was auto-converted to 1 (float) value on import, even if it was text originally. This is hard to solve for HTML in general, which is not typed, but this input is coming from google sheets, which has an additional data-sheets-value attribute on <td> that does tell us about the type of the cell. Fix the problem by handling that attribute, and in case it explicitly says it's text, then apply the matching number format. Other types are not yet handled. Change-Id: I2986ef864e97d9c46d191aba25ca5740a1151a71 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162869 Reviewed-by: Miklos Vajna <vmiklos@collabora.com> Tested-by: Jenkins
-rw-r--r--include/svtools/htmlkywd.hxx1
-rw-r--r--include/svtools/htmltokn.h1
-rw-r--r--sc/CppunitTest_sc_filter_html.mk82
-rw-r--r--sc/Module_sc.mk1
-rw-r--r--sc/qa/filter/html/data/text.html8
-rw-r--r--sc/qa/filter/html/html.cxx62
-rw-r--r--sc/source/filter/html/htmlpars.cxx22
-rw-r--r--svtools/source/svhtml/htmlkywd.cxx1
8 files changed, 178 insertions, 0 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index 00c8260749bd..23e836ea7cea 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -445,6 +445,7 @@
#define OOO_STRING_SVTOOLS_HTML_O_title "title"
#define OOO_STRING_SVTOOLS_HTML_O_value "value"
#define OOO_STRING_SVTOOLS_HTML_O_SDval "sdval"
+#define OOO_STRING_SVTOOLS_HTML_O_DSval "data-sheets-value"
#define OOO_STRING_SVTOOLS_HTML_O_SDnum "sdnum"
#define OOO_STRING_SVTOOLS_HTML_O_sdlibrary "sdlibrary"
#define OOO_STRING_SVTOOLS_HTML_O_sdmodule "sdmodule"
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h
index 4a333ee2f6d9..27370e5cb869 100644
--- a/include/svtools/htmltokn.h
+++ b/include/svtools/htmltokn.h
@@ -344,6 +344,7 @@ STRING_START = BOOL_END,
TITLE,
VALUE,
SDVAL, // StarDiv NumberValue
+ DSVAL,
SDNUM, // StarDiv NumberFormat
SDLIBRARY,
SDMODULE,
diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk
new file mode 100644
index 000000000000..b78349d64703
--- /dev/null
+++ b/sc/CppunitTest_sc_filter_html.mk
@@ -0,0 +1,82 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sc_filter_html, \
+ sc/qa/filter/html/html \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sc_filter_html, \
+ boost_headers \
+ libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \
+ basegfx \
+ comphelper \
+ cppu \
+ cppuhelper \
+ drawinglayer \
+ drawinglayercore \
+ editeng \
+ for \
+ forui \
+ i18nlangtag \
+ msfilter \
+ oox \
+ sal \
+ salhelper \
+ sax \
+ sc \
+ scqahelper \
+ sfx \
+ sot \
+ subsequenttest \
+ svl \
+ svt \
+ svx \
+ svxcore \
+ test \
+ tk \
+ tl \
+ ucbhelper \
+ unotest \
+ utl \
+ vcl \
+ xo \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sc_filter_html,\
+ -I$(SRCDIR)/sc/source/ui/inc \
+ -I$(SRCDIR)/sc/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sc_filter_html,\
+ udkapi \
+ offapi \
+ oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_packages,sc_filter_html, \
+ filter_xhtml \
+ filter_xslt \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sc_filter_html))
+$(eval $(call gb_CppunitTest_use_vcl,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_use_rdb,sc_filter_html,services))
+
+$(eval $(call gb_CppunitTest_use_configuration,sc_filter_html))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sc/Module_sc.mk b/sc/Module_sc.mk
index a159c957d988..71488d5439e6 100644
--- a/sc/Module_sc.mk
+++ b/sc/Module_sc.mk
@@ -97,6 +97,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sc, \
CppunitTest_sc_uicalc2 \
CppunitTest_sc_vba_macro_test \
CppunitTest_sc_a11y \
+ CppunitTest_sc_filter_html \
))
ifneq ($(ENABLE_JUMBO_SHEETS),)
diff --git a/sc/qa/filter/html/data/text.html b/sc/qa/filter/html/data/text.html
new file mode 100644
index 000000000000..eadb34b5e1f8
--- /dev/null
+++ b/sc/qa/filter/html/data/text.html
@@ -0,0 +1,8 @@
+<table>
+ <tr>
+ <td data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:1}">1</td>
+ </tr>
+ <tr>
+ <td data-sheets-value="{&quot;1&quot;:2,&quot;2&quot;:&quot;01&quot;,&quot;6&quot;:1}">01</td>
+ </tr>
+</table>
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
new file mode 100644
index 000000000000..76413c6455b4
--- /dev/null
+++ b/sc/qa/filter/html/html.cxx
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <test/unoapixml_test.hxx>
+#include <test/htmltesttools.hxx>
+
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
+#include <com/sun/star/table/XCellRange.hpp>
+
+#include <comphelper/propertyvalue.hxx>
+
+using namespace com::sun::star;
+
+namespace
+{
+/// Covers sc/source/filter/html/ fixes.
+class Test : public UnoApiXmlTest, public HtmlTestTools
+{
+public:
+ Test()
+ : UnoApiXmlTest("/sc/qa/filter/html/data/")
+ {
+ }
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testTdAsText)
+{
+ // Given a document with an A2 cell that contains "02" as text:
+ OUString aURL = createFileURL(u"text.html");
+
+ // When loading that document to Calc:
+ uno::Sequence<beans::PropertyValue> aParams = {
+ comphelper::makePropertyValue("DocumentService",
+ OUString("com.sun.star.sheet.SpreadsheetDocument")),
+ };
+ loadWithParams(aURL, aParams);
+
+ // Then make sure "01" is not auto-converted to 1, as a number:
+ uno::Reference<sheet::XSpreadsheetDocument> xDocument(mxComponent, uno::UNO_QUERY);
+ uno::Reference<container::XIndexAccess> xSheets(xDocument->getSheets(), uno::UNO_QUERY);
+ uno::Reference<table::XCellRange> xSheet(xSheets->getByIndex(0), uno::UNO_QUERY);
+ uno::Reference<beans::XPropertySet> xCell(xSheet->getCellByPosition(0, 1), uno::UNO_QUERY);
+ table::CellContentType eType{};
+ xCell->getPropertyValue("CellContentType") >>= eType;
+ // Without the accompanying fix in place, this test would have failed with:
+ // - Expected: 2 (TEXT)
+ // - Actual : 1 (VALUE)
+ // i.e. data-sheets-value was ignored on import.
+ CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType);
+}
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx
index 43ff3cca2de5..5d46d12dabe3 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -64,6 +64,7 @@
#include <rangelst.hxx>
#include <orcus/css_parser.hpp>
+#include <boost/property_tree/json_parser.hpp>
#include <com/sun/star/document/XDocumentProperties.hpp>
#include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
@@ -2126,6 +2127,27 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
}
}
break;
+ case HtmlOptionId::DSVAL:
+ {
+ // data-sheets-value from google sheets, value is a JSON.
+ OString aEncodedOption = rOption.GetString().toUtf8();
+ const char* pEncodedOption = aEncodedOption.getStr();
+ std::stringstream aStream(pEncodedOption);
+ boost::property_tree::ptree aTree;
+ boost::property_tree::read_json(aStream, aTree);
+ // The "1" key describes the original data type.
+ auto it = aTree.find("1");
+ if (it != aTree.not_found())
+ {
+ int nValueType = std::stoi(it->second.get_value<std::string>());
+ // 2 is text.
+ if (nValueType == 2)
+ {
+ nNumberFormat = NF_STANDARD_FORMAT_TEXT;
+ }
+ }
+ }
+ break;
default: break;
}
}
diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx
index d1b0ea2ee03e..f5799434b72a 100644
--- a/svtools/source/svhtml/htmlkywd.cxx
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -524,6 +524,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = {
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_title), HtmlOptionId::TITLE},
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_value), HtmlOptionId::VALUE},
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDval), HtmlOptionId::SDVAL}, // StarDiv NumberValue
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSval), HtmlOptionId::DSVAL},
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum), HtmlOptionId::SDNUM}, // StarDiv NumberFormat
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), HtmlOptionId::SDLIBRARY},
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule), HtmlOptionId::SDMODULE},