summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kohei@libreoffice.org>2023-05-31 21:33:56 -0400
committerKohei Yoshida <kohei@libreoffice.org>2023-10-25 03:59:57 +0200
commitb14583ba37a6d7ce398ccd3cf339f954785b03d8 (patch)
treecf48ebcb363c1f782b3f238f5a49dea367345b84
parentb5d194d1d37131f921853b78d659c63178de2bc6 (diff)
Support conditional loading of Apache Parquet files into Calc
Also, use orcus::create_filter() and simplify the logic a bit. This requires orcus 0.19.1 or newer. Note that this change makes it possible to load Apache Parquet files if and only if orcus has been built with the parquet import filter enabled. Using orcus without the parquet import filter enabled will not break the build or run-time behavior; you just can't load parquet files. Change-Id: I9f8820998b7b0667d1e7cd532c32b1c7e55ca999 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158411 Tested-by: Jenkins Reviewed-by: Kohei Yoshida <kohei@libreoffice.org>
-rw-r--r--configure.ac2
-rw-r--r--filter/Configuration_filter.mk2
-rw-r--r--filter/source/config/cache/typedetection.cxx1
-rw-r--r--filter/source/config/fragments/filters/calc_Parquet.xcu19
-rw-r--r--filter/source/config/fragments/types/calc_Parquet.xcu17
-rw-r--r--sc/inc/orcusfilters.hxx18
-rw-r--r--sc/source/filter/inc/orcusfiltersimpl.hxx7
-rw-r--r--sc/source/filter/orcus/filterdetect.cxx2
-rw-r--r--sc/source/filter/orcus/orcusfiltersimpl.cxx70
-rw-r--r--sc/source/ui/docshell/docsh.cxx63
10 files changed, 102 insertions, 99 deletions
diff --git a/configure.ac b/configure.ac
index 999ab64289e8..c8466ae65e01 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10878,7 +10878,7 @@ fi
dnl ===================================================================
dnl Orcus
dnl ===================================================================
-libo_CHECK_SYSTEM_MODULE([orcus],[ORCUS],[liborcus-0.18 >= 0.18.0])
+libo_CHECK_SYSTEM_MODULE([orcus],[ORCUS],[liborcus-0.18 >= 0.19.1])
if test "$with_system_orcus" != "yes"; then
if test "$SYSTEM_BOOST" = "TRUE"; then
dnl Link with Boost.System
diff --git a/filter/Configuration_filter.mk b/filter/Configuration_filter.mk
index bd3d3486234e..8a323e83fd54 100644
--- a/filter/Configuration_filter.mk
+++ b/filter/Configuration_filter.mk
@@ -477,6 +477,7 @@ $(eval $(call filter_Configuration_add_types,fcfg_langpack,fcfg_calc_types.xcu,f
generic_Text \
calc_Gnumeric \
calc_Lotus \
+ calc_Parquet \
calc_QPro \
calc_MS_Excel_40 \
calc_MS_Excel_40_VorlageTemplate \
@@ -535,6 +536,7 @@ $(eval $(call filter_Configuration_add_filters,fcfg_langpack,fcfg_calc_filters.x
calc_HTML_WebQuery \
calc_StarOffice_XML_Calc_Template \
calc_pdf_Export \
+ calc_Parquet \
dBase \
calc8 \
calc8_template \
diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
index 18d13a8f7796..a83a1406d0c1 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -211,6 +211,7 @@ int getFlatTypeRank(std::u16string_view rType)
"calc_SYLK",
"calc_DIF",
"calc_dBase",
+ "Apache Parquet",
// Binary (raster and vector image files)
"emf_MS_Windows_Metafile",
diff --git a/filter/source/config/fragments/filters/calc_Parquet.xcu b/filter/source/config/fragments/filters/calc_Parquet.xcu
new file mode 100644
index 000000000000..5b0fea8257bb
--- /dev/null
+++ b/filter/source/config/fragments/filters/calc_Parquet.xcu
@@ -0,0 +1,19 @@
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+-->
+<node oor:name="Apache Parquet Spreadsheet" oor:op="replace">
+ <prop oor:name="Flags"><value>IMPORT ALIEN PREFERRED</value></prop>
+ <prop oor:name="UIComponent"/>
+ <prop oor:name="FilterService"/>
+ <prop oor:name="UserData"/>
+ <prop oor:name="Type"><value>Apache Parquet</value></prop>
+ <prop oor:name="TemplateName"/>
+ <prop oor:name="DocumentService"><value>com.sun.star.sheet.SpreadsheetDocument</value></prop>
+ <prop oor:name="UIName">
+ <value xml:lang="en-US">Apache Parquet Spreadsheet</value>
+ </prop>
+</node>
diff --git a/filter/source/config/fragments/types/calc_Parquet.xcu b/filter/source/config/fragments/types/calc_Parquet.xcu
new file mode 100644
index 000000000000..6c29d886c92f
--- /dev/null
+++ b/filter/source/config/fragments/types/calc_Parquet.xcu
@@ -0,0 +1,17 @@
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+-->
+<node oor:name="Apache Parquet" oor:op="replace" >
+ <prop oor:name="DetectService"><value>com.sun.star.comp.sc.OrcusFilterDetect</value></prop>
+ <prop oor:name="URLPattern"/>
+ <prop oor:name="Extensions"><value>parquet</value></prop>
+ <prop oor:name="MediaType"/>
+ <prop oor:name="Preferred"><value>true</value></prop>
+ <prop oor:name="PreferredFilter"><value>Apache Parquet Spreadsheet</value></prop>
+ <prop oor:name="UIName"><value xml:lang="en-US">Apache Parquet</value></prop>
+ <prop oor:name="ClipboardFormat"/>
+</node>
diff --git a/sc/inc/orcusfilters.hxx b/sc/inc/orcusfilters.hxx
index 6d17f3741ebb..f13f5cc630d2 100644
--- a/sc/inc/orcusfilters.hxx
+++ b/sc/inc/orcusfilters.hxx
@@ -25,17 +25,17 @@ namespace weld { class TreeView; }
class ScOrcusFilters
{
public:
- virtual ~ScOrcusFilters() {}
-
- virtual bool importCSV(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
-
- virtual bool importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
+ enum class ImportResult
+ {
+ NotSupported,
+ Success,
+ Failure
+ };
- virtual bool importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
-
- virtual bool importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
+ virtual ~ScOrcusFilters() {}
- virtual bool importODS(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
+ virtual ImportResult importByName(
+ ScDocument& rDoc, SfxMedium& rMedium, const OUString& rFilterName) const = 0;
/**
* Used to import just the styles from an xml file.
diff --git a/sc/source/filter/inc/orcusfiltersimpl.hxx b/sc/source/filter/inc/orcusfiltersimpl.hxx
index 070d69aec03e..7bba1410dc5c 100644
--- a/sc/source/filter/inc/orcusfiltersimpl.hxx
+++ b/sc/source/filter/inc/orcusfiltersimpl.hxx
@@ -16,11 +16,8 @@
class ScOrcusFiltersImpl : public ScOrcusFilters
{
public:
- virtual bool importCSV(ScDocument& rDoc, SfxMedium& rMedium) const override;
- virtual bool importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const override;
- virtual bool importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) const override;
- virtual bool importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const override;
- virtual bool importODS(ScDocument& rDoc, SfxMedium& rMedium) const override;
+ virtual ImportResult importByName(ScDocument& rDoc, SfxMedium& rMedium,
+ const OUString& rFilterName) const override;
virtual bool importODS_Styles(ScDocument& rDoc, OUString& aFileName) const override;
diff --git a/sc/source/filter/orcus/filterdetect.cxx b/sc/source/filter/orcus/filterdetect.cxx
index 5750932e95d7..06f6015a8f0c 100644
--- a/sc/source/filter/orcus/filterdetect.cxx
+++ b/sc/source/filter/orcus/filterdetect.cxx
@@ -90,6 +90,8 @@ OUString OrcusFormatDetect::detect(css::uno::Sequence<css::beans::PropertyValue>
return "Gnumeric XML";
case orcus::format_t::xls_xml:
return "calc_MS_Excel_2003_XML";
+ case orcus::format_t::parquet:
+ return "Apache Parquet";
default:
;
}
diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx
index 1d3bc9c46234..2a13c761d5d5 100644
--- a/sc/source/filter/orcus/orcusfiltersimpl.cxx
+++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx
@@ -19,11 +19,7 @@
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
-#include <orcus/orcus_csv.hpp>
-#include <orcus/orcus_gnumeric.hpp>
-#include <orcus/orcus_xlsx.hpp>
-#include <orcus/orcus_xls_xml.hpp>
-#include <orcus/orcus_ods.hpp>
+#include <orcus/format_detection.hpp>
#include <orcus/orcus_import_ods.hpp>
#include <orcus/stream.hpp>
#include <com/sun/star/task/XStatusIndicator.hpp>
@@ -70,49 +66,35 @@ bool loadFileContent(SfxMedium& rMedium, orcus::iface::import_filter& filter)
}
}
-bool ScOrcusFiltersImpl::importCSV(ScDocument& rDoc, SfxMedium& rMedium) const
+ScOrcusFilters::ImportResult ScOrcusFiltersImpl::importByName(ScDocument& rDoc, SfxMedium& rMedium,
+ const OUString& rFilterName) const
{
- ScOrcusFactory aFactory(rDoc);
- aFactory.setStatusIndicator(getStatusIndicator(rMedium));
-
- orcus::orcus_csv filter(&aFactory);
- return loadFileContent(rMedium, filter);
-}
-
-bool ScOrcusFiltersImpl::importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const
-{
- ScOrcusFactory aFactory(rDoc);
- aFactory.setStatusIndicator(getStatusIndicator(rMedium));
-
- orcus::orcus_gnumeric filter(&aFactory);
- return loadFileContent(rMedium, filter);
-}
-
-bool ScOrcusFiltersImpl::importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) const
-{
- ScOrcusFactory aFactory(rDoc);
- aFactory.setStatusIndicator(getStatusIndicator(rMedium));
-
- orcus::orcus_xls_xml filter(&aFactory);
- return loadFileContent(rMedium, filter);
-}
-
-bool ScOrcusFiltersImpl::importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const
-{
- ScOrcusFactory aFactory(rDoc);
- aFactory.setStatusIndicator(getStatusIndicator(rMedium));
+ const std::unordered_map<OUString, orcus::format_t> aMap = {
+ { "Apache Parquet Spreadsheet", orcus::format_t::parquet },
+ { "Gnumeric Spreadsheet", orcus::format_t::gnumeric },
+ { "MS Excel 2003 XML Orcus", orcus::format_t::xls_xml },
+ { "csv", orcus::format_t::csv },
+ { "gnumeric", orcus::format_t::gnumeric },
+ { "ods", orcus::format_t::ods },
+ { "parquet", orcus::format_t::parquet },
+ { "xls-xml", orcus::format_t::xls_xml },
+ { "xlsx", orcus::format_t::xlsx },
+ };
+
+ if (auto it = aMap.find(rFilterName); it != aMap.end())
+ {
+ ScOrcusFactory aFactory(rDoc);
+ aFactory.setStatusIndicator(getStatusIndicator(rMedium));
- orcus::orcus_xlsx filter(&aFactory);
- return loadFileContent(rMedium, filter);
-}
+ auto filter = orcus::create_filter(it->second, &aFactory);
+ if (!filter)
+ return ImportResult::Failure;
-bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const
-{
- ScOrcusFactory aFactory(rDoc);
- aFactory.setStatusIndicator(getStatusIndicator(rMedium));
+ bool res = loadFileContent(rMedium, *filter);
+ return res ? ImportResult::Success : ImportResult::Failure;
+ }
- orcus::orcus_ods filter(&aFactory);
- return loadFileContent(rMedium, filter);
+ return ImportResult::NotSupported;
}
bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) const
diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx
index 7ba08a1cc5d5..175eb2f9c38a 100644
--- a/sc/source/ui/docshell/docsh.cxx
+++ b/sc/source/ui/docshell/docsh.cxx
@@ -1261,22 +1261,6 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium )
else
bRet = true;
}
- else if (aFltName == "Gnumeric Spreadsheet")
- {
- ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters();
- if (!pOrcus)
- return false;
-
- bRet = pOrcus->importGnumeric(*m_pDocument, rMedium);
- }
- else if (aFltName == "MS Excel 2003 XML Orcus")
- {
- ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters();
- if (!pOrcus)
- return false;
-
- bRet = pOrcus->importExcel2003XML(*m_pDocument, rMedium);
- }
else if (aFltName == SC_TEXT_CSV_FILTER_NAME)
{
ScAsciiOptions aOptions;
@@ -1604,10 +1588,27 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium )
}
else
{
- if (!GetErrorIgnoreWarning())
+ ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters();
+ if (!pOrcus)
+ return false;
+
+ switch (pOrcus->importByName(*m_pDocument, rMedium, aFltName))
{
- SAL_WARN("sc.filter", "No match for filter '" << aFltName << "' in ConvertFrom");
- SetError(SCERR_IMPORT_NI);
+ case ScOrcusFilters::ImportResult::Success:
+ bRet = true;
+ break;
+ case ScOrcusFilters::ImportResult::Failure:
+ bRet = false;
+ break;
+ case ScOrcusFilters::ImportResult::NotSupported:
+ {
+ if (!GetErrorIgnoreWarning())
+ {
+ SAL_WARN("sc.filter", "No match for filter '" << aFltName << "' in ConvertFrom");
+ SetError(SCERR_IMPORT_NI);
+ }
+ break;
+ }
}
}
@@ -1695,27 +1696,9 @@ bool ScDocShell::LoadExternal( SfxMedium& rMed )
if (!pOrcus)
return false;
- const OUString& rFilterName = pFilter->GetName();
- if (rFilterName == "gnumeric")
- {
- if (!pOrcus->importGnumeric(*m_pDocument, rMed))
- return false;
- }
- else if (rFilterName == "csv")
- {
- if (!pOrcus->importCSV(*m_pDocument, rMed))
- return false;
- }
- else if (rFilterName == "xlsx")
- {
- if (!pOrcus->importXLSX(*m_pDocument, rMed))
- return false;
- }
- else if (rFilterName == "ods")
- {
- if (!pOrcus->importODS(*m_pDocument, rMed))
- return false;
- }
+ auto res = pOrcus->importByName(*m_pDocument, rMed, pFilter->GetName());
+ if (res != ScOrcusFilters::ImportResult::Success)
+ return false;
FinishedLoading();
return true;