summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2022-08-03 18:26:25 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2022-08-04 06:31:36 +0200
commitc2e86396b741b956efc05e9dfea1e1c3286dfb9d (patch)
tree08eb4a1a7e9ac2c249b5c1612d6d3fc743bec698
parent86ed0105a4d70d481e3358ae1c6855766ef44d23 (diff)
tdf#150247: patch orcus to use UTF-16 paths on Windows
This adds UTF8->UTF16 conversion of the paths passed to ctor of file_content on Windows, since both boost::filesystem::file_size and boost::interprocess::file_mapping take UTF-16 strings on this platform. So the assumption is that 8-bit path strings passed to orcus are UTF-8-encoded. This partially reverts commit 75252e58d9b5d020bf7bd6ca66b3a9d780463051 (it keeps use of osl_getThreadTextEncoding for platforms other than Windows). Change-Id: Ie467f71a65945f4f07ff432136ea06b811c3f794 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137759 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r--external/liborcus/UnpackedTarball_liborcus.mk1
-rw-r--r--external/liborcus/win_path_utf16.patch33
-rw-r--r--sc/source/filter/orcus/orcusfiltersimpl.cxx33
3 files changed, 38 insertions, 29 deletions
diff --git a/external/liborcus/UnpackedTarball_liborcus.mk b/external/liborcus/UnpackedTarball_liborcus.mk
index 6e78c0ca2c3e..30d3f0d78d1c 100644
--- a/external/liborcus/UnpackedTarball_liborcus.mk
+++ b/external/liborcus/UnpackedTarball_liborcus.mk
@@ -47,6 +47,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
ifeq ($(OS),WNT)
$(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
external/liborcus/windows-constants-hack.patch \
+ external/liborcus/win_path_utf16.patch \
))
endif
diff --git a/external/liborcus/win_path_utf16.patch b/external/liborcus/win_path_utf16.patch
new file mode 100644
index 000000000000..0a6781e728b3
--- /dev/null
+++ b/external/liborcus/win_path_utf16.patch
@@ -0,0 +1,33 @@
+diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp
+index 00395f59ff25..8f385fb8965a 100644
+--- a/src/parser/stream.cpp
++++ b/src/parser/stream.cpp
+@@ -147,6 +147,14 @@ std::tuple<std::string_view, size_t, size_t> find_line_with_offset(std::string_v
+ return std::make_tuple(line, line_num, offset_on_line);
+ }
+
++#ifdef _WIN32
++std::wstring to_wstring(std::string_view s)
++{
++ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conversion;
++ return conversion.from_bytes(s.data(), s.data() + s.size());
++}
++#endif
++
+ } // anonymous namespace
+
+ struct file_content::impl
+@@ -162,8 +170,13 @@ struct file_content::impl
+ impl() : content_size(0), content(nullptr) {}
+
+ impl(std::string_view filepath) :
++#ifdef _WIN32
++ content_size(fs::file_size(to_wstring(filepath))),
++ mapped_file(to_wstring(filepath).c_str(), bip::read_only),
++#else
+ content_size(fs::file_size(std::string{filepath}.c_str())),
+ mapped_file(std::string{filepath}.c_str(), bip::read_only),
++#endif
+ mapped_region(mapped_file, bip::read_only, 0, content_size),
+ content(nullptr)
+ {
diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx
index db2d5705d51e..e7fdb44ca91e 100644
--- a/sc/source/filter/orcus/orcusfiltersimpl.cxx
+++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx
@@ -11,8 +11,6 @@
#include <orcusinterface.hxx>
#include <tokenarray.hxx>
-#include <memory>
-
#include <osl/thread.hxx>
#include <sfx2/docfile.hxx>
#include <sfx2/frame.hxx>
@@ -30,13 +28,6 @@
#include <orcus/stream.hpp>
#include <com/sun/star/task/XStatusIndicator.hpp>
-#if defined _WIN32
-#include <boost/filesystem/operations.hpp> // for boost::filesystem::filesystem_error
-#include <o3tl/char16_t2wchar_t.hxx>
-#include <prewin.h>
-#include <postwin.h>
-#endif
-
using namespace com::sun::star;
namespace
@@ -129,33 +120,17 @@ bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const
bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) const
{
- OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
-
try
{
#if defined _WIN32
- std::unique_ptr<orcus::file_content> content;
- try
- {
- content = std::make_unique<orcus::file_content>(aPath8.getStr());
- }
- catch (const boost::filesystem::filesystem_error&)
- {
- // Maybe the path contains characters not representable in ACP. It's not
- // yet possible to pass Unicode path to orcus::file_content ctor - see
- // https://gitlab.com/orcus/orcus/-/issues/30; try short path.
- wchar_t buf[32767];
- if (GetShortPathNameW(o3tl::toW(aPath.getStr()), buf, std::size(buf)) == 0)
- throw;
- aPath8 = OUStringToOString(o3tl::toU(buf), osl_getThreadTextEncoding());
- content = std::make_unique<orcus::file_content>(aPath8);
- }
+ OString aPath8 = OUStringToOString(aPath, RTL_TEXTENCODING_UTF8);
#else
- auto content = std::make_unique<orcus::file_content>(aPath8);
+ OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
#endif
+ orcus::file_content content(aPath8);
ScOrcusFactory aFactory(rDoc);
ScOrcusStyles aStyles(aFactory);
- orcus::import_ods::read_styles(content->str(), &aStyles);
+ orcus::import_ods::read_styles(content.str(), &aStyles);
}
catch (const std::exception& e)
{