diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2022-08-03 18:26:25 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2022-08-04 06:31:36 +0200 |
commit | c2e86396b741b956efc05e9dfea1e1c3286dfb9d (patch) | |
tree | 08eb4a1a7e9ac2c249b5c1612d6d3fc743bec698 | |
parent | 86ed0105a4d70d481e3358ae1c6855766ef44d23 (diff) |
tdf#150247: patch orcus to use UTF-16 paths on Windows
This adds UTF8->UTF16 conversion of the paths passed to ctor of
file_content on Windows, since both boost::filesystem::file_size
and boost::interprocess::file_mapping take UTF-16 strings on this
platform. So the assumption is that 8-bit path strings passed to
orcus are UTF-8-encoded.
This partially reverts commit 75252e58d9b5d020bf7bd6ca66b3a9d780463051
(it keeps use of osl_getThreadTextEncoding for platforms other than
Windows).
Change-Id: Ie467f71a65945f4f07ff432136ea06b811c3f794
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137759
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r-- | external/liborcus/UnpackedTarball_liborcus.mk | 1 | ||||
-rw-r--r-- | external/liborcus/win_path_utf16.patch | 33 | ||||
-rw-r--r-- | sc/source/filter/orcus/orcusfiltersimpl.cxx | 33 |
3 files changed, 38 insertions, 29 deletions
diff --git a/external/liborcus/UnpackedTarball_liborcus.mk b/external/liborcus/UnpackedTarball_liborcus.mk index 6e78c0ca2c3e..30d3f0d78d1c 100644 --- a/external/liborcus/UnpackedTarball_liborcus.mk +++ b/external/liborcus/UnpackedTarball_liborcus.mk @@ -47,6 +47,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\ ifeq ($(OS),WNT) $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\ external/liborcus/windows-constants-hack.patch \ + external/liborcus/win_path_utf16.patch \ )) endif diff --git a/external/liborcus/win_path_utf16.patch b/external/liborcus/win_path_utf16.patch new file mode 100644 index 000000000000..0a6781e728b3 --- /dev/null +++ b/external/liborcus/win_path_utf16.patch @@ -0,0 +1,33 @@ +diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp +index 00395f59ff25..8f385fb8965a 100644 +--- a/src/parser/stream.cpp ++++ b/src/parser/stream.cpp +@@ -147,6 +147,14 @@ std::tuple<std::string_view, size_t, size_t> find_line_with_offset(std::string_v + return std::make_tuple(line, line_num, offset_on_line); + } + ++#ifdef _WIN32 ++std::wstring to_wstring(std::string_view s) ++{ ++ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conversion; ++ return conversion.from_bytes(s.data(), s.data() + s.size()); ++} ++#endif ++ + } // anonymous namespace + + struct file_content::impl +@@ -162,8 +170,13 @@ struct file_content::impl + impl() : content_size(0), content(nullptr) {} + + impl(std::string_view filepath) : ++#ifdef _WIN32 ++ content_size(fs::file_size(to_wstring(filepath))), ++ mapped_file(to_wstring(filepath).c_str(), bip::read_only), ++#else + content_size(fs::file_size(std::string{filepath}.c_str())), + mapped_file(std::string{filepath}.c_str(), bip::read_only), ++#endif + mapped_region(mapped_file, bip::read_only, 0, content_size), + content(nullptr) + { diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx index db2d5705d51e..e7fdb44ca91e 100644 --- a/sc/source/filter/orcus/orcusfiltersimpl.cxx +++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx @@ -11,8 +11,6 @@ #include <orcusinterface.hxx> #include <tokenarray.hxx> -#include <memory> - #include <osl/thread.hxx> #include <sfx2/docfile.hxx> #include <sfx2/frame.hxx> @@ -30,13 +28,6 @@ #include <orcus/stream.hpp> #include <com/sun/star/task/XStatusIndicator.hpp> -#if defined _WIN32 -#include <boost/filesystem/operations.hpp> // for boost::filesystem::filesystem_error -#include <o3tl/char16_t2wchar_t.hxx> -#include <prewin.h> -#include <postwin.h> -#endif - using namespace com::sun::star; namespace @@ -129,33 +120,17 @@ bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) const { - OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding()); - try { #if defined _WIN32 - std::unique_ptr<orcus::file_content> content; - try - { - content = std::make_unique<orcus::file_content>(aPath8.getStr()); - } - catch (const boost::filesystem::filesystem_error&) - { - // Maybe the path contains characters not representable in ACP. It's not - // yet possible to pass Unicode path to orcus::file_content ctor - see - // https://gitlab.com/orcus/orcus/-/issues/30; try short path. - wchar_t buf[32767]; - if (GetShortPathNameW(o3tl::toW(aPath.getStr()), buf, std::size(buf)) == 0) - throw; - aPath8 = OUStringToOString(o3tl::toU(buf), osl_getThreadTextEncoding()); - content = std::make_unique<orcus::file_content>(aPath8); - } + OString aPath8 = OUStringToOString(aPath, RTL_TEXTENCODING_UTF8); #else - auto content = std::make_unique<orcus::file_content>(aPath8); + OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding()); #endif + orcus::file_content content(aPath8); ScOrcusFactory aFactory(rDoc); ScOrcusStyles aStyles(aFactory); - orcus::import_ods::read_styles(content->str(), &aStyles); + orcus::import_ods::read_styles(content.str(), &aStyles); } catch (const std::exception& e) { |