summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2023-12-01 16:48:44 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2023-12-01 16:13:38 +0100
commita5a49657dc17609a05dca59a8521fd71d14fe76e (patch)
treeb5d6e1f0df0d3da94135fc833bd5ef027d15a992
parent8a0015c35f3f137e4f3a80e40616bc078e265a1c (diff)
tdf#158442: fix opening hybrid PDFs on Windows
Commit 046e9545956d8ad1d69345d6b4a4c0a33714d179 (Try to revert to use of file_iterator from boost on Windows, 2023-10-31) had introduced a problem that pdfparse::PDFReader::read couldn't create file_iterator for files already opened with write access: mmap_file_iterator ctor on Windows used single FILE_SHARE_READ as dwSharedMode parameter for CreateFileA WinAPI; and that failed, when the file was already opened using GENERIC_WRITE in dwDesiredAccess - which happens when opening stream in TypeDetection::impl_detectTypeFlatAndDeep. Fix this by patching boosts' mmap_file_iterator constructor to use FILE_SHARE_READ | FILE_SHARE_WRITE, like we do in osl_openFile. But there was a pre-existing problem of using char-based CreateFileA API, which disallows opening any files with names not representable in current Windows codepage. Such hybrid PDF files would still fail creation of the file_iterator, and open as PDF. Fix that by further patching boost to have wstring-based constructors for file_iterator and mmap_file_iterator on Windows, which would call CreateFileW. Change-Id: Ib190bc090636159ade390b3dd120957d06d7b89b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160218 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r--external/boost/UnpackedTarball_boost.mk2
-rw-r--r--external/boost/boost.file_iterator.sharing_win.patch158
-rw-r--r--filter/CppunitTest_filter_textfilterdetect.mk4
-rw-r--r--filter/qa/unit/data/hybrid_calc_абв_αβγ.pdfbin0 -> 10420 bytes
-rw-r--r--filter/qa/unit/data/hybrid_impress_абв_αβγ.pdfbin0 -> 21055 bytes
-rw-r--r--filter/qa/unit/data/hybrid_writer_абв_αβγ.pdfbin0 -> 10732 bytes
-rw-r--r--filter/qa/unit/textfilterdetect.cxx41
-rw-r--r--include/o3tl/char16_t2wchar_t.hxx1
-rw-r--r--sdext/source/pdfimport/filterdet.cxx5
-rw-r--r--sdext/source/pdfimport/inc/pdfparse.hxx5
-rw-r--r--sdext/source/pdfimport/pdfparse/pdfparse.cxx11
-rw-r--r--sdext/source/pdfimport/test/pdfunzip.cxx3
-rw-r--r--sdext/source/pdfimport/wrapper/wrapper.cxx3
13 files changed, 216 insertions, 17 deletions
diff --git a/external/boost/UnpackedTarball_boost.mk b/external/boost/UnpackedTarball_boost.mk
index acdc5d331c76..d8bd131ac8df 100644
--- a/external/boost/UnpackedTarball_boost.mk
+++ b/external/boost/UnpackedTarball_boost.mk
@@ -35,6 +35,8 @@ boost_patches += boost-ios.patch.0
# violations":
boost_patches += 0001-Avoid-boost-phoenix-placeholders-uarg1.10-ODR-violat.patch.2
+boost_patches += boost.file_iterator.sharing_win.patch
+
$(eval $(call gb_UnpackedTarball_UnpackedTarball,boost))
$(eval $(call gb_UnpackedTarball_set_tarball,boost,$(BOOST_TARBALL)))
diff --git a/external/boost/boost.file_iterator.sharing_win.patch b/external/boost/boost.file_iterator.sharing_win.patch
new file mode 100644
index 000000000000..b3b8bea3f3ff
--- /dev/null
+++ b/external/boost/boost.file_iterator.sharing_win.patch
@@ -0,0 +1,158 @@
+--- foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp.orig
++++ foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp
+@@ -181,67 +181,28 @@ public:
+ {}
+
+ explicit mmap_file_iterator(std::string const& fileName)
+- : m_filesize(0), m_curChar(0)
+- {
+- HANDLE hFile = ::CreateFileA(
++ : mmap_file_iterator(::CreateFileA(
+ fileName.c_str(),
+ GENERIC_READ,
+- FILE_SHARE_READ,
++ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_SEQUENTIAL_SCAN,
+ NULL
+- );
+-
+- if (hFile == INVALID_HANDLE_VALUE)
+- return;
+-
+- // Store the size of the file, it's used to construct
+- // the end iterator
+- m_filesize = ::GetFileSize(hFile, NULL);
++ ))
++ {}
+
+- HANDLE hMap = ::CreateFileMapping(
+- hFile,
++ explicit mmap_file_iterator(std::wstring const& fileName)
++ : mmap_file_iterator(::CreateFileW(
++ fileName.c_str(),
++ GENERIC_READ,
++ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+- PAGE_READONLY,
+- 0, 0,
++ OPEN_EXISTING,
++ FILE_FLAG_SEQUENTIAL_SCAN,
+ NULL
+- );
+-
+- if (hMap == NULL)
+- {
+- ::CloseHandle(hFile);
+- return;
+- }
+-
+- LPVOID pMem = ::MapViewOfFile(
+- hMap,
+- FILE_MAP_READ,
+- 0, 0, 0
+- );
+-
+- if (pMem == NULL)
+- {
+- ::CloseHandle(hMap);
+- ::CloseHandle(hFile);
+- return;
+- }
+-
+- // We hold both the file handle and the memory pointer.
+- // We can close the hMap handle now because Windows holds internally
+- // a reference to it since there is a view mapped.
+- ::CloseHandle(hMap);
+-
+- // It seems like we can close the file handle as well (because
+- // a reference is hold by the filemap object).
+- ::CloseHandle(hFile);
+-
+- // Store the handles inside the shared_ptr (with the custom destructors)
+- m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
+-
+- // Start of the file
+- m_curChar = m_mem.get();
+- }
++ ))
++ {}
+
+ mmap_file_iterator(const mmap_file_iterator& iter)
+ { *this = iter; }
+@@ -290,6 +251,59 @@ private:
+ boost::shared_ptr<CharT> m_mem;
+ std::size_t m_filesize;
+ CharT* m_curChar;
++
++ explicit mmap_file_iterator(HANDLE hFile)
++ : m_filesize(0), m_curChar(0)
++ {
++ if (hFile == INVALID_HANDLE_VALUE)
++ return;
++
++ // Store the size of the file, it's used to construct
++ // the end iterator
++ m_filesize = ::GetFileSize(hFile, NULL);
++
++ HANDLE hMap = ::CreateFileMapping(
++ hFile,
++ NULL,
++ PAGE_READONLY,
++ 0, 0,
++ NULL
++ );
++
++ if (hMap == NULL)
++ {
++ ::CloseHandle(hFile);
++ return;
++ }
++
++ LPVOID pMem = ::MapViewOfFile(
++ hMap,
++ FILE_MAP_READ,
++ 0, 0, 0
++ );
++
++ if (pMem == NULL)
++ {
++ ::CloseHandle(hMap);
++ ::CloseHandle(hFile);
++ return;
++ }
++
++ // We hold both the file handle and the memory pointer.
++ // We can close the hMap handle now because Windows holds internally
++ // a reference to it since there is a view mapped.
++ ::CloseHandle(hMap);
++
++ // It seems like we can close the file handle as well (because
++ // a reference is hold by the filemap object).
++ ::CloseHandle(hFile);
++
++ // Store the handles inside the shared_ptr (with the custom destructors)
++ m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
++
++ // Start of the file
++ m_curChar = m_mem.get();
++ }
+ };
+
+ #endif // BOOST_SPIRIT_FILEITERATOR_WINDOWS
+--- foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp.orig
++++ foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp
+@@ -170,6 +170,12 @@ public:
+ : base_t(adapted_t(fileName))
+ {}
+
++#ifdef BOOST_SPIRIT_FILEITERATOR_WINDOWS
++ file_iterator(std::wstring const& fileName)
++ : base_t(adapted_t(fileName))
++ {}
++#endif
++
+ file_iterator(const base_t& iter)
+ : base_t(iter)
+ {}
diff --git a/filter/CppunitTest_filter_textfilterdetect.mk b/filter/CppunitTest_filter_textfilterdetect.mk
index be2697a17406..e931a5741a1d 100644
--- a/filter/CppunitTest_filter_textfilterdetect.mk
+++ b/filter/CppunitTest_filter_textfilterdetect.mk
@@ -42,4 +42,8 @@ $(eval $(call gb_CppunitTest_use_rdb,filter_textfilterdetect,services))
$(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
+$(eval $(call gb_CppunitTest_use_custom_headers,filter_textfilterdetect, \
+ officecfg/registry \
+))
+
# vim: set noet sw=4 ts=4:
diff --git a/filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf b/filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf
new file mode 100644
index 000000000000..b104113a5238
--- /dev/null
+++ b/filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf
Binary files differ
diff --git a/filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf b/filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf
new file mode 100644
index 000000000000..78e7136211f4
--- /dev/null
+++ b/filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf
Binary files differ
diff --git a/filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf b/filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf
new file mode 100644
index 000000000000..00cf3de44c5c
--- /dev/null
+++ b/filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf
Binary files differ
diff --git a/filter/qa/unit/textfilterdetect.cxx b/filter/qa/unit/textfilterdetect.cxx
index e8de97a8df61..c88de0e8790d 100644
--- a/filter/qa/unit/textfilterdetect.cxx
+++ b/filter/qa/unit/textfilterdetect.cxx
@@ -16,7 +16,9 @@
#include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
#include <com/sun/star/text/XTextDocument.hpp>
+#include <comphelper/configuration.hxx>
#include <comphelper/propertyvalue.hxx>
+#include <officecfg/Office/Common.hxx>
#include <sfx2/docfac.hxx>
#include <unotools/mediadescriptor.hxx>
#include <unotools/streamwrap.hxx>
@@ -31,6 +33,11 @@ using namespace com::sun::star;
namespace
{
+bool supportsService(const uno::Reference<lang::XComponent>& x, const OUString& s)
+{
+ return uno::Reference<lang::XServiceInfo>(x, uno::UNO_QUERY_THROW)->supportsService(s);
+}
+
/// Test class for PlainTextFilterDetect.
class TextFilterDetectTest : public UnoApiTest
{
@@ -63,10 +70,6 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
{
- auto supportsService = [](const uno::Reference<lang::XComponent>& x, const OUString& s) {
- return uno::Reference<lang::XServiceInfo>(x, uno::UNO_QUERY_THROW)->supportsService(s);
- };
-
// Given an empty file, with a pptx extension
// When loading the file
loadFromURL(u"empty.pptx");
@@ -172,6 +175,36 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
CPPUNIT_ASSERT_EQUAL(u"Writer template’s first line"_ustr, xParagraph->getString());
}
}
+
+CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testHybridPDFFile)
+{
+ // Make sure that file locking is ON
+ {
+ std::shared_ptr<comphelper::ConfigurationChanges> xChanges(
+ comphelper::ConfigurationChanges::create());
+ officecfg::Office::Common::Misc::UseDocumentSystemFileLocking::set(true, xChanges);
+ xChanges->commit();
+ }
+
+ // Given a hybrid PDF file
+
+ // Created in Writer
+ loadFromURL(u"hybrid_writer_абв_αβγ.pdf");
+ // Make sure it opens in Writer.
+ // Without the accompanying fix in place, this test would have failed on Windows, as it was
+ // opened in Draw instead.
+ CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.text.TextDocument"));
+
+ // Created in Calc
+ loadFromURL(u"hybrid_calc_абв_αβγ.pdf");
+ // Make sure it opens in Calc.
+ CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.sheet.SpreadsheetDocument"));
+
+ // Created in Impress
+ loadFromURL(u"hybrid_impress_абв_αβγ.pdf");
+ // Make sure it opens in Impress.
+ CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.presentation.PresentationDocument"));
+}
}
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/include/o3tl/char16_t2wchar_t.hxx b/include/o3tl/char16_t2wchar_t.hxx
index fcbbaf079ce3..c2640f627109 100644
--- a/include/o3tl/char16_t2wchar_t.hxx
+++ b/include/o3tl/char16_t2wchar_t.hxx
@@ -41,6 +41,7 @@ inline char16_t* toU(wchar_t* p) { return reinterpret_cast<char16_t*>(p); }
inline char16_t const* toU(wchar_t const* p) { return reinterpret_cast<char16_t const*>(p); }
inline std::u16string_view toU(std::wstring_view v) { return { toU(v.data()), v.size() }; }
+inline std::wstring_view toW(std::u16string_view v) { return { toW(v.data()), v.size() }; }
#endif
}
diff --git a/sdext/source/pdfimport/filterdet.cxx b/sdext/source/pdfimport/filterdet.cxx
index e8d2e11d30ba..d9fdcc03c6ab 100644
--- a/sdext/source/pdfimport/filterdet.cxx
+++ b/sdext/source/pdfimport/filterdet.cxx
@@ -555,7 +555,6 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString&
bool bMayUseUI )
{
uno::Reference< io::XStream > xEmbed;
- OString aPDFFile;
OUString aSysUPath;
if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) != osl_File_E_None )
return xEmbed;
@@ -563,9 +562,7 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString&
if (!detectHasAdditionalStreams(aSysUPath))
return xEmbed;
- aPDFFile = OUStringToOString( aSysUPath, osl_getThreadTextEncoding() );
-
- std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
+ std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(aSysUPath));
if( pEntry )
{
pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx b/sdext/source/pdfimport/inc/pdfparse.hxx
index 7891419471d3..542a9ed4b1a5 100644
--- a/sdext/source/pdfimport/inc/pdfparse.hxx
+++ b/sdext/source/pdfimport/inc/pdfparse.hxx
@@ -292,10 +292,7 @@ struct PDFReader
{
PDFReader() = delete;
- static std::unique_ptr<PDFEntry> read( const char* pFileName );
-#ifdef _WIN32
- static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int nLen );
-#endif
+ static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName);
};
} // namespace
diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
index cdd3ac13ff35..8b3da7eb39d7 100644
--- a/sdext/source/pdfimport/pdfparse/pdfparse.cxx
+++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
@@ -36,7 +36,9 @@
#include <string.h>
+#include <o3tl/char16_t2wchar_t.hxx>
#include <o3tl/safeint.hxx>
+#include <osl/thread.h>
#include <rtl/strbuf.hxx>
#include <rtl/ustrbuf.hxx>
#include <sal/log.hxx>
@@ -558,9 +560,14 @@ public:
}
-std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
+std::unique_ptr<PDFEntry> PDFReader::read(std::u16string_view aFileName)
{
- file_iterator<> file_start( pFileName );
+#ifdef _WIN32
+ file_iterator<> file_start(std::wstring(o3tl::toW(aFileName)));
+#else
+ file_iterator<> file_start(
+ std::string(OUStringToOString(aFileName, osl_getThreadTextEncoding())));
+#endif
if( ! file_start )
return nullptr;
file_iterator<> file_end = file_start.make_end();
diff --git a/sdext/source/pdfimport/test/pdfunzip.cxx b/sdext/source/pdfimport/test/pdfunzip.cxx
index b9bf3a62f14f..6db3b740d668 100644
--- a/sdext/source/pdfimport/test/pdfunzip.cxx
+++ b/sdext/source/pdfimport/test/pdfunzip.cxx
@@ -224,7 +224,8 @@ typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
static int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
{
int nRet = 0;
- std::unique_ptr<PDFEntry> pEntry = pdfparse::PDFReader::read( pInFile );
+ std::unique_ptr<PDFEntry> pEntry
+ = pdfparse::PDFReader::read(OStringToOUString(pInFile, osl_getThreadTextEncoding()));
if( pEntry )
{
PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry.get());
diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx b/sdext/source/pdfimport/wrapper/wrapper.cxx
index ad25f85ebc09..ade4dc5edb6b 100644
--- a/sdext/source/pdfimport/wrapper/wrapper.cxx
+++ b/sdext/source/pdfimport/wrapper/wrapper.cxx
@@ -911,9 +911,8 @@ static bool checkEncryption( std::u16string_view i_rPa
)
{
bool bSuccess = false;
- OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() );
- std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
+ std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(i_rPath));
if( pEntry )
{
pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());