From a5a49657dc17609a05dca59a8521fd71d14fe76e Mon Sep 17 00:00:00 2001 From: Mike Kaganski Date: Fri, 1 Dec 2023 16:48:44 +0300 Subject: tdf#158442: fix opening hybrid PDFs on Windows Commit 046e9545956d8ad1d69345d6b4a4c0a33714d179 (Try to revert to use of file_iterator from boost on Windows, 2023-10-31) had introduced a problem that pdfparse::PDFReader::read couldn't create file_iterator for files already opened with write access: mmap_file_iterator ctor on Windows used single FILE_SHARE_READ as dwSharedMode parameter for CreateFileA WinAPI; and that failed, when the file was already opened using GENERIC_WRITE in dwDesiredAccess - which happens when opening stream in TypeDetection::impl_detectTypeFlatAndDeep. Fix this by patching boosts' mmap_file_iterator constructor to use FILE_SHARE_READ | FILE_SHARE_WRITE, like we do in osl_openFile. But there was a pre-existing problem of using char-based CreateFileA API, which disallows opening any files with names not representable in current Windows codepage. Such hybrid PDF files would still fail creation of the file_iterator, and open as PDF. Fix that by further patching boost to have wstring-based constructors for file_iterator and mmap_file_iterator on Windows, which would call CreateFileW. Change-Id: Ib190bc090636159ade390b3dd120957d06d7b89b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160218 Tested-by: Jenkins Reviewed-by: Mike Kaganski --- sdext/source/pdfimport/filterdet.cxx | 5 +---- sdext/source/pdfimport/inc/pdfparse.hxx | 5 +---- sdext/source/pdfimport/pdfparse/pdfparse.cxx | 11 +++++++++-- sdext/source/pdfimport/test/pdfunzip.cxx | 3 ++- sdext/source/pdfimport/wrapper/wrapper.cxx | 3 +-- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'sdext') diff --git a/sdext/source/pdfimport/filterdet.cxx b/sdext/source/pdfimport/filterdet.cxx index e8d2e11d30ba..d9fdcc03c6ab 100644 --- a/sdext/source/pdfimport/filterdet.cxx +++ b/sdext/source/pdfimport/filterdet.cxx @@ -555,7 +555,6 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString& bool bMayUseUI ) { uno::Reference< io::XStream > xEmbed; - OString aPDFFile; OUString aSysUPath; if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) != osl_File_E_None ) return xEmbed; @@ -563,9 +562,7 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString& if (!detectHasAdditionalStreams(aSysUPath)) return xEmbed; - aPDFFile = OUStringToOString( aSysUPath, osl_getThreadTextEncoding() ); - - std::unique_ptr pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() )); + std::unique_ptr pEntry(pdfparse::PDFReader::read(aSysUPath)); if( pEntry ) { pdfparse::PDFFile* pPDFFile = dynamic_cast(pEntry.get()); diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx b/sdext/source/pdfimport/inc/pdfparse.hxx index 7891419471d3..542a9ed4b1a5 100644 --- a/sdext/source/pdfimport/inc/pdfparse.hxx +++ b/sdext/source/pdfimport/inc/pdfparse.hxx @@ -292,10 +292,7 @@ struct PDFReader { PDFReader() = delete; - static std::unique_ptr read( const char* pFileName ); -#ifdef _WIN32 - static std::unique_ptr read( const char* pBuffer, unsigned int nLen ); -#endif + static std::unique_ptr read(std::u16string_view aFileName); }; } // namespace diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx b/sdext/source/pdfimport/pdfparse/pdfparse.cxx index cdd3ac13ff35..8b3da7eb39d7 100644 --- a/sdext/source/pdfimport/pdfparse/pdfparse.cxx +++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx @@ -36,7 +36,9 @@ #include +#include #include +#include #include #include #include @@ -558,9 +560,14 @@ public: } -std::unique_ptr PDFReader::read( const char* pFileName ) +std::unique_ptr PDFReader::read(std::u16string_view aFileName) { - file_iterator<> file_start( pFileName ); +#ifdef _WIN32 + file_iterator<> file_start(std::wstring(o3tl::toW(aFileName))); +#else + file_iterator<> file_start( + std::string(OUStringToOString(aFileName, osl_getThreadTextEncoding()))); +#endif if( ! file_start ) return nullptr; file_iterator<> file_end = file_start.make_end(); diff --git a/sdext/source/pdfimport/test/pdfunzip.cxx b/sdext/source/pdfimport/test/pdfunzip.cxx index b9bf3a62f14f..6db3b740d668 100644 --- a/sdext/source/pdfimport/test/pdfunzip.cxx +++ b/sdext/source/pdfimport/test/pdfunzip.cxx @@ -224,7 +224,8 @@ typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*); static int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl ) { int nRet = 0; - std::unique_ptr pEntry = pdfparse::PDFReader::read( pInFile ); + std::unique_ptr pEntry + = pdfparse::PDFReader::read(OStringToOUString(pInFile, osl_getThreadTextEncoding())); if( pEntry ) { PDFFile* pPDFFile = dynamic_cast(pEntry.get()); diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx b/sdext/source/pdfimport/wrapper/wrapper.cxx index ad25f85ebc09..ade4dc5edb6b 100644 --- a/sdext/source/pdfimport/wrapper/wrapper.cxx +++ b/sdext/source/pdfimport/wrapper/wrapper.cxx @@ -911,9 +911,8 @@ static bool checkEncryption( std::u16string_view i_rPa ) { bool bSuccess = false; - OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() ); - std::unique_ptr pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() )); + std::unique_ptr pEntry(pdfparse::PDFReader::read(i_rPath)); if( pEntry ) { pdfparse::PDFFile* pPDFFile = dynamic_cast(pEntry.get()); -- cgit