From 98b27ea907517d585f5d951895ae733653a442d2 Mon Sep 17 00:00:00 2001 From: Mihai Date: Fri, 13 Feb 2015 12:56:11 +0200 Subject: SkipImages option for PDF import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This option allows images and drawings to be skipped while importing a PDF, the main reason was performance improvement where images were not needed. Change-Id: Ibca342ff6e7005bedf7b16869473832ce2576fb2 Reviewed-on: https://gerrit.libreoffice.org/14470 Reviewed-by: Caolán McNamara Tested-by: Caolán McNamara --- sdext/source/pdfimport/inc/wrapper.hxx | 6 ++- sdext/source/pdfimport/pdfiadaptor.cxx | 15 +++++-- sdext/source/pdfimport/pdfiadaptor.hxx | 3 +- sdext/source/pdfimport/wrapper/wrapper.cxx | 14 ++++--- .../pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx | 46 +++++++++++++++++++++- .../pdfimport/xpdfwrapper/pdfioutdev_gpl.hxx | 2 + sdext/source/pdfimport/xpdfwrapper/wrapper_gpl.cxx | 17 ++++++-- 7 files changed, 86 insertions(+), 17 deletions(-) (limited to 'sdext') diff --git a/sdext/source/pdfimport/inc/wrapper.hxx b/sdext/source/pdfimport/inc/wrapper.hxx index 8f0fc10f5a97..6bafce4939d7 100644 --- a/sdext/source/pdfimport/inc/wrapper.hxx +++ b/sdext/source/pdfimport/inc/wrapper.hxx @@ -40,7 +40,8 @@ namespace pdfi css::task::XInteractionHandler >& xIHdl, const OUString& rPwd, const css::uno::Reference< - css::uno::XComponentContext >& xContext ); + css::uno::XComponentContext >& xContext, + const OUString& rFilterOptions = OUString()); bool xpdf_ImportFromStream( const css::uno::Reference< css::io::XInputStream >& xInput, const ContentSinkSharedPtr& rSink, @@ -48,7 +49,8 @@ namespace pdfi css::task::XInteractionHandler >& xIHdl, const OUString& rPwd, const css::uno::Reference< - css::uno::XComponentContext >& xContext ); + css::uno::XComponentContext >& xContext, + const OUString& rFilterOptions = OUString() ); } #endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX diff --git a/sdext/source/pdfimport/pdfiadaptor.cxx b/sdext/source/pdfimport/pdfiadaptor.cxx index a6cb0983d57d..6162d27b049d 100644 --- a/sdext/source/pdfimport/pdfiadaptor.cxx +++ b/sdext/source/pdfimport/pdfiadaptor.cxx @@ -221,7 +221,8 @@ bool PDFIRawAdaptor::parse( const uno::Reference& xInput const OUString& rPwd, const uno::Reference& xStatus, const XmlEmitterSharedPtr& rEmitter, - const OUString& rURL ) + const OUString& rURL, + const OUString& rFilterOptions ) { // container for metaformat boost::shared_ptr pSink( @@ -234,9 +235,11 @@ bool PDFIRawAdaptor::parse( const uno::Reference& xInput bool bSuccess=false; if( xInput.is() ) - bSuccess = xpdf_ImportFromStream( xInput, pSink, xIHdl, rPwd, m_xContext ); + bSuccess = xpdf_ImportFromStream( xInput, pSink, xIHdl, + rPwd, m_xContext, rFilterOptions ); else - bSuccess = xpdf_ImportFromFile( rURL, pSink, xIHdl, rPwd, m_xContext ); + bSuccess = xpdf_ImportFromFile( rURL, pSink, xIHdl, + rPwd, m_xContext, rFilterOptions ); if( bSuccess ) pSink->emit(*rEmitter,*m_pVisitorFactory); @@ -271,6 +274,7 @@ sal_Bool SAL_CALL PDFIRawAdaptor::importer( const uno::Sequence< beans::Property uno::Reference< task::XInteractionHandler > xInteractionHandler; OUString aURL; OUString aPwd; + OUString aFilterOptions; const beans::PropertyValue* pAttribs = rSourceData.getConstArray(); sal_Int32 nAttribs = rSourceData.getLength(); for( sal_Int32 i = 0; i < nAttribs; i++, pAttribs++ ) @@ -286,12 +290,15 @@ sal_Bool SAL_CALL PDFIRawAdaptor::importer( const uno::Sequence< beans::Property pAttribs->Value >>= xInteractionHandler; else if ( pAttribs->Name == "Password" ) pAttribs->Value >>= aPwd; + else if ( pAttribs->Name == "FilterOptions" ) + pAttribs->Value >>= aFilterOptions; } if( !xInput.is() ) return sal_False; XmlEmitterSharedPtr pEmitter = createSaxEmitter(rHdl); - const bool bSuccess = parse(xInput,xInteractionHandler, aPwd, xStatus,pEmitter,aURL); + const bool bSuccess = parse(xInput, xInteractionHandler, + aPwd, xStatus, pEmitter, aURL, aFilterOptions); // tell input stream that it is no longer needed xInput->closeInput(); diff --git a/sdext/source/pdfimport/pdfiadaptor.hxx b/sdext/source/pdfimport/pdfiadaptor.hxx index a88f3df44f47..0b7c749b08c5 100644 --- a/sdext/source/pdfimport/pdfiadaptor.hxx +++ b/sdext/source/pdfimport/pdfiadaptor.hxx @@ -88,7 +88,8 @@ namespace pdfi const OUString& rPwd, const css::uno::Reference& xStatus, const XmlEmitterSharedPtr& rEmitter, - const OUString& rURL ); + const OUString& rURL, + const OUString& rFilterOptions = OUString()); public: explicit PDFIRawAdaptor( const css::uno::Reference< diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx b/sdext/source/pdfimport/wrapper/wrapper.cxx index 331b7900dff1..8b0abf34281b 100644 --- a/sdext/source/pdfimport/wrapper/wrapper.cxx +++ b/sdext/source/pdfimport/wrapper/wrapper.cxx @@ -1011,7 +1011,8 @@ bool xpdf_ImportFromFile( const OUString& rURL, const ContentSinkSharedPtr& rSink, const uno::Reference< task::XInteractionHandler >& xIHdl, const OUString& rPwd, - const uno::Reference< uno::XComponentContext >& xContext ) + const uno::Reference< uno::XComponentContext >& xContext, + const OUString& rFilterOptions ) { OSL_ASSERT(rSink); @@ -1054,8 +1055,10 @@ bool xpdf_ImportFromFile( const OUString& rURL, // spawn separate process to keep LGPL/GPL code apart. - rtl_uString* args[] = { aSysUPath.pData, errPathname.pData }; - sal_Int32 nArgs = 2; + OUString aOptFlag("-o"); + rtl_uString* args[] = { aSysUPath.pData, errPathname.pData, + aOptFlag.pData, rFilterOptions.pData }; + sal_Int32 nArgs = rFilterOptions.isEmpty() ? 2 : 4; oslProcess aProcess; oslFileHandle pIn = NULL; @@ -1160,7 +1163,8 @@ bool xpdf_ImportFromStream( const uno::Reference< io::XInputStream >& xI const ContentSinkSharedPtr& rSink, const uno::Reference& xIHdl, const OUString& rPwd, - const uno::Reference< uno::XComponentContext >& xContext ) + const uno::Reference< uno::XComponentContext >& xContext, + const OUString& rFilterOptions ) { OSL_ASSERT(xInput.is()); OSL_ASSERT(rSink); @@ -1203,7 +1207,7 @@ bool xpdf_ImportFromStream( const uno::Reference< io::XInputStream >& xI osl_closeFile( aFile ); if ( bSuccess ) - bSuccess = xpdf_ImportFromFile( aURL, rSink, xIHdl, rPwd, xContext ); + bSuccess = xpdf_ImportFromFile( aURL, rSink, xIHdl, rPwd, xContext, rFilterOptions ); osl_removeFile( aURL.pData ); return bSuccess; diff --git a/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx b/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx index d3e39887e32e..cf19b30834a7 100644 --- a/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx +++ b/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx @@ -532,7 +532,8 @@ void PDFOutDev::printPath( GfxPath* pPath ) const PDFOutDev::PDFOutDev( PDFDoc* pDoc ) : m_pDoc( pDoc ), m_aFontMap(), - m_pUtf8Map( new UnicodeMap((char*)"UTF-8", gTrue, &mapUTF8) ) + m_pUtf8Map( new UnicodeMap((char*)"UTF-8", gTrue, &mapUTF8) ), + m_bSkipImages(false) { } PDFOutDev::~PDFOutDev() @@ -632,6 +633,8 @@ void PDFOutDev::updateCTM(GfxState* state, void PDFOutDev::updateLineDash(GfxState *state) { + if (m_bSkipImages) + return; assert(state); double* dashArray; int arrayLen; double startOffset; @@ -649,36 +652,48 @@ void PDFOutDev::updateLineDash(GfxState *state) void PDFOutDev::updateFlatness(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "updateFlatness %d\n", state->getFlatness() ); } void PDFOutDev::updateLineJoin(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "updateLineJoin %d\n", state->getLineJoin() ); } void PDFOutDev::updateLineCap(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "updateLineCap %d\n", state->getLineCap() ); } void PDFOutDev::updateMiterLimit(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "updateMiterLimit %f\n", normalize(state->getMiterLimit()) ); } void PDFOutDev::updateLineWidth(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "updateLineWidth %f\n", normalize(state->getLineWidth()) ); } void PDFOutDev::updateFillColor(GfxState *state) { + if (m_bSkipImages) + return; assert(state); GfxRGB aRGB; @@ -693,6 +708,8 @@ void PDFOutDev::updateFillColor(GfxState *state) void PDFOutDev::updateStrokeColor(GfxState *state) { + if (m_bSkipImages) + return; assert(state); GfxRGB aRGB; @@ -707,11 +724,15 @@ void PDFOutDev::updateStrokeColor(GfxState *state) void PDFOutDev::updateFillOpacity(GfxState *state) { + if (m_bSkipImages) + return; updateFillColor(state); } void PDFOutDev::updateStrokeOpacity(GfxState *state) { + if (m_bSkipImages) + return; updateStrokeColor(state); } @@ -774,6 +795,8 @@ void PDFOutDev::updateRender(GfxState *state) void PDFOutDev::stroke(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "strokePath" ); @@ -783,6 +806,8 @@ void PDFOutDev::stroke(GfxState *state) void PDFOutDev::fill(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "fillPath" ); @@ -792,6 +817,8 @@ void PDFOutDev::fill(GfxState *state) void PDFOutDev::eoFill(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "eoFillPath" ); @@ -801,6 +828,8 @@ void PDFOutDev::eoFill(GfxState *state) void PDFOutDev::clip(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "clipPath" ); @@ -810,6 +839,8 @@ void PDFOutDev::clip(GfxState *state) void PDFOutDev::eoClip(GfxState *state) { + if (m_bSkipImages) + return; assert(state); printf( "eoClipPath" ); @@ -915,6 +946,8 @@ void PDFOutDev::drawImageMask(GfxState* pState, Object*, Stream* str, #endif GBool /*inlineImg*/ ) { + if (m_bSkipImages) + return; OutputBuffer aBuf; initBuf(aBuf); printf( "drawMask %d %d %d", width, height, invert ); @@ -944,6 +977,8 @@ void PDFOutDev::drawImage(GfxState*, Object*, Stream* str, #endif int* maskColors, GBool /*inlineImg*/ ) { + if (m_bSkipImages) + return; OutputBuffer aBuf; initBuf(aBuf); OutputBuffer aMaskBuf; @@ -999,6 +1034,8 @@ void PDFOutDev::drawMaskedImage(GfxState*, Object*, Stream* str, #endif ) { + if (m_bSkipImages) + return; OutputBuffer aBuf; initBuf(aBuf); printf( "drawImage %d %d 0", width, height ); writePng_( aBuf, str, width, height, colorMap, maskStr, maskWidth, maskHeight, maskInvert, true ); @@ -1019,6 +1056,8 @@ void PDFOutDev::drawSoftMaskedImage(GfxState*, Object*, Stream* str, #endif ) { + if (m_bSkipImages) + return; OutputBuffer aBuf; initBuf(aBuf); printf( "drawImage %d %d 0", width, height ); writePng_( aBuf, str, width, height, colorMap, maskStr, maskWidth, maskHeight, maskColorMap, true ); @@ -1031,6 +1070,11 @@ void PDFOutDev::setPageNum( int nNumPages ) printf("setPageNum %d\n", nNumPages); } +void PDFOutDev::setSkipImages( bool bSkipImages ) +{ + m_bSkipImages = bSkipImages; +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.hxx b/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.hxx index daba4454a4d8..cc64db43a9a4 100644 --- a/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.hxx +++ b/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.hxx @@ -138,6 +138,7 @@ namespace pdfi mutable std::unordered_map< long long, FontAttributes > m_aFontMap; UnicodeMap* m_pUtf8Map; + bool m_bSkipImages; int parseFont( long long nNewId, GfxFont* pFont, GfxState* state ) const; void writeFontFile( GfxFont* gfxFont ) const; @@ -267,6 +268,7 @@ namespace pdfi ) SAL_OVERRIDE; void setPageNum( int nNumPages ); + void setSkipImages ( bool bSkipImages ); }; } diff --git a/sdext/source/pdfimport/xpdfwrapper/wrapper_gpl.cxx b/sdext/source/pdfimport/xpdfwrapper/wrapper_gpl.cxx index 20c45c49a1dd..c72ba95d5f60 100644 --- a/sdext/source/pdfimport/xpdfwrapper/wrapper_gpl.cxx +++ b/sdext/source/pdfimport/xpdfwrapper/wrapper_gpl.cxx @@ -27,7 +27,8 @@ FILE* g_binary_out=stderr; static const char *ownerPassword = "\001"; static const char *userPassword = "\001"; -static const char *outputFile = "\001"; +static const char *outputFile = "\001"; +static const char *options = "\001"; int main(int argc, char **argv) { @@ -41,6 +42,14 @@ int main(int argc, char **argv) for (int j = k; j < argc; ++j) argv[j] = argv[j+2]; } + else if (!strcmp(argv[k], "-o")) + { + options = argv[k+1]; + argc -= 2; + for (int j = k; j < argc; ++j) + argv[j] = argv[j+2]; + } + else if (!strcmp(argv[k], "-opw")) { ownerPassword = argv[k+1]; @@ -58,9 +67,6 @@ int main(int argc, char **argv) ++k; } - if( argc != 3 ) - return 1; - // read config file globalParams = new GlobalParams(); globalParams->setErrQuiet(gTrue); @@ -119,6 +125,9 @@ int main(int argc, char **argv) PDFDoc &rDoc = aDoc.isOk()? aDoc: aErrDoc; pdfi::PDFOutDev aOutDev(&rDoc); + if (!strcmp(options, "SkipImages")) { + aOutDev.setSkipImages(true); + } // tell the receiver early - needed for proper progress calculation const int nPages = rDoc.isOk()? rDoc.getNumPages(): 0; -- cgit