From 0c24faee6b622971d7d8f989da36029200cbd2a5 Mon Sep 17 00:00:00 2001 From: Matúš Kukan Date: Fri, 3 Oct 2014 23:12:56 +0200 Subject: FastSerializer: Also use cache for writing to ForMerge if we are inside mark() To ensure the correct order of calling ForMerge methods, call flush always before touching maMarkStack. This was the missing piece in optimizing write() methods, because of writeBytes() checking each time what to call. E.g. for Calc documents we don't use maMarkStack at all. So, just transfer the output to proper "ForMerge" when inside mark() and allow optimizations. This commit makes write() methods almost 1/3 as fast. Change-Id: I96c13888206c81f87e29b998839f78ea9d5570af --- sax/source/tools/CachedOutputStream.hxx | 37 +++++++++++++++++++++-- sax/source/tools/fastserializer.cxx | 53 ++++++++++++++++++++------------- sax/source/tools/fastserializer.hxx | 12 ++++---- 3 files changed, 75 insertions(+), 27 deletions(-) (limited to 'sax') diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx index 8877bb779a4e..fc74118a63ea 100644 --- a/sax/source/tools/CachedOutputStream.hxx +++ b/sax/source/tools/CachedOutputStream.hxx @@ -17,9 +17,17 @@ #include #include +#include namespace sax_fastparser { +class ForMergeBase +{ +public: + virtual ~ForMergeBase() {} + virtual void append( const css::uno::Sequence& rWhat ) = 0; +}; + class CachedOutputStream { /// When buffer hits this size, it's written to mxOutputStream @@ -30,11 +38,16 @@ class CachedOutputStream sal_Int32 mnCacheWrittenSize; const css::uno::Sequence mpCache; uno_Sequence *pSeq; + bool mbWriteToOutStream; + /// ForMerge structure is used for sorting elements in Writer + boost::shared_ptr< ForMergeBase > mpForMerge; public: CachedOutputStream() : mnCacheWrittenSize(0) , mpCache(mnMaximumSize) , pSeq(mpCache.get()) + , mbWriteToOutStream(true) + , mpForMerge(NULL) {} ~CachedOutputStream() {} @@ -48,6 +61,20 @@ public: mxOutputStream = xOutputStream; } + void setOutput( boost::shared_ptr< ForMergeBase > pForMerge ) + { + flush(); + mbWriteToOutStream = false; + mpForMerge = pForMerge; + } + + void resetOutputToStream() + { + flush(); + mbWriteToOutStream = true; + mpForMerge.reset(); + } + /// cache string and if limit is hit, flush void writeBytes( const sal_Int8* pStr, sal_Int32 nLen ) { @@ -61,7 +88,10 @@ public: // In that case, just flush data and write immediately. if (nLen > mnMaximumSize) { - mxOutputStream->writeBytes( css::uno::Sequence(pStr, nLen) ); + if (mbWriteToOutStream) + mxOutputStream->writeBytes( css::uno::Sequence(pStr, nLen) ); + else + mpForMerge->append( css::uno::Sequence(pStr, nLen) ); return; } } @@ -75,7 +105,10 @@ public: { // resize the Sequence to written size pSeq->nElements = mnCacheWrittenSize; - mxOutputStream->writeBytes( mpCache ); + if (mbWriteToOutStream) + mxOutputStream->writeBytes( mpCache ); + else + mpForMerge->append( mpCache ); // and next time write to the beginning mnCacheWrittenSize = 0; } diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx index 0f05ec9a3ff0..ac8376b3be84 100644 --- a/sax/source/tools/fastserializer.cxx +++ b/sax/source/tools/fastserializer.cxx @@ -57,6 +57,7 @@ namespace sax_fastparser { FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) : maCachedOutputStream() , maMarkStack() + , mbMarkStackEmpty(true) , mpDoubleStr(NULL) , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE) { @@ -152,6 +153,7 @@ namespace sax_fastparser { void FastSaxSerializer::endDocument() { + assert(mbMarkStackEmpty && maMarkStack.empty()); maCachedOutputStream.flush(); } @@ -186,8 +188,11 @@ namespace sax_fastparser { void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList ) { - if ( !maMarkStack.empty() ) + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); maMarkStack.top()->setCurrentElement( Element ); + } #ifdef DBG_UTIL m_DebugStartedElements.push(Element); @@ -222,8 +227,11 @@ namespace sax_fastparser { void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList ) { - if ( !maMarkStack.empty() ) + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); maMarkStack.top()->setCurrentElement( Element ); + } writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); @@ -303,28 +311,47 @@ namespace sax_fastparser { { boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) ); maMarkStack.push( pSort ); + maCachedOutputStream.setOutput( pSort ); } else { boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) ); maMarkStack.push( pMerge ); + maCachedOutputStream.setOutput( pMerge ); } + mbMarkStackEmpty = false; } void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType ) { - if ( maMarkStack.empty() ) + SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge"); + if ( mbMarkStackEmpty ) return; + // flush, so that we get everything in getData() + maCachedOutputStream.flush(); + if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE) { - writeOutput( maMarkStack.top()->getData() ); + Sequence aSeq( maMarkStack.top()->getData() ); maMarkStack.pop(); + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() ); return; } const Int8Sequence aMerge( maMarkStack.top()->getData() ); maMarkStack.pop(); + if (maMarkStack.empty()) + { + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + } + else + { + maCachedOutputStream.setOutput( maMarkStack.top() ); + } switch ( eMergeType ) { @@ -338,26 +365,12 @@ namespace sax_fastparser { void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData ) { - writeBytes( reinterpret_cast(rData.getConstArray()), rData.getLength() ); + maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() ); } void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen ) { - if ( maMarkStack.empty() ) - writeOutput( reinterpret_cast(pStr), nLen ); - else - maMarkStack.top()->append( Sequence< sal_Int8 >( - reinterpret_cast(pStr), nLen) ); - } - - void FastSaxSerializer::writeOutput( const Sequence< ::sal_Int8 >& aData ) - { - writeOutput( aData.getConstArray(), aData.getLength() ); - } - - void FastSaxSerializer::writeOutput( const sal_Int8* pStr, size_t nLen ) - { - maCachedOutputStream.writeBytes( pStr, nLen ); + maCachedOutputStream.writeBytes( reinterpret_cast(pStr), nLen ); } FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData() diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx index 5b740cee338b..8500b680f65d 100644 --- a/sax/source/tools/fastserializer.hxx +++ b/sax/source/tools/fastserializer.hxx @@ -148,11 +148,14 @@ public: void mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType = sax_fastparser::MERGE_MARKS_APPEND ); private: - /// Helper class to cache data and write in chunks to XOutputStream + /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append. + * Its flush method needs to be called before touching maMarkStack + * to ensure correct order of ForSort methods. + */ CachedOutputStream maCachedOutputStream; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxFastTokenHandler; - class ForMerge + class ForMerge : public ForMergeBase { Int8Sequence maData; Int8Sequence maPostponed; @@ -168,7 +171,7 @@ private: #endif virtual void prepend( const Int8Sequence &rWhat ); - virtual void append( const Int8Sequence &rWhat ); + virtual void append( const Int8Sequence &rWhat ) SAL_OVERRIDE; void postpone( const Int8Sequence &rWhat ); protected: @@ -205,6 +208,7 @@ private: }; ::std::stack< boost::shared_ptr< ForMerge > > maMarkStack; + bool mbMarkStackEmpty; // Would be better to use OStringBuffer instead of these two // but then we couldn't get the rtl_String* member :-( rtl_String *mpDoubleStr; @@ -217,8 +221,6 @@ private: void writeTokenValueList(); void writeFastAttributeList( FastAttributeList* pAttrList ); - void writeOutput( const sal_Int8* pStr, size_t nLen ); - void writeOutput( const css::uno::Sequence< ::sal_Int8 >& aData ); /** Forward the call to the output stream, or write to the stack. -- cgit