diff options
author | Matúš Kukan <matus.kukan@collabora.com> | 2014-10-03 23:12:56 +0200 |
---|---|---|
committer | Matúš Kukan <matus.kukan@collabora.com> | 2014-10-23 14:30:29 +0200 |
commit | 0c24faee6b622971d7d8f989da36029200cbd2a5 (patch) | |
tree | be33db223c5beba6aee04b58207cde009934b6c3 /sax/source/tools | |
parent | 143bbb20a3f4d757e2493fc078deea7dbaa1e14f (diff) |
FastSerializer: Also use cache for writing to ForMerge if we are inside mark()
To ensure the correct order of calling ForMerge methods,
call flush always before touching maMarkStack.
This was the missing piece in optimizing write() methods,
because of writeBytes() checking each time what to call.
E.g. for Calc documents we don't use maMarkStack at all.
So, just transfer the output to proper "ForMerge" when inside mark()
and allow optimizations.
This commit makes write() methods almost 1/3 as fast.
Change-Id: I96c13888206c81f87e29b998839f78ea9d5570af
Diffstat (limited to 'sax/source/tools')
-rw-r--r-- | sax/source/tools/CachedOutputStream.hxx | 37 | ||||
-rw-r--r-- | sax/source/tools/fastserializer.cxx | 53 | ||||
-rw-r--r-- | sax/source/tools/fastserializer.hxx | 12 |
3 files changed, 75 insertions, 27 deletions
diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx index 8877bb779a4e..fc74118a63ea 100644 --- a/sax/source/tools/CachedOutputStream.hxx +++ b/sax/source/tools/CachedOutputStream.hxx @@ -17,9 +17,17 @@ #include <cstring> #include <cstdlib> +#include <boost/shared_ptr.hpp> namespace sax_fastparser { +class ForMergeBase +{ +public: + virtual ~ForMergeBase() {} + virtual void append( const css::uno::Sequence<sal_Int8>& rWhat ) = 0; +}; + class CachedOutputStream { /// When buffer hits this size, it's written to mxOutputStream @@ -30,11 +38,16 @@ class CachedOutputStream sal_Int32 mnCacheWrittenSize; const css::uno::Sequence<sal_Int8> mpCache; uno_Sequence *pSeq; + bool mbWriteToOutStream; + /// ForMerge structure is used for sorting elements in Writer + boost::shared_ptr< ForMergeBase > mpForMerge; public: CachedOutputStream() : mnCacheWrittenSize(0) , mpCache(mnMaximumSize) , pSeq(mpCache.get()) + , mbWriteToOutStream(true) + , mpForMerge(NULL) {} ~CachedOutputStream() {} @@ -48,6 +61,20 @@ public: mxOutputStream = xOutputStream; } + void setOutput( boost::shared_ptr< ForMergeBase > pForMerge ) + { + flush(); + mbWriteToOutStream = false; + mpForMerge = pForMerge; + } + + void resetOutputToStream() + { + flush(); + mbWriteToOutStream = true; + mpForMerge.reset(); + } + /// cache string and if limit is hit, flush void writeBytes( const sal_Int8* pStr, sal_Int32 nLen ) { @@ -61,7 +88,10 @@ public: // In that case, just flush data and write immediately. if (nLen > mnMaximumSize) { - mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) ); + if (mbWriteToOutStream) + mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) ); + else + mpForMerge->append( css::uno::Sequence<sal_Int8>(pStr, nLen) ); return; } } @@ -75,7 +105,10 @@ public: { // resize the Sequence to written size pSeq->nElements = mnCacheWrittenSize; - mxOutputStream->writeBytes( mpCache ); + if (mbWriteToOutStream) + mxOutputStream->writeBytes( mpCache ); + else + mpForMerge->append( mpCache ); // and next time write to the beginning mnCacheWrittenSize = 0; } diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx index 0f05ec9a3ff0..ac8376b3be84 100644 --- a/sax/source/tools/fastserializer.cxx +++ b/sax/source/tools/fastserializer.cxx @@ -57,6 +57,7 @@ namespace sax_fastparser { FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) : maCachedOutputStream() , maMarkStack() + , mbMarkStackEmpty(true) , mpDoubleStr(NULL) , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE) { @@ -152,6 +153,7 @@ namespace sax_fastparser { void FastSaxSerializer::endDocument() { + assert(mbMarkStackEmpty && maMarkStack.empty()); maCachedOutputStream.flush(); } @@ -186,8 +188,11 @@ namespace sax_fastparser { void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList ) { - if ( !maMarkStack.empty() ) + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); maMarkStack.top()->setCurrentElement( Element ); + } #ifdef DBG_UTIL m_DebugStartedElements.push(Element); @@ -222,8 +227,11 @@ namespace sax_fastparser { void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList ) { - if ( !maMarkStack.empty() ) + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); maMarkStack.top()->setCurrentElement( Element ); + } writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); @@ -303,28 +311,47 @@ namespace sax_fastparser { { boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) ); maMarkStack.push( pSort ); + maCachedOutputStream.setOutput( pSort ); } else { boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) ); maMarkStack.push( pMerge ); + maCachedOutputStream.setOutput( pMerge ); } + mbMarkStackEmpty = false; } void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType ) { - if ( maMarkStack.empty() ) + SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge"); + if ( mbMarkStackEmpty ) return; + // flush, so that we get everything in getData() + maCachedOutputStream.flush(); + if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE) { - writeOutput( maMarkStack.top()->getData() ); + Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() ); maMarkStack.pop(); + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() ); return; } const Int8Sequence aMerge( maMarkStack.top()->getData() ); maMarkStack.pop(); + if (maMarkStack.empty()) + { + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + } + else + { + maCachedOutputStream.setOutput( maMarkStack.top() ); + } switch ( eMergeType ) { @@ -338,26 +365,12 @@ namespace sax_fastparser { void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData ) { - writeBytes( reinterpret_cast<const char*>(rData.getConstArray()), rData.getLength() ); + maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() ); } void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen ) { - if ( maMarkStack.empty() ) - writeOutput( reinterpret_cast<const sal_Int8*>(pStr), nLen ); - else - maMarkStack.top()->append( Sequence< sal_Int8 >( - reinterpret_cast<const sal_Int8*>(pStr), nLen) ); - } - - void FastSaxSerializer::writeOutput( const Sequence< ::sal_Int8 >& aData ) - { - writeOutput( aData.getConstArray(), aData.getLength() ); - } - - void FastSaxSerializer::writeOutput( const sal_Int8* pStr, size_t nLen ) - { - maCachedOutputStream.writeBytes( pStr, nLen ); + maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen ); } FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData() diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx index 5b740cee338b..8500b680f65d 100644 --- a/sax/source/tools/fastserializer.hxx +++ b/sax/source/tools/fastserializer.hxx @@ -148,11 +148,14 @@ public: void mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType = sax_fastparser::MERGE_MARKS_APPEND ); private: - /// Helper class to cache data and write in chunks to XOutputStream + /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append. + * Its flush method needs to be called before touching maMarkStack + * to ensure correct order of ForSort methods. + */ CachedOutputStream maCachedOutputStream; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxFastTokenHandler; - class ForMerge + class ForMerge : public ForMergeBase { Int8Sequence maData; Int8Sequence maPostponed; @@ -168,7 +171,7 @@ private: #endif virtual void prepend( const Int8Sequence &rWhat ); - virtual void append( const Int8Sequence &rWhat ); + virtual void append( const Int8Sequence &rWhat ) SAL_OVERRIDE; void postpone( const Int8Sequence &rWhat ); protected: @@ -205,6 +208,7 @@ private: }; ::std::stack< boost::shared_ptr< ForMerge > > maMarkStack; + bool mbMarkStackEmpty; // Would be better to use OStringBuffer instead of these two // but then we couldn't get the rtl_String* member :-( rtl_String *mpDoubleStr; @@ -217,8 +221,6 @@ private: void writeTokenValueList(); void writeFastAttributeList( FastAttributeList* pAttrList ); - void writeOutput( const sal_Int8* pStr, size_t nLen ); - void writeOutput( const css::uno::Sequence< ::sal_Int8 >& aData ); /** Forward the call to the output stream, or write to the stack. |