diff options
author | Matúš Kukan <matus.kukan@gmail.com> | 2013-10-08 23:28:32 +0200 |
---|---|---|
committer | Matúš Kukan <matus.kukan@gmail.com> | 2013-10-17 21:38:38 +0200 |
commit | 9612bdbfa9335823e864c354130717f0e7607bf1 (patch) | |
tree | 4c1a27c90d2015ae62b1ff288a8b4afaabc21d29 /sax/source/fastparser | |
parent | 2df047bfd1f618329872261ca0600fe232ad8cfe (diff) |
fastparser: implementation using two threads
Instead of calling methods directly, generate EventList - vector of
Events, where arguments for the callee are stored.
Change-Id: I227a0ef3038566664ac8f294770152c8b445997b
Diffstat (limited to 'sax/source/fastparser')
-rw-r--r-- | sax/source/fastparser/fastparser.cxx | 192 | ||||
-rw-r--r-- | sax/source/fastparser/fastparser.hxx | 48 |
2 files changed, 214 insertions, 26 deletions
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 825160f998a0..6f92f59e82ac 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -21,6 +21,7 @@ #include <osl/diagnose.h> #include <rtl/ustrbuf.hxx> +#include <salhelper/thread.hxx> #include <com/sun/star/lang/DisposedException.hpp> #include <com/sun/star/xml/sax/SAXParseException.hpp> @@ -61,6 +62,25 @@ struct NamespaceDefine NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {} }; +class ParserThread: public salhelper::Thread +{ + FastSaxParser *mpParser; +public: + ParserThread(FastSaxParser *pParser): Thread("Parser"), mpParser(pParser) {} +private: + virtual void execute() + { + try + { + mpParser->parse(); + } + catch (const SAXParseException& e) + { + mpParser->produce(Event( CallbackType::EXCEPTION )); + } + } +}; + // -------------------------------------------------------------------- // FastLocatorImpl // -------------------------------------------------------------------- @@ -167,31 +187,65 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) // -------------------------------------------------------------------- -ParserData::ParserData() +Event::Event(const CallbackType& t): maType(t) +{} + +Event::Event(const CallbackType& t, const OUString& sChars): Event(t) { + msChars = sChars; } -ParserData::~ParserData() +Event::Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace, + const OUString& aElementName, FastAttributeList *pAttributes): Event(t) { + mnElementToken = nElementToken; + maNamespace = aNamespace; + maElementName = aElementName; + mpAttributes = rtl::Reference< FastAttributeList >(pAttributes); } +Event::~Event() +{} + +// -------------------------------------------------------------------- + +ParserData::ParserData() +{} + +ParserData::~ParserData() +{} + // -------------------------------------------------------------------- Entity::Entity( const ParserData& rData ) : ParserData( rData ) { - // performance-improvement. Reference is needed when calling the startTag callback. - // Handing out the same object with every call is allowed (see sax-specification) - mxAttributes.set( new FastAttributeList( mxTokenHandler ) ); + mpProducedEvents = 0; +} + +Entity::Entity( const Entity& e ) : + ParserData( e ) + ,maStructSource(e.maStructSource) + ,mpParser(e.mpParser) + ,maConverter(e.maConverter) + ,maSavedException(e.maSavedException) + ,maNamespaceStack(e.maNamespaceStack) + ,maContextStack(e.maContextStack) + ,maNamespaceCount(e.maNamespaceCount) + ,maNamespaceDefines(e.maNamespaceDefines) +{ + mpProducedEvents = 0; } Entity::~Entity() { } -void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace, - const OUString& aElementName, FastAttributeList *pAttributes ) +void Entity::startElement( Event *pEvent ) { + const sal_Int32& nElementToken = pEvent->mnElementToken.get(); + const OUString& aNamespace = pEvent->maNamespace.get(); + const OUString& aElementName = pEvent->maElementName.get(); Reference< XFastContextHandler > xParentContext; if( !maContextStack.empty() ) { @@ -207,7 +261,7 @@ void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace, try { - Reference< XFastAttributeList > xAttr( pAttributes ); + Reference< XFastAttributeList > xAttr( pEvent->mpAttributes.get().get() ); Reference< XFastContextHandler > xContext; if( nElementToken == FastToken::DONTKNOW ) { @@ -495,6 +549,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef ); pushEntity( entity ); + Entity& rEntity = getEntity(); try { // start the document @@ -505,7 +560,24 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx entity.mxDocumentHandler->startDocument(); } - parse(); + rtl::Reference<ParserThread> xParser; + xParser = new ParserThread(this); + xParser->launch(); + bool done = false; + do { + rEntity.maEventsPushed.wait(); + rEntity.maEventsPushed.reset(); + MutexGuard aGuard(rEntity.maEventProtector); + while (!rEntity.maPendingEvents.empty()) + { + EventList *pEventList = rEntity.maPendingEvents.front(); + rEntity.maPendingEvents.pop(); + if (!consume(pEventList)) + done = true; + } + } while (!done); + xParser->join(); + deleteUsedEvents(); // finish document if( entity.mxDocumentHandler.is() ) @@ -673,6 +745,88 @@ OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int3 } // namespace +void FastSaxParser::deleteUsedEvents() +{ + Entity& rEntity = getEntity(); + while (!rEntity.maUsedEvents.empty()) + { + EventList *pEventList = rEntity.maUsedEvents.front(); + rEntity.maUsedEvents.pop(); + + delete pEventList; + } +} + +void FastSaxParser::produce(const Event& aEvent) +{ + Entity& rEntity = getEntity(); + if (!rEntity.mpProducedEvents) + { + rEntity.mpProducedEvents = new EventList(); + rEntity.mpProducedEvents->reserve(rEntity.mnEventListSize); + } + rEntity.mpProducedEvents->push_back( aEvent ); + if (aEvent->maType == CallbackType::DONE || + aEvent->maType == CallbackType::EXCEPTION || + rEntity.mpProducedEvents->size() == rEntity.mnEventListSize) + { + MutexGuard aGuard(rEntity.maEventProtector); + rEntity.maPendingEvents.push(rEntity.mpProducedEvents); + rEntity.mpProducedEvents = 0; + deleteUsedEvents(); + rEntity.maEventsPushed.set(); + } +} + +bool FastSaxParser::consume(EventList *pEventList) +{ + Entity& rEntity = getEntity(); + bool bIsParserFinished = false; + for (EventList::iterator aEventIt = pEventList->begin(); + aEventIt != pEventList->end(); ++aEventIt) + { + switch ((*aEventIt).maType) + { + case CallbackType::START_ELEMENT: + rEntity.startElement( &(*aEventIt) ); + break; + case CallbackType::END_ELEMENT: + rEntity.endElement(); + break; + case CallbackType::CHARACTERS: + rEntity.characters( (*aEventIt).msChars.get() ); + break; + case CallbackType::DONE: + bIsParserFinished = true; + assert(aEventIt+1 == pEventList->end()); + break; + case CallbackType::EXCEPTION: + assert( rEntity.maSavedException.hasValue() ); + // Error during parsing ! + XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser ); + OUString sSystemId = mxDocumentLocator->getSystemId(); + sal_Int32 nLine = mxDocumentLocator->getLineNumber(); + + SAXParseException aExcept( + lclGetErrorMessage( xmlE, sSystemId, nLine ), + Reference< XInterface >(), + Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ), + mxDocumentLocator->getPublicId(), + mxDocumentLocator->getSystemId(), + mxDocumentLocator->getLineNumber(), + mxDocumentLocator->getColumnNumber() + ); + // error handler is set, it may throw the exception + if( rEntity.mxErrorHandler.is() ) + rEntity.mxErrorHandler->fatalError( Any( aExcept ) ); + + throw aExcept; + } + } + rEntity.maUsedEvents.push(pEventList); + return !bIsParserFinished; +} + // starts parsing with actual parser ! void FastSaxParser::parse() { @@ -720,6 +874,7 @@ void FastSaxParser::parse() } } while( nRead > 0 ); + produce(Event( CallbackType::DONE )); } //------------------------------------------ @@ -741,14 +896,13 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() ); } - rEntity.mxAttributes->clear(); - // create attribute map and process namespace instructions sal_Int32 nNameLen, nPrefixLen; const XML_Char *pName; const XML_Char *pPrefix; OUString sNamespace; sal_Int32 nNamespaceToken = FastToken::DONTKNOW; + FastAttributeList *pAttributes = new FastAttributeList( rEntity.mxTokenHandler ); if (!rEntity.maNamespaceStack.empty()) { sNamespace = rEntity.maNamespaceStack.top().msName; @@ -796,9 +950,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char { sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); if( nAttributeToken != FastToken::DONTKNOW ) - rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] ); + pAttributes->add( nAttributeToken, awAttributes[i+1] ); else - rEntity.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ), + pAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ), OString(pName, nNameLen), awAttributes[i+1] ); } } @@ -808,9 +962,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char { sal_Int32 nAttributeToken = GetToken( pName, nNameLen ); if( nAttributeToken != FastToken::DONTKNOW ) - rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] ); + pAttributes->add( nAttributeToken, awAttributes[i+1] ); else - rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] ); + pAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] ); } } } @@ -832,8 +986,8 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char } rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) ); - rEntity.startElement( nElementToken, sNamespace, - OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), rEntity.mxAttributes.get() ); + produce(Event( CallbackType::START_ELEMENT, nElementToken, sNamespace, + OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes )); } catch (const Exception& e) { @@ -852,13 +1006,13 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* ) if( !rEntity.maNamespaceStack.empty() ) rEntity.maNamespaceStack.pop(); - rEntity.endElement(); + produce(Event( CallbackType::END_ELEMENT )); } void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen ) { - getEntity().characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) ); + produce(Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) )); } void FastSaxParser::callbackEntityDecl( diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx index d604a8421f19..b23c3ff53a9c 100644 --- a/sax/source/fastparser/fastparser.hxx +++ b/sax/source/fastparser/fastparser.hxx @@ -20,11 +20,13 @@ #ifndef _SAX_FASTPARSER_HXX_ #define _SAX_FASTPARSER_HXX_ +#include <queue> #include <vector> #include <stack> #include <boost/optional.hpp> #include <boost/shared_ptr.hpp> #include <boost/unordered_map.hpp> +#include <osl/conditn.hxx> #include <rtl/ref.hxx> #include <com/sun/star/xml/sax/XFastContextHandler.hpp> #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp> @@ -43,6 +45,7 @@ namespace sax_fastparser { +struct Event; class FastLocatorImpl; struct NamespaceDefine; @@ -59,6 +62,24 @@ struct NameWithToken msName(sName), mnToken(nToken) {} }; +typedef std::vector<Event> EventList; + +enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION }; + +struct Event { + boost::optional< OUString > msChars; + boost::optional< sal_Int32 > mnElementToken; + boost::optional< OUString > maNamespace; + boost::optional< OUString > maElementName; + boost::optional< rtl::Reference< FastAttributeList > > mpAttributes; + CallbackType maType; + Event(const CallbackType& t); + Event(const CallbackType& t, const OUString& sChars); + Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace, + const OUString& aElementName, FastAttributeList *pAttributes); + ~Event(); +}; + // -------------------------------------------------------------------- struct SaxContext @@ -86,13 +107,24 @@ struct ParserData // -------------------------------------------------------------------- -// Entity binds all information needed for a single file +// Entity binds all information needed for a single file | single call of parseStream struct Entity : public ParserData { + // Amount of work producer sends to consumer in one iteration: + static const size_t mnEventListSize = 1000; + // unique for each Entity instance: + + EventList *mpProducedEvents; + std::queue< EventList * > maPendingEvents; + std::queue< EventList * > maUsedEvents; + osl::Mutex maEventProtector; + osl::Condition maEventsPushed; + + // copied in copy constructor: + ::com::sun::star::xml::sax::InputSource maStructSource; XML_Parser mpParser; ::sax_expatwrap::XMLFile2UTFConverter maConverter; - ::rtl::Reference< FastAttributeList > mxAttributes; // Exceptions cannot be thrown through the C-XmlParser (possible resource leaks), // therefore the exception must be saved somewhere. @@ -108,9 +140,9 @@ struct Entity : public ParserData ::std::vector< NamespaceDefineRef > maNamespaceDefines; explicit Entity( const ParserData& rData ); + Entity( const Entity& rEntity ); ~Entity(); - void startElement( sal_Int32 nElementToken, const OUString& aNamespace, - const OUString& aElementName, FastAttributeList *pAttributes ); + void startElement( Event *pEvent ); void characters( const OUString& sChars ); void endElement(); }; @@ -155,9 +187,12 @@ public: inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); } inline void popEntity() { maEntities.pop(); } Entity& getEntity() { return maEntities.top(); } + void parse(); + void produce( const Event& ); private: - void parse(); + bool consume(EventList *); + void deleteUsedEvents(); sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 ); sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException); @@ -173,8 +208,7 @@ private: void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ); private: - ::osl::Mutex maMutex; - + osl::Mutex maMutex; ///< Protecting whole parseStream() execution ::rtl::Reference< FastLocatorImpl > mxDocumentLocator; NamespaceMap maNamespaceMap; |