summaryrefslogtreecommitdiff
path: root/sax/source/fastparser
diff options
context:
space:
mode:
authorMatúš Kukan <matus.kukan@gmail.com>2013-10-08 23:28:32 +0200
committerMatúš Kukan <matus.kukan@gmail.com>2013-10-17 21:38:38 +0200
commit9612bdbfa9335823e864c354130717f0e7607bf1 (patch)
tree4c1a27c90d2015ae62b1ff288a8b4afaabc21d29 /sax/source/fastparser
parent2df047bfd1f618329872261ca0600fe232ad8cfe (diff)
fastparser: implementation using two threads
Instead of calling methods directly, generate EventList - vector of Events, where arguments for the callee are stored. Change-Id: I227a0ef3038566664ac8f294770152c8b445997b
Diffstat (limited to 'sax/source/fastparser')
-rw-r--r--sax/source/fastparser/fastparser.cxx192
-rw-r--r--sax/source/fastparser/fastparser.hxx48
2 files changed, 214 insertions, 26 deletions
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 825160f998a0..6f92f59e82ac 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -21,6 +21,7 @@
#include <osl/diagnose.h>
#include <rtl/ustrbuf.hxx>
+#include <salhelper/thread.hxx>
#include <com/sun/star/lang/DisposedException.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp>
@@ -61,6 +62,25 @@ struct NamespaceDefine
NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
};
+class ParserThread: public salhelper::Thread
+{
+ FastSaxParser *mpParser;
+public:
+ ParserThread(FastSaxParser *pParser): Thread("Parser"), mpParser(pParser) {}
+private:
+ virtual void execute()
+ {
+ try
+ {
+ mpParser->parse();
+ }
+ catch (const SAXParseException& e)
+ {
+ mpParser->produce(Event( CallbackType::EXCEPTION ));
+ }
+ }
+};
+
// --------------------------------------------------------------------
// FastLocatorImpl
// --------------------------------------------------------------------
@@ -167,31 +187,65 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
// --------------------------------------------------------------------
-ParserData::ParserData()
+Event::Event(const CallbackType& t): maType(t)
+{}
+
+Event::Event(const CallbackType& t, const OUString& sChars): Event(t)
{
+ msChars = sChars;
}
-ParserData::~ParserData()
+Event::Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
+ const OUString& aElementName, FastAttributeList *pAttributes): Event(t)
{
+ mnElementToken = nElementToken;
+ maNamespace = aNamespace;
+ maElementName = aElementName;
+ mpAttributes = rtl::Reference< FastAttributeList >(pAttributes);
}
+Event::~Event()
+{}
+
+// --------------------------------------------------------------------
+
+ParserData::ParserData()
+{}
+
+ParserData::~ParserData()
+{}
+
// --------------------------------------------------------------------
Entity::Entity( const ParserData& rData ) :
ParserData( rData )
{
- // performance-improvement. Reference is needed when calling the startTag callback.
- // Handing out the same object with every call is allowed (see sax-specification)
- mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
+ mpProducedEvents = 0;
+}
+
+Entity::Entity( const Entity& e ) :
+ ParserData( e )
+ ,maStructSource(e.maStructSource)
+ ,mpParser(e.mpParser)
+ ,maConverter(e.maConverter)
+ ,maSavedException(e.maSavedException)
+ ,maNamespaceStack(e.maNamespaceStack)
+ ,maContextStack(e.maContextStack)
+ ,maNamespaceCount(e.maNamespaceCount)
+ ,maNamespaceDefines(e.maNamespaceDefines)
+{
+ mpProducedEvents = 0;
}
Entity::~Entity()
{
}
-void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace,
- const OUString& aElementName, FastAttributeList *pAttributes )
+void Entity::startElement( Event *pEvent )
{
+ const sal_Int32& nElementToken = pEvent->mnElementToken.get();
+ const OUString& aNamespace = pEvent->maNamespace.get();
+ const OUString& aElementName = pEvent->maElementName.get();
Reference< XFastContextHandler > xParentContext;
if( !maContextStack.empty() )
{
@@ -207,7 +261,7 @@ void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace,
try
{
- Reference< XFastAttributeList > xAttr( pAttributes );
+ Reference< XFastAttributeList > xAttr( pEvent->mpAttributes.get().get() );
Reference< XFastContextHandler > xContext;
if( nElementToken == FastToken::DONTKNOW )
{
@@ -495,6 +549,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
pushEntity( entity );
+ Entity& rEntity = getEntity();
try
{
// start the document
@@ -505,7 +560,24 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
entity.mxDocumentHandler->startDocument();
}
- parse();
+ rtl::Reference<ParserThread> xParser;
+ xParser = new ParserThread(this);
+ xParser->launch();
+ bool done = false;
+ do {
+ rEntity.maEventsPushed.wait();
+ rEntity.maEventsPushed.reset();
+ MutexGuard aGuard(rEntity.maEventProtector);
+ while (!rEntity.maPendingEvents.empty())
+ {
+ EventList *pEventList = rEntity.maPendingEvents.front();
+ rEntity.maPendingEvents.pop();
+ if (!consume(pEventList))
+ done = true;
+ }
+ } while (!done);
+ xParser->join();
+ deleteUsedEvents();
// finish document
if( entity.mxDocumentHandler.is() )
@@ -673,6 +745,88 @@ OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int3
} // namespace
+void FastSaxParser::deleteUsedEvents()
+{
+ Entity& rEntity = getEntity();
+ while (!rEntity.maUsedEvents.empty())
+ {
+ EventList *pEventList = rEntity.maUsedEvents.front();
+ rEntity.maUsedEvents.pop();
+
+ delete pEventList;
+ }
+}
+
+void FastSaxParser::produce(const Event& aEvent)
+{
+ Entity& rEntity = getEntity();
+ if (!rEntity.mpProducedEvents)
+ {
+ rEntity.mpProducedEvents = new EventList();
+ rEntity.mpProducedEvents->reserve(rEntity.mnEventListSize);
+ }
+ rEntity.mpProducedEvents->push_back( aEvent );
+ if (aEvent->maType == CallbackType::DONE ||
+ aEvent->maType == CallbackType::EXCEPTION ||
+ rEntity.mpProducedEvents->size() == rEntity.mnEventListSize)
+ {
+ MutexGuard aGuard(rEntity.maEventProtector);
+ rEntity.maPendingEvents.push(rEntity.mpProducedEvents);
+ rEntity.mpProducedEvents = 0;
+ deleteUsedEvents();
+ rEntity.maEventsPushed.set();
+ }
+}
+
+bool FastSaxParser::consume(EventList *pEventList)
+{
+ Entity& rEntity = getEntity();
+ bool bIsParserFinished = false;
+ for (EventList::iterator aEventIt = pEventList->begin();
+ aEventIt != pEventList->end(); ++aEventIt)
+ {
+ switch ((*aEventIt).maType)
+ {
+ case CallbackType::START_ELEMENT:
+ rEntity.startElement( &(*aEventIt) );
+ break;
+ case CallbackType::END_ELEMENT:
+ rEntity.endElement();
+ break;
+ case CallbackType::CHARACTERS:
+ rEntity.characters( (*aEventIt).msChars.get() );
+ break;
+ case CallbackType::DONE:
+ bIsParserFinished = true;
+ assert(aEventIt+1 == pEventList->end());
+ break;
+ case CallbackType::EXCEPTION:
+ assert( rEntity.maSavedException.hasValue() );
+ // Error during parsing !
+ XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
+ OUString sSystemId = mxDocumentLocator->getSystemId();
+ sal_Int32 nLine = mxDocumentLocator->getLineNumber();
+
+ SAXParseException aExcept(
+ lclGetErrorMessage( xmlE, sSystemId, nLine ),
+ Reference< XInterface >(),
+ Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
+ mxDocumentLocator->getPublicId(),
+ mxDocumentLocator->getSystemId(),
+ mxDocumentLocator->getLineNumber(),
+ mxDocumentLocator->getColumnNumber()
+ );
+ // error handler is set, it may throw the exception
+ if( rEntity.mxErrorHandler.is() )
+ rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
+
+ throw aExcept;
+ }
+ }
+ rEntity.maUsedEvents.push(pEventList);
+ return !bIsParserFinished;
+}
+
// starts parsing with actual parser !
void FastSaxParser::parse()
{
@@ -720,6 +874,7 @@ void FastSaxParser::parse()
}
}
while( nRead > 0 );
+ produce(Event( CallbackType::DONE ));
}
//------------------------------------------
@@ -741,14 +896,13 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
}
- rEntity.mxAttributes->clear();
-
// create attribute map and process namespace instructions
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
const XML_Char *pPrefix;
OUString sNamespace;
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
+ FastAttributeList *pAttributes = new FastAttributeList( rEntity.mxTokenHandler );
if (!rEntity.maNamespaceStack.empty())
{
sNamespace = rEntity.maNamespaceStack.top().msName;
@@ -796,9 +950,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
+ pAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- rEntity.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
+ pAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
OString(pName, nNameLen), awAttributes[i+1] );
}
}
@@ -808,9 +962,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetToken( pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
+ pAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
+ pAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
}
}
}
@@ -832,8 +986,8 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
}
rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
- rEntity.startElement( nElementToken, sNamespace,
- OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), rEntity.mxAttributes.get() );
+ produce(Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
+ OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes ));
}
catch (const Exception& e)
{
@@ -852,13 +1006,13 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
if( !rEntity.maNamespaceStack.empty() )
rEntity.maNamespaceStack.pop();
- rEntity.endElement();
+ produce(Event( CallbackType::END_ELEMENT ));
}
void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
{
- getEntity().characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
+ produce(Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) ));
}
void FastSaxParser::callbackEntityDecl(
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index d604a8421f19..b23c3ff53a9c 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -20,11 +20,13 @@
#ifndef _SAX_FASTPARSER_HXX_
#define _SAX_FASTPARSER_HXX_
+#include <queue>
#include <vector>
#include <stack>
#include <boost/optional.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
+#include <osl/conditn.hxx>
#include <rtl/ref.hxx>
#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
@@ -43,6 +45,7 @@
namespace sax_fastparser {
+struct Event;
class FastLocatorImpl;
struct NamespaceDefine;
@@ -59,6 +62,24 @@ struct NameWithToken
msName(sName), mnToken(nToken) {}
};
+typedef std::vector<Event> EventList;
+
+enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
+
+struct Event {
+ boost::optional< OUString > msChars;
+ boost::optional< sal_Int32 > mnElementToken;
+ boost::optional< OUString > maNamespace;
+ boost::optional< OUString > maElementName;
+ boost::optional< rtl::Reference< FastAttributeList > > mpAttributes;
+ CallbackType maType;
+ Event(const CallbackType& t);
+ Event(const CallbackType& t, const OUString& sChars);
+ Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
+ const OUString& aElementName, FastAttributeList *pAttributes);
+ ~Event();
+};
+
// --------------------------------------------------------------------
struct SaxContext
@@ -86,13 +107,24 @@ struct ParserData
// --------------------------------------------------------------------
-// Entity binds all information needed for a single file
+// Entity binds all information needed for a single file | single call of parseStream
struct Entity : public ParserData
{
+ // Amount of work producer sends to consumer in one iteration:
+ static const size_t mnEventListSize = 1000;
+ // unique for each Entity instance:
+
+ EventList *mpProducedEvents;
+ std::queue< EventList * > maPendingEvents;
+ std::queue< EventList * > maUsedEvents;
+ osl::Mutex maEventProtector;
+ osl::Condition maEventsPushed;
+
+ // copied in copy constructor:
+
::com::sun::star::xml::sax::InputSource maStructSource;
XML_Parser mpParser;
::sax_expatwrap::XMLFile2UTFConverter maConverter;
- ::rtl::Reference< FastAttributeList > mxAttributes;
// Exceptions cannot be thrown through the C-XmlParser (possible resource leaks),
// therefore the exception must be saved somewhere.
@@ -108,9 +140,9 @@ struct Entity : public ParserData
::std::vector< NamespaceDefineRef > maNamespaceDefines;
explicit Entity( const ParserData& rData );
+ Entity( const Entity& rEntity );
~Entity();
- void startElement( sal_Int32 nElementToken, const OUString& aNamespace,
- const OUString& aElementName, FastAttributeList *pAttributes );
+ void startElement( Event *pEvent );
void characters( const OUString& sChars );
void endElement();
};
@@ -155,9 +187,12 @@ public:
inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); }
inline void popEntity() { maEntities.pop(); }
Entity& getEntity() { return maEntities.top(); }
+ void parse();
+ void produce( const Event& );
private:
- void parse();
+ bool consume(EventList *);
+ void deleteUsedEvents();
sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 );
sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException);
@@ -173,8 +208,7 @@ private:
void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen );
private:
- ::osl::Mutex maMutex;
-
+ osl::Mutex maMutex; ///< Protecting whole parseStream() execution
::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
NamespaceMap maNamespaceMap;