diff options
author | Michael Meeks <michael.meeks@collabora.com> | 2013-11-26 16:26:12 +0000 |
---|---|---|
committer | Michael Meeks <michael.meeks@collabora.com> | 2013-11-26 17:33:44 +0000 |
commit | d7280af9740819971f46a855108334b84778b47d (patch) | |
tree | 12f13fe1c9afcaa2b2e4ab3a7d22a70bda710c23 | |
parent | 2d681ad020d3cd6beb53a0de1c3057537f31c8df (diff) |
fastparser: Avoid copying all tokens into a sequence.
-rw-r--r-- | include/oox/core/fasttokenhandler.hxx | 7 | ||||
-rw-r--r-- | include/oox/token/tokenmap.hxx | 22 | ||||
-rw-r--r-- | include/sax/fastattribs.hxx | 14 | ||||
-rw-r--r-- | oox/source/core/fasttokenhandler.cxx | 5 | ||||
-rw-r--r-- | oox/source/token/tokenmap.cxx | 12 | ||||
-rw-r--r-- | sax/source/fastparser/fastparser.cxx | 14 | ||||
-rw-r--r-- | sax/source/fastparser/fastparser.hxx | 1 | ||||
-rw-r--r-- | sax/source/tools/fastattribs.cxx | 30 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx | 23 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx | 7 |
10 files changed, 87 insertions, 48 deletions
diff --git a/include/oox/core/fasttokenhandler.hxx b/include/oox/core/fasttokenhandler.hxx index b79472c7e466..fb4e0e16fac6 100644 --- a/include/oox/core/fasttokenhandler.hxx +++ b/include/oox/core/fasttokenhandler.hxx @@ -23,6 +23,7 @@ #include <com/sun/star/lang/XServiceInfo.hpp> #include <com/sun/star/xml/sax/XFastTokenHandler.hpp> #include <cppuhelper/implbase2.hxx> +#include <sax/fastattribs.hxx> namespace oox { class TokenMap; } @@ -36,7 +37,8 @@ typedef ::cppu::WeakImplHelper2< ::com::sun::star::lang::XServiceInfo, ::com::su /** Wrapper implementing the com.sun.star.xml.sax.XFastTokenHandler API interface that provides access to the tokens generated from the internal token name list. */ -class FastTokenHandler : public FastTokenHandler_BASE +class FastTokenHandler : public FastTokenHandler_BASE, + public sax_fastparser::FastTokenHandlerBase { public: explicit FastTokenHandler(); @@ -53,6 +55,9 @@ public: virtual ::com::sun::star::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) throw (::com::sun::star::uno::RuntimeException); virtual sal_Int32 SAL_CALL getTokenFromUTF8( const ::com::sun::star::uno::Sequence< sal_Int8 >& Identifier ) throw (::com::sun::star::uno::RuntimeException); + // Much faster direct C++ shortcut to the method that matters + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const; + private: const TokenMap& mrTokenMap; ///< Reference to global token map singleton. }; diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx index 495fa2d6eb3e..bddc0ce6a34d 100644 --- a/include/oox/token/tokenmap.hxx +++ b/include/oox/token/tokenmap.hxx @@ -47,9 +47,29 @@ public: /** Returns the token identifier for the passed UTF8 token name. */ sal_Int32 getTokenFromUtf8( - const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const; + const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const + { + return getTokenFromUTF8( reinterpret_cast< const char * >( + rUtf8Name.getConstArray() ), + rUtf8Name.getLength() ); + } + + /** Returns the token identifier for a UTF8 string passed in pToken */ + sal_Int32 getTokenFromUTF8( const char *pToken, sal_Int32 nLength ) const + { + // 50% of OOXML tokens are primarily 1 lower-case character, a-z + if( nLength == 1) + { + sal_Char c = pToken[0]; + if (c >= 'a' && c <= 'z') + return mnAlphaTokens[ c - 'a' ]; + } + return getTokenPerfectHash( pToken, nLength ); + } private: + sal_Int32 getTokenPerfectHash( const char *pToken, sal_Int32 nLength ) const; + struct TokenName { OUString maUniName; diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx index 42b285c9e847..f1f64dd4b9ed 100644 --- a/include/sax/fastattribs.hxx +++ b/include/sax/fastattribs.hxx @@ -49,6 +49,14 @@ struct UnknownAttribute typedef std::vector< UnknownAttribute > UnknownAttributeList; +/// A native C++ interface to tokenisation +class SAX_DLLPUBLIC FastTokenHandlerBase +{ + public: + virtual ~FastTokenHandlerBase() {} + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const = 0; +}; + /// avoid constantly allocating and freeing sequences. class SAX_DLLPUBLIC FastTokenLookup { @@ -58,13 +66,15 @@ public: FastTokenLookup(); sal_Int32 getTokenFromChars( const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &mxTokenHandler, + FastTokenHandlerBase *pTokenHandler, const char *pStr, size_t nLength = 0 ); }; class SAX_DLLPUBLIC FastAttributeList : public ::cppu::WeakImplHelper1< ::com::sun::star::xml::sax::XFastAttributeList > { public: - FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler ); + FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler, + FastTokenHandlerBase *pOptHandlerBase = NULL ); virtual ~FastAttributeList(); void clear(); @@ -100,6 +110,8 @@ private: std::vector< sal_Int32 > maAttributeTokens; UnknownAttributeList maUnknownAttributes; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; + FastTokenHandlerBase *mpTokenHandler; + FastTokenLookup maTokenLookup; }; diff --git a/oox/source/core/fasttokenhandler.cxx b/oox/source/core/fasttokenhandler.cxx index f57739c92d1c..510240863845 100644 --- a/oox/source/core/fasttokenhandler.cxx +++ b/oox/source/core/fasttokenhandler.cxx @@ -92,6 +92,11 @@ sal_Int32 FastTokenHandler::getTokenFromUTF8( const Sequence< sal_Int8 >& rIdent return mrTokenMap.getTokenFromUtf8( rIdentifier ); } +sal_Int32 FastTokenHandler::getTokenDirect( const char *pToken, sal_Int32 nLength ) const +{ + return mrTokenMap.getTokenFromUTF8( pToken, nLength ); +} + // ============================================================================ } // namespace core diff --git a/oox/source/token/tokenmap.cxx b/oox/source/token/tokenmap.cxx index ea3e621e3792..7728b26bff06 100644 --- a/oox/source/token/tokenmap.cxx +++ b/oox/source/token/tokenmap.cxx @@ -113,17 +113,9 @@ Sequence< sal_Int8 > TokenMap::getUtf8TokenName( sal_Int32 nToken ) const return Sequence< sal_Int8 >(); } -sal_Int32 TokenMap::getTokenFromUtf8( const Sequence< sal_Int8 >& rUtf8Name ) const +sal_Int32 TokenMap::getTokenPerfectHash( const char *pStr, sal_Int32 nLength ) const { - // 50% of OOXML tokens are primarily 1 lower-case character, a-z - if( rUtf8Name.getLength() == 1) - { - sal_Char c = rUtf8Name[0]; - if (c >= 'a' && c <= 'z') - return mnAlphaTokens[ c - 'a' ]; - } - struct xmltoken* pToken = Perfect_Hash::in_word_set( - reinterpret_cast< const char* >( rUtf8Name.getConstArray() ), rUtf8Name.getLength() ); + struct xmltoken* pToken = Perfect_Hash::in_word_set( pStr, nLength ); return pToken ? pToken->nToken : XML_TOKEN_INVALID; } diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 415747c63614..4c75e0cf6d8a 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -190,6 +190,7 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) // -------------------------------------------------------------------- ParserData::ParserData() + : mpTokenHandler( NULL ) {} ParserData::~ParserData() @@ -382,7 +383,9 @@ void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNa sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ ) { - return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, pToken, nLen ); + return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, + getEntity().mpTokenHandler, + pToken, nLen ); } // -------------------------------------------------------------------- @@ -623,9 +626,10 @@ void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandle maData.mxDocumentHandler = Handler; } -void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException) +void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& xHandler ) throw (RuntimeException) { - maData.mxTokenHandler = Handler; + maData.mxTokenHandler = xHandler; + maData.mpTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ); } void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException) @@ -918,7 +922,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char if (rEvent.mxAttributes.is()) rEvent.mxAttributes->clear(); else - rEvent.mxAttributes.set( new FastAttributeList( rEntity.mxTokenHandler ) ); + rEvent.mxAttributes.set( + new FastAttributeList( rEntity.mxTokenHandler, + rEntity.mpTokenHandler ) ); sal_Int32 nNameLen, nPrefixLen; const XML_Char *pName; diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx index 35deb0c76b5e..d2bf1a73c354 100644 --- a/sax/source/fastparser/fastparser.hxx +++ b/sax/source/fastparser/fastparser.hxx @@ -92,6 +92,7 @@ struct ParserData { ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; + FastTokenHandlerBase *mpTokenHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler > mxErrorHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver > mxEntityResolver; ::com::sun::star::lang::Locale maLocale; diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx index ee65cc600c80..bc63b3eeeb2a 100644 --- a/sax/source/tools/fastattribs.cxx +++ b/sax/source/tools/fastattribs.cxx @@ -47,8 +47,10 @@ void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const } } -FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler ) -: mxTokenHandler( xTokenHandler ) +FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler, + sax_fastparser::FastTokenHandlerBase *pTokenHandler) +: mxTokenHandler( xTokenHandler ), + mpTokenHandler( pTokenHandler ) { // random initial size of buffer to store attribute values mnChunkLength = 58; @@ -114,7 +116,7 @@ sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) throw (SAXExcept { for (size_t i = 0; i < maAttributeTokens.size(); ++i) if (maAttributeTokens[i] == Token) - return maTokenLookup.getTokenFromChars( mxTokenHandler, + return maTokenLookup.getTokenFromChars( mxTokenHandler, mpTokenHandler, mpChunk + maAttributeValues[ i ], AttributeValueLength( i ) ); @@ -125,7 +127,7 @@ sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int { for (size_t i = 0; i < maAttributeTokens.size(); ++i) if (maAttributeTokens[i] == Token) - return maTokenLookup.getTokenFromChars( mxTokenHandler, + return maTokenLookup.getTokenFromChars( mxTokenHandler, mpTokenHandler, mpChunk + maAttributeValues[ i ], AttributeValueLength( i ) ); @@ -227,6 +229,7 @@ FastTokenLookup::FastTokenLookup() */ sal_Int32 FastTokenLookup::getTokenFromChars( const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &xTokenHandler, + FastTokenHandlerBase *pTokenHandler, const char *pToken, size_t nLen /* = 0 */ ) { sal_Int32 nRet; @@ -234,23 +237,12 @@ sal_Int32 FastTokenLookup::getTokenFromChars( if( !nLen ) nLen = strlen( pToken ); - if ( static_cast<sal_Int32>(nLen) < mnUtf8BufferSize ) - { - // Get intimate with the underlying sequence cf. sal/types.h - sal_Sequence *pSeq = maUtf8Buffer.get(); - - sal_Int32 nPreRefCount = pSeq->nRefCount; - - pSeq->nElements = nLen; - memcpy( pSeq->elements, pToken, nLen ); - nRet = xTokenHandler->getTokenFromUTF8( maUtf8Buffer ); - - (void)nPreRefCount; // for non-debug mode. - assert( pSeq->nRefCount == nPreRefCount ); // callee must not take ref - } + if( pTokenHandler ) + nRet = pTokenHandler->getTokenDirect( pToken, (sal_Int32) nLen ); else { - Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); // heap allocate & free + // heap allocate, copy & then free + Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); nRet = xTokenHandler->getTokenFromUTF8( aSeq ); } diff --git a/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx b/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx index 096de7f0b9c1..4cf32f562a27 100644 --- a/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx +++ b/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx @@ -108,23 +108,16 @@ css::uno::Sequence< ::sal_Int8 > SAL_CALL OOXMLFastTokenHandler::getUTF8Identifi #endif } -::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenFromUTF8 -(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException) +::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenDirect( const char *pStr, sal_Int32 nLength ) const { - ::sal_Int32 nResult = OOXML_FAST_TOKENS_END; - struct tokenmap::token * pToken = - tokenmap::Perfect_Hash::in_word_set - (reinterpret_cast<const char *>(Identifier.getConstArray()), - Identifier.getLength()); + tokenmap::Perfect_Hash::in_word_set( pStr, nLength ); - if (pToken != NULL) - nResult = pToken->nToken; + sal_Int32 nResult = pToken != NULL ? pToken->nToken : OOXML_FAST_TOKENS_END; #ifdef DEBUG_TOKEN clog << "getTokenFromUTF8: " - << string(reinterpret_cast<const char *> - (Identifier.getConstArray()), Identifier.getLength()) + << string(pStr, nLength) << ", " << nResult << (pToken == NULL ? ", failed" : "") << endl; #endif @@ -132,6 +125,14 @@ css::uno::Sequence< ::sal_Int8 > SAL_CALL OOXMLFastTokenHandler::getUTF8Identifi return nResult; } +::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenFromUTF8 +(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException) +{ + return getTokenDirect(reinterpret_cast<const char *> + (Identifier.getConstArray()), + Identifier.getLength()); +} + }} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx b/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx index 120c4e70a74f..ddb5b50d153a 100644 --- a/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx +++ b/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx @@ -24,6 +24,7 @@ #include "com/sun/star/uno/XComponentContext.hpp" #include "cppuhelper/implbase1.hxx" #include "com/sun/star/xml/sax/XFastTokenHandler.hpp" +#include "sax/fastattribs.hxx" namespace writerfilter { namespace ooxml @@ -31,7 +32,8 @@ namespace ooxml class OOXMLFastTokenHandler: public ::cppu::WeakImplHelper1< - css::xml::sax::XFastTokenHandler> + css::xml::sax::XFastTokenHandler>, + public sax_fastparser::FastTokenHandlerBase { public: explicit OOXMLFastTokenHandler(css::uno::Reference< css::uno::XComponentContext > const & context); @@ -42,6 +44,9 @@ public: virtual css::uno::Sequence< ::sal_Int8 > SAL_CALL getUTF8Identifier(::sal_Int32 Token) throw (css::uno::RuntimeException); virtual ::sal_Int32 SAL_CALL getTokenFromUTF8(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException); + // Much faster direct C++ shortcut to the method that matters + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const; + private: OOXMLFastTokenHandler(OOXMLFastTokenHandler &); // not defined void operator =(OOXMLFastTokenHandler &); // not defined |