From d7280af9740819971f46a855108334b84778b47d Mon Sep 17 00:00:00 2001 From: Michael Meeks Date: Tue, 26 Nov 2013 16:26:12 +0000 Subject: fastparser: Avoid copying all tokens into a sequence. --- include/oox/core/fasttokenhandler.hxx | 7 ++++++- include/oox/token/tokenmap.hxx | 22 +++++++++++++++++++++- include/sax/fastattribs.hxx | 14 +++++++++++++- 3 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/oox/core/fasttokenhandler.hxx b/include/oox/core/fasttokenhandler.hxx index b79472c7e466..fb4e0e16fac6 100644 --- a/include/oox/core/fasttokenhandler.hxx +++ b/include/oox/core/fasttokenhandler.hxx @@ -23,6 +23,7 @@ #include #include #include +#include namespace oox { class TokenMap; } @@ -36,7 +37,8 @@ typedef ::cppu::WeakImplHelper2< ::com::sun::star::lang::XServiceInfo, ::com::su /** Wrapper implementing the com.sun.star.xml.sax.XFastTokenHandler API interface that provides access to the tokens generated from the internal token name list. */ -class FastTokenHandler : public FastTokenHandler_BASE +class FastTokenHandler : public FastTokenHandler_BASE, + public sax_fastparser::FastTokenHandlerBase { public: explicit FastTokenHandler(); @@ -53,6 +55,9 @@ public: virtual ::com::sun::star::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) throw (::com::sun::star::uno::RuntimeException); virtual sal_Int32 SAL_CALL getTokenFromUTF8( const ::com::sun::star::uno::Sequence< sal_Int8 >& Identifier ) throw (::com::sun::star::uno::RuntimeException); + // Much faster direct C++ shortcut to the method that matters + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const; + private: const TokenMap& mrTokenMap; ///< Reference to global token map singleton. }; diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx index 495fa2d6eb3e..bddc0ce6a34d 100644 --- a/include/oox/token/tokenmap.hxx +++ b/include/oox/token/tokenmap.hxx @@ -47,9 +47,29 @@ public: /** Returns the token identifier for the passed UTF8 token name. */ sal_Int32 getTokenFromUtf8( - const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const; + const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const + { + return getTokenFromUTF8( reinterpret_cast< const char * >( + rUtf8Name.getConstArray() ), + rUtf8Name.getLength() ); + } + + /** Returns the token identifier for a UTF8 string passed in pToken */ + sal_Int32 getTokenFromUTF8( const char *pToken, sal_Int32 nLength ) const + { + // 50% of OOXML tokens are primarily 1 lower-case character, a-z + if( nLength == 1) + { + sal_Char c = pToken[0]; + if (c >= 'a' && c <= 'z') + return mnAlphaTokens[ c - 'a' ]; + } + return getTokenPerfectHash( pToken, nLength ); + } private: + sal_Int32 getTokenPerfectHash( const char *pToken, sal_Int32 nLength ) const; + struct TokenName { OUString maUniName; diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx index 42b285c9e847..f1f64dd4b9ed 100644 --- a/include/sax/fastattribs.hxx +++ b/include/sax/fastattribs.hxx @@ -49,6 +49,14 @@ struct UnknownAttribute typedef std::vector< UnknownAttribute > UnknownAttributeList; +/// A native C++ interface to tokenisation +class SAX_DLLPUBLIC FastTokenHandlerBase +{ + public: + virtual ~FastTokenHandlerBase() {} + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const = 0; +}; + /// avoid constantly allocating and freeing sequences. class SAX_DLLPUBLIC FastTokenLookup { @@ -58,13 +66,15 @@ public: FastTokenLookup(); sal_Int32 getTokenFromChars( const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &mxTokenHandler, + FastTokenHandlerBase *pTokenHandler, const char *pStr, size_t nLength = 0 ); }; class SAX_DLLPUBLIC FastAttributeList : public ::cppu::WeakImplHelper1< ::com::sun::star::xml::sax::XFastAttributeList > { public: - FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler ); + FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler, + FastTokenHandlerBase *pOptHandlerBase = NULL ); virtual ~FastAttributeList(); void clear(); @@ -100,6 +110,8 @@ private: std::vector< sal_Int32 > maAttributeTokens; UnknownAttributeList maUnknownAttributes; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; + FastTokenHandlerBase *mpTokenHandler; + FastTokenLookup maTokenLookup; }; -- cgit