summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Meeks <michael.meeks@collabora.com>2013-11-26 16:26:12 +0000
committerMichael Meeks <michael.meeks@collabora.com>2013-11-26 17:33:44 +0000
commitd7280af9740819971f46a855108334b84778b47d (patch)
tree12f13fe1c9afcaa2b2e4ab3a7d22a70bda710c23
parent2d681ad020d3cd6beb53a0de1c3057537f31c8df (diff)
fastparser: Avoid copying all tokens into a sequence.
-rw-r--r--include/oox/core/fasttokenhandler.hxx7
-rw-r--r--include/oox/token/tokenmap.hxx22
-rw-r--r--include/sax/fastattribs.hxx14
-rw-r--r--oox/source/core/fasttokenhandler.cxx5
-rw-r--r--oox/source/token/tokenmap.cxx12
-rw-r--r--sax/source/fastparser/fastparser.cxx14
-rw-r--r--sax/source/fastparser/fastparser.hxx1
-rw-r--r--sax/source/tools/fastattribs.cxx30
-rw-r--r--writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx23
-rw-r--r--writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx7
10 files changed, 87 insertions, 48 deletions
diff --git a/include/oox/core/fasttokenhandler.hxx b/include/oox/core/fasttokenhandler.hxx
index b79472c7e466..fb4e0e16fac6 100644
--- a/include/oox/core/fasttokenhandler.hxx
+++ b/include/oox/core/fasttokenhandler.hxx
@@ -23,6 +23,7 @@
#include <com/sun/star/lang/XServiceInfo.hpp>
#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
#include <cppuhelper/implbase2.hxx>
+#include <sax/fastattribs.hxx>
namespace oox { class TokenMap; }
@@ -36,7 +37,8 @@ typedef ::cppu::WeakImplHelper2< ::com::sun::star::lang::XServiceInfo, ::com::su
/** Wrapper implementing the com.sun.star.xml.sax.XFastTokenHandler API interface
that provides access to the tokens generated from the internal token name list.
*/
-class FastTokenHandler : public FastTokenHandler_BASE
+class FastTokenHandler : public FastTokenHandler_BASE,
+ public sax_fastparser::FastTokenHandlerBase
{
public:
explicit FastTokenHandler();
@@ -53,6 +55,9 @@ public:
virtual ::com::sun::star::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) throw (::com::sun::star::uno::RuntimeException);
virtual sal_Int32 SAL_CALL getTokenFromUTF8( const ::com::sun::star::uno::Sequence< sal_Int8 >& Identifier ) throw (::com::sun::star::uno::RuntimeException);
+ // Much faster direct C++ shortcut to the method that matters
+ virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const;
+
private:
const TokenMap& mrTokenMap; ///< Reference to global token map singleton.
};
diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx
index 495fa2d6eb3e..bddc0ce6a34d 100644
--- a/include/oox/token/tokenmap.hxx
+++ b/include/oox/token/tokenmap.hxx
@@ -47,9 +47,29 @@ public:
/** Returns the token identifier for the passed UTF8 token name. */
sal_Int32 getTokenFromUtf8(
- const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const;
+ const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const
+ {
+ return getTokenFromUTF8( reinterpret_cast< const char * >(
+ rUtf8Name.getConstArray() ),
+ rUtf8Name.getLength() );
+ }
+
+ /** Returns the token identifier for a UTF8 string passed in pToken */
+ sal_Int32 getTokenFromUTF8( const char *pToken, sal_Int32 nLength ) const
+ {
+ // 50% of OOXML tokens are primarily 1 lower-case character, a-z
+ if( nLength == 1)
+ {
+ sal_Char c = pToken[0];
+ if (c >= 'a' && c <= 'z')
+ return mnAlphaTokens[ c - 'a' ];
+ }
+ return getTokenPerfectHash( pToken, nLength );
+ }
private:
+ sal_Int32 getTokenPerfectHash( const char *pToken, sal_Int32 nLength ) const;
+
struct TokenName
{
OUString maUniName;
diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx
index 42b285c9e847..f1f64dd4b9ed 100644
--- a/include/sax/fastattribs.hxx
+++ b/include/sax/fastattribs.hxx
@@ -49,6 +49,14 @@ struct UnknownAttribute
typedef std::vector< UnknownAttribute > UnknownAttributeList;
+/// A native C++ interface to tokenisation
+class SAX_DLLPUBLIC FastTokenHandlerBase
+{
+ public:
+ virtual ~FastTokenHandlerBase() {}
+ virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const = 0;
+};
+
/// avoid constantly allocating and freeing sequences.
class SAX_DLLPUBLIC FastTokenLookup
{
@@ -58,13 +66,15 @@ public:
FastTokenLookup();
sal_Int32 getTokenFromChars(
const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &mxTokenHandler,
+ FastTokenHandlerBase *pTokenHandler,
const char *pStr, size_t nLength = 0 );
};
class SAX_DLLPUBLIC FastAttributeList : public ::cppu::WeakImplHelper1< ::com::sun::star::xml::sax::XFastAttributeList >
{
public:
- FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler );
+ FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler,
+ FastTokenHandlerBase *pOptHandlerBase = NULL );
virtual ~FastAttributeList();
void clear();
@@ -100,6 +110,8 @@ private:
std::vector< sal_Int32 > maAttributeTokens;
UnknownAttributeList maUnknownAttributes;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler;
+ FastTokenHandlerBase *mpTokenHandler;
+
FastTokenLookup maTokenLookup;
};
diff --git a/oox/source/core/fasttokenhandler.cxx b/oox/source/core/fasttokenhandler.cxx
index f57739c92d1c..510240863845 100644
--- a/oox/source/core/fasttokenhandler.cxx
+++ b/oox/source/core/fasttokenhandler.cxx
@@ -92,6 +92,11 @@ sal_Int32 FastTokenHandler::getTokenFromUTF8( const Sequence< sal_Int8 >& rIdent
return mrTokenMap.getTokenFromUtf8( rIdentifier );
}
+sal_Int32 FastTokenHandler::getTokenDirect( const char *pToken, sal_Int32 nLength ) const
+{
+ return mrTokenMap.getTokenFromUTF8( pToken, nLength );
+}
+
// ============================================================================
} // namespace core
diff --git a/oox/source/token/tokenmap.cxx b/oox/source/token/tokenmap.cxx
index ea3e621e3792..7728b26bff06 100644
--- a/oox/source/token/tokenmap.cxx
+++ b/oox/source/token/tokenmap.cxx
@@ -113,17 +113,9 @@ Sequence< sal_Int8 > TokenMap::getUtf8TokenName( sal_Int32 nToken ) const
return Sequence< sal_Int8 >();
}
-sal_Int32 TokenMap::getTokenFromUtf8( const Sequence< sal_Int8 >& rUtf8Name ) const
+sal_Int32 TokenMap::getTokenPerfectHash( const char *pStr, sal_Int32 nLength ) const
{
- // 50% of OOXML tokens are primarily 1 lower-case character, a-z
- if( rUtf8Name.getLength() == 1)
- {
- sal_Char c = rUtf8Name[0];
- if (c >= 'a' && c <= 'z')
- return mnAlphaTokens[ c - 'a' ];
- }
- struct xmltoken* pToken = Perfect_Hash::in_word_set(
- reinterpret_cast< const char* >( rUtf8Name.getConstArray() ), rUtf8Name.getLength() );
+ struct xmltoken* pToken = Perfect_Hash::in_word_set( pStr, nLength );
return pToken ? pToken->nToken : XML_TOKEN_INVALID;
}
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 415747c63614..4c75e0cf6d8a 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -190,6 +190,7 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
// --------------------------------------------------------------------
ParserData::ParserData()
+ : mpTokenHandler( NULL )
{}
ParserData::~ParserData()
@@ -382,7 +383,9 @@ void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNa
sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
{
- return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, pToken, nLen );
+ return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler,
+ getEntity().mpTokenHandler,
+ pToken, nLen );
}
// --------------------------------------------------------------------
@@ -623,9 +626,10 @@ void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandle
maData.mxDocumentHandler = Handler;
}
-void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException)
+void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& xHandler ) throw (RuntimeException)
{
- maData.mxTokenHandler = Handler;
+ maData.mxTokenHandler = xHandler;
+ maData.mpTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
}
void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException)
@@ -918,7 +922,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
if (rEvent.mxAttributes.is())
rEvent.mxAttributes->clear();
else
- rEvent.mxAttributes.set( new FastAttributeList( rEntity.mxTokenHandler ) );
+ rEvent.mxAttributes.set(
+ new FastAttributeList( rEntity.mxTokenHandler,
+ rEntity.mpTokenHandler ) );
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 35deb0c76b5e..d2bf1a73c354 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -92,6 +92,7 @@ struct ParserData
{
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler;
+ FastTokenHandlerBase *mpTokenHandler;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler > mxErrorHandler;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver > mxEntityResolver;
::com::sun::star::lang::Locale maLocale;
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
index ee65cc600c80..bc63b3eeeb2a 100644
--- a/sax/source/tools/fastattribs.cxx
+++ b/sax/source/tools/fastattribs.cxx
@@ -47,8 +47,10 @@ void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const
}
}
-FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler )
-: mxTokenHandler( xTokenHandler )
+FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler,
+ sax_fastparser::FastTokenHandlerBase *pTokenHandler)
+: mxTokenHandler( xTokenHandler ),
+ mpTokenHandler( pTokenHandler )
{
// random initial size of buffer to store attribute values
mnChunkLength = 58;
@@ -114,7 +116,7 @@ sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) throw (SAXExcept
{
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
if (maAttributeTokens[i] == Token)
- return maTokenLookup.getTokenFromChars( mxTokenHandler,
+ return maTokenLookup.getTokenFromChars( mxTokenHandler, mpTokenHandler,
mpChunk + maAttributeValues[ i ],
AttributeValueLength( i ) );
@@ -125,7 +127,7 @@ sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int
{
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
if (maAttributeTokens[i] == Token)
- return maTokenLookup.getTokenFromChars( mxTokenHandler,
+ return maTokenLookup.getTokenFromChars( mxTokenHandler, mpTokenHandler,
mpChunk + maAttributeValues[ i ],
AttributeValueLength( i ) );
@@ -227,6 +229,7 @@ FastTokenLookup::FastTokenLookup()
*/
sal_Int32 FastTokenLookup::getTokenFromChars(
const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &xTokenHandler,
+ FastTokenHandlerBase *pTokenHandler,
const char *pToken, size_t nLen /* = 0 */ )
{
sal_Int32 nRet;
@@ -234,23 +237,12 @@ sal_Int32 FastTokenLookup::getTokenFromChars(
if( !nLen )
nLen = strlen( pToken );
- if ( static_cast<sal_Int32>(nLen) < mnUtf8BufferSize )
- {
- // Get intimate with the underlying sequence cf. sal/types.h
- sal_Sequence *pSeq = maUtf8Buffer.get();
-
- sal_Int32 nPreRefCount = pSeq->nRefCount;
-
- pSeq->nElements = nLen;
- memcpy( pSeq->elements, pToken, nLen );
- nRet = xTokenHandler->getTokenFromUTF8( maUtf8Buffer );
-
- (void)nPreRefCount; // for non-debug mode.
- assert( pSeq->nRefCount == nPreRefCount ); // callee must not take ref
- }
+ if( pTokenHandler )
+ nRet = pTokenHandler->getTokenDirect( pToken, (sal_Int32) nLen );
else
{
- Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); // heap allocate & free
+ // heap allocate, copy & then free
+ Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen );
nRet = xTokenHandler->getTokenFromUTF8( aSeq );
}
diff --git a/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx b/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx
index 096de7f0b9c1..4cf32f562a27 100644
--- a/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx
@@ -108,23 +108,16 @@ css::uno::Sequence< ::sal_Int8 > SAL_CALL OOXMLFastTokenHandler::getUTF8Identifi
#endif
}
-::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenFromUTF8
-(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException)
+::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenDirect( const char *pStr, sal_Int32 nLength ) const
{
- ::sal_Int32 nResult = OOXML_FAST_TOKENS_END;
-
struct tokenmap::token * pToken =
- tokenmap::Perfect_Hash::in_word_set
- (reinterpret_cast<const char *>(Identifier.getConstArray()),
- Identifier.getLength());
+ tokenmap::Perfect_Hash::in_word_set( pStr, nLength );
- if (pToken != NULL)
- nResult = pToken->nToken;
+ sal_Int32 nResult = pToken != NULL ? pToken->nToken : OOXML_FAST_TOKENS_END;
#ifdef DEBUG_TOKEN
clog << "getTokenFromUTF8: "
- << string(reinterpret_cast<const char *>
- (Identifier.getConstArray()), Identifier.getLength())
+ << string(pStr, nLength)
<< ", " << nResult
<< (pToken == NULL ? ", failed" : "") << endl;
#endif
@@ -132,6 +125,14 @@ css::uno::Sequence< ::sal_Int8 > SAL_CALL OOXMLFastTokenHandler::getUTF8Identifi
return nResult;
}
+::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenFromUTF8
+(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException)
+{
+ return getTokenDirect(reinterpret_cast<const char *>
+ (Identifier.getConstArray()),
+ Identifier.getLength());
+}
+
}}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx b/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx
index 120c4e70a74f..ddb5b50d153a 100644
--- a/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx
+++ b/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx
@@ -24,6 +24,7 @@
#include "com/sun/star/uno/XComponentContext.hpp"
#include "cppuhelper/implbase1.hxx"
#include "com/sun/star/xml/sax/XFastTokenHandler.hpp"
+#include "sax/fastattribs.hxx"
namespace writerfilter {
namespace ooxml
@@ -31,7 +32,8 @@ namespace ooxml
class OOXMLFastTokenHandler:
public ::cppu::WeakImplHelper1<
- css::xml::sax::XFastTokenHandler>
+ css::xml::sax::XFastTokenHandler>,
+ public sax_fastparser::FastTokenHandlerBase
{
public:
explicit OOXMLFastTokenHandler(css::uno::Reference< css::uno::XComponentContext > const & context);
@@ -42,6 +44,9 @@ public:
virtual css::uno::Sequence< ::sal_Int8 > SAL_CALL getUTF8Identifier(::sal_Int32 Token) throw (css::uno::RuntimeException);
virtual ::sal_Int32 SAL_CALL getTokenFromUTF8(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException);
+ // Much faster direct C++ shortcut to the method that matters
+ virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const;
+
private:
OOXMLFastTokenHandler(OOXMLFastTokenHandler &); // not defined
void operator =(OOXMLFastTokenHandler &); // not defined