From 84db6c1d9c57c3dc115dc22413442cf1aa5e69df Mon Sep 17 00:00:00 2001 From: Mohammed Abdul Azeem Date: Wed, 27 Jul 2016 22:28:39 +0530 Subject: GSOC - Avoid tokenizing strings for Unknown elements: Added a special case when token handler is not set, which avoids tokenizing strings. Change-Id: I749a7af22e45180cc8bfc55843832b7ccb529ac6 Reviewed-on: https://gerrit.libreoffice.org/27593 Tested-by: Jenkins Reviewed-by: Michael Meeks Tested-by: Michael Meeks --- sax/source/fastparser/fastparser.cxx | 109 ++++++++++++++++++----------- sax/source/fastparser/legacyfastparser.cxx | 16 ----- 2 files changed, 67 insertions(+), 58 deletions(-) (limited to 'sax/source') diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 49038b8d6357..ab324a1f40db 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -738,9 +738,6 @@ void FastSaxParserImpl::parseStream(const InputSource& maStructSource) Entity entity( maData ); entity.maStructSource = maStructSource; - if( !entity.mxTokenHandler.is() ) - throw SAXException("No token handler, use setTokenHandler()", Reference< XInterface >(), Any() ); - if( !entity.maStructSource.aInputStream.is() ) throw SAXException("No input source", Reference< XInterface >(), Any() ); @@ -1072,60 +1069,88 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm try { - /* #158414# Each element may define new namespaces, also for attribues. - First, process all namespaces, second, process the attributes after namespaces - have been initialized. */ - - // #158414# first: get namespaces - for (int i = 0; i < numNamespaces * 2; i += 2) + if ( rEntity.mxTokenHandler.is() ) { - // namespaces[] is (prefix/URI) - if( namespaces[ i ] != nullptr ) + /* #158414# Each element may define new namespaces, also for attribues. + First, process all namespaces, second, process the attributes after namespaces + have been initialized. */ + + // #158414# first: get namespaces + for (int i = 0; i < numNamespaces * 2; i += 2) { - DefineNamespace( OString( XML_CAST( namespaces[ i ] )), - OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 )); - if( rEntity.mxNamespaceHandler.is() ) - rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + // namespaces[] is (prefix/URI) + if( namespaces[ i ] != nullptr ) + { + DefineNamespace( OString( XML_CAST( namespaces[ i ] )), + OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 )); + } + else + { + // default namespace + sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); + nNamespaceToken = GetNamespaceToken( sNamespace ); + } } + + // #158414# second: fill attribute list with other attributes + for (int i = 0; i < numAttributes * 5; i += 5) + { + // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd ) + if( attributes[ i + 1 ] != nullptr ) + { + sal_Int32 nAttributeToken = GetTokenWithPrefix( attributes[ i + 1 ], strlen( XML_CAST( attributes[ i + 1 ] )), attributes[ i ], strlen( XML_CAST( attributes[ i ] ))); + if( nAttributeToken != FastToken::DONTKNOW ) + rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ); + else + rEvent.mxAttributes->addUnknown( OUString( XML_CAST( attributes[ i + 1 ] ), strlen( XML_CAST( attributes[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ), + OString( XML_CAST( attributes[ i ] )), OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + } + else + { + sal_Int32 nAttributeToken = GetToken( attributes[ i ], strlen( XML_CAST( attributes[ i ] ))); + if( nAttributeToken != FastToken::DONTKNOW ) + rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ); + else + rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ), + OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + } + } + + if( prefix != nullptr ) + rEvent.mnElementToken = GetTokenWithPrefix( prefix, strlen( XML_CAST( prefix )), localName, strlen( XML_CAST( localName ))); + else if( !sNamespace.isEmpty() ) + rEvent.mnElementToken = GetTokenWithContextNamespace( nNamespaceToken, localName, strlen( XML_CAST( localName ))); else + rEvent.mnElementToken = GetToken( localName, strlen( XML_CAST( localName ))); + } + else + { + for (int i = 0; i < numNamespaces * 2; i += 2) { - // default namespace - sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); - nNamespaceToken = GetNamespaceToken( sNamespace ); if( rEntity.mxNamespaceHandler.is() ) - rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + { + if( namespaces[ i ] != nullptr ) + rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + else + { + sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); + rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + } + } } - } - // #158414# second: fill attribute list with other attributes - for (int i = 0; i < numAttributes * 5; i += 5) - { - if( attributes[ i + 1 ] != nullptr ) + for (int i = 0; i < numAttributes * 5; i += 5) { - sal_Int32 nAttributeToken = GetTokenWithPrefix( attributes[ i + 1 ], strlen( XML_CAST( attributes[ i + 1 ] )), attributes[ i ], strlen( XML_CAST( attributes[ i ] ))); - if( nAttributeToken != FastToken::DONTKNOW ) - rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ); - else + if( attributes[ i + 1 ] != nullptr ) rEvent.mxAttributes->addUnknown( OUString( XML_CAST( attributes[ i + 1 ] ), strlen( XML_CAST( attributes[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ), OString( XML_CAST( attributes[ i ] )), OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); - } - else - { - sal_Int32 nAttributeToken = GetToken( attributes[ i ], strlen( XML_CAST( attributes[ i ] ))); - if( nAttributeToken != FastToken::DONTKNOW ) - rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ); else rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ), - OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); } - } - if( prefix != nullptr ) - rEvent.mnElementToken = GetTokenWithPrefix( prefix, strlen( XML_CAST( prefix )), localName, strlen( XML_CAST( localName ))); - else if( !sNamespace.isEmpty() ) - rEvent.mnElementToken = GetTokenWithContextNamespace( nNamespaceToken, localName, strlen( XML_CAST( localName ))); - else - rEvent.mnElementToken = GetToken( localName, strlen( XML_CAST( localName ))); + rEvent.mnElementToken = FastToken::DONTKNOW; + } if( rEvent.mnElementToken == FastToken::DONTKNOW ) { diff --git a/sax/source/fastparser/legacyfastparser.cxx b/sax/source/fastparser/legacyfastparser.cxx index 1c82178d0526..166f740638d6 100644 --- a/sax/source/fastparser/legacyfastparser.cxx +++ b/sax/source/fastparser/legacyfastparser.cxx @@ -132,21 +132,6 @@ private: }; -class CallbackTokenHandler : public cppu::WeakImplHelper< XFastTokenHandler > -{ -public: - virtual sal_Int32 SAL_CALL getTokenFromUTF8( const Sequence& ) - throw (RuntimeException, exception) override - { - return FastToken::DONTKNOW; - } - virtual Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 ) - throw (RuntimeException, exception) override - { - return Sequence(); - } -}; - class CallbackDocumentHandler : public WeakImplHelper< XFastDocumentHandler > { private: @@ -276,7 +261,6 @@ SaxLegacyFastParser::SaxLegacyFastParser( ) : m_aNamespaceHandler( new Namespace { m_xParser = FastParser::create( ::comphelper::getProcessComponentContext() ); - m_xParser->setTokenHandler( new CallbackTokenHandler() ); m_xParser->setNamespaceHandler( m_aNamespaceHandler.get() ); } -- cgit