diff options
author | Mohammed Abdul Azeem <azeemmysore@gmail.com> | 2016-08-03 17:27:48 +0530 |
---|---|---|
committer | Noel Grandin <noelgrandin@gmail.com> | 2016-08-10 11:09:12 +0000 |
commit | 18edd88edc0c45d9c3b8f6faa45bab481ec078f5 (patch) | |
tree | a296122ddec37e8edd77b77306401c481d371fd2 /sax | |
parent | e5d24f50b7e527a5991a1d21f40edcb537eeb72d (diff) |
GSoC - Making legacyfastparser to use tokens:
This tokenizes some elements, de-tokenize while consuming
and emits elements through legacy interface. DummyTokenHandler
is just to test the correctness.
Change-Id: I1ea1e4d806ed4d426215f93b3f6b66a9776f6479
Reviewed-on: https://gerrit.libreoffice.org/27849
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Noel Grandin <noelgrandin@gmail.com>
Diffstat (limited to 'sax')
-rw-r--r-- | sax/qa/cppunit/xmlimport.cxx | 89 | ||||
-rw-r--r-- | sax/source/fastparser/fastparser.cxx | 77 | ||||
-rw-r--r-- | sax/source/fastparser/legacyfastparser.cxx | 94 |
3 files changed, 206 insertions, 54 deletions
diff --git a/sax/qa/cppunit/xmlimport.cxx b/sax/qa/cppunit/xmlimport.cxx index f7fcd739d4c3..c50a6749fef3 100644 --- a/sax/qa/cppunit/xmlimport.cxx +++ b/sax/qa/cppunit/xmlimport.cxx @@ -24,6 +24,7 @@ #include <test/bootstrapfixture.hxx> #include <cppuhelper/weak.hxx> #include <cppuhelper/implbase.hxx> +#include <com/sun/star/beans/Pair.hpp> #include <com/sun/star/xml/sax/XDocumentHandler.hpp> #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp> #include <com/sun/star/xml/sax/XFastAttributeList.hpp> @@ -40,6 +41,7 @@ #include <osl/conditn.hxx> #include <unotools/ucbstreamhelper.hxx> #include <unotools/streamwrap.hxx> +#include <sax/fastattribs.hxx> #include <string> #include <stack> #include <deque> @@ -264,6 +266,73 @@ void SAL_CALL NSDocumentHandler::startElement( const OUString& aName, const Refe CPPUNIT_ASSERT(false); } +class DummyTokenHandler : public cppu::WeakImplHelper< XFastTokenHandler >, + public sax_fastparser::FastTokenHandlerBase +{ +public: + const static OUString tokens[]; + const static OUString namespaceURIs[]; + const static OUString namespacePrefixes[]; + + // XFastTokenHandler + virtual Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) + throw (css::uno::RuntimeException, std::exception) override; + virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier ) + throw (css::uno::RuntimeException, std::exception) override; + //FastTokenHandlerBase + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override; +}; + +const OUString DummyTokenHandler::tokens[] = { "Signature", "CanonicalizationMethod", "Algorithm", "Type", + "DigestMethod", "Reference", "document", + "spacing", "Player", "Height" }; + +const OUString DummyTokenHandler::namespaceURIs[] = { "http://www.w3.org/2000/09/xmldsig#", + "http://schemas.openxmlformats.org/wordprocessingml/2006/main/", + "xyzsports.com/players/football/" }; + +const OUString DummyTokenHandler::namespacePrefixes[] = { "", "w", "Player" }; + +Sequence< sal_Int8 > DummyTokenHandler::getUTF8Identifier( sal_Int32 nToken ) + throw (uno::RuntimeException, std::exception) +{ + OString aUtf8Token; + if ( ( ( nToken & 0xffff0000 ) != 0 ) ) //namespace + { + sal_uInt32 nNamespaceToken = ( nToken >> 16 ) - 1; + if ( nNamespaceToken < sizeof( namespacePrefixes ) / sizeof( OUString ) ) + aUtf8Token = OUStringToOString( namespacePrefixes[ nNamespaceToken ], RTL_TEXTENCODING_UTF8 ); + } + else //element or attribute + { + sal_uInt32 nElementToken = nToken & 0xffff; + if ( nElementToken < sizeof( tokens ) / sizeof( OUString ) ) + aUtf8Token = OUStringToOString( tokens[ nElementToken ], RTL_TEXTENCODING_UTF8 ); + } + Sequence< sal_Int8 > aSeq = Sequence< sal_Int8 >( reinterpret_cast< const sal_Int8* >( + aUtf8Token.getStr() ), aUtf8Token.getLength() ); + return aSeq; +} + +sal_Int32 DummyTokenHandler::getTokenFromUTF8( const uno::Sequence< sal_Int8 >& rIdentifier ) + throw (uno::RuntimeException, std::exception) +{ + return getTokenDirect( reinterpret_cast< const char* >( + rIdentifier.getConstArray() ), rIdentifier.getLength() ); +} + +sal_Int32 DummyTokenHandler::getTokenDirect( const char* pToken, sal_Int32 nLength ) const +{ + OUString sToken( pToken, nLength, RTL_TEXTENCODING_UTF8 ); + for( sal_uInt16 i = 0; i < sizeof(tokens)/sizeof(OUString); i++ ) + { + if ( tokens[i] == sToken ) + return (sal_Int32)i; + } + return FastToken::DONTKNOW; +} + + class XMLImportTest : public test::BootstrapFixture { private: @@ -298,6 +367,26 @@ void XMLImportTest::setUp() m_xLegacyFastParser.set( xContext->getServiceManager()->createInstanceWithContext ( "com.sun.star.xml.sax.LegacyFastParser", xContext ), UNO_QUERY ); m_xLegacyFastParser->setDocumentHandler( m_xDocumentHandler.get() ); + + Reference< XFastTokenHandler > xTokenHandler; + xTokenHandler.set( new DummyTokenHandler() ); + uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser, + uno::UNO_QUERY_THROW); + uno::Sequence<uno::Any> args(1); + args[0] <<= xTokenHandler; + xInit->initialize( args ); + + sal_Int32 nNamespaceCount = sizeof( DummyTokenHandler::namespaceURIs ) / sizeof( OUString ); + uno::Sequence<uno::Any> namespaceArgs( nNamespaceCount + 1 ); + namespaceArgs[0] <<= OUString( "registerNamespaces" ); + for (sal_Int32 i = 1; i <= nNamespaceCount; i++ ) + { + css::beans::Pair <OUString, sal_Int32> rPair; + rPair = css::beans::Pair<OUString, sal_Int32>( DummyTokenHandler::namespaceURIs[i - 1], i << 16 ); + namespaceArgs[i] <<= rPair; + } + xInit->initialize( namespaceArgs ); + m_sDirPath = m_directories.getPathFromSrc( "/sax/qa/data/" ); } diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 8fa922379f64..24811fd1bc97 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -434,6 +434,17 @@ void Entity::startElement( Event *pEvent ) { Reference< XFastAttributeList > xAttr( pEvent->mxAttributes.get() ); Reference< XFastContextHandler > xContext; + + if ( mxNamespaceHandler.is() ) + { + Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes(); + sal_uInt16 len = NSDeclAttribs.getLength(); + for (sal_uInt16 i = 0; i < len; i++) + { + mxNamespaceHandler->registerNamespace( NSDeclAttribs[i].Name, NSDeclAttribs[i].Value ); + } + } + if( nElementToken == FastToken::DONTKNOW ) { if( pParentContext ) @@ -441,16 +452,6 @@ void Entity::startElement( Event *pEvent ) else if( mxDocumentHandler.is() ) xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); - if ( mxNamespaceHandler.is() ) - { - Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes(); - sal_uInt16 len = NSDeclAttribs.getLength(); - for (sal_uInt16 i = 0; i < len; i++) - { - mxNamespaceHandler->registerNamespace( NSDeclAttribs[i].Name, NSDeclAttribs[i].Value ); - } - } - if( xContext.is() ) { xContext->startUnknownElement( aNamespace, aElementName, xAttr ); @@ -1103,29 +1104,33 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm try { - if ( rEntity.mxTokenHandler.is() ) - { - /* #158414# Each element may define new namespaces, also for attribues. - First, process all namespaces, second, process the attributes after namespaces - have been initialized. */ + /* #158414# Each element may define new namespaces, also for attribues. + First, process all namespaces, second, process the attributes after namespaces + have been initialized. */ - // #158414# first: get namespaces - for (int i = 0; i < numNamespaces * 2; i += 2) + // #158414# first: get namespaces + for (int i = 0; i < numNamespaces * 2; i += 2) + { + // namespaces[] is (prefix/URI) + if( namespaces[ i ] != nullptr ) { - // namespaces[] is (prefix/URI) - if( namespaces[ i ] != nullptr ) - { - DefineNamespace( OString( XML_CAST( namespaces[ i ] )), - OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 )); - } - else - { - // default namespace - sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); - nNamespaceToken = GetNamespaceToken( sNamespace ); - } + DefineNamespace( OString( XML_CAST( namespaces[ i ] )), + OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 )); + if( rEntity.mxNamespaceHandler.is() ) + rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); } + else + { + // default namespace + sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); + nNamespaceToken = GetNamespaceToken( sNamespace ); + if( rEntity.mxNamespaceHandler.is() ) + rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + } + } + if ( rEntity.mxTokenHandler.is() ) + { // #158414# second: fill attribute list with other attributes for (int i = 0; i < numAttributes * 5; i += 5) { @@ -1159,20 +1164,6 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm } else { - for (int i = 0; i < numNamespaces * 2; i += 2) - { - if( rEntity.mxNamespaceHandler.is() ) - { - if( namespaces[ i ] != nullptr ) - rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); - else - { - sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); - rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); - } - } - } - for (int i = 0; i < numAttributes * 5; i += 5) { if( attributes[ i + 1 ] != nullptr ) diff --git a/sax/source/fastparser/legacyfastparser.cxx b/sax/source/fastparser/legacyfastparser.cxx index 166f740638d6..dbcc03ff97b4 100644 --- a/sax/source/fastparser/legacyfastparser.cxx +++ b/sax/source/fastparser/legacyfastparser.cxx @@ -22,6 +22,7 @@ #include <com/sun/star/xml/sax/FastParser.hpp> #include <com/sun/star/xml/sax/FastToken.hpp> #include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/beans/Pair.hpp> #include <comphelper/attributelist.hxx> #include <cppuhelper/supportsservice.hxx> #include <comphelper/processfactory.hxx> @@ -129,16 +130,24 @@ public: private: Reference< XFastParser > m_xParser; Reference< XDocumentHandler > m_xDocumentHandler; + Reference< XFastTokenHandler > m_xTokenHandler; }; + class CallbackDocumentHandler : public WeakImplHelper< XFastDocumentHandler > { private: Reference< XDocumentHandler > m_xDocumentHandler; + Reference< XFastTokenHandler > m_xTokenHandler; rtl::Reference< NamespaceHandler > m_aNamespaceHandler; + const OUString getNamespacePrefixFromToken( sal_Int32 nToken ); + const OUString getNameFromToken( sal_Int32 nToken ); + public: - CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, rtl::Reference< NamespaceHandler > const & rNamespaceHandler ); + CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, + rtl::Reference< NamespaceHandler > const & rNamespaceHandler, + Reference< XFastTokenHandler > const & xTokenHandler); // XFastDocumentHandler virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException, exception) override; @@ -156,10 +165,32 @@ public: }; -CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, rtl::Reference< NamespaceHandler > const & rNamespaceHandler ) +const OUString CallbackDocumentHandler::getNamespacePrefixFromToken( sal_Int32 nToken ) +{ + if ( ( nToken & 0xffff0000 ) != 0 ) + { + Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff0000 ); + return OUString( reinterpret_cast< const char* >( + aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 ); + } + else + return OUString(); +} + +const OUString CallbackDocumentHandler::getNameFromToken( sal_Int32 nToken ) +{ + Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff ); + return OUString( reinterpret_cast< const char* >( + aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 ); +} + +CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, + rtl::Reference< NamespaceHandler > const & rNamespaceHandler, + Reference< XFastTokenHandler > const & xTokenHandler) { m_xDocumentHandler.set( xDocumentHandler ); m_aNamespaceHandler.set( rNamespaceHandler.get() ); + m_xTokenHandler.set( xTokenHandler ); } void SAL_CALL CallbackDocumentHandler::startDocument() @@ -183,9 +214,11 @@ void SAL_CALL CallbackDocumentHandler::setDocumentLocator( const Reference< XLoc m_xDocumentHandler->setDocumentLocator( xLocator ); } -void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32/* nElement */, const Reference< XFastAttributeList >&/* Attribs */ ) +void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32 nElement , const Reference< XFastAttributeList >& Attribs ) throw (SAXException, RuntimeException, exception) { + startUnknownElement( CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ), + CallbackDocumentHandler::getNameFromToken( nElement ), Attribs ); } void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs ) @@ -201,15 +234,29 @@ void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Name else elementName = Name; + Sequence< xml::FastAttribute > fastAttribs = Attribs->getFastAttributes(); + sal_uInt16 len = fastAttribs.getLength(); + for (sal_uInt16 i = 0; i < len; i++) + { + OUString& rAttrValue = fastAttribs[i].Value; + sal_Int32 nToken = fastAttribs[i].Token; + const OUString& rAttrNamespacePrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nToken ); + OUString sAttrName = CallbackDocumentHandler::getNameFromToken( nToken ); + if ( !rAttrNamespacePrefix.isEmpty() ) + sAttrName = rAttrNamespacePrefix + ":" + sAttrName; + + rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue ); + } + Sequence< xml::Attribute > unknownAttribs = Attribs->getUnknownAttributes(); - sal_uInt16 len = unknownAttribs.getLength(); + len = unknownAttribs.getLength(); for (sal_uInt16 i = 0; i < len; i++) { OUString& rAttrValue = unknownAttribs[i].Value; OUString sAttrName = unknownAttribs[i].Name; - OUString& rAttrNamespaceURL = unknownAttribs[i].NamespaceURL; - if ( !rAttrNamespaceURL.isEmpty() ) - sAttrName = rAttrNamespaceURL + ":" + sAttrName; + OUString& rAttrNamespacePrefix = unknownAttribs[i].NamespaceURL; + if ( !rAttrNamespacePrefix.isEmpty() ) + sAttrName = rAttrNamespacePrefix + ":" + sAttrName; rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue ); } @@ -217,9 +264,11 @@ void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Name } } -void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32/* nElement */) +void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32 nElement ) throw (SAXException, RuntimeException, exception) { + endUnknownElement( CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ), + CallbackDocumentHandler::getNameFromToken( nElement ) ); } @@ -267,9 +316,30 @@ SaxLegacyFastParser::SaxLegacyFastParser( ) : m_aNamespaceHandler( new Namespace void SAL_CALL SaxLegacyFastParser::initialize(Sequence< Any > const& rArguments ) throw (RuntimeException, Exception, exception) { - uno::Reference<lang::XInitialization> const xInit(m_xParser, + if (rArguments.getLength()) + { + Reference< XFastTokenHandler > xTokenHandler; + OUString str; + if ( ( rArguments[0] >>= xTokenHandler ) && xTokenHandler.is() ) + { + m_xTokenHandler.set( xTokenHandler ); + } + else if ( ( rArguments[0] >>= str ) && "registerNamespaces" == str ) + { + css::beans::Pair< OUString, sal_Int32 > rPair; + for (sal_Int32 i = 1; i < rArguments.getLength(); i++ ) + { + rArguments[i] >>= rPair; + m_xParser->registerNamespace( rPair.First, rPair.Second ); + } + } + else + { + uno::Reference<lang::XInitialization> const xInit(m_xParser, uno::UNO_QUERY_THROW); - xInit->initialize( rArguments ); + xInit->initialize( rArguments ); + } + } } void SaxLegacyFastParser::parseStream( const InputSource& structSource ) @@ -277,7 +347,9 @@ void SaxLegacyFastParser::parseStream( const InputSource& structSource ) IOException, RuntimeException, exception) { - m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler.get(), m_aNamespaceHandler.get() ) ); + m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler.get(), + m_aNamespaceHandler.get(), m_xTokenHandler.get() ) ); + m_xParser->setTokenHandler( m_xTokenHandler ); m_xParser->parseStream( structSource ); } |