From 3a37d8320c0b8a7bced8e67f7ed2581d4013e38b Mon Sep 17 00:00:00 2001 From: Mike Kaganski Date: Thu, 12 Sep 2024 14:34:26 +0500 Subject: Optimize TokenMap and AttributeList in oox and xo Shaves lots of string allocations, and uses optimized code paths Change-Id: I8e33e2aecdc7e0d2f2c31b774daa36304b3973ac Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173179 Tested-by: Jenkins Reviewed-by: Mike Kaganski --- include/oox/core/fasttokenhandler.hxx | 10 +---- include/oox/helper/attributelist.hxx | 15 ++++--- include/oox/token/tokenmap.hxx | 75 ++++++++--------------------------- 3 files changed, 28 insertions(+), 72 deletions(-) (limited to 'include/oox') diff --git a/include/oox/core/fasttokenhandler.hxx b/include/oox/core/fasttokenhandler.hxx index 396474b988a1..d47c81dd0130 100644 --- a/include/oox/core/fasttokenhandler.hxx +++ b/include/oox/core/fasttokenhandler.hxx @@ -29,8 +29,6 @@ #include #include -namespace oox { class TokenMap; } - namespace oox::core { @@ -41,8 +39,7 @@ class OOX_DLLPUBLIC FastTokenHandler final : public cppu::ImplInheritanceHelper< sax_fastparser::FastTokenHandlerBase, css::lang::XServiceInfo > { public: - explicit FastTokenHandler(); - virtual ~FastTokenHandler() override; + explicit FastTokenHandler() = default; // XServiceInfo virtual OUString SAL_CALL getImplementationName() override; @@ -54,10 +51,7 @@ public: virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier ) override; // Much faster direct C++ shortcut to the method that matters - virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override; - -private: - const TokenMap& mrTokenMap; ///< Reference to global token map singleton. + virtual sal_Int32 getTokenDirect(std::string_view token) const override; }; diff --git a/include/oox/helper/attributelist.hxx b/include/oox/helper/attributelist.hxx index 25f2ebe4f823..7e3c773a5781 100644 --- a/include/oox/helper/attributelist.hxx +++ b/include/oox/helper/attributelist.hxx @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -83,10 +84,10 @@ class OOX_DLLPUBLIC AttributeList public: explicit AttributeList( const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ); + ~AttributeList(); /** Returns the wrapped com.sun.star.xml.sax.XFastAttributeList object. */ - const css::uno::Reference< css::xml::sax::XFastAttributeList >& - getFastAttributeList() const { return mxAttribs; } + css::uno::Reference getFastAttributeList() const; /** Returns true, if the specified attribute is present. */ bool hasAttribute( sal_Int32 nAttrToken ) const; @@ -180,10 +181,12 @@ public: std::vector getTokenList(sal_Int32 nAttrToken) const; private: - css::uno::Reference< css::xml::sax::XFastAttributeList > - mxAttribs; - mutable sax_fastparser::FastAttributeList *mpAttribList; - sax_fastparser::FastAttributeList *getAttribList() const; + AttributeList(const AttributeList&) = delete; + AttributeList(AttributeList&&) = delete; + void operator=(const AttributeList&) = delete; + void operator=(AttributeList&&) = delete; + + rtl::Reference mxAttribs; }; diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx index 4358822c360f..3c0c50d5a013 100644 --- a/include/oox/token/tokenmap.hxx +++ b/include/oox/token/tokenmap.hxx @@ -35,70 +35,29 @@ namespace oox { -class TokenMap +namespace TokenMap { -public: - explicit TokenMap(); - ~TokenMap(); +/** Returns the token identifier for a UTF8 string passed in pToken */ +sal_Int32 getTokenFromUtf8(std::string_view token); - /** Returns the token identifier for the passed Unicode token name. */ - static sal_Int32 getTokenFromUnicode( std::u16string_view rUnicodeName ); - - /** Returns the UTF8 name of the passed token identifier as byte sequence. */ - css::uno::Sequence< sal_Int8 > const & - getUtf8TokenName( sal_Int32 nToken ) const - { - SAL_WARN_IF(nToken < 0 || nToken >= XML_TOKEN_COUNT, "oox", "Wrong nToken parameter"); - if (0 <= nToken && nToken < XML_TOKEN_COUNT) - return maTokenNames[ nToken ]; - return EMPTY_BYTE_SEQ; - } - - /** Returns the token identifier for the passed UTF8 token name. */ - sal_Int32 getTokenFromUtf8( - const css::uno::Sequence< sal_Int8 >& rUtf8Name ) const - { - return getTokenFromUTF8( reinterpret_cast< const char * >( - rUtf8Name.getConstArray() ), - rUtf8Name.getLength() ); - } - - /** Returns the token identifier for a UTF8 string passed in pToken */ - sal_Int32 getTokenFromUTF8( const char *pToken, sal_Int32 nLength ) const - { - // 50% of OOXML tokens are primarily 1 lower-case character, a-z - if( nLength == 1) - { - char c = pToken[0]; - if (c >= 'a' && c <= 'z') - return mnAlphaTokens[ c - 'a' ]; - } - return getTokenPerfectHash( pToken, nLength ); - } - - /** Returns the name of the passed token identifier as OUString. */ - OUString getUnicodeTokenName(sal_Int32 nToken) const - { - SAL_WARN_IF(nToken < 0 || nToken >= XML_TOKEN_COUNT, "oox", "Wrong nToken parameter"); - OUString const ret((0 <= nToken && nToken < XML_TOKEN_COUNT) - ? rtl::OUString(reinterpret_cast(maTokenNames[nToken].getConstArray()), - maTokenNames[nToken].getLength(), RTL_TEXTENCODING_UTF8) - : OUString()); - return ret; - } +/** Returns the token identifier for the passed Unicode token name. */ +inline sal_Int32 getTokenFromUnicode(std::u16string_view rUnicodeName) +{ + return getTokenFromUtf8(OUStringToOString(rUnicodeName, RTL_TEXTENCODING_UTF8)); +} -private: - static sal_Int32 getTokenPerfectHash( const char *pToken, sal_Int32 nLength ); - static const css::uno::Sequence< sal_Int8 > EMPTY_BYTE_SEQ; +/** Returns the UTF8 name of the passed token identifier as byte sequence. */ +css::uno::Sequence const& getUtf8TokenName(sal_Int32 nToken); - std::vector< css::uno::Sequence< sal_Int8 > > - maTokenNames; - sal_Int32 mnAlphaTokens[26]; +/** Returns the name of the passed token identifier as OUString. */ +inline OUString getUnicodeTokenName(sal_Int32 nToken) +{ + auto name = getUtf8TokenName(nToken); + return OUString(reinterpret_cast(name.getConstArray()), name.getLength(), + RTL_TEXTENCODING_UTF8); +} }; - -TokenMap& StaticTokenMap(); - } // namespace oox #endif -- cgit