From 0a1d5af2a18d6a062c45d65689fbce619922dcc8 Mon Sep 17 00:00:00 2001 From: Michael Stahl Date: Wed, 28 Feb 2018 13:49:14 +0100 Subject: tdf#115429 sax: assert if exporting an invalid XML attribute/element Add a cheap check for this in both SaxWriter and FastSaxSerializer so we can find such bugs earlier, e.g. with the weekly crashtesting. Don't do a correct check but a cheap & fast one, let's ignore non-ASCII characters for now as the only filter with such is UOF and that is implemented with XSLT, not this sax code. Change-Id: I4db8f70ffb23684d4cb4211468519edd6c7c465f Reviewed-on: https://gerrit.libreoffice.org/50507 Tested-by: Jenkins Reviewed-by: Michael Stahl --- sax/source/expatwrap/saxwriter.cxx | 30 ++++++++++++++++++++++++++++++ sax/source/tools/fastserializer.cxx | 15 +++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'sax') diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx index a9d8f280706c..b7de667bcf05 100644 --- a/sax/source/expatwrap/saxwriter.cxx +++ b/sax/source/expatwrap/saxwriter.cxx @@ -566,6 +566,33 @@ inline void SaxWriterHelper::startDocument() nCurrentPos = writeSequence(); } +void CheckValidName(OUString const& rName) +{ +#ifdef NDEBUG + (void) rName; +#else + assert(!rName.isEmpty()); + bool hasColon(false); + for (sal_Int32 i = 0; i < rName.getLength(); ++i) + { + auto const c(rName[i]); + if (c == ':') + { + if (hasColon) + assert("only one colon allowed"); + else + hasColon = true; + } + else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.') + { // note: this will also warn about non-ASCII characters which + // are allowed by XML but surely unexpected in LO filters + // (OTOH we don't warn about invalid start chars) + assert(!"unexpected character in attribute name"); + } + } +#endif +} + inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName, const Reference< XAttributeList >& xAttribs) { FinishStartElement(); @@ -581,6 +608,7 @@ inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rN nCurrentPos = writeSequence(); SaxInvalidCharacterError eRet(SAX_NONE); + CheckValidName(rName); if (!writeString(rName, false, false)) eRet = SAX_ERROR; @@ -598,6 +626,7 @@ inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rN assert(DebugAttributes.find(rAttrName) == DebugAttributes.end()); DebugAttributes.insert(rAttrName); #endif + CheckValidName(rAttrName); if (!writeString(rAttrName, false, false)) eRet = SAX_ERROR; @@ -658,6 +687,7 @@ inline bool SaxWriterHelper::endElement(const OUString& rName) if (nCurrentPos == SEQUENCESIZE) nCurrentPos = writeSequence(); + CheckValidName(rName); bool bRet(writeString( rName, false, false)); mp_Sequence[nCurrentPos] = '>'; diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx index 6257853a64a8..9356f3e081d1 100644 --- a/sax/source/tools/fastserializer.cxx +++ b/sax/source/tools/fastserializer.cxx @@ -270,11 +270,18 @@ namespace sax_fastparser { void FastSaxSerializer::writeId( ::sal_Int32 nElement ) { if( HAS_NAMESPACE( nElement ) ) { - writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); + auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); + assert(Namespace.getLength() != 0); + writeBytes(Namespace); writeBytes(sColon, N_CHARS(sColon)); - writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); - } else - writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement)); + auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); + assert(Element.getLength() != 0); + writeBytes(Element); + } else { + auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement)); + assert(Element.getLength() != 0); + writeBytes(Element); + } } #ifdef DBG_UTIL -- cgit