From 167fb3f7800e342b2009b98579a1d42d528b9b5a Mon Sep 17 00:00:00 2001 From: Caolán McNamara Date: Thu, 15 Mar 2018 16:17:51 +0000 Subject: crashtesting: asserts on legal xml names on export e.g. ooo82358-1.odt which has Chinese characters used in tags inside a parent xforms:instance tag Change-Id: If2edf9cc13e5a4cc969f5a46618a6534c52f2877 Reviewed-on: https://gerrit.libreoffice.org/51352 Reviewed-by: Michael Stahl Tested-by: Michael Stahl Tested-by: Jenkins --- sax/source/expatwrap/saxwriter.cxx | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx index b7de667bcf05..c9d0a9b48cce 100644 --- a/sax/source/expatwrap/saxwriter.cxx +++ b/sax/source/expatwrap/saxwriter.cxx @@ -566,6 +566,13 @@ inline void SaxWriterHelper::startDocument() nCurrentPos = writeSequence(); } +#ifndef NDEBUG +bool inrange(sal_Unicode c, sal_Unicode start, sal_Unicode end) +{ + return c >= start && c <= end; +} +#endif + void CheckValidName(OUString const& rName) { #ifdef NDEBUG @@ -583,10 +590,17 @@ void CheckValidName(OUString const& rName) else hasColon = true; } - else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.') - { // note: this will also warn about non-ASCII characters which - // are allowed by XML but surely unexpected in LO filters - // (OTOH we don't warn about invalid start chars) + else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.' && + !inrange(c, 0x00C0, 0x00D6) && !inrange(c, 0x00D8, 0x00F6) && + !inrange(c, 0x00F8, 0x02FF) && !inrange(c, 0x0370, 0x037D) && + !inrange(c, 0x037F, 0x1FFF) && !inrange(c, 0x200C, 0x200D) && + !inrange(c, 0x2070, 0x218F) && !inrange(c, 0x2C00, 0x2FEF) && + !inrange(c, 0x3001, 0xD7FF) && !inrange(c, 0xF900, 0xFDCF) && + !inrange(c, 0xFDF0, 0xFFFD) && c != 0x00B7 && + !inrange(c, 0x0300, 0x036F) && !inrange(c, 0x203F, 0x2040)) + { + // https://www.w3.org/TR/xml11/#NT-NameChar + // (currently we don't warn about invalid start chars) assert(!"unexpected character in attribute name"); } } -- cgit