From aff3ce8837e00db6e91d0deb6c4f2ccb0d520c54 Mon Sep 17 00:00:00 2001 From: Mike Kaganski Date: Wed, 13 Oct 2021 13:17:23 +0200 Subject: Improve JsonWriter's escapement code It should now be conformant to JSON spec; and additionally, it escapes two characters that are valid in JSON, but invalid in JavaScript (as described in [1]). [1] http://web.archive.org/web/20201203234157/http://timelessrepo.com/json-isnt-a-javascript-subset Change-Id: I1081ade89a57fefefde672f2b8fa08e97627fc50 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/123510 Tested-by: Jenkins Reviewed-by: Mike Kaganski --- tools/source/misc/json_writer.cxx | 135 ++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 56 deletions(-) (limited to 'tools/source') diff --git a/tools/source/misc/json_writer.cxx b/tools/source/misc/json_writer.cxx index 09f34c25c3c7..7024b580c7fd 100644 --- a/tools/source/misc/json_writer.cxx +++ b/tools/source/misc/json_writer.cxx @@ -119,6 +119,56 @@ void JsonWriter::endStruct() mbFirstFieldInNode = false; } +static char getEscapementChar(char ch) +{ + switch (ch) + { + case '\b': + return 'b'; + case '\t': + return 't'; + case '\n': + return 'n'; + case '\f': + return 'f'; + case '\r': + return 'r'; + default: + return ch; + } +} + +static bool writeEscapedSequence(sal_uInt32 ch, char*& pos) +{ + switch (ch) + { + case '\b': + case '\t': + case '\n': + case '\f': + case '\r': + case '"': + case '/': + case '\\': + *pos++ = '\\'; + *pos++ = getEscapementChar(ch); + return true; + // Special processing of U+2028 and U+2029, which are valid JSON, but invalid JavaScript + // Write them in escaped '\u2028' or '\u2029' form + case 0x2028: + case 0x2029: + *pos++ = '\\'; + *pos++ = 'u'; + *pos++ = '2'; + *pos++ = '0'; + *pos++ = '2'; + *pos++ = ch == 0x2028 ? '8' : '9'; + return true; + default: + return false; + } +} + void JsonWriter::writeEscapedOUString(const OUString& rPropVal) { // Convert from UTF-16 to UTF-8 and perform escaping @@ -126,42 +176,9 @@ void JsonWriter::writeEscapedOUString(const OUString& rPropVal) while (i < rPropVal.getLength()) { sal_uInt32 ch = rPropVal.iterateCodePoints(&i); - if (ch == '\\') - { - *mPos = static_cast(ch); - ++mPos; - *mPos = static_cast(ch); - ++mPos; - } - else if (ch == '"') - { - *mPos = '\\'; - ++mPos; - *mPos = static_cast(ch); - ++mPos; - } - else if (ch == '\n') - { - *mPos = '\\'; - ++mPos; - *mPos = 'n'; - ++mPos; - } - else if (ch == '\r') - { - *mPos = '\\'; - ++mPos; - *mPos = 'r'; - ++mPos; - } - else if (ch == '\f') - { - *mPos = '\\'; - ++mPos; - *mPos = 'f'; - ++mPos; - } - else if (ch <= 0x7F) + if (writeEscapedSequence(ch, mPos)) + continue; + if (ch <= 0x7F) { *mPos = static_cast(ch); ++mPos; @@ -200,9 +217,8 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) { auto nPropNameLength = strlen(pPropName); // But values can be any UTF-8, - // see rtl_ImplGetFastUTF8ByteLen in sal/rtl/string.cxx for why a factor 3 - // is the worst case - auto nWorstCasePropValLength = rPropVal.getLength() * 3; + // if the string only contains of 0x2028, it will be expanded 6 times (see writeEscapedSequence) + auto nWorstCasePropValLength = rPropVal.getLength() * 6; ensureSpace(nPropNameLength + nWorstCasePropValLength + 8); addCommaBeforeField(); @@ -241,24 +257,31 @@ void JsonWriter::put(const char* pPropName, std::string_view rPropVal) for (size_t i = 0; i < rPropVal.size(); ++i) { char ch = rPropVal[i]; - if (ch == '\\') - { - *mPos = ch; - ++mPos; - *mPos = ch; - ++mPos; - } - else if (ch == '"') + switch (ch) { - *mPos = '\\'; - ++mPos; - *mPos = ch; - ++mPos; - } - else - { - *mPos = ch; - ++mPos; + case '\b': + case '\t': + case '\n': + case '\f': + case '\r': + case '"': + case '/': + case '\\': + writeEscapedSequence(ch, mPos); + break; + case '\xE2': // Special processing of U+2028 and U+2029 + if (i + 2 < rPropVal.size() && rPropVal[i + 1] == '\x80' + && (rPropVal[i + 2] == '\xA8' || rPropVal[i + 2] == '\xA9')) + { + writeEscapedSequence(rPropVal[i + 2] == '\xA8' ? 0x2028 : 0x2029, mPos); + i += 2; + break; + } + [[fallthrough]]; + default: + *mPos = ch; + ++mPos; + break; } } -- cgit