summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorTor Lillqvist <tml@collabora.com>2021-01-15 03:03:41 +0200
committerTor Lillqvist <tml@collabora.com>2021-01-15 08:54:12 +0100
commit5aee16cf09f9d4ba50feaf804b2a7a649af276bc (patch)
tree9fc4c9e3b4a9f1f201b00fbe142de93a2b24b0bd /tools
parent84a6cfd9bb6532602ca811b0d5daf016bb9b4578 (diff)
Make JsonWriter::writeEscapedOUString() handle surrogate pairs properly
It is wrong to iterate over UTF-16 code units one by one. We have OUString::iterateCodePoints() to iterate over Unicode code points. The two UTF-16 code units of a surrogate pair (for a non-BMP code point) should not be encoded separately to UTF-8 bytes. It is the code point that should be encoded (to four bytes). Change-Id: Ica4341308deb6618c9c2da8dcee8a11ef4e8238d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109318 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk> Reviewed-by: Tor Lillqvist <tml@collabora.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/source/misc/json_writer.cxx18
1 files changed, 15 insertions, 3 deletions
diff --git a/tools/source/misc/json_writer.cxx b/tools/source/misc/json_writer.cxx
index a50e2ada967f..c326201eb9e5 100644
--- a/tools/source/misc/json_writer.cxx
+++ b/tools/source/misc/json_writer.cxx
@@ -123,9 +123,10 @@ void JsonWriter::endStruct()
void JsonWriter::writeEscapedOUString(const OUString& rPropVal)
{
// Convert from UTF-16 to UTF-8 and perform escaping
- for (int i = 0; i < rPropVal.getLength(); ++i)
+ sal_Int32 i = 0;
+ while (i < rPropVal.getLength())
{
- sal_Unicode ch = rPropVal[i];
+ sal_uInt32 ch = rPropVal.iterateCodePoints(&i);
if (ch == '\\')
{
*mPos = static_cast<char>(ch);
@@ -173,7 +174,7 @@ void JsonWriter::writeEscapedOUString(const OUString& rPropVal)
*mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */
++mPos;
}
- else
+ else if (ch <= 0xFFFF)
{
*mPos = 0xE0 | (ch >> 12); /* 1110xxxx */
++mPos;
@@ -182,6 +183,17 @@ void JsonWriter::writeEscapedOUString(const OUString& rPropVal)
*mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */
++mPos;
}
+ else
+ {
+ *mPos = 0xF0 | (ch >> 18); /* 11110xxx */
+ ++mPos;
+ *mPos = 0x80 | ((ch >> 12) & 0x3F); /* 10xxxxxx */
+ ++mPos;
+ *mPos = 0x80 | ((ch >> 6) & 0x3F); /* 10xxxxxx */
+ ++mPos;
+ *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */
+ ++mPos;
+ }
}
}