diff options
author | Tor Lillqvist <tml@collabora.com> | 2021-01-15 03:03:41 +0200 |
---|---|---|
committer | Stephan Bergmann <sbergman@redhat.com> | 2021-01-22 11:42:15 +0100 |
commit | 972c619f4f058d86c0cd0ed388bf141b94d9a912 (patch) | |
tree | 5c41d00fa2e95b8ea17318e25b5269da57d8897d /tools/source | |
parent | 2cca78f81e8acacb324a1ff1aa404ead1144549c (diff) |
Make JsonWriter::writeEscapedOUString() handle surrogate pairs properly
It is wrong to iterate over UTF-16 code units one by one. We have
OUString::iterateCodePoints() to iterate over Unicode code points.
The two UTF-16 code units of a surrogate pair (for a non-BMP code
point) should not be encoded separately to UTF-8 bytes. It is the code
point that should be encoded (to four bytes).
Change-Id: Ica4341308deb6618c9c2da8dcee8a11ef4e8238d
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109318
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Reviewed-by: Tor Lillqvist <tml@collabora.com>
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109474
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
Diffstat (limited to 'tools/source')
-rw-r--r-- | tools/source/misc/json_writer.cxx | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/tools/source/misc/json_writer.cxx b/tools/source/misc/json_writer.cxx index 1ccee8569480..0b13a63fc038 100644 --- a/tools/source/misc/json_writer.cxx +++ b/tools/source/misc/json_writer.cxx @@ -136,9 +136,10 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) mPos += 4; // Convert from UTF-16 to UTF-8 and perform escaping - for (int i = 0; i < rPropVal.getLength(); ++i) + sal_Int32 i = 0; + while (i < rPropVal.getLength()) { - sal_Unicode ch = rPropVal[i]; + sal_uInt32 ch = rPropVal.iterateCodePoints(&i); if (ch == '\\') { *mPos = static_cast<char>(ch); @@ -165,7 +166,7 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ ++mPos; } - else + else if (ch <= 0xFFFF) { *mPos = 0xE0 | (ch >> 12); /* 1110xxxx */ ++mPos; @@ -174,6 +175,17 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ ++mPos; } + else + { + *mPos = 0xF0 | (ch >> 18); /* 11110xxx */ + ++mPos; + *mPos = 0x80 | ((ch >> 12) & 0x3F); /* 10xxxxxx */ + ++mPos; + *mPos = 0x80 | ((ch >> 6) & 0x3F); /* 10xxxxxx */ + ++mPos; + *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ + ++mPos; + } } *mPos = '"'; |