summaryrefslogtreecommitdiff
path: root/sax/source/expatwrap/saxwriter.cxx
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2021-12-24 15:25:52 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2021-12-24 21:42:08 +0100
commit8b333c76945960fc62a01829666ba234f59a6d94 (patch)
tree4393e992348ac00faf04256d5f51f9e3aea2e4e6 /sax/source/expatwrap/saxwriter.cxx
parentfd4acfaca9fc012313f03f46e927add6feb6a553 (diff)
Use rtl functions instead of own surrogate checking/combining
Change-Id: I3eb05d8f5b0761bc3b672d4c855eb469f8cc1a29 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/127375 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'sax/source/expatwrap/saxwriter.cxx')
-rw-r--r--sax/source/expatwrap/saxwriter.cxx40
1 files changed, 23 insertions, 17 deletions
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
index 37eb58f099db..e19a31211d98 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -447,20 +447,22 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen,
}
// Deal with other unicode cases
- if (c >= 0xd800 && c < 0xdc00)
+ if (rtl::isHighSurrogate(c))
{
// 1. surrogate: save (until 2. surrogate)
- OSL_ENSURE(nSurrogate == 0, "left-over Unicode surrogate");
- nSurrogate = ((c & 0x03ff) + 0x0040);
+ if (nSurrogate != 0) // left-over lone 1st Unicode surrogate
+ {
+ OSL_FAIL("left-over Unicode surrogate");
+ bRet = false;
+ }
+ nSurrogate = c;
}
- else if (c >= 0xdc00 && c < 0xe000)
+ else if (rtl::isLowSurrogate(c))
{
// 2. surrogate: write as UTF-8
- OSL_ENSURE(nSurrogate != 0, "lone 2nd Unicode surrogate");
-
- nSurrogate = (nSurrogate << 10) | (c & 0x03ff);
- if (rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000)
+ if (nSurrogate) // can only be 1st surrogate
{
+ nSurrogate = rtl::combineSurrogates(nSurrogate, c);
sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
@@ -479,7 +481,7 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen,
rPos++;
}
}
- else
+ else // lone 2nd surrogate
{
OSL_FAIL("illegal Unicode character");
bRet = false;
@@ -526,13 +528,18 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen,
rPos = writeSequence();
// reset left-over surrogate
- if ((nSurrogate != 0) && (c < 0xd800 || c >= 0xdc00))
+ if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
{
- OSL_ENSURE(nSurrogate != 0, "left-over Unicode surrogate");
+ OSL_FAIL("left-over Unicode surrogate");
nSurrogate = 0;
bRet = false;
}
}
+ if (nSurrogate != 0) // trailing lone 1st surrogate
+ {
+ OSL_FAIL("left-over Unicode surrogate");
+ bRet = false;
+ }
return bRet;
}
@@ -951,16 +958,15 @@ sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNorma
}
// Deal with other unicode cases
- if (c >= 0xd800 && c < 0xdc00)
+ if (rtl::isHighSurrogate(c))
{
// save surrogate
- nSurrogate = ((c & 0x03ff) + 0x0040);
+ nSurrogate = c;
}
- else if (c >= 0xdc00 && c < 0xe000)
+ else if (rtl::isLowSurrogate(c))
{
// 2. surrogate: write as UTF-8 (if range is OK
- nSurrogate = (nSurrogate << 10) | (c & 0x03ff);
- if (rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000)
+ if (nSurrogate)
nOutputLength += 4;
nSurrogate = 0;
}
@@ -975,7 +981,7 @@ sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNorma
}
// surrogate processing
- if ((nSurrogate != 0) && (c < 0xd800 || c >= 0xdc00))
+ if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
nSurrogate = 0;
}