summaryrefslogtreecommitdiff
path: root/sal
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2021-12-24 15:25:52 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2021-12-24 21:42:08 +0100
commit8b333c76945960fc62a01829666ba234f59a6d94 (patch)
tree4393e992348ac00faf04256d5f51f9e3aea2e4e6 /sal
parentfd4acfaca9fc012313f03f46e927add6feb6a553 (diff)
Use rtl functions instead of own surrogate checking/combining
Change-Id: I3eb05d8f5b0761bc3b672d4c855eb469f8cc1a29 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/127375 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'sal')
-rw-r--r--sal/rtl/uri.cxx31
-rw-r--r--sal/rtl/ustrbuf.cxx12
-rw-r--r--sal/textenc/convertbig5hkscs.cxx14
-rw-r--r--sal/textenc/converteuctw.cxx10
-rw-r--r--sal/textenc/convertgb18030.cxx13
-rw-r--r--sal/textenc/convertisciidevangari.cxx8
-rw-r--r--sal/textenc/convertiso2022cn.cxx10
-rw-r--r--sal/textenc/convertiso2022jp.cxx8
-rw-r--r--sal/textenc/convertiso2022kr.cxx8
-rw-r--r--sal/textenc/convertsinglebytetobmpunicode.cxx8
-rw-r--r--sal/textenc/handleundefinedunicodetotextchar.cxx4
-rw-r--r--sal/textenc/tcvtutf8.cxx13
-rw-r--r--sal/textenc/unichars.hxx24
13 files changed, 48 insertions, 115 deletions
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index 441a6c69f1e8..0c5479563405 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -219,21 +219,7 @@ namespace {
void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
{
- assert(rtl::isUnicodeCodePoint(nUtf32));
- if (nUtf32 <= 0xFFFF)
- {
- writeUnicode(pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
- }
- else
- {
- nUtf32 -= 0x10000;
- writeUnicode(
- pBuffer, pCapacity,
- static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
- writeUnicode(
- pBuffer, pCapacity,
- static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
- }
+ rtl_uStringbuffer_insertUtf32(pBuffer, pCapacity, (*pBuffer)->length, nUtf32);
}
void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
@@ -284,20 +270,7 @@ bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
rtl_UnicodeToTextConverter aConverter
= rtl_createUnicodeToTextConverter(eCharset);
sal_Unicode aSrc[2];
- sal_Size nSrcSize;
- if (nUtf32 <= 0xFFFF)
- {
- aSrc[0] = static_cast< sal_Unicode >(nUtf32);
- nSrcSize = 1;
- }
- else
- {
- aSrc[0] = static_cast< sal_Unicode >(
- ((nUtf32 - 0x10000) >> 10) | 0xD800);
- aSrc[1] = static_cast< sal_Unicode >(
- ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
- nSrcSize = 2;
- }
+ sal_Size nSrcSize = rtl::splitSurrogates(nUtf32, aSrc);
char aDst[32]; // FIXME random value
sal_uInt32 nInfo;
diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx
index 53e178e5881a..7c5b187edbcb 100644
--- a/sal/rtl/ustrbuf.cxx
+++ b/sal/rtl/ustrbuf.cxx
@@ -174,17 +174,7 @@ void rtl_uStringbuffer_insertUtf32(
SAL_THROW_EXTERN_C()
{
sal_Unicode buf[2];
- sal_Int32 len;
- OSL_ASSERT(rtl::isUnicodeScalarValue(c));
- if (c <= 0xFFFF) {
- buf[0] = static_cast<sal_Unicode>(c);
- len = 1;
- } else {
- c -= 0x10000;
- buf[0] = static_cast<sal_Unicode>((c >> 10) | 0xD800);
- buf[1] = static_cast<sal_Unicode>((c & 0x3FF) | 0xDC00);
- len = 2;
- }
+ sal_Int32 len = rtl::splitSurrogates(c, buf);
rtl_uStringbuffer_insert(pThis, capacity, offset, buf, len);
}
diff --git a/sal/textenc/convertbig5hkscs.cxx b/sal/textenc/convertbig5hkscs.cxx
index 77484666982e..bf3a2e2cf59d 100644
--- a/sal/textenc/convertbig5hkscs.cxx
+++ b/sal/textenc/convertbig5hkscs.cxx
@@ -131,7 +131,7 @@ sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
if (nUnicode == 0)
nUnicode = 0xFFFF;
- assert(!ImplIsHighSurrogate(nUnicode));
+ assert(!rtl::isHighSurrogate(nUnicode));
}
}
if (nUnicode == 0xFFFF)
@@ -192,11 +192,11 @@ sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
}
++p;
}
- assert(!ImplIsHighSurrogate(nUnicode));
+ assert(!rtl::isHighSurrogate(nUnicode));
}
if (nUnicode == 0xFFFF)
goto bad_input;
- if (ImplIsHighSurrogate(nUnicode))
+ if (rtl::isHighSurrogate(nUnicode))
if (pDestBufEnd - pDestBufPtr >= 2)
{
nOffset += nLast - nFirst + 1;
@@ -329,19 +329,19 @@ sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
sal_uInt32 nChar = *pSrcBuf++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar))
+ if (rtl::isHighSurrogate(nChar))
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar))
+ else if (rtl::isLowSurrogate(nChar))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar))
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar))
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
{
bUndefined = false;
diff --git a/sal/textenc/converteuctw.cxx b/sal/textenc/converteuctw.cxx
index edb3c07fa934..8684b643c66e 100644
--- a/sal/textenc/converteuctw.cxx
+++ b/sal/textenc/converteuctw.cxx
@@ -206,7 +206,7 @@ sal_Size ImplConvertEucTwToUnicode(void const * pData,
= pCns116431992Data[nOffset + (nChar - nFirst)];
if (nUnicode == 0xFFFF)
goto bad_input;
- else if (ImplIsHighSurrogate(nUnicode))
+ else if (rtl::isHighSurrogate(nUnicode))
if (pDestBufEnd - pDestBufPtr >= 2)
{
nOffset += nLast - nFirst + 1;
@@ -340,19 +340,19 @@ sal_Size ImplConvertUnicodeToEucTw(void const * pData,
sal_uInt32 nChar = *pSrcBuf++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar))
+ if (rtl::isHighSurrogate(nChar))
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar))
+ else if (rtl::isLowSurrogate(nChar))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar))
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar))
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
{
bUndefined = false;
diff --git a/sal/textenc/convertgb18030.cxx b/sal/textenc/convertgb18030.cxx
index 88f5a999d87d..d7963762fa2b 100644
--- a/sal/textenc/convertgb18030.cxx
+++ b/sal/textenc/convertgb18030.cxx
@@ -172,10 +172,7 @@ sal_Size ImplConvertGb18030ToUnicode(void const * pData,
if (pDestBufEnd - pDestBufPtr >= 2)
{
nCode -= 189000 - 0x10000;
- *pDestBufPtr++
- = static_cast<sal_Unicode>(ImplGetHighSurrogate(nCode));
- *pDestBufPtr++
- = static_cast<sal_Unicode>(ImplGetLowSurrogate(nCode));
+ pDestBufPtr += rtl::splitSurrogates(nCode, pDestBufPtr);
startOfCurrentChar = nConverted + 1;
}
else
@@ -330,19 +327,19 @@ sal_Size ImplConvertUnicodeToGb18030(void const * pData,
sal_uInt32 nChar = *pSrcBuf++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar))
+ if (rtl::isHighSurrogate(nChar))
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar))
+ else if (rtl::isLowSurrogate(nChar))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar))
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar))
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
{
bUndefined = false;
diff --git a/sal/textenc/convertisciidevangari.cxx b/sal/textenc/convertisciidevangari.cxx
index 759328565690..75fcadcf7fe4 100644
--- a/sal/textenc/convertisciidevangari.cxx
+++ b/sal/textenc/convertisciidevangari.cxx
@@ -266,20 +266,20 @@ sal_Size UnicodeToIsciiDevanagari::convert(sal_Unicode const* pSrcBuf, sal_Size
char cSpecialChar = 0;
if (cHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(c))
+ if (rtl::isHighSurrogate(c))
{
cHighSurrogate = static_cast< sal_Unicode >(c);
continue;
}
- else if (ImplIsLowSurrogate(c))
+ else if (rtl::isLowSurrogate(c))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(c))
+ else if (rtl::isLowSurrogate(c))
{
- c = ImplCombineSurrogates(cHighSurrogate, c);
+ c = rtl::combineSurrogates(cHighSurrogate, c);
}
else
{
diff --git a/sal/textenc/convertiso2022cn.cxx b/sal/textenc/convertiso2022cn.cxx
index 9e89c27486db..60c2b57436ff 100644
--- a/sal/textenc/convertiso2022cn.cxx
+++ b/sal/textenc/convertiso2022cn.cxx
@@ -329,7 +329,7 @@ sal_Size ImplConvertIso2022CnToUnicode(void const * pData,
= pCns116431992Data[nOffset + (nChar - nFirst)];
if (nUnicode == 0xFFFF)
goto bad_input;
- else if (ImplIsHighSurrogate(nUnicode))
+ else if (rtl::isHighSurrogate(nUnicode))
if (pDestBufEnd - pDestBufPtr >= 2)
{
nOffset += nLast - nFirst + 1;
@@ -556,19 +556,19 @@ sal_Size ImplConvertUnicodeToIso2022Cn(void const * pData,
sal_uInt32 nChar = *pSrcBuf++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar))
+ if (rtl::isHighSurrogate(nChar))
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar))
+ else if (rtl::isLowSurrogate(nChar))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar))
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar))
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
{
bUndefined = false;
diff --git a/sal/textenc/convertiso2022jp.cxx b/sal/textenc/convertiso2022jp.cxx
index 4024653f04e5..935bc8515d77 100644
--- a/sal/textenc/convertiso2022jp.cxx
+++ b/sal/textenc/convertiso2022jp.cxx
@@ -375,19 +375,19 @@ sal_Size ImplConvertUnicodeToIso2022Jp(void const * pData,
sal_uInt32 nChar = *pSrcBuf++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar))
+ if (rtl::isHighSurrogate(nChar))
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar))
+ else if (rtl::isLowSurrogate(nChar))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar))
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar))
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
{
bUndefined = false;
diff --git a/sal/textenc/convertiso2022kr.cxx b/sal/textenc/convertiso2022kr.cxx
index 5c7971ba5af6..5aea5c66c877 100644
--- a/sal/textenc/convertiso2022kr.cxx
+++ b/sal/textenc/convertiso2022kr.cxx
@@ -353,19 +353,19 @@ sal_Size ImplConvertUnicodeToIso2022Kr(void const * pData,
sal_uInt32 nChar = *pSrcBuf++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar))
+ if (rtl::isHighSurrogate(nChar))
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar))
+ else if (rtl::isLowSurrogate(nChar))
{
bUndefined = false;
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar))
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar))
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
{
bUndefined = false;
diff --git a/sal/textenc/convertsinglebytetobmpunicode.cxx b/sal/textenc/convertsinglebytetobmpunicode.cxx
index b948ba0fbc5c..43c2b9cf39ce 100644
--- a/sal/textenc/convertsinglebytetobmpunicode.cxx
+++ b/sal/textenc/convertsinglebytetobmpunicode.cxx
@@ -111,17 +111,17 @@ sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
bool undefined = true;
sal_uInt32 c = *srcBuf++;
if (highSurrogate == 0) {
- if (ImplIsHighSurrogate(c)) {
+ if (rtl::isHighSurrogate(c)) {
highSurrogate = static_cast< sal_Unicode >(c);
continue;
}
- else if (ImplIsLowSurrogate(c))
+ else if (rtl::isLowSurrogate(c))
{
undefined = false;
goto bad_input;
}
- } else if (ImplIsLowSurrogate(c)) {
- c = ImplCombineSurrogates(highSurrogate, c);
+ } else if (rtl::isLowSurrogate(c)) {
+ c = rtl::combineSurrogates(highSurrogate, c);
} else {
undefined = false;
goto bad_input;
diff --git a/sal/textenc/handleundefinedunicodetotextchar.cxx b/sal/textenc/handleundefinedunicodetotextchar.cxx
index 320562495e97..76aed03e1881 100644
--- a/sal/textenc/handleundefinedunicodetotextchar.cxx
+++ b/sal/textenc/handleundefinedunicodetotextchar.cxx
@@ -106,7 +106,7 @@ bool sal::detail::textenc::handleUndefinedUnicodeToTextChar(
/* Surrogates Characters should result in */
/* one replacement character */
- if (ImplIsHighSurrogate(c))
+ if (rtl::isHighSurrogate(c))
{
if ( ((*ppSrcBuf) + 1) == pEndSrcBuf )
{
@@ -115,7 +115,7 @@ bool sal::detail::textenc::handleUndefinedUnicodeToTextChar(
}
c = *((*ppSrcBuf)+1);
- if (ImplIsLowSurrogate(c))
+ if (rtl::isLowSurrogate(c))
(*ppSrcBuf)++;
else
{
diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx
index ca29156c418f..f210b654d57f 100644
--- a/sal/textenc/tcvtutf8.cxx
+++ b/sal/textenc/tcvtutf8.cxx
@@ -199,10 +199,7 @@ sal_Size ImplConvertUtf8ToUnicode(
else
goto no_output;
else if (pDestBufEnd - pDestBufPtr >= 2)
- {
- *pDestBufPtr++ = static_cast<sal_Unicode>(ImplGetHighSurrogate(nUtf32));
- *pDestBufPtr++ = static_cast<sal_Unicode>(ImplGetLowSurrogate(nUtf32));
- }
+ pDestBufPtr += rtl::splitSurrogates(nUtf32, pDestBufPtr);
else
goto no_output;
}
@@ -349,18 +346,18 @@ sal_Size ImplConvertUnicodeToUtf8(
sal_uInt32 nChar = *pSrcBufPtr++;
if (nHighSurrogate == 0)
{
- if (ImplIsHighSurrogate(nChar) && !bJavaUtf8)
+ if (rtl::isHighSurrogate(nChar) && !bJavaUtf8)
{
nHighSurrogate = static_cast<sal_Unicode>(nChar);
continue;
}
- else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
+ else if (rtl::isLowSurrogate(nChar) && !bJavaUtf8)
{
goto bad_input;
}
}
- else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
- nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+ else if (rtl::isLowSurrogate(nChar) && !bJavaUtf8)
+ nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
else
goto bad_input;
diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx
index 8ca1021da005..e627d3c95c3e 100644
--- a/sal/textenc/unichars.hxx
+++ b/sal/textenc/unichars.hxx
@@ -31,34 +31,10 @@
bool ImplIsControlOrFormat(sal_uInt32 nUtf32);
-// All code points that are high-surrogates, as of Unicode 3.1.1.
-inline bool ImplIsHighSurrogate(sal_uInt32 nUtf32) { return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; }
-
-// All code points that are low-surrogates, as of Unicode 3.1.1.
-inline bool ImplIsLowSurrogate(sal_uInt32 nUtf32) { return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; }
-
bool ImplIsPrivateUse(sal_uInt32 nUtf32);
bool ImplIsZeroWidth(sal_uInt32 nUtf32);
-inline sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32)
-{
- assert(nUtf32 >= 0x10000);
- return ((nUtf32 - 0x10000) >> 10) | 0xD800;
-}
-
-inline sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32)
-{
- assert(nUtf32 >= 0x10000);
- return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00;
-}
-
-inline sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow)
-{
- assert(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow));
- return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000;
-}
-
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */