diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2021-12-24 15:25:52 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2021-12-24 21:42:08 +0100 |
commit | 8b333c76945960fc62a01829666ba234f59a6d94 (patch) | |
tree | 4393e992348ac00faf04256d5f51f9e3aea2e4e6 /sal | |
parent | fd4acfaca9fc012313f03f46e927add6feb6a553 (diff) |
Use rtl functions instead of own surrogate checking/combining
Change-Id: I3eb05d8f5b0761bc3b672d4c855eb469f8cc1a29
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/127375
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'sal')
-rw-r--r-- | sal/rtl/uri.cxx | 31 | ||||
-rw-r--r-- | sal/rtl/ustrbuf.cxx | 12 | ||||
-rw-r--r-- | sal/textenc/convertbig5hkscs.cxx | 14 | ||||
-rw-r--r-- | sal/textenc/converteuctw.cxx | 10 | ||||
-rw-r--r-- | sal/textenc/convertgb18030.cxx | 13 | ||||
-rw-r--r-- | sal/textenc/convertisciidevangari.cxx | 8 | ||||
-rw-r--r-- | sal/textenc/convertiso2022cn.cxx | 10 | ||||
-rw-r--r-- | sal/textenc/convertiso2022jp.cxx | 8 | ||||
-rw-r--r-- | sal/textenc/convertiso2022kr.cxx | 8 | ||||
-rw-r--r-- | sal/textenc/convertsinglebytetobmpunicode.cxx | 8 | ||||
-rw-r--r-- | sal/textenc/handleundefinedunicodetotextchar.cxx | 4 | ||||
-rw-r--r-- | sal/textenc/tcvtutf8.cxx | 13 | ||||
-rw-r--r-- | sal/textenc/unichars.hxx | 24 |
13 files changed, 48 insertions, 115 deletions
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx index 441a6c69f1e8..0c5479563405 100644 --- a/sal/rtl/uri.cxx +++ b/sal/rtl/uri.cxx @@ -219,21 +219,7 @@ namespace { void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) { - assert(rtl::isUnicodeCodePoint(nUtf32)); - if (nUtf32 <= 0xFFFF) - { - writeUnicode(pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); - } - else - { - nUtf32 -= 0x10000; - writeUnicode( - pBuffer, pCapacity, - static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); - writeUnicode( - pBuffer, pCapacity, - static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); - } + rtl_uStringbuffer_insertUtf32(pBuffer, pCapacity, (*pBuffer)->length, nUtf32); } void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, @@ -284,20 +270,7 @@ bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, rtl_UnicodeToTextConverter aConverter = rtl_createUnicodeToTextConverter(eCharset); sal_Unicode aSrc[2]; - sal_Size nSrcSize; - if (nUtf32 <= 0xFFFF) - { - aSrc[0] = static_cast< sal_Unicode >(nUtf32); - nSrcSize = 1; - } - else - { - aSrc[0] = static_cast< sal_Unicode >( - ((nUtf32 - 0x10000) >> 10) | 0xD800); - aSrc[1] = static_cast< sal_Unicode >( - ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); - nSrcSize = 2; - } + sal_Size nSrcSize = rtl::splitSurrogates(nUtf32, aSrc); char aDst[32]; // FIXME random value sal_uInt32 nInfo; diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx index 53e178e5881a..7c5b187edbcb 100644 --- a/sal/rtl/ustrbuf.cxx +++ b/sal/rtl/ustrbuf.cxx @@ -174,17 +174,7 @@ void rtl_uStringbuffer_insertUtf32( SAL_THROW_EXTERN_C() { sal_Unicode buf[2]; - sal_Int32 len; - OSL_ASSERT(rtl::isUnicodeScalarValue(c)); - if (c <= 0xFFFF) { - buf[0] = static_cast<sal_Unicode>(c); - len = 1; - } else { - c -= 0x10000; - buf[0] = static_cast<sal_Unicode>((c >> 10) | 0xD800); - buf[1] = static_cast<sal_Unicode>((c & 0x3FF) | 0xDC00); - len = 2; - } + sal_Int32 len = rtl::splitSurrogates(c, buf); rtl_uStringbuffer_insert(pThis, capacity, offset, buf, len); } diff --git a/sal/textenc/convertbig5hkscs.cxx b/sal/textenc/convertbig5hkscs.cxx index 77484666982e..bf3a2e2cf59d 100644 --- a/sal/textenc/convertbig5hkscs.cxx +++ b/sal/textenc/convertbig5hkscs.cxx @@ -131,7 +131,7 @@ sal_Size ImplConvertBig5HkscsToUnicode(void const * pData, nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n]; if (nUnicode == 0) nUnicode = 0xFFFF; - assert(!ImplIsHighSurrogate(nUnicode)); + assert(!rtl::isHighSurrogate(nUnicode)); } } if (nUnicode == 0xFFFF) @@ -192,11 +192,11 @@ sal_Size ImplConvertBig5HkscsToUnicode(void const * pData, } ++p; } - assert(!ImplIsHighSurrogate(nUnicode)); + assert(!rtl::isHighSurrogate(nUnicode)); } if (nUnicode == 0xFFFF) goto bad_input; - if (ImplIsHighSurrogate(nUnicode)) + if (rtl::isHighSurrogate(nUnicode)) if (pDestBufEnd - pDestBufPtr >= 2) { nOffset += nLast - nFirst + 1; @@ -329,19 +329,19 @@ sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData, sal_uInt32 nChar = *pSrcBuf++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar)) + if (rtl::isHighSurrogate(nChar)) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar)) + else if (rtl::isLowSurrogate(nChar)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar)) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else { bUndefined = false; diff --git a/sal/textenc/converteuctw.cxx b/sal/textenc/converteuctw.cxx index edb3c07fa934..8684b643c66e 100644 --- a/sal/textenc/converteuctw.cxx +++ b/sal/textenc/converteuctw.cxx @@ -206,7 +206,7 @@ sal_Size ImplConvertEucTwToUnicode(void const * pData, = pCns116431992Data[nOffset + (nChar - nFirst)]; if (nUnicode == 0xFFFF) goto bad_input; - else if (ImplIsHighSurrogate(nUnicode)) + else if (rtl::isHighSurrogate(nUnicode)) if (pDestBufEnd - pDestBufPtr >= 2) { nOffset += nLast - nFirst + 1; @@ -340,19 +340,19 @@ sal_Size ImplConvertUnicodeToEucTw(void const * pData, sal_uInt32 nChar = *pSrcBuf++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar)) + if (rtl::isHighSurrogate(nChar)) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar)) + else if (rtl::isLowSurrogate(nChar)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar)) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else { bUndefined = false; diff --git a/sal/textenc/convertgb18030.cxx b/sal/textenc/convertgb18030.cxx index 88f5a999d87d..d7963762fa2b 100644 --- a/sal/textenc/convertgb18030.cxx +++ b/sal/textenc/convertgb18030.cxx @@ -172,10 +172,7 @@ sal_Size ImplConvertGb18030ToUnicode(void const * pData, if (pDestBufEnd - pDestBufPtr >= 2) { nCode -= 189000 - 0x10000; - *pDestBufPtr++ - = static_cast<sal_Unicode>(ImplGetHighSurrogate(nCode)); - *pDestBufPtr++ - = static_cast<sal_Unicode>(ImplGetLowSurrogate(nCode)); + pDestBufPtr += rtl::splitSurrogates(nCode, pDestBufPtr); startOfCurrentChar = nConverted + 1; } else @@ -330,19 +327,19 @@ sal_Size ImplConvertUnicodeToGb18030(void const * pData, sal_uInt32 nChar = *pSrcBuf++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar)) + if (rtl::isHighSurrogate(nChar)) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar)) + else if (rtl::isLowSurrogate(nChar)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar)) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else { bUndefined = false; diff --git a/sal/textenc/convertisciidevangari.cxx b/sal/textenc/convertisciidevangari.cxx index 759328565690..75fcadcf7fe4 100644 --- a/sal/textenc/convertisciidevangari.cxx +++ b/sal/textenc/convertisciidevangari.cxx @@ -266,20 +266,20 @@ sal_Size UnicodeToIsciiDevanagari::convert(sal_Unicode const* pSrcBuf, sal_Size char cSpecialChar = 0; if (cHighSurrogate == 0) { - if (ImplIsHighSurrogate(c)) + if (rtl::isHighSurrogate(c)) { cHighSurrogate = static_cast< sal_Unicode >(c); continue; } - else if (ImplIsLowSurrogate(c)) + else if (rtl::isLowSurrogate(c)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(c)) + else if (rtl::isLowSurrogate(c)) { - c = ImplCombineSurrogates(cHighSurrogate, c); + c = rtl::combineSurrogates(cHighSurrogate, c); } else { diff --git a/sal/textenc/convertiso2022cn.cxx b/sal/textenc/convertiso2022cn.cxx index 9e89c27486db..60c2b57436ff 100644 --- a/sal/textenc/convertiso2022cn.cxx +++ b/sal/textenc/convertiso2022cn.cxx @@ -329,7 +329,7 @@ sal_Size ImplConvertIso2022CnToUnicode(void const * pData, = pCns116431992Data[nOffset + (nChar - nFirst)]; if (nUnicode == 0xFFFF) goto bad_input; - else if (ImplIsHighSurrogate(nUnicode)) + else if (rtl::isHighSurrogate(nUnicode)) if (pDestBufEnd - pDestBufPtr >= 2) { nOffset += nLast - nFirst + 1; @@ -556,19 +556,19 @@ sal_Size ImplConvertUnicodeToIso2022Cn(void const * pData, sal_uInt32 nChar = *pSrcBuf++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar)) + if (rtl::isHighSurrogate(nChar)) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar)) + else if (rtl::isLowSurrogate(nChar)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar)) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else { bUndefined = false; diff --git a/sal/textenc/convertiso2022jp.cxx b/sal/textenc/convertiso2022jp.cxx index 4024653f04e5..935bc8515d77 100644 --- a/sal/textenc/convertiso2022jp.cxx +++ b/sal/textenc/convertiso2022jp.cxx @@ -375,19 +375,19 @@ sal_Size ImplConvertUnicodeToIso2022Jp(void const * pData, sal_uInt32 nChar = *pSrcBuf++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar)) + if (rtl::isHighSurrogate(nChar)) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar)) + else if (rtl::isLowSurrogate(nChar)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar)) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else { bUndefined = false; diff --git a/sal/textenc/convertiso2022kr.cxx b/sal/textenc/convertiso2022kr.cxx index 5c7971ba5af6..5aea5c66c877 100644 --- a/sal/textenc/convertiso2022kr.cxx +++ b/sal/textenc/convertiso2022kr.cxx @@ -353,19 +353,19 @@ sal_Size ImplConvertUnicodeToIso2022Kr(void const * pData, sal_uInt32 nChar = *pSrcBuf++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar)) + if (rtl::isHighSurrogate(nChar)) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar)) + else if (rtl::isLowSurrogate(nChar)) { bUndefined = false; goto bad_input; } } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar)) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else { bUndefined = false; diff --git a/sal/textenc/convertsinglebytetobmpunicode.cxx b/sal/textenc/convertsinglebytetobmpunicode.cxx index b948ba0fbc5c..43c2b9cf39ce 100644 --- a/sal/textenc/convertsinglebytetobmpunicode.cxx +++ b/sal/textenc/convertsinglebytetobmpunicode.cxx @@ -111,17 +111,17 @@ sal_Size rtl_textenc_convertBmpUnicodeToSingleByte( bool undefined = true; sal_uInt32 c = *srcBuf++; if (highSurrogate == 0) { - if (ImplIsHighSurrogate(c)) { + if (rtl::isHighSurrogate(c)) { highSurrogate = static_cast< sal_Unicode >(c); continue; } - else if (ImplIsLowSurrogate(c)) + else if (rtl::isLowSurrogate(c)) { undefined = false; goto bad_input; } - } else if (ImplIsLowSurrogate(c)) { - c = ImplCombineSurrogates(highSurrogate, c); + } else if (rtl::isLowSurrogate(c)) { + c = rtl::combineSurrogates(highSurrogate, c); } else { undefined = false; goto bad_input; diff --git a/sal/textenc/handleundefinedunicodetotextchar.cxx b/sal/textenc/handleundefinedunicodetotextchar.cxx index 320562495e97..76aed03e1881 100644 --- a/sal/textenc/handleundefinedunicodetotextchar.cxx +++ b/sal/textenc/handleundefinedunicodetotextchar.cxx @@ -106,7 +106,7 @@ bool sal::detail::textenc::handleUndefinedUnicodeToTextChar( /* Surrogates Characters should result in */ /* one replacement character */ - if (ImplIsHighSurrogate(c)) + if (rtl::isHighSurrogate(c)) { if ( ((*ppSrcBuf) + 1) == pEndSrcBuf ) { @@ -115,7 +115,7 @@ bool sal::detail::textenc::handleUndefinedUnicodeToTextChar( } c = *((*ppSrcBuf)+1); - if (ImplIsLowSurrogate(c)) + if (rtl::isLowSurrogate(c)) (*ppSrcBuf)++; else { diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx index ca29156c418f..f210b654d57f 100644 --- a/sal/textenc/tcvtutf8.cxx +++ b/sal/textenc/tcvtutf8.cxx @@ -199,10 +199,7 @@ sal_Size ImplConvertUtf8ToUnicode( else goto no_output; else if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = static_cast<sal_Unicode>(ImplGetHighSurrogate(nUtf32)); - *pDestBufPtr++ = static_cast<sal_Unicode>(ImplGetLowSurrogate(nUtf32)); - } + pDestBufPtr += rtl::splitSurrogates(nUtf32, pDestBufPtr); else goto no_output; } @@ -349,18 +346,18 @@ sal_Size ImplConvertUnicodeToUtf8( sal_uInt32 nChar = *pSrcBufPtr++; if (nHighSurrogate == 0) { - if (ImplIsHighSurrogate(nChar) && !bJavaUtf8) + if (rtl::isHighSurrogate(nChar) && !bJavaUtf8) { nHighSurrogate = static_cast<sal_Unicode>(nChar); continue; } - else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8) + else if (rtl::isLowSurrogate(nChar) && !bJavaUtf8) { goto bad_input; } } - else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else if (rtl::isLowSurrogate(nChar) && !bJavaUtf8) + nChar = rtl::combineSurrogates(nHighSurrogate, nChar); else goto bad_input; diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx index 8ca1021da005..e627d3c95c3e 100644 --- a/sal/textenc/unichars.hxx +++ b/sal/textenc/unichars.hxx @@ -31,34 +31,10 @@ bool ImplIsControlOrFormat(sal_uInt32 nUtf32); -// All code points that are high-surrogates, as of Unicode 3.1.1. -inline bool ImplIsHighSurrogate(sal_uInt32 nUtf32) { return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; } - -// All code points that are low-surrogates, as of Unicode 3.1.1. -inline bool ImplIsLowSurrogate(sal_uInt32 nUtf32) { return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; } - bool ImplIsPrivateUse(sal_uInt32 nUtf32); bool ImplIsZeroWidth(sal_uInt32 nUtf32); -inline sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) -{ - assert(nUtf32 >= 0x10000); - return ((nUtf32 - 0x10000) >> 10) | 0xD800; -} - -inline sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) -{ - assert(nUtf32 >= 0x10000); - return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00; -} - -inline sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) -{ - assert(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow)); - return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000; -} - #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |