diff options
-rw-r--r-- | include/rtl/character.hxx | 84 | ||||
-rw-r--r-- | include/rtl/surrogates.h | 57 | ||||
-rw-r--r-- | sal/rtl/string.cxx | 8 | ||||
-rw-r--r-- | sal/rtl/uri.cxx | 18 | ||||
-rw-r--r-- | sal/rtl/ustring.cxx | 21 | ||||
-rw-r--r-- | svl/source/misc/urihelper.cxx | 6 | ||||
-rw-r--r-- | sw/source/filter/ww8/ww8par3.cxx | 9 | ||||
-rw-r--r-- | tools/source/fsys/urlobj.cxx | 7 |
8 files changed, 118 insertions, 92 deletions
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx index f5c9490033ec..52151e8c10ed 100644 --- a/include/rtl/character.hxx +++ b/include/rtl/character.hxx @@ -211,6 +211,90 @@ inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2) - static_cast<sal_Int32>(toAsciiLowerCase(code2)); } +/// @cond INTERNAL +namespace detail { + +sal_uInt32 const surrogatesHighFirst = 0xD800; +sal_uInt32 const surrogatesHighLast = 0xDBFF; +sal_uInt32 const surrogatesLowFirst = 0xDC00; +sal_uInt32 const surrogatesLowLast = 0xDFFF; + +} +/// @endcond + +/** Check for high surrogate. + + @param code A Unicode code point. + + @return True if code is a high surrogate code point (0xD800--0xDBFF). + + @since LibreOffice 5.0 +*/ +inline bool isHighSurrogate(sal_uInt32 code) { + assert(code <= 0x10FFFF); + return code >= detail::surrogatesHighFirst + && code <= detail::surrogatesHighLast; +} + +/** Check for low surrogate. + + @param code A Unicode code point. + + @return True if code is a low surrogate code point (0xDC00--0xDFFF). + + @since LibreOffice 5.0 +*/ +inline bool isLowSurrogate(sal_uInt32 code) { + assert(code <= 0x10FFFF); + return code >= detail::surrogatesLowFirst + && code <= detail::surrogatesLowLast; +} + +/** Get high surrogate half of a non-BMP Unicode code point. + + @param code A non-BMP Unicode code point. + + @return The UTF-16 high surrogate half for the give code point. + + @since LibreOffice 5.0 + */ +inline sal_Unicode getHighSurrogate(sal_uInt32 code) { + assert(code <= 0x10FFFF); + assert(code >= 0x10000); + return ((code - 0x10000) >> 10) | detail::surrogatesHighFirst; +} + +/** Get low surrogate half of a non-BMP Unicode code point. + + @param code A non-BMP Unicode code point. + + @return The UTF-16 low surrogate half for the give code point. + + @since LibreOffice 5.0 + */ +inline sal_Unicode getLowSurrogate(sal_uInt32 code) { + assert(code <= 0x10FFFF); + assert(code >= 0x10000); + return ((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst; +} + +/** Combine surrogates to form a code point. + + @param high A high surrogate code point. + + @param low A low surrogate code point. + + @return The code point represented by the surrogate pair. + + @since LibreOffice 5.0 +*/ +inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) { + assert(isHighSurrogate(high)); + assert(isLowSurrogate(low)); + return ((high - detail::surrogatesHighFirst) << 10) + + (low - detail::surrogatesLowFirst) + 0x10000; +} + } #endif diff --git a/include/rtl/surrogates.h b/include/rtl/surrogates.h deleted file mode 100644 index ab98cd666ca3..000000000000 --- a/include/rtl/surrogates.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - -#ifndef INCLUDED_RTL_SURROGATES_H -#define INCLUDED_RTL_SURROGATES_H - -#include <sal/config.h> - -#include <sal/types.h> - -#define SAL_RTL_FIRST_HIGH_SURROGATE 0xD800 -#define SAL_RTL_LAST_HIGH_SURROGATE 0xDBFF -#define SAL_RTL_FIRST_LOW_SURROGATE 0xDC00 -#define SAL_RTL_LAST_LOW_SURROGATE 0xDFFF - -#ifdef __cplusplus -extern "C" { -#endif - -inline bool isHighSurrogate(sal_uInt32 utf16) { - return utf16 >= SAL_RTL_FIRST_HIGH_SURROGATE - && utf16 <= SAL_RTL_LAST_HIGH_SURROGATE; -} - -inline bool isLowSurrogate(sal_uInt32 utf16) { - return utf16 >= SAL_RTL_FIRST_LOW_SURROGATE - && utf16 <= SAL_RTL_LAST_LOW_SURROGATE; -} - -inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) { - return ((high - SAL_RTL_FIRST_HIGH_SURROGATE) << 10) - + (low - SAL_RTL_FIRST_LOW_SURROGATE) + 0x10000; -} - -#ifdef __cplusplus -} -#endif - -#endif - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx index 68a85f0ae841..3647908c3ada 100644 --- a/sal/rtl/string.cxx +++ b/sal/rtl/string.cxx @@ -32,7 +32,7 @@ #include <rtl/tencinfo.h> #include "strimp.hxx" -#include <rtl/surrogates.h> +#include <rtl/character.hxx> #include <rtl/string.h> #include "rtl/math.h" @@ -154,7 +154,7 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen ) n += 2; else { - if ( !isHighSurrogate(c) ) + if ( !rtl::isHighSurrogate(c) ) n += 3; else { @@ -163,9 +163,9 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen ) if ( pStr+1 < pEndStr ) { c = *(pStr+1); - if ( isLowSurrogate(c) ) + if ( rtl::isLowSurrogate(c) ) { - nUCS4Char = combineSurrogates(nUCS4Char, c); + nUCS4Char = rtl::combineSurrogates(nUCS4Char, c); pStr++; } } diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx index a90b40ba83d3..ea895e5a8906 100644 --- a/sal/rtl/uri.cxx +++ b/sal/rtl/uri.cxx @@ -20,7 +20,6 @@ #include "osl/diagnose.h" #include "rtl/character.hxx" #include "rtl/strbuf.hxx" -#include "rtl/surrogates.h" #include "rtl/textenc.h" #include "rtl/textcvt.h" #include "rtl/uri.h" @@ -133,8 +132,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, p += 3; nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; } - if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) - && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) + if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF + && !rtl::isHighSurrogate(nEncoded) + && !rtl::isLowSurrogate(nEncoded)) { *pBegin = p; *pType = EscapeChar; @@ -171,10 +171,10 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, *pBegin = p; *pType = EscapeChar; assert( nDstSize == 1 - || (nDstSize == 2 && isHighSurrogate(aDst[0]) - && isLowSurrogate(aDst[1]))); + || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0]) + && rtl::isLowSurrogate(aDst[1]))); return nDstSize == 1 - ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); + ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]); } else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL && pEnd - p >= 3 && p[0] == cEscapePrefix @@ -205,9 +205,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, else { *pType = EscapeNo; - return isHighSurrogate(nChar) && *pBegin < pEnd - && isLowSurrogate(**pBegin) ? - combineSurrogates(nChar, *(*pBegin)++) : nChar; + return rtl::isHighSurrogate(nChar) && *pBegin < pEnd + && rtl::isLowSurrogate(**pBegin) ? + rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar; } } diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index a418c6ad5841..3c9c8b750a1f 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -39,7 +39,7 @@ #include "hash.hxx" #include "strimp.hxx" -#include <rtl/surrogates.h> +#include <rtl/character.hxx> #include <rtl/ustring.h> #include "rtl/math.h" @@ -588,9 +588,8 @@ void SAL_CALL rtl_uString_newFromCodePoints( if (c < 0x10000) { *p++ = (sal_Unicode) c; } else { - c -= 0x10000; - *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE); - *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE); + *p++ = rtl::getHighSurrogate(c); + *p++ = rtl::getLowSurrogate(c); } } RTL_LOG_STRING_NEW( *newString ); @@ -1049,8 +1048,8 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( while (incrementCodePoints < 0) { assert(n > 0); cu = string->buffer[--n]; - if (isLowSurrogate(cu) && n != 0 && - isHighSurrogate(string->buffer[n - 1])) + if (rtl::isLowSurrogate(cu) && n != 0 && + rtl::isHighSurrogate(string->buffer[n - 1])) { --n; } @@ -1058,18 +1057,18 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( } assert(n >= 0 && n < string->length); cu = string->buffer[n]; - if (isHighSurrogate(cu) && string->length - n >= 2 && - isLowSurrogate(string->buffer[n + 1])) + if (rtl::isHighSurrogate(cu) && string->length - n >= 2 && + rtl::isLowSurrogate(string->buffer[n + 1])) { - cp = combineSurrogates(cu, string->buffer[n + 1]); + cp = rtl::combineSurrogates(cu, string->buffer[n + 1]); } else { cp = cu; } while (incrementCodePoints > 0) { assert(n < string->length); cu = string->buffer[n++]; - if (isHighSurrogate(cu) && n != string->length && - isLowSurrogate(string->buffer[n])) + if (rtl::isHighSurrogate(cu) && n != string->length && + rtl::isLowSurrogate(string->buffer[n])) { ++n; } diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx index 7d7cc2c90cbf..06936b8bc76f 100644 --- a/svl/source/misc/urihelper.cxx +++ b/svl/source/misc/urihelper.cxx @@ -36,8 +36,8 @@ #include <com/sun/star/uri/XUriReferenceFactory.hpp> #include <comphelper/processfactory.hxx> #include <osl/diagnose.h> +#include <rtl/character.hxx> #include <rtl/instance.hxx> -#include <rtl/surrogates.h> #include <rtl/ustrbuf.hxx> #include <rtl/ustring.h> #include <rtl/ustring.hxx> @@ -281,9 +281,9 @@ namespace { inline sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos) { - return isHighSurrogate(rStr[nPos]) + return rtl::isHighSurrogate(rStr[nPos]) && rStr.getLength() - nPos >= 2 - && isLowSurrogate(rStr[nPos + 1]) ? + && rtl::isLowSurrogate(rStr[nPos + 1]) ? nPos + 2 : nPos + 1; } diff --git a/sw/source/filter/ww8/ww8par3.cxx b/sw/source/filter/ww8/ww8par3.cxx index 5bb17c9a2b9f..104052a2c841 100644 --- a/sw/source/filter/ww8/ww8par3.cxx +++ b/sw/source/filter/ww8/ww8par3.cxx @@ -79,7 +79,7 @@ #include <IMark.hxx> #include <unotools/fltrcfg.hxx> -#include <rtl/surrogates.h> +#include <rtl/character.hxx> #include <xmloff/odffields.hxx> #include <stdio.h> @@ -500,16 +500,17 @@ OUString sanitizeString(const OUString& rString) while (i < rString.getLength()) { sal_Unicode c = rString[i]; - if (isHighSurrogate(c)) + if (rtl::isHighSurrogate(c)) { - if (i+1 == rString.getLength() || !isLowSurrogate(rString[i+1])) + if (i+1 == rString.getLength() + || !rtl::isLowSurrogate(rString[i+1])) { SAL_WARN("sw.ww8", "Surrogate error: high without low"); return rString.copy(0, i); } ++i; //skip correct low } - if (isLowSurrogate(c)) //bare low without preceeding high + if (rtl::isLowSurrogate(c)) //bare low without preceeding high { SAL_WARN("sw.ww8", "Surrogate error: low without high"); return rString.copy(0, i); diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx index 4dfe58895bce..08c0c6da5a7e 100644 --- a/tools/source/fsys/urlobj.cxx +++ b/tools/source/fsys/urlobj.cxx @@ -31,7 +31,6 @@ #include <osl/file.hxx> #include <rtl/character.hxx> #include <rtl/string.h> -#include <rtl/surrogates.h> #include <rtl/textenc.h> #include <rtl/ustring.hxx> #include <sal/types.h> @@ -4778,9 +4777,9 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin, nShift -= 6; } if (bUTF8 && nEncoded >= nMin - && !isHighSurrogate(nEncoded) - && !isLowSurrogate(nEncoded) - && nEncoded <= 0x10FFFF) + && nEncoded <= 0x10FFFF + && !rtl::isHighSurrogate(nEncoded) + && !rtl::isLowSurrogate(nEncoded)) { rBegin = p; nUTF32 = nEncoded; |