diff options
-rw-r--r-- | i18nutil/source/utility/unicode.cxx | 4 | ||||
-rw-r--r-- | include/rtl/character.hxx | 60 | ||||
-rw-r--r-- | sal/rtl/uri.cxx | 8 | ||||
-rw-r--r-- | sal/rtl/ustrbuf.cxx | 4 | ||||
-rw-r--r-- | sal/rtl/ustring.cxx | 2 | ||||
-rw-r--r-- | sal/textenc/tcvtutf8.cxx | 2 | ||||
-rw-r--r-- | sal/textenc/unichars.hxx | 9 | ||||
-rw-r--r-- | sax/source/expatwrap/saxwriter.cxx | 5 | ||||
-rw-r--r-- | sc/source/core/tool/interpr1.cxx | 2 | ||||
-rw-r--r-- | stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx | 6 | ||||
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 2 | ||||
-rw-r--r-- | svtools/source/svrtf/svparser.cxx | 2 | ||||
-rw-r--r-- | tools/source/fsys/urlobj.cxx | 4 | ||||
-rw-r--r-- | tools/source/inet/inetmime.cxx | 6 | ||||
-rw-r--r-- | xmlreader/source/xmlreader.cxx | 9 |
15 files changed, 65 insertions, 60 deletions
diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index a7d3d4690f1c..6507479807fe 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -1190,14 +1190,14 @@ OUString ToggleUnicodeCodepoint::StringToReplace() { nUnicode = sIn.copy(0, nUPlus).toString().toUInt32(16); //prevent creating control characters or invalid Unicode values - if( nUnicode < 0x20 || nUnicode > 0x10ffff ) + if( !rtl::isUnicodeCodePoint(nUnicode) || nUnicode < 0x20 ) maInput = sIn.copy(nUPlus); sIn = sIn.copy(nUPlus+2); nUPlus = sIn.indexOf("U+"); } nUnicode = sIn.toString().toUInt32(16); - if( nUnicode < 0x20 || nUnicode > 0x10ffff ) + if( !rtl::isUnicodeCodePoint(nUnicode) || nUnicode < 0x20 ) maInput.truncate().append( sIn[sIn.getLength()-1] ); return maInput.toString(); } diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx index 49f6803821de..ba3088efdeda 100644 --- a/include/rtl/character.hxx +++ b/include/rtl/character.hxx @@ -29,6 +29,19 @@ namespace rtl { +/** Check for Unicode code point. + + @param code An integer. + + @return True if code is a Unicode code point. + + @since LibreOffice 5.2 +*/ +inline bool isUnicodeCodePoint(sal_uInt32 code) +{ + return code <= 0x10FFFF; +} + /** Check for ASCII character. @param code A Unicode code point. @@ -39,7 +52,7 @@ namespace rtl */ inline bool isAscii(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code <= 0x7F; } @@ -54,7 +67,7 @@ inline bool isAscii(sal_uInt32 code) */ inline bool isAsciiLowerCase(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code >= 'a' && code <= 'z'; } @@ -69,7 +82,7 @@ inline bool isAsciiLowerCase(sal_uInt32 code) */ inline bool isAsciiUpperCase(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code >= 'A' && code <= 'Z'; } @@ -84,7 +97,7 @@ inline bool isAsciiUpperCase(sal_uInt32 code) */ inline bool isAsciiAlpha(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return isAsciiLowerCase(code) || isAsciiUpperCase(code); } @@ -99,7 +112,7 @@ inline bool isAsciiAlpha(sal_uInt32 code) */ inline bool isAsciiDigit(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code >= '0' && code <= '9'; } @@ -114,7 +127,7 @@ inline bool isAsciiDigit(sal_uInt32 code) */ inline bool isAsciiAlphanumeric(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return isAsciiDigit(code) || isAsciiAlpha(code); } @@ -129,7 +142,7 @@ inline bool isAsciiAlphanumeric(sal_uInt32 code) */ inline bool isAsciiCanonicHexDigit(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return isAsciiDigit(code) || (code >= 'A' && code <= 'F'); } @@ -144,7 +157,7 @@ inline bool isAsciiCanonicHexDigit(sal_uInt32 code) */ inline bool isAsciiHexDigit(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f'); } @@ -158,7 +171,7 @@ inline bool isAsciiHexDigit(sal_uInt32 code) */ inline bool isAsciiOctalDigit(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code >= '0' && code <= '7'; } @@ -173,7 +186,7 @@ inline bool isAsciiOctalDigit(sal_uInt32 code) */ inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return isAsciiLowerCase(code) ? code - 32 : code; } @@ -187,7 +200,7 @@ inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code) */ inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return isAsciiUpperCase(code) ? code + 32 : code; } @@ -205,8 +218,8 @@ inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code) */ inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2) { - assert(code1 <= 0x10FFFF); - assert(code2 <= 0x10FFFF); + assert(isUnicodeCodePoint(code1)); + assert(isUnicodeCodePoint(code2)); return static_cast<sal_Int32>(toAsciiLowerCase(code1)) - static_cast<sal_Int32>(toAsciiLowerCase(code2)); } @@ -222,19 +235,6 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF; } /// @endcond -/** Check if a codepoint is accessible via utf16 per RFC3629 - - @param code A non-BMP Unicode code point. - - @return True if the code is a valid codepoint. - - @since LibreOffice 5.2 -*/ -inline bool isValidCodePoint( sal_uInt32 code) -{ - return code <= 0x10FFFF; -} - /** Check for high surrogate. @param code A Unicode code point. @@ -244,7 +244,7 @@ inline bool isValidCodePoint( sal_uInt32 code) @since LibreOffice 5.0 */ inline bool isHighSurrogate(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast; } @@ -258,7 +258,7 @@ inline bool isHighSurrogate(sal_uInt32 code) { @since LibreOffice 5.0 */ inline bool isLowSurrogate(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast; } @@ -272,7 +272,7 @@ inline bool isLowSurrogate(sal_uInt32 code) { @since LibreOffice 5.0 */ inline sal_Unicode getHighSurrogate(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); assert(code >= 0x10000); return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst); } @@ -286,7 +286,7 @@ inline sal_Unicode getHighSurrogate(sal_uInt32 code) { @since LibreOffice 5.0 */ inline sal_Unicode getLowSurrogate(sal_uInt32 code) { - assert(code <= 0x10FFFF); + assert(isUnicodeCodePoint(code)); assert(code >= 0x10000); return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst); } diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx index b028b3cf6664..0f3d6df52194 100644 --- a/sal/rtl/uri.cxx +++ b/sal/rtl/uri.cxx @@ -132,8 +132,8 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, p += 3; nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; } - if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF - && !rtl::isHighSurrogate(nEncoded) + if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded) + && nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded) && !rtl::isLowSurrogate(nEncoded)) { *pBegin = p; @@ -213,7 +213,7 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) { - assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char + assert(rtl::isUnicodeCodePoint(nUtf32)); if (nUtf32 <= 0xFFFF) { writeUnicode( pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); @@ -245,7 +245,7 @@ void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) { - assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char + assert(rtl::isUnicodeCodePoint(nUtf32)); if (eCharset == RTL_TEXTENCODING_UTF8) { if (nUtf32 < 0x80) writeEscapeOctet(pBuffer, pCapacity, nUtf32); diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx index 89b897f21573..b73318b6e5f4 100644 --- a/sal/rtl/ustrbuf.cxx +++ b/sal/rtl/ustrbuf.cxx @@ -21,7 +21,7 @@ #include <osl/interlck.h> #include <osl/diagnose.h> - +#include <rtl/character.hxx> #include <rtl/ustrbuf.hxx> #include <strimp.hxx> @@ -169,7 +169,7 @@ void rtl_uStringbuffer_insertUtf32( { sal_Unicode buf[2]; sal_Int32 len; - OSL_ASSERT(c <= 0x10FFFF && !(c >= 0xD800 && c <= 0xDFFF)); + OSL_ASSERT(rtl::isUnicodeCodePoint(c) && !(c >= 0xD800 && c <= 0xDFFF)); if (c <= 0xFFFF) { buf[0] = (sal_Unicode) c; len = 1; diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index 16e9b87b6288..db07cabb396b 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -578,7 +578,7 @@ void SAL_CALL rtl_uString_newFromCodePoints( } n = codePointCount; for (i = 0; i < codePointCount; ++i) { - OSL_ASSERT(codePoints[i] <= 0x10FFFF); + OSL_ASSERT(rtl::isUnicodeCodePoint(codePoints[i])); if (codePoints[i] >= 0x10000) { ++n; } diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx index 1f0b2bfc96ef..f9c9879c54a3 100644 --- a/sal/textenc/tcvtutf8.cxx +++ b/sal/textenc/tcvtutf8.cxx @@ -163,7 +163,7 @@ sal_Size ImplConvertUtf8ToUnicode( *pDestBufPtr++ = (sal_Unicode) nUtf32; else goto no_output; - else if (nUtf32 <= 0x10FFFF) + else if (rtl::isUnicodeCodePoint(nUtf32)) if (pDestBufEnd - pDestBufPtr >= 2) { *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32); diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx index 09652b98ce57..0bcd6f710518 100644 --- a/sal/textenc/unichars.hxx +++ b/sal/textenc/unichars.hxx @@ -20,9 +20,12 @@ #ifndef INCLUDED_SAL_TEXTENC_UNICHARS_HXX #define INCLUDED_SAL_TEXTENC_UNICHARS_HXX -#include "sal/config.h" +#include <sal/config.h> + #include <cassert> -#include "sal/types.h" + +#include <rtl/character.hxx> +#include <sal/types.h> #define RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER 0xFFFD @@ -30,7 +33,7 @@ inline bool ImplIsNoncharacter(sal_uInt32 nUtf32) { return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF) || (nUtf32 & 0xFFFF) >= 0xFFFE - || nUtf32 > 0x10FFFF; + || !rtl::isUnicodeCodePoint(nUtf32); } // All code points that are noncharacters, as of Unicode 3.1.1. diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx index 09a78557c8cb..db75efe44d33 100644 --- a/sax/source/expatwrap/saxwriter.cxx +++ b/sax/source/expatwrap/saxwriter.cxx @@ -39,6 +39,7 @@ #include <cppuhelper/supportsservice.hxx> #include <osl/diagnose.h> +#include <rtl/character.hxx> #include <rtl/ref.hxx> #include <rtl/ustrbuf.hxx> @@ -388,7 +389,7 @@ inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr, OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" ); nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff ); - if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF ) + if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 ) { sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)), sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)), @@ -831,7 +832,7 @@ inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen, { // 2. surrogate: write as UTF-8 (if range is OK nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff ); - if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF ) + if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 ) nOutputLength += 4; nSurrogate = 0; } diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index 266b0d776741..d43f5f5b1a26 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -3324,7 +3324,7 @@ void ScInterpreter::ScUnichar() if ( MustHaveParamCount( GetByte(), 1 ) ) { double dVal = ::rtl::math::approxFloor( GetDouble() ); - if ((dVal < 0x000000) || (dVal > 0x10FFFF)) + if (dVal < 0 || !rtl::isUnicodeCodePoint(dVal)) PushIllegalArgument(); else { diff --git a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx index b2f6c6da2b3a..eaf874143a5e 100644 --- a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx +++ b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx @@ -34,6 +34,7 @@ #include <cppuhelper/supportsservice.hxx> #include <cppuhelper/weak.hxx> #include <osl/mutex.hxx> +#include <rtl/character.hxx> #include <rtl/uri.hxx> #include <rtl/ustrbuf.hxx> #include <rtl/ustring.hxx> @@ -110,9 +111,8 @@ OUString parsePart( } encoded |= (n & 0x3F) << shift; } - if (!utf8 || encoded < min - || (encoded >= 0xD800 && encoded <= 0xDFFF) - || encoded > 0x10FFFF) + if (!utf8 || !rtl::isUnicodeCodePoint(encoded) || encoded < min + || (encoded >= 0xD800 && encoded <= 0xDFFF)) { break; } diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index a8eff6d0158f..d1a081bbabf8 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -502,7 +502,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) else nNextCh = 0U; - if ( ! rtl::isValidCodePoint( cChar ) ) + if ( ! rtl::isUnicodeCodePoint( cChar ) ) cChar = '?'; } else if( HTML_ISALPHA( nNextCh ) ) diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx index b862e66766ca..33504923a684 100644 --- a/svtools/source/svrtf/svparser.cxx +++ b/svtools/source/svrtf/svparser.cxx @@ -394,7 +394,7 @@ sal_uInt32 SvParser::GetNextChar() while( 0 == nChars && !bErr ); } - if ( ! rtl::isValidCodePoint( c ) ) + if ( ! rtl::isUnicodeCodePoint( c ) ) c = (sal_uInt32) '?' ; if( bErr ) diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx index 6ebb517540ae..03e550e96421 100644 --- a/tools/source/fsys/urlobj.cxx +++ b/tools/source/fsys/urlobj.cxx @@ -4744,8 +4744,8 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin, break; nShift -= 6; } - if (bUTF8 && nEncoded >= nMin - && nEncoded <= 0x10FFFF + if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded) + && nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded) && !rtl::isLowSurrogate(nEncoded)) { diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx index 88b9f99d2306..d0b638a66a99 100644 --- a/tools/source/inet/inetmime.cxx +++ b/tools/source/inet/inetmime.cxx @@ -270,7 +270,7 @@ sal_Char * convertFromUnicode(const sal_Unicode * pBegin, inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer, sal_uInt32 nUTF32) { - DBG_ASSERT(nUTF32 <= 0x10FFFF, "putUTF32Character(): Bad char"); + DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char"); if (nUTF32 < 0x10000) *pBuffer++ = sal_Unicode(nUTF32); else @@ -375,7 +375,7 @@ bool translateUTF8Char(const sal_Char *& rBegin, else return false; - if (nUCS4 < nMin || nUCS4 > 0x10FFFF) + if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin) return false; if (eEncoding >= RTL_TEXTENCODING_UCS4) @@ -1279,7 +1279,7 @@ void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer) if (bEscape) { DBG_ASSERT( - nUTF32 < 0x10FFFF, + rtl::isUnicodeCodePoint(nUTF32), "INetMIMEEncodedWordOutputSink::finish():" " Bad char"); if (nUTF32 < 0x80) diff --git a/xmlreader/source/xmlreader.cxx b/xmlreader/source/xmlreader.cxx index 011a09485d2c..25b56847f692 100644 --- a/xmlreader/source/xmlreader.cxx +++ b/xmlreader/source/xmlreader.cxx @@ -28,6 +28,7 @@ #include <com/sun/star/uno/RuntimeException.hpp> #include <com/sun/star/uno/XInterface.hpp> #include <osl/file.h> +#include <rtl/character.hxx> #include <rtl/string.h> #include <rtl/ustring.hxx> #include <sal/log.hxx> @@ -399,7 +400,7 @@ char const * XmlReader::handleReference(char const * position, char const * end) ++position; if (*position == '#') { ++position; - sal_Int32 val = 0; + sal_uInt32 val = 0; char const * p; if (*position == 'x') { ++position; @@ -415,7 +416,7 @@ char const * XmlReader::handleReference(char const * position, char const * end) } else { break; } - if (val > 0x10FFFF) { // avoid overflow + if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow throw css::uno::RuntimeException( "'&#x...' too large in " + fileUrl_ ); } @@ -429,7 +430,7 @@ char const * XmlReader::handleReference(char const * position, char const * end) } else { break; } - if (val > 0x10FFFF) { // avoid overflow + if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow throw css::uno::RuntimeException( "'&#...' too large in " + fileUrl_ ); } @@ -439,7 +440,7 @@ char const * XmlReader::handleReference(char const * position, char const * end) throw css::uno::RuntimeException( "'&#...' missing ';' in " + fileUrl_ ); } - assert(val >= 0 && val <= 0x10FFFF); + assert(rtl::isUnicodeCodePoint(val)); if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) || (val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF) { |