diff options
author | Eike Rathke <erack@redhat.com> | 2020-05-11 19:45:42 +0200 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2020-05-11 21:19:11 +0200 |
commit | 8a31ac7264d7a11146d4a29034e97b564164f635 (patch) | |
tree | f8c4f6b68d9138c4188b7dc3cae5c7fb01c856b7 /external/icu | |
parent | 9c5ffdbdd60385a3d4618f5e36034f550d9b15c9 (diff) |
Upgrade to internal ICU 67
Change-Id: I9b8d5cb6d6f4610f2b20c0e0f49eb674d55ce3b8
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/94009
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
Diffstat (limited to 'external/icu')
-rw-r--r-- | external/icu/UnpackedTarball_icu.mk | 4 | ||||
-rw-r--r-- | external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 | 118 | ||||
-rw-r--r-- | external/icu/char8_t.patch | 19 | ||||
-rw-r--r-- | external/icu/icu4c-khmerbreakengine.patch.1 | 269 |
4 files changed, 25 insertions, 385 deletions
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk index 96dcd45c30a4..72fae09b1625 100644 --- a/external/icu/UnpackedTarball_icu.mk +++ b/external/icu/UnpackedTarball_icu.mk @@ -34,13 +34,11 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\ external/icu/icu4c-rtti.patch.1 \ external/icu/icu4c-clang-cl.patch.1 \ $(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \ - external/icu/icu4c-khmerbreakengine.patch.1 \ external/icu/gcc9.patch \ - external/icu/char8_t.patch \ external/icu/c++20-comparison.patch \ external/icu/ubsan.patch \ external/icu/Wdeprecated-copy-dtor.patch \ - external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 \ + external/icu/icu4c-khmerbreakengine.patch.1 \ )) $(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict)) diff --git a/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 b/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 deleted file mode 100644 index 07b3db6774be..000000000000 --- a/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 +++ /dev/null @@ -1,118 +0,0 @@ -From b7d08bc04a4296982fcef8b6b8a354a9e4e7afca Mon Sep 17 00:00:00 2001 -From: Frank Tang <ftang@chromium.org> -Date: Sat, 1 Feb 2020 02:39:04 +0000 -Subject: [PATCH] ICU-20958 Prevent SEGV_MAPERR in append - -See #971 ---- - icu4c/source/common/unistr.cpp | 6 ++- - icu4c/source/test/intltest/ustrtest.cpp | 62 +++++++++++++++++++++++++ - icu4c/source/test/intltest/ustrtest.h | 1 + - 3 files changed, 68 insertions(+), 1 deletion(-) - -diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp -index 901bb3358ba..077b4d6ef20 100644 ---- a/icu4c/source/common/unistr.cpp -+++ b/icu4c/source/common/unistr.cpp -@@ -1563,7 +1563,11 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng - } - - int32_t oldLength = length(); -- int32_t newLength = oldLength + srcLength; -+ int32_t newLength; -+ if (uprv_add32_overflow(oldLength, srcLength, &newLength)) { -+ setToBogus(); -+ return *this; -+ } - - // Check for append onto ourself - const UChar* oldArray = getArrayStart(); -diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp -index b6515ea813c..ad38bdf53a3 100644 ---- a/icu4c/source/test/intltest/ustrtest.cpp -+++ b/icu4c/source/test/intltest/ustrtest.cpp -@@ -67,6 +67,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & - TESTCASE_AUTO(TestWCharPointers); - TESTCASE_AUTO(TestNullPointers); - TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf); -+ TESTCASE_AUTO(TestLargeAppend); - TESTCASE_AUTO_END; - } - -@@ -2310,3 +2311,64 @@ void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() { - str.insert(2, sub); - assertEquals("", u"abbcdcde", str); - } -+ -+void UnicodeStringTest::TestLargeAppend() { -+ if(quick) return; -+ -+ IcuTestErrorCode status(*this, "TestLargeAppend"); -+ // Make a large UnicodeString -+ int32_t len = 0xAFFFFFF; -+ UnicodeString str; -+ char16_t *buf = str.getBuffer(len); -+ // A fast way to set buffer to valid Unicode. -+ // 4E4E is a valid unicode character -+ uprv_memset(buf, 0x4e, len * 2); -+ str.releaseBuffer(len); -+ UnicodeString dest; -+ // Append it 16 times -+ // 0xAFFFFFF times 16 is 0xA4FFFFF1, -+ // which is greater than INT32_MAX, which is 0x7FFFFFFF. -+ int64_t total = 0; -+ for (int32_t i = 0; i < 16; i++) { -+ dest.append(str); -+ total += len; -+ if (total <= INT32_MAX) { -+ assertFalse("dest is not bogus", dest.isBogus()); -+ } else { -+ assertTrue("dest should be bogus", dest.isBogus()); -+ } -+ } -+ dest.remove(); -+ total = 0; -+ for (int32_t i = 0; i < 16; i++) { -+ dest.append(str); -+ total += len; -+ if (total + len <= INT32_MAX) { -+ assertFalse("dest is not bogus", dest.isBogus()); -+ } else if (total <= INT32_MAX) { -+ // Check that a string of exactly the maximum size works -+ UnicodeString str2; -+ int32_t remain = INT32_MAX - total; -+ char16_t *buf2 = str2.getBuffer(remain); -+ if (buf2 == nullptr) { -+ // if somehow memory allocation fail, return the test -+ return; -+ } -+ uprv_memset(buf2, 0x4e, remain * 2); -+ str2.releaseBuffer(remain); -+ dest.append(str2); -+ total += remain; -+ assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total); -+ assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length()); -+ assertFalse("dest is not bogus", dest.isBogus()); -+ -+ // Check that a string size+1 goes bogus -+ str2.truncate(1); -+ dest.append(str2); -+ total++; -+ assertTrue("dest should be bogus", dest.isBogus()); -+ } else { -+ assertTrue("dest should be bogus", dest.isBogus()); -+ } -+ } -+} -diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h -index 218befdcc68..4a356a92c7a 100644 ---- a/icu4c/source/test/intltest/ustrtest.h -+++ b/icu4c/source/test/intltest/ustrtest.h -@@ -97,6 +97,7 @@ class UnicodeStringTest: public IntlTest { - void TestWCharPointers(); - void TestNullPointers(); - void TestUnicodeStringInsertAppendToSelf(); -+ void TestLargeAppend(); - }; - - #endif diff --git a/external/icu/char8_t.patch b/external/icu/char8_t.patch deleted file mode 100644 index d13b29634bc5..000000000000 --- a/external/icu/char8_t.patch +++ /dev/null @@ -1,19 +0,0 @@ ---- source/common/ucasemap.cpp -+++ source/common/ucasemap.cpp -@@ -687,13 +687,13 @@ - if (change) { - ByteSinkUtil::appendTwoBytes(upper, sink); - if ((data & HAS_EITHER_DIALYTIKA) != 0) { -- sink.Append(u8"\u0308", 2); // restore or add a dialytika -+ sink.Append(reinterpret_cast<char const *>(u8"\u0308"), 2); // restore or add a dialytika - } - if (addTonos) { -- sink.Append(u8"\u0301", 2); -+ sink.Append(reinterpret_cast<char const *>(u8"\u0301"), 2); - } - while (numYpogegrammeni > 0) { -- sink.Append(u8"\u0399", 2); -+ sink.Append(reinterpret_cast<char const *>(u8"\u0399"), 2); - --numYpogegrammeni; - } - } diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1 index 7992da6fc18f..272d0b8ab204 100644 --- a/external/icu/icu4c-khmerbreakengine.patch.1 +++ b/external/icu/icu4c-khmerbreakengine.patch.1 @@ -1,7 +1,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp ---- icu.org/source/common/dictbe.cpp 2018-10-02 00:39:56.000000000 +0200 -+++ icu/source/common/dictbe.cpp 2018-10-20 00:14:46.462039038 +0200 -@@ -29,7 +29,19 @@ +--- icu.org/source/common/dictbe.cpp 2020-04-22 22:04:20.000000000 +0200 ++++ icu/source/common/dictbe.cpp 2020-05-11 18:55:07.702282061 +0200 +@@ -32,7 +32,19 @@ ****************************************************************** */ @@ -22,7 +22,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp } DictionaryBreakEngine::~DictionaryBreakEngine() { -@@ -76,6 +88,169 @@ +@@ -79,6 +91,169 @@ fSet.compact(); } @@ -192,7 +192,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp /* ****************************************************************** * PossibleWord -@@ -282,7 +282,7 @@ +@@ -108,7 +283,7 @@ ~PossibleWord() {} // Fill the list of candidates if needed, select the longest, and return the number found @@ -201,7 +201,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp // Select the currently marked candidate, point after it in the text, and invalidate self int32_t acceptMarked( UText *text ); -@@ -303,12 +303,12 @@ +@@ -129,12 +304,12 @@ }; @@ -216,7 +216,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp // Dictionary leaves text after longest prefix, not longest word. Back up. if (count <= 0) { utext_setNativeIndex(text, start); -@@ -803,51 +978,28 @@ +@@ -815,53 +990,30 @@ * KhmerBreakEngine */ @@ -241,6 +241,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp + : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr"); - fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); + + clusterLimit = 3; @@ -277,10 +279,10 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp + fIgnoreSet.compact(); + fBaseSet.compact(); + fPuncSet.compact(); + UTRACE_EXIT_STATUS(status); } - KhmerBreakEngine::~KhmerBreakEngine() { -@@ -859,180 +1011,204 @@ +@@ -874,180 +1026,204 @@ int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks ) const { @@ -637,8 +639,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp #if !UCONFIG_NO_NORMALIZATION diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h ---- icu.org/source/common/dictbe.h 2018-09-29 02:34:41.000000000 +0200 -+++ icu/source/common/dictbe.h 2018-10-19 14:21:00.339942804 +0200 +--- icu.org/source/common/dictbe.h 2020-04-22 22:04:20.000000000 +0200 ++++ icu/source/common/dictbe.h 2020-05-11 19:08:24.754634732 +0200 @@ -34,7 +34,8 @@ * threads without synchronization.</p> */ @@ -735,206 +737,15 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h * <p>Virtual destructor.</p> */ virtual ~DictionaryBreakEngine(); -@@ -68,7 +139,7 @@ - * <p>Find any breaks within a run in the supplied text.</p> - * - * @param text A UText representing the text. The iterator is left at -- * the end of the run of characters which the engine is capable of handling -+ * the end of the run of characters which the engine is capable of handling - * that starts from the first character in the range. - * @param startPos The start of the run within the supplied text. - * @param endPos The end of the run within the supplied text. -@@ -218,118 +289,120 @@ - - }; - --/******************************************************************* -- * BurmeseBreakEngine -- */ -- --/** -- * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a -- * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> -- * -- * <p>After it is constructed a BurmeseBreakEngine may be shared between -- * threads without synchronization.</p> -- */ --class BurmeseBreakEngine : public DictionaryBreakEngine { -- private: -- /** -- * The set of characters handled by this engine -- * @internal -- */ -- -- UnicodeSet fBurmeseWordSet; +@@ -293,11 +364,13 @@ + */ + + UnicodeSet fKhmerWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - -- public: -- -- /** -- * <p>Default constructor.</p> -- * -- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the -- * engine is deleted. -- */ -- BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); -- -- /** -- * <p>Virtual destructor.</p> -- */ -- virtual ~BurmeseBreakEngine(); -- -- protected: -- /** -- * <p>Divide up a range of known dictionary characters.</p> -- * -- * @param text A UText representing the text -- * @param rangeStart The start of the range of dictionary characters -- * @param rangeEnd The end of the range of dictionary characters -- * @param foundBreaks Output of C array of int32_t break positions, or 0 -- * @return The number of breaks found -- */ -- virtual int32_t divideUpDictionaryRange( UText *text, -- int32_t rangeStart, -- int32_t rangeEnd, -- UVector32 &foundBreaks ) const; -- --}; -- --/******************************************************************* -- * KhmerBreakEngine -- */ -- --/** -- * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a -- * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> -- * -- * <p>After it is constructed a KhmerBreakEngine may be shared between -- * threads without synchronization.</p> -- */ --class KhmerBreakEngine : public DictionaryBreakEngine { -- private: -- /** -- * The set of characters handled by this engine -- * @internal -- */ -- -- UnicodeSet fKhmerWordSet; -- UnicodeSet fEndWordSet; -- UnicodeSet fBeginWordSet; -- UnicodeSet fMarkSet; -- DictionaryMatcher *fDictionary; -- -- public: -- -- /** -- * <p>Default constructor.</p> -- * -- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the -- * engine is deleted. -- */ -- KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); -- -- /** -- * <p>Virtual destructor.</p> -- */ -- virtual ~KhmerBreakEngine(); -- -- protected: -- /** -- * <p>Divide up a range of known dictionary characters.</p> -- * -- * @param text A UText representing the text -- * @param rangeStart The start of the range of dictionary characters -- * @param rangeEnd The end of the range of dictionary characters -- * @param foundBreaks Output of C array of int32_t break positions, or 0 -- * @return The number of breaks found -- */ -- virtual int32_t divideUpDictionaryRange( UText *text, -- int32_t rangeStart, -- int32_t rangeEnd, -- UVector32 &foundBreaks ) const; -- --}; -- -+/******************************************************************* -+ * BurmeseBreakEngine -+ */ -+ -+/** -+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a -+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> -+ * -+ * <p>After it is constructed a BurmeseBreakEngine may be shared between -+ * threads without synchronization.</p> -+ */ -+class BurmeseBreakEngine : public DictionaryBreakEngine { -+ private: -+ /** -+ * The set of characters handled by this engine -+ * @internal -+ */ -+ -+ UnicodeSet fBurmeseWordSet; -+ UnicodeSet fEndWordSet; -+ UnicodeSet fBeginWordSet; -+ UnicodeSet fMarkSet; -+ DictionaryMatcher *fDictionary; -+ -+ public: -+ -+ /** -+ * <p>Default constructor.</p> -+ * -+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the -+ * engine is deleted. -+ */ -+ BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); -+ -+ /** -+ * <p>Virtual destructor.</p> -+ */ -+ virtual ~BurmeseBreakEngine(); -+ -+ protected: -+ /** -+ * <p>Divide up a range of known dictionary characters.</p> -+ * -+ * @param text A UText representing the text -+ * @param rangeStart The start of the range of dictionary characters -+ * @param rangeEnd The end of the range of dictionary characters -+ * @param foundBreaks Output of C array of int32_t break positions, or 0 -+ * @return The number of breaks found -+ */ -+ virtual int32_t divideUpDictionaryRange( UText *text, -+ int32_t rangeStart, -+ int32_t rangeEnd, -+ UVector32 &foundBreaks ) const; -+ -+}; -+ -+/******************************************************************* -+ * KhmerBreakEngine -+ */ -+ -+/** -+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a -+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> -+ * -+ * <p>After it is constructed a KhmerBreakEngine may be shared between -+ * threads without synchronization.</p> -+ */ -+class KhmerBreakEngine : public DictionaryBreakEngine { -+ private: -+ /** -+ * The set of characters handled by this engine -+ * @internal -+ */ -+ -+ UnicodeSet fKhmerWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fPuncSet; + DictionaryMatcher *fDictionary; @@ -942,44 +753,12 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h + const uint32_t BADSNLP = 256 * 20; + const uint32_t kuint32max = 0x7FFFFFFF; + -+ public: -+ -+ /** -+ * <p>Default constructor.</p> -+ * -+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the -+ * engine is deleted. -+ */ -+ KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); -+ -+ /** -+ * <p>Virtual destructor.</p> -+ */ -+ virtual ~KhmerBreakEngine(); -+ -+ protected: -+ /** -+ * <p>Divide up a range of known dictionary characters.</p> -+ * -+ * @param text A UText representing the text -+ * @param rangeStart The start of the range of dictionary characters -+ * @param rangeEnd The end of the range of dictionary characters -+ * @param foundBreaks Output of C array of int32_t break positions, or 0 -+ * @return The number of breaks found -+ */ -+ virtual int32_t divideUpDictionaryRange( UText *text, -+ int32_t rangeStart, -+ int32_t rangeEnd, -+ UVector32 &foundBreaks ) const; -+ -+}; -+ - #if !UCONFIG_NO_NORMALIZATION - - /******************************************************************* + public: + + /** diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp ---- icu.org/source/common/dictionarydata.cpp 2018-09-29 02:34:41.000000000 +0200 -+++ icu/source/common/dictionarydata.cpp 2018-10-19 14:21:00.339942804 +0200 +--- icu.org/source/common/dictionarydata.cpp 2020-04-22 22:04:20.000000000 +0200 ++++ icu/source/common/dictionarydata.cpp 2020-05-11 18:50:43.703113749 +0200 @@ -44,7 +44,7 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, @@ -1027,8 +806,8 @@ diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionaryda if (values != NULL) { values[wordCount] = bt.getValue(); diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h ---- icu.org/source/common/dictionarydata.h 2018-09-29 02:34:41.000000000 +0200 -+++ icu/source/common/dictionarydata.h 2018-10-19 14:21:00.339942804 +0200 +--- icu.org/source/common/dictionarydata.h 2020-04-22 22:04:20.000000000 +0200 ++++ icu/source/common/dictionarydata.h 2020-05-11 18:50:43.704113746 +0200 @@ -21,6 +21,7 @@ #include "unicode/utext.h" #include "unicode/udata.h" |