1 files changed, 124 insertions, 122 deletions
diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
index 6b45b3743611..9f134dd961b1 100644
--- a/external/icu/icu4c-khmerbreakengine.patch.1
+++ b/external/icu/icu4c-khmerbreakengine.patch.1
@@ -1,16 +1,18 @@
 diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
---- icu.org/source/common/dictbe.cpp	2017-01-20 01:20:31.000000000 +0100
-+++ icu/source/common/dictbe.cpp	2017-04-21 23:14:23.845894374 +0200
-@@ -29,8 +29,17 @@
+--- icu.org/source/common/dictbe.cpp	2018-03-26 15:38:30.000000000 +0200
++++ icu/source/common/dictbe.cpp	2018-04-17 17:55:38.620944919 +0200
+@@ -29,7 +29,19 @@
   ******************************************************************
   */
  
--DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
-+DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) :
-+    clusterLimit(3)
-+{
+-DictionaryBreakEngine::DictionaryBreakEngine() {
++DictionaryBreakEngine::DictionaryBreakEngine()
++    : fTypes(0), clusterLimit(0) {
++}
++
++DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes)
++    : fTypes(breakTypes), clusterLimit(3) {
 +    UErrorCode status = U_ZERO_ERROR;
-     fTypes = breakTypes;
 +    fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
 +
 +    // note Skip Sets contain fIgnoreSet characters too.
@@ -20,16 +22,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
  }
  
  DictionaryBreakEngine::~DictionaryBreakEngine() {
-@@ -92,7 +101,7 @@
-         result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
-         utext_setNativeIndex(text, current);
-     }
--    
-+
-     return result;
- }
- 
-@@ -103,6 +112,169 @@
+@@ -76,6 +88,169 @@
      fSet.compact();
  }
  
@@ -199,7 +192,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
  /*
   ******************************************************************
   * PossibleWord
-@@ -130,35 +302,35 @@
+@@ -103,35 +278,35 @@
  public:
      PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {};
      ~PossibleWord() {};
@@ -244,7 +237,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
          // Dictionary leaves text after longest prefix, not longest word. Back up.
          if (count <= 0) {
              utext_setNativeIndex(text, start);
-@@ -830,51 +1002,28 @@
+@@ -803,51 +978,28 @@
   * KhmerBreakEngine
   */
  
@@ -265,7 +258,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 -static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
 -
  KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
-     : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
+-    : DictionaryBreakEngine(),
++    : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
        fDictionary(adoptDictionary)
  {
 -    fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
@@ -301,13 +295,13 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 -    fEndWordSet.compact();
 -    fBeginWordSet.compact();
 -//    fSuffixSet.compact();
-+	fIgnoreSet.compact();
-+	fBaseSet.compact();
-+	fPuncSet.compact();
++    fIgnoreSet.compact();
++    fBaseSet.compact();
++    fPuncSet.compact();
  }
  
  KhmerBreakEngine::~KhmerBreakEngine() {
-@@ -886,180 +1035,204 @@
+@@ -859,180 +1011,204 @@
                                                  int32_t rangeStart,
                                                  int32_t rangeEnd,
                                                  UVector32 &foundBreaks ) const {
@@ -350,17 +344,6 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +            foundBreaks.push(rangeEnd, status);
 +        return foundBreaks.size() - wordsFound;
 +    }
-+
-+    scanStart = rangeStart;
-+    scanWJ(text, scanStart, rangeEnd, before, after);
-+    if (startZwsp || initAfter >= before) {
-+        after = initAfter;
-+        before = 0;
-+    }
-+    if (!endZwsp && after > finalBefore && after < rangeEnd)
-+        endZwsp = true;
-+    if (endZwsp && before > finalBefore)
-+        before = finalBefore;
  
 -    while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
 -        cuWordLength = 0;
@@ -375,7 +358,17 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 -            cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
 -            wordsFound += 1;
 -        }
--
++    scanStart = rangeStart;
++    scanWJ(text, scanStart, rangeEnd, before, after);
++    if (startZwsp || initAfter >= before) {
++        after = initAfter;
++        before = 0;
++    }
++    if (!endZwsp && after > finalBefore && after < rangeEnd)
++        endZwsp = true;
++    if (endZwsp && before > finalBefore)
++        before = finalBefore;
+ 
 -        // If there was more than one, see which one can take us forward the most words
 -        else if (candidates > 1) {
 -            // If we're already at the end of the range, we're done
@@ -390,22 +383,6 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 -                        words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
 -                        wordsMatched = 2;
 -                    }
--
--                    // If we're already at the end of the range, we're done
--                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
--                        goto foundBest;
--                    }
--
--                    // See if any of the possible second words is followed by a third word
--                    do {
--                        // If we find a third word, stop right away
--                        if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
--                            words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
--                            goto foundBest;
--                        }
--                    }
--                    while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
--                }
 +    utext_setNativeIndex(text, rangeStart);
 +    int32_t numCodePts = rangeEnd - rangeStart;
 +    // bestSnlp[i] is the snlp of the best segmentation of the first i
@@ -415,7 +392,11 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +    for(int32_t i = 1; i <= numCodePts; i++) {
 +        bestSnlp.addElement(kuint32max, status);
 +    }
-+
+ 
+-                    // If we're already at the end of the range, we're done
+-                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+-                        goto foundBest;
+-                    }
 +    // prev[i] is the index of the last code point in the previous word in
 +    // the best segmentation of the first i characters. Note negative implies
 +	// that the code point is part of an unknown word.
@@ -423,7 +404,17 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +    for(int32_t i = 0; i <= numCodePts; i++) {
 +        prev.addElement(kuint32max, status);
 +    }
-+
+ 
+-                    // See if any of the possible second words is followed by a third word
+-                    do {
+-                        // If we find a third word, stop right away
+-                        if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+-                            words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+-                            goto foundBest;
+-                        }
+-                    }
+-                    while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
+-                }
 +    const int32_t maxWordSize = 20;
 +    UVector32 values(maxWordSize, status);
 +    values.setSize(maxWordSize);
@@ -528,27 +519,17 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 -                if (cuWordLength <= 0) {
 -                    wordsFound += 1;
 -                }
--
--                // Update the length with the passed-over characters
--                cuWordLength += chars;
--            }
--            else {
--                // Back up to where we were for next iteration
--                utext_setNativeIndex(text, current+cuWordLength);
 +                } while (fMarkSet.contains(c) || fIgnoreSet.contains(c));
 +                values.setElementAt(BADSNLP, count);
 +                lengths.setElementAt(utext_getNativeIndex(text) - currix, count++);
 +            } else {
 +                values.setElementAt(BADSNLP, count);
 +                lengths.setElementAt(1, count++);
-             }
-         }
++            }
++        }
  
--        // Never stop before a combining mark.
--        int32_t currPos;
--        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
--            utext_next32(text);
--            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+-                // Update the length with the passed-over characters
+-                cuWordLength += chars;
 +        for (int32_t j = 0; j < count; j++) {
 +            uint32_t v = values.elementAti(j);
 +            int32_t newSnlp = bestSnlp.elementAti(i) + v;
@@ -559,7 +540,10 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +                ++ln;
 +                utext_next32(text);
 +                c = utext_current32(text);
-+            }
+             }
+-            else {
+-                // Back up to where we were for next iteration
+-                utext_setNativeIndex(text, current+cuWordLength);
 +            int32_t ln_j_i = ln + i;   // yes really i!
 +            if (newSnlp < bestSnlp.elementAti(ln_j_i)) {
 +                if (v == BADSNLP) {
@@ -572,9 +556,38 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +                else
 +                    prev.setElementAt(i, ln_j_i);
 +                bestSnlp.setElementAt(newSnlp, ln_j_i);
-+            }
+             }
          }
 -
+-        // Never stop before a combining mark.
+-        int32_t currPos;
+-        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+-            utext_next32(text);
+-            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
++    }
++    // Start pushing the optimal offset index into t_boundary (t for tentative).
++    // prev[numCodePts] is guaranteed to be meaningful.
++    // We'll first push in the reverse order, i.e.,
++    // t_boundary[0] = numCodePts, and afterwards do a swap.
++    UVector32 t_boundary(numCodePts+1, status);
++
++    int32_t numBreaks = 0;
++    // No segmentation found, set boundary to end of range
++    while (numCodePts >= 0 && (uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
++        --numCodePts;
++    }
++    if (numCodePts < 0) {
++        t_boundary.addElement(numCodePts, status);
++        numBreaks++;
++    } else {
++        for (int32_t i = numCodePts; (uint32_t)i != kuint32max; i = prev.elementAti(i)) {
++            if (i < 0) i = -i;
++            t_boundary.addElement(i, status);
++            numBreaks++;
+         }
++        U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
++    }
+ 
 -        // Look ahead for possible suffixes if a dictionary word does not follow.
 -        // We do this in code rather than using a rule so that the heuristic
 -        // resynch continues to function. For example, one of the suffix characters
@@ -616,30 +629,6 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 -        // Did we find a word on this iteration? If so, push it on the break stack
 -        if (cuWordLength > 0) {
 -            foundBreaks.push((current+cuWordLength), status);
-+    }
-+    // Start pushing the optimal offset index into t_boundary (t for tentative).
-+    // prev[numCodePts] is guaranteed to be meaningful.
-+    // We'll first push in the reverse order, i.e.,
-+    // t_boundary[0] = numCodePts, and afterwards do a swap.
-+    UVector32 t_boundary(numCodePts+1, status);
-+
-+    int32_t numBreaks = 0;
-+    // No segmentation found, set boundary to end of range
-+    while (numCodePts >= 0 && (uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
-+        --numCodePts;
-+    }
-+    if (numCodePts < 0) {
-+        t_boundary.addElement(numCodePts, status);
-+        numBreaks++;
-+    } else {
-+        for (int32_t i = numCodePts; (uint32_t)i != kuint32max; i = prev.elementAti(i)) {
-+            if (i < 0) i = -i;
-+            t_boundary.addElement(i, status);
-+            numBreaks++;
-+        }
-+        U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
-+    }
-+
 +    // Now that we're done, convert positions in t_boundary[] (indices in
 +    // the normalized input string) back to indices in the original input UText
 +    // while reversing t_boundary and pushing values to foundBreaks.
@@ -669,38 +658,35 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
  
  #if !UCONFIG_NO_NORMALIZATION
 diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
---- icu.org/source/common/dictbe.h	2017-01-20 01:20:31.000000000 +0100
-+++ icu/source/common/dictbe.h	2017-04-21 23:14:23.845894374 +0200
-@@ -34,6 +34,15 @@
+--- icu.org/source/common/dictbe.h	2018-03-26 15:38:30.000000000 +0200
++++ icu/source/common/dictbe.h	2018-04-17 14:55:33.307639865 +0200
+@@ -34,7 +34,8 @@
+  * threads without synchronization.</p>
   */
  class DictionaryBreakEngine : public LanguageBreakEngine {
-  private:
-+
-+  /**
-+   * <p>Default constructor.</p>
-+   *
-+   */
-+  DictionaryBreakEngine();
-+
+- private:
 + protected:
 +
      /**
       * The set of characters handled by this engine
       * @internal
-@@ -48,11 +57,63 @@
+@@ -42,14 +43,84 @@
  
-   uint32_t      fTypes;
+   UnicodeSet    fSet;
  
 +  const int32_t WJ   = 0x2060;
 +  const int32_t ZWSP = 0x200B;
 +
-   /**
--   * <p>Default constructor.</p>
--   *
++  /**
++   * The break types it was constructed with
++   * @internal
++   */
++  uint32_t      fTypes;
++
++  /**
 +   * A Unicode set of all viramas
 +   * @internal
-    */
--  DictionaryBreakEngine();
++   */
 +  UnicodeSet    fViramaSet;
 +
 +  /**
@@ -751,10 +737,26 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
 +  bool scanAfterEnd(UText *text, int32_t rangeEnd, int32_t& end, bool &doBreak) const;
 +  void scanBackClusters(UText *text, int32_t textStart, int32_t& start) const;
 +  void scanFwdClusters(UText *text, int32_t textEnd, int32_t& end) const;
- 
++
   public:
  
-@@ -83,7 +144,7 @@
+   /**
+-   * <p>Constructor </p>
++   * <p>Default constructor.</p>
++   *
+    */
+   DictionaryBreakEngine();
+ 
+   /**
++   * <p>Constructor with break types.</p>
++   */
++  explicit DictionaryBreakEngine(uint32_t breakTypes);
++
++  /**
+    * <p>Virtual destructor.</p>
+    */
+   virtual ~DictionaryBreakEngine();
+@@ -68,7 +139,7 @@
     * <p>Find any breaks within a run in the supplied text.</p>
     *
     * @param text A UText representing the text. The iterator is left at
@@ -763,7 +765,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
     * that starts from the first character in the range.
     * @param startPos The start of the run within the supplied text.
     * @param endPos The end of the run within the supplied text.
-@@ -245,118 +306,120 @@
+@@ -218,118 +289,120 @@
  
  };
  
@@ -997,8 +999,8 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
  
  /*******************************************************************
 diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp
---- icu.org/source/common/dictionarydata.cpp	2017-01-20 01:20:31.000000000 +0100
-+++ icu/source/common/dictionarydata.cpp	2017-04-21 23:14:23.846894372 +0200
+--- icu.org/source/common/dictionarydata.cpp	2018-03-26 15:38:30.000000000 +0200
++++ icu/source/common/dictionarydata.cpp	2018-04-17 14:04:50.775567214 +0200
 @@ -44,7 +44,7 @@
  
  int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
@@ -1046,8 +1048,8 @@ diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionaryda
                  if (values != NULL) {
                      values[wordCount] = bt.getValue();
 diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
---- icu.org/source/common/dictionarydata.h	2017-01-20 01:20:31.000000000 +0100
-+++ icu/source/common/dictionarydata.h	2017-04-21 23:14:23.846894372 +0200
+--- icu.org/source/common/dictionarydata.h	2018-03-26 15:38:30.000000000 +0200
++++ icu/source/common/dictionarydata.h	2018-04-17 14:04:50.775567214 +0200
 @@ -21,6 +21,7 @@
  #include "unicode/utext.h"
  #include "unicode/udata.h"
@@ -1084,8 +1086,8 @@ diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata
  private:
      UChar32 transform(UChar32 c) const;
 diff -ur icu.org/source/data/Makefile.in icu/source/data/Makefile.in
---- icu.org/source/data/Makefile.in	2017-04-21 23:13:03.248087545 +0200
-+++ icu/source/data/Makefile.in	2017-04-21 23:14:23.846894372 +0200
+--- icu.org/source/data/Makefile.in	2018-04-17 12:28:37.098707466 +0200
++++ icu/source/data/Makefile.in	2018-04-17 14:04:50.775567214 +0200
 @@ -183,7 +183,7 @@
  endif
  endif