diff options
author | Martin Hosken <martin_hosken@sil.org> | 2011-03-09 14:29:05 +0700 |
---|---|---|
committer | Martin Hosken <martin_hosken@sil.org> | 2011-03-10 22:40:13 +0700 |
commit | 52934a63ff91754fb3f75d4a34ee0867920ed819 (patch) | |
tree | 3f914bfb495763c297adc0914de3b8cdaa172909 /i18npool | |
parent | 8e1679399097ad30091decd6eff000adc01f8e7e (diff) |
Work around ICU bug regarding SMP ranges not working, so expand them
Diffstat (limited to 'i18npool')
-rw-r--r-- | i18npool/source/breakiterator/data/char.txt | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/i18npool/source/breakiterator/data/char.txt b/i18npool/source/breakiterator/data/char.txt index c298b73dc77c..b2772db0676f 100644 --- a/i18npool/source/breakiterator/data/char.txt +++ b/i18npool/source/breakiterator/data/char.txt @@ -23,7 +23,7 @@ $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; # In effect it is [\p{Grapheme_Cluster_Break = SpacingMark} - \u0E30 \u0E32 \u0E45 \u0EB0 \u0EB2 \u102B \u102C \u1038 # \u1062-\u1064 \u1067-\u106D \u1083 \u1087-\u108C \u108F \u109A-\u109C \u19B0-\u19B4 \u19B8-\u19C0 \u19C8 \u19C9 # \u1A61 \u1A63 \u1A64 \u1BE7 \u1BEA-\u1BEC \u1BEE \u1BF2 \u1BF3 \uAA7B -$IndicSpacing = [\u0903 \u093B \u093E-\u0940 \u0949-\u094C \u094E \u094F \u0982 \u0983 \u09BF \u09C0 \u09C7 \u09C8 \u09CB \u09CC \u0A03 \u0A3E-\u0A40 \u0A83 \u0ABE-\u0AC0 \u0AC9 \u0ACB \u0ACC \u0B02 \u0B03 \u0B40 \u0B47 \u0B48 \u0B4B-\u0B4C \u0BBF \u0BC1 \u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCC \u0C01-\u0C03 \u0C41-\u0C44 \u0C82 \u0C83 \u0CBE \u0CC0 \u0CC1 \u0CC3 \u0CC4 \u0CC7 \u0CC8 \u0CCA \u0CCB \u0D02 \u0D03 \u0D3F \u0D40 \u0D46-\u0D48 \u0D4A-\u0D4C \u0D82 \u0D83 \u0DD0 \u0DD1 \u0DD8-\u0DDE \u0F3E \u0F3F \u0F7F \u1923-\u1926 \u1929-\u192B \u1930 \u1931 \u1933-\u1938 \u1A19-\u1A1B \u1B04 \u1B35 \u1B3B \u1B3D-\u1B41 \u1B43 \u1B44 \u1B82 \u1BA1 \u1BA6 \u1BA7 \u1BAA \u1C24-\u1C2B \u1C34 \u1C35 \u1CE1 \u1CF2 \uA880 \uA881 \uA8B4-\uA8C3 \uA952 \uA953 \uA983 \uA9B4 \uA9B5 \uA9BA \uA9BB \uA9BD-\uA9C0 \uAA2F \uAA30 \uAA33 \uAA34 \uABE3 \uABE4 \uABE6 \uABE7 \uABE9 \uABEA \uABEC \u11000 \u11002 \u11082 \u110B0-\u110B2 \u110B7 \u100B8 \u1D166 \u1D16D]; +$IndicSpacing = [\u0903 \u093B \u093E-\u0940 \u0949-\u094C \u094E \u094F \u0982 \u0983 \u09BF \u09C0 \u09C7 \u09C8 \u09CB \u09CC \u0A03 \u0A3E-\u0A40 \u0A83 \u0ABE-\u0AC0 \u0AC9 \u0ACB \u0ACC \u0B02 \u0B03 \u0B40 \u0B47 \u0B48 \u0B4B-\u0B4C \u0BBF \u0BC1 \u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCC \u0C01-\u0C03 \u0C41-\u0C44 \u0C82 \u0C83 \u0CBE \u0CC0 \u0CC1 \u0CC3 \u0CC4 \u0CC7 \u0CC8 \u0CCA \u0CCB \u0D02 \u0D03 \u0D3F \u0D40 \u0D46-\u0D48 \u0D4A-\u0D4C \u0D82 \u0D83 \u0DD0 \u0DD1 \u0DD8-\u0DDE \u0F3E \u0F3F \u0F7F \u1923-\u1926 \u1929-\u192B \u1930 \u1931 \u1933-\u1938 \u1A19-\u1A1B \u1B04 \u1B35 \u1B3B \u1B3D-\u1B41 \u1B43 \u1B44 \u1B82 \u1BA1 \u1BA6 \u1BA7 \u1BAA \u1C24-\u1C2B \u1C34 \u1C35 \u1CE1 \u1CF2 \uA880 \uA881 \uA8B4-\uA8C3 \uA952 \uA953 \uA983 \uA9B4 \uA9B5 \uA9BA \uA9BB \uA9BD-\uA9C0 \uAA2F \uAA30 \uAA33 \uAA34 \uABE3 \uABE4 \uABE6 \uABE7 \uABE9 \uABEA \uABEC \u11000 \u11002 \u11082 \u110B0 \u110B1 \u110B2 \u110B7 \u100B8 \u1D166 \u1D16D]; # SEAsian (Thai, Lao, Burmese, Tai Lue, Tai Tham, Batak) are cluster based not syllable based $SEASpacing = [\u0E33 \u0EB3 \u1031 \u103B \u103C \u1056 \u1057 \u1084 \u17B6 \u17BE-\u17C5 \u17C7 \u17C8 \u19B5-\u19B7 \u19BA \u1A55 \u1A57 \u1A6D-\u1A72 \uA823 \uA824 \uA827 \uAA4D]; $BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1]; @@ -79,8 +79,7 @@ $L ($L | $V | $LV | $LVT); [^$Control $CR $LF] $Extend; -[^$Control $CR $LF] $IndicSpacing; -[^$Control $CR $LF] $SEASpacing; +[^$Control $CR $LF] ($IndicSpacing | $SEASpacing); #[^$Control $CR $LF] $SpacingMark; # $Prepend [^$Control $CR $LF]; @@ -103,8 +102,7 @@ $LF $CR; $T ($LVT | $T); $Extend [^$Control $CR $LF]; -$IndicSpacing [^$Control $CR $LF]; -$SEASpacing [^$Control $CR $LF]; +($IndicSpacing | $SEASpacing) [^$Control $CR $LF]; #$SpacingMark [^$Control $CR $LF]; # [^$Control $CR $LF] $Prepend; |