summaryrefslogtreecommitdiff
path: root/i18npool/source/breakiterator
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2012-07-25 11:37:58 +0100
committerCaolán McNamara <caolanm@redhat.com>2012-07-25 11:37:58 +0100
commit071a0dc02c90e939a23f99af1f9c4c65982bdd7d (patch)
treee9162edee1e4543a9afe1b226175ffa6db86d32b /i18npool/source/breakiterator
parent4f1e5b34211a714baad8a05ab38f3f953ebd4cce (diff)
Related: #i85411# catalan word breaking rules out of sync with ZWNJ
I can see no reason to have specific catalan rules, old examples work fine with default rules Change-Id: Ifacb7b46204d8aed543ab0c77fe80d1d5c5de738
Diffstat (limited to 'i18npool/source/breakiterator')
-rw-r--r--i18npool/source/breakiterator/data/README80
-rw-r--r--i18npool/source/breakiterator/data/dict_word_ca.txt21
2 files changed, 16 insertions, 85 deletions
diff --git a/i18npool/source/breakiterator/data/README b/i18npool/source/breakiterator/data/README
index cf74ec4435a5..b058a2d575e3 100644
--- a/i18npool/source/breakiterator/data/README
+++ b/i18npool/source/breakiterator/data/README
@@ -69,85 +69,6 @@ Date: Thu Oct 2 13:51:29 2008 +0000
#i80412#
-commit 672a654fa6b447df0397942c1fa6594bb63264b9
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:31:04 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.2.132); FILE MERGED
- 2008/07/23 23:07:46 khong 1.2.132.1: #i85411# Apply patch for ZWSP
-
-commit c75401da0c36bb518c41971d07660010ec745dd0
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:30:52 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.2.230); FILE MERGED
- 2008/07/23 23:07:46 khong 1.2.230.1: #i85411# Apply patch for ZWSP
-
-commit 43f49bd7d04fcc64941b5576a804f1b8bab76423
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:30:39 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.3.314); FILE MERGED
- 2008/07/23 23:07:46 khong 1.3.314.1: #i85411# Apply patch for ZWSP
-
-commit 8c4bc258ab77b586325a868d75094b1e041bd57e
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:30:26 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.5.214); FILE MERGED
- 2008/07/23 23:07:45 khong 1.5.214.1: #i85411# Apply patch for ZWSP
-
-commit 0c008c4b9b1957fffb62175a31a7085f98afbd6a
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:30:05 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.3.214); FILE MERGED
- 2008/07/23 23:07:45 khong 1.3.214.1: #i85411# Apply patch for ZWSP
-
-commit 01a7b977a133a910845c7226f36640f2edaf2ce9
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:29:53 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.2.184); FILE MERGED
- 2008/07/23 23:07:45 khong 1.2.184.1: #i85411# Apply patch for ZWSP
-
-commit 77cd396b673caa67dc1d56ecf44ee5f619244e77
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:29:40 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.2.114); FILE MERGED
- 2008/07/23 23:07:45 khong 1.2.114.1: #i85411# Apply patch for ZWSP
-
-commit 1e8949e19eb5f63504ab634c9a3e55b4b48484e0
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:29:27 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.4.214); FILE MERGED
- 2008/07/23 23:07:45 khong 1.4.214.1: #i85411# Apply patch for ZWSP
-
-commit 601733f145bf518eec4d29c2319c1f61ebd83d96
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:29:14 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.5.214); FILE MERGED
- 2008/07/23 23:07:45 khong 1.5.214.2: #i85411# Apply patch for ZWSP
- 2008/07/23 07:35:04 khong 1.5.214.1: #i85411# Apply patch for ZWSP
-
-commit 744a220b2950f488c50e7380fd45232e24921438
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:28:18 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.3.18); FILE MERGED
- 2008/07/23 23:07:45 khong 1.3.18.1: #i85411# Apply patch for ZWSP
-
-commit 8ead581613efb4ecd6121a195e04c4f5a7bc8bf1
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Thu Aug 14 15:27:36 2008 +0000
-
- INTEGRATION: CWS i18n44 (1.27.6); FILE MERGED
- 2008/07/24 16:12:44 khong 1.27.6.2: #i85411# Apply patch for ZWSP
- 2008/07/23 23:07:44 khong 1.27.6.1: #i85411# Apply patch for ZWSP
-
commit 9964a76ef58786bba47d409970512d7ded6c8889
Author: Rüdiger Timm <rt@openoffice.org>
Date: Wed Jul 2 07:53:05 2008 +0000
@@ -700,6 +621,7 @@ Date: Tue Jan 20 12:20:28 2004 +0000
done, regression tests added:
+#i85411# Apply patch for ZWSP
#i17155# fix line breakiterator rule to make slash and hyphen as part of word when doing line break
#i13451# add '-' as midLetter for Catalan dictionary word breakiterator
#i13494# fix word breakiterator rule to handle punctuations and signs correctly
diff --git a/i18npool/source/breakiterator/data/dict_word_ca.txt b/i18npool/source/breakiterator/data/dict_word_ca.txt
index 6ad6a0bbb915..b1666f44daab 100644
--- a/i18npool/source/breakiterator/data/dict_word_ca.txt
+++ b/i18npool/source/breakiterator/data/dict_word_ca.txt
@@ -21,18 +21,24 @@ $Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND M
[:name = HALFWIDTH KATAKANA VOICED SOUND MARK:]
[:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]];
+$Ideographic = [:Ideographic:];
+$Hangul = [:Script = HANGUL:];
$ALetter = [[:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:]
+ - $Ideographic
- $Katakana
+ - $Hangul
- [:Script = Thai:]
- [:Script = Lao:]
- [:Script = Hiragana:]];
-$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:]
+$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:]
[:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:]
- [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] [:name = HYPHEN-MINUS:]];
-
+ [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:]
+ [:name = HYPHEN-MINUS:] ];
+
$SufixLetter = [:name= FULL STOP:];
+
$MidNum = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:]
[:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:]
@@ -60,7 +66,7 @@ $Extend = [[:Grapheme_Extend = TRUE:]];
#
####################################################################################
-$Format = [[:Cf:]];
+$Format = [[:Cf:] - $TheZWSP];
@@ -80,6 +86,8 @@ $MidNumEx = $MidNum $Extend*;
$MidLetterEx = $MidLetter $Extend*;
$SufixLetterEx= $SufixLetter $Extend*;
$KatakanaEx = $Katakana $Extend*;
+$IdeographicEx= $Ideographic $Extend*;
+$HangulEx = $Hangul $Extend*;
$FormatEx = $Format $Extend*;
@@ -111,7 +119,8 @@ $KatakanaEx ($FormatEx* $KatakanaEx)* {300};
# Separated from the "Everything Else" rule, below, only so that they
# can be tagged with a return value. TODO: is this what we want?
#
-# [:IDEOGRAPHIC:] $Extend* {400};
+$IdeographicEx ($FormatEx* $IdeographicEx)* {400};
+$HangulEx ($FormatEx* $HangulEx)* {400};
#
# Everything Else, with no tag.
@@ -132,7 +141,7 @@ $CR $LF;
# reaches something that can only be the start (and probably only) char in a "word".
# A space or punctuation meets the test.
#
-$NonStarters = [$Numeric $ALetter $Katakana [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format];
+$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format];
#!.*;
! ($NonStarters* | \n \r) .;