diff options
Diffstat (limited to 'i18npool')
-rw-r--r-- | i18npool/qa/cppunit/test_breakiterator.cxx | 38 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/README | 80 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/dict_word_ca.txt | 21 |
3 files changed, 54 insertions, 85 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 403c71bdbf3a..b04dc531ebec 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -397,6 +397,44 @@ void TestBreakIterator::testWordBoundaries() while (nPos++ < aTest.getLength()); CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected)); } + + //See https://issues.apache.org/ooo/show_bug.cgi?id=85411 + for (int j = 0; j < 2; ++j) + { + switch (j) + { + case 0: + aLocale.Language = rtl::OUString("en"); + aLocale.Country = rtl::OUString("US"); + break; + case 1: + aLocale.Language = rtl::OUString("ca"); + aLocale.Country = rtl::OUString("ES"); + break; + default: + CPPUNIT_ASSERT(false); + break; + } + + const sal_Unicode TEST[] = + { + 'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o' + }; + rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST)); + + sal_Int32 nPos = 0; + sal_Int32 aExpected[] = {1, 6, 9, 12}; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected)); + nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true).endPos; + CPPUNIT_ASSERT(aExpected[i++] == nPos); + } + while (nPos++ < aTest.getLength()); + CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected)); + } } //See http://qa.openoffice.org/issues/show_bug.cgi?id=111152 diff --git a/i18npool/source/breakiterator/data/README b/i18npool/source/breakiterator/data/README index cf74ec4435a5..b058a2d575e3 100644 --- a/i18npool/source/breakiterator/data/README +++ b/i18npool/source/breakiterator/data/README @@ -69,85 +69,6 @@ Date: Thu Oct 2 13:51:29 2008 +0000 #i80412# -commit 672a654fa6b447df0397942c1fa6594bb63264b9 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:31:04 2008 +0000 - - INTEGRATION: CWS i18n44 (1.2.132); FILE MERGED - 2008/07/23 23:07:46 khong 1.2.132.1: #i85411# Apply patch for ZWSP - -commit c75401da0c36bb518c41971d07660010ec745dd0 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:30:52 2008 +0000 - - INTEGRATION: CWS i18n44 (1.2.230); FILE MERGED - 2008/07/23 23:07:46 khong 1.2.230.1: #i85411# Apply patch for ZWSP - -commit 43f49bd7d04fcc64941b5576a804f1b8bab76423 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:30:39 2008 +0000 - - INTEGRATION: CWS i18n44 (1.3.314); FILE MERGED - 2008/07/23 23:07:46 khong 1.3.314.1: #i85411# Apply patch for ZWSP - -commit 8c4bc258ab77b586325a868d75094b1e041bd57e -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:30:26 2008 +0000 - - INTEGRATION: CWS i18n44 (1.5.214); FILE MERGED - 2008/07/23 23:07:45 khong 1.5.214.1: #i85411# Apply patch for ZWSP - -commit 0c008c4b9b1957fffb62175a31a7085f98afbd6a -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:30:05 2008 +0000 - - INTEGRATION: CWS i18n44 (1.3.214); FILE MERGED - 2008/07/23 23:07:45 khong 1.3.214.1: #i85411# Apply patch for ZWSP - -commit 01a7b977a133a910845c7226f36640f2edaf2ce9 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:29:53 2008 +0000 - - INTEGRATION: CWS i18n44 (1.2.184); FILE MERGED - 2008/07/23 23:07:45 khong 1.2.184.1: #i85411# Apply patch for ZWSP - -commit 77cd396b673caa67dc1d56ecf44ee5f619244e77 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:29:40 2008 +0000 - - INTEGRATION: CWS i18n44 (1.2.114); FILE MERGED - 2008/07/23 23:07:45 khong 1.2.114.1: #i85411# Apply patch for ZWSP - -commit 1e8949e19eb5f63504ab634c9a3e55b4b48484e0 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:29:27 2008 +0000 - - INTEGRATION: CWS i18n44 (1.4.214); FILE MERGED - 2008/07/23 23:07:45 khong 1.4.214.1: #i85411# Apply patch for ZWSP - -commit 601733f145bf518eec4d29c2319c1f61ebd83d96 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:29:14 2008 +0000 - - INTEGRATION: CWS i18n44 (1.5.214); FILE MERGED - 2008/07/23 23:07:45 khong 1.5.214.2: #i85411# Apply patch for ZWSP - 2008/07/23 07:35:04 khong 1.5.214.1: #i85411# Apply patch for ZWSP - -commit 744a220b2950f488c50e7380fd45232e24921438 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:28:18 2008 +0000 - - INTEGRATION: CWS i18n44 (1.3.18); FILE MERGED - 2008/07/23 23:07:45 khong 1.3.18.1: #i85411# Apply patch for ZWSP - -commit 8ead581613efb4ecd6121a195e04c4f5a7bc8bf1 -Author: Kurt Zenker <kz@openoffice.org> -Date: Thu Aug 14 15:27:36 2008 +0000 - - INTEGRATION: CWS i18n44 (1.27.6); FILE MERGED - 2008/07/24 16:12:44 khong 1.27.6.2: #i85411# Apply patch for ZWSP - 2008/07/23 23:07:44 khong 1.27.6.1: #i85411# Apply patch for ZWSP - commit 9964a76ef58786bba47d409970512d7ded6c8889 Author: Rüdiger Timm <rt@openoffice.org> Date: Wed Jul 2 07:53:05 2008 +0000 @@ -700,6 +621,7 @@ Date: Tue Jan 20 12:20:28 2004 +0000 done, regression tests added: +#i85411# Apply patch for ZWSP #i17155# fix line breakiterator rule to make slash and hyphen as part of word when doing line break #i13451# add '-' as midLetter for Catalan dictionary word breakiterator #i13494# fix word breakiterator rule to handle punctuations and signs correctly diff --git a/i18npool/source/breakiterator/data/dict_word_ca.txt b/i18npool/source/breakiterator/data/dict_word_ca.txt index 6ad6a0bbb915..b1666f44daab 100644 --- a/i18npool/source/breakiterator/data/dict_word_ca.txt +++ b/i18npool/source/breakiterator/data/dict_word_ca.txt @@ -21,18 +21,24 @@ $Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND M [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:] [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]]; +$Ideographic = [:Ideographic:]; +$Hangul = [:Script = HANGUL:]; $ALetter = [[:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:] + - $Ideographic - $Katakana + - $Hangul - [:Script = Thai:] - [:Script = Lao:] - [:Script = Hiragana:]]; -$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] +$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:] - [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] [:name = HYPHEN-MINUS:]]; - + [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] + [:name = HYPHEN-MINUS:] ]; + $SufixLetter = [:name= FULL STOP:]; + $MidNum = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:] [:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:] @@ -60,7 +66,7 @@ $Extend = [[:Grapheme_Extend = TRUE:]]; # #################################################################################### -$Format = [[:Cf:]]; +$Format = [[:Cf:] - $TheZWSP]; @@ -80,6 +86,8 @@ $MidNumEx = $MidNum $Extend*; $MidLetterEx = $MidLetter $Extend*; $SufixLetterEx= $SufixLetter $Extend*; $KatakanaEx = $Katakana $Extend*; +$IdeographicEx= $Ideographic $Extend*; +$HangulEx = $Hangul $Extend*; $FormatEx = $Format $Extend*; @@ -111,7 +119,8 @@ $KatakanaEx ($FormatEx* $KatakanaEx)* {300}; # Separated from the "Everything Else" rule, below, only so that they # can be tagged with a return value. TODO: is this what we want? # -# [:IDEOGRAPHIC:] $Extend* {400}; +$IdeographicEx ($FormatEx* $IdeographicEx)* {400}; +$HangulEx ($FormatEx* $HangulEx)* {400}; # # Everything Else, with no tag. @@ -132,7 +141,7 @@ $CR $LF; # reaches something that can only be the start (and probably only) char in a "word". # A space or punctuation meets the test. # -$NonStarters = [$Numeric $ALetter $Katakana [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format]; +$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format]; #!.*; ! ($NonStarters* | \n \r) .; |