diff options
author | Thomas Lange <tl@openoffice.org> | 2009-07-13 12:26:46 +0000 |
---|---|---|
committer | Thomas Lange <tl@openoffice.org> | 2009-07-13 12:26:46 +0000 |
commit | 1d9708f5e1f732eb84813e0eda6da8dd48623b82 (patch) | |
tree | 0c9cb8b39b7bdd8a5d5669f1c1bfb91c72bdce16 /i18npool | |
parent | f74002df591f62452ef9181068cda76e355f75e7 (diff) |
undoing r273932 which was accidently commited on the master m51
Diffstat (limited to 'i18npool')
-rw-r--r-- | i18npool/inc/breakiterator_ctl.hxx | 1 | ||||
-rw-r--r-- | i18npool/inc/breakiterator_unicode.hxx | 6 | ||||
-rw-r--r-- | i18npool/inc/xdictionary.hxx | 4 | ||||
-rw-r--r-- | i18npool/source/breakiterator/breakiterator_cjk.cxx | 4 | ||||
-rw-r--r-- | i18npool/source/breakiterator/breakiterator_ctl.cxx | 7 | ||||
-rw-r--r-- | i18npool/source/breakiterator/breakiterator_unicode.cxx | 29 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/dict_word.txt | 3 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/dict_word_dash.txt | 148 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/dict_word_nodash.txt | 147 | ||||
-rw-r--r-- | i18npool/source/breakiterator/data/dict_word_prepostdash.txt | 157 | ||||
-rw-r--r-- | i18npool/source/breakiterator/makefile.mk | 2 | ||||
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 30 | ||||
-rw-r--r-- | i18npool/source/localedata/data/de_DE.xml | 9 | ||||
-rw-r--r-- | i18npool/source/localedata/data/nl_NL.xml | 35 | ||||
-rw-r--r-- | i18npool/source/localedata/data/pl_PL.xml | 7 | ||||
-rw-r--r-- | i18npool/source/localedata/data/sv_SE.xml | 7 |
16 files changed, 191 insertions, 405 deletions
diff --git a/i18npool/inc/breakiterator_ctl.hxx b/i18npool/inc/breakiterator_ctl.hxx index 3e687b5e8e68..9e753e8ae16f 100644 --- a/i18npool/inc/breakiterator_ctl.hxx +++ b/i18npool/inc/breakiterator_ctl.hxx @@ -58,7 +58,6 @@ protected: sal_Int32* nextCellIndex; sal_Int32* previousCellIndex; sal_Int32 cellIndexSize; - virtual void SAL_CALL makeIndex(const rtl::OUString& text, sal_Int32 pos) throw(com::sun::star::uno::RuntimeException); }; diff --git a/i18npool/inc/breakiterator_unicode.hxx b/i18npool/inc/breakiterator_unicode.hxx index 654df424b237..3a0720f27545 100644 --- a/i18npool/inc/breakiterator_unicode.hxx +++ b/i18npool/inc/breakiterator_unicode.hxx @@ -86,16 +86,12 @@ protected: const sal_Char *cBreakIterator, *wordRule, *lineRule; Boundary result; // for word break iterator - struct BI_Data { + struct { UnicodeString aICUText; icu::BreakIterator *aBreakIterator; - - BI_Data() : aICUText(), aBreakIterator(NULL) {} } character, word, sentence, line, *icuBI; - com::sun::star::lang::Locale aLocale; sal_Int16 aBreakType, aWordType; - void SAL_CALL loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale, sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char* name, const rtl::OUString& rText) throw(com::sun::star::uno::RuntimeException); }; diff --git a/i18npool/inc/xdictionary.hxx b/i18npool/inc/xdictionary.hxx index 04c5836aaaea..32ffdbbfe377 100644 --- a/i18npool/inc/xdictionary.hxx +++ b/i18npool/inc/xdictionary.hxx @@ -45,13 +45,11 @@ namespace com { namespace sun { namespace star { namespace i18n { // cache structure. struct WordBreakCache { + sal_Bool equals(const sal_Unicode *str, Boundary& boundary); // checking cached string sal_Int32 length; // contents length saved here. sal_Unicode *contents; // seperated segment contents. sal_Int32* wordboundary; // word boundaries in segments. sal_Int32 size; // size of wordboundary - - WordBreakCache(); - sal_Bool equals(const sal_Unicode *str, Boundary& boundary); // checking cached string }; class xdictionary diff --git a/i18npool/source/breakiterator/breakiterator_cjk.cxx b/i18npool/source/breakiterator/breakiterator_cjk.cxx index 3a44c02edc23..3d684b8788df 100644 --- a/i18npool/source/breakiterator/breakiterator_cjk.cxx +++ b/i18npool/source/breakiterator/breakiterator_cjk.cxx @@ -46,9 +46,7 @@ namespace com { namespace sun { namespace star { namespace i18n { // class BreakIterator_CJK // ----------------------------------------------------; -BreakIterator_CJK::BreakIterator_CJK() : - dict( NULL ), - hangingCharacters() +BreakIterator_CJK::BreakIterator_CJK() : dict(NULL) { cBreakIterator = "com.sun.star.i18n.BreakIterator_CJK"; } diff --git a/i18npool/source/breakiterator/breakiterator_ctl.cxx b/i18npool/source/breakiterator/breakiterator_ctl.cxx index 0e1bee869cac..a42014615f1a 100644 --- a/i18npool/source/breakiterator/breakiterator_ctl.cxx +++ b/i18npool/source/breakiterator/breakiterator_ctl.cxx @@ -45,14 +45,11 @@ namespace com { namespace sun { namespace star { namespace i18n { /** * Constructor. */ -BreakIterator_CTL::BreakIterator_CTL() : - cachedText(), - nextCellIndex( NULL ), - previousCellIndex( NULL ), - cellIndexSize( 512 ) +BreakIterator_CTL::BreakIterator_CTL() { cBreakIterator = "com.sun.star.i18n.BreakIterator_CTL"; // to improve performance, alloc big enough memory in construct. + cellIndexSize = 512; nextCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32)); previousCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32)); memset(nextCellIndex, 0, cellIndexSize * sizeof(sal_Int32)); diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index f0710c996858..b832bb02e7e9 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -51,23 +51,20 @@ namespace com { namespace sun { namespace star { namespace i18n { #define ERROR ::com::sun::star::uno::RuntimeException() -//#define ImplementName "com.sun.star.i18n.BreakIterator_Unicode"; - - -BreakIterator_Unicode::BreakIterator_Unicode() : - cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ), // implementation name - wordRule( "word" ), - lineRule( "line" ), - result(), - character(), - word(), - sentence(), - line(), - icuBI( NULL ), - aLocale(), - aBreakType(), - aWordType() +#define ImplementName "com.sun.star.i18n.BreakIterator_Unicode"; + + +BreakIterator_Unicode::BreakIterator_Unicode() { + wordRule="word"; + lineRule="line"; + character.aBreakIterator=word.aBreakIterator=sentence.aBreakIterator=line.aBreakIterator=NULL; + character.aICUText=UnicodeString(); + word.aICUText=UnicodeString(); + sentence.aICUText=UnicodeString(); + line.aICUText=UnicodeString(); + cBreakIterator = ImplementName; + icuBI=NULL; } diff --git a/i18npool/source/breakiterator/data/dict_word.txt b/i18npool/source/breakiterator/data/dict_word.txt index 367a82db9e6f..4c5c80823041 100644 --- a/i18npool/source/breakiterator/data/dict_word.txt +++ b/i18npool/source/breakiterator/data/dict_word.txt @@ -34,8 +34,7 @@ $ALetter = [\u0002 [:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCT $MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:] - [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] - [:name = HYPHEN-MINUS:] [:name = EN DASH:] [:name = EM DASH:] ]; + [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] ]; $SufixLetter = [:name= FULL STOP:]; diff --git a/i18npool/source/breakiterator/data/dict_word_dash.txt b/i18npool/source/breakiterator/data/dict_word_dash.txt index e69de29bb2d1..7f861c7911ca 100644 --- a/i18npool/source/breakiterator/data/dict_word_dash.txt +++ b/i18npool/source/breakiterator/data/dict_word_dash.txt @@ -0,0 +1,148 @@ +# +# Copyright (C) 2002-2003, International Business Machines Corporation and others. +# All Rights Reserved. +# +# file: dict_word.txt +# +# ICU Word Break Rules +# See Unicode Standard Annex #29. +# These rules are based on Version 4.0.0, dated 2003-04-17 +# + + + +#################################################################################### +# +# Character class definitions from TR 29 +# +#################################################################################### +$Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND MARK:] + [:name = HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK:] + [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:] + [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]]; + +$Ideographic = [:Ideographic:]; +$Hangul = [:Script = HANGUL:]; + +$ALetter = [\u0002 [:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:] + - $Ideographic + - $Katakana + - $Hangul + - [:Script = Thai:] + - [:Script = Lao:] + - [:Script = Hiragana:]]; + +$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] + [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:] + [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] + [:name = HYPHEN-MINUS:] [:name = EN DASH:] ]; + +$SufixLetter = [:name= FULL STOP:]; + + +$MidNum = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:] + [:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:] + [:name = PRIME:]]; +$Numeric = [:LineBreak = Numeric:]; + + +$TheZWSP = \u200b; + +# +# Character Class Definitions. +# The names are those from TR29. +# +$CR = \u000d; +$LF = \u000a; +$Control = [[[:Zl:] [:Zp:] [:Cc:] [:Cf:]] - $TheZWSP]; +$Extend = [[:Grapheme_Extend = TRUE:]]; + + + + +#################################################################################### +# +# Word Break Rules. Definitions and Rules specific to word break begin Here. +# +#################################################################################### + +$Format = [[:Cf:] - $TheZWSP]; + + + +# Rule 3: Treat a grapheme cluster as if it were a single character. +# Hangul Syllables are easier to deal with here than they are in Grapheme Clusters +# because we don't need to find the boundaries between adjacent syllables - +# they won't be word boundaries. +# + + +# +# "Extended" definitions. Grapheme Cluster + Format Chars, treated like the base char. +# +$ALetterEx = $ALetter $Extend*; +$NumericEx = $Numeric $Extend*; +$MidNumEx = $MidNum $Extend*; +$MidLetterEx = $MidLetter $Extend*; +$SufixLetterEx= $SufixLetter $Extend*; +$KatakanaEx = $Katakana $Extend*; +$IdeographicEx= $Ideographic $Extend*; +$HangulEx = $Hangul $Extend*; +$FormatEx = $Format $Extend*; + + +# +# Numbers. Rules 8, 11, 12 form the TR. +# +$NumberSequence = $NumericEx ($FormatEx* $MidNumEx? $FormatEx* $NumericEx)*; +$NumberSequence {100}; + +# +# Words. Alpha-numerics. Rule 5, 6, 7, 9, 10 +# - must include at least one letter. +# - may include both letters and numbers. +# - may include MideLetter, MidNumber punctuation. +# +$LetterSequence = $ALetterEx ($FormatEx* $MidLetterEx? $FormatEx* $ALetterEx)*; # rules #6, #7 +($NumberSequence $FormatEx*)? $LetterSequence ($FormatEx* ($NumberSequence | $LetterSequence))* $SufixLetterEx? {200}; + +[[:P:][:S:]]*; + +# +# Do not break between Katakana. Rule #13. +# +$KatakanaEx ($FormatEx* $KatakanaEx)* {300}; +[:Hiragana:] $Extend* {300}; + +# +# Ideographic Characters. Stand by themselves as words. +# Separated from the "Everything Else" rule, below, only so that they +# can be tagged with a return value. TODO: is this what we want? +# +$IdeographicEx ($FormatEx* $IdeographicEx)* {400}; +$HangulEx ($FormatEx* $HangulEx)* {400}; + +# +# Everything Else, with no tag. +# Non-Control chars combine with $Extend (combining) chars. +# Controls are do not. +# +[^$Control [:Ideographic:]] $Extend*; +$CR $LF; + +# +# Reverse Rules. Back up over any of the chars that can group together. +# (Reverse rules do not need to be exact; they can back up too far, +# but must back up at least enough, and must stop on a boundary.) +# + +# NonStarters are the set of all characters that can appear at the 2nd - nth position of +# a word. (They may also be the first.) The reverse rule skips over these, until it +# reaches something that can only be the start (and probably only) char in a "word". +# A space or punctuation meets the test. +# +$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format]; + +#!.*; +! ($NonStarters* | \n \r) .; + diff --git a/i18npool/source/breakiterator/data/dict_word_nodash.txt b/i18npool/source/breakiterator/data/dict_word_nodash.txt deleted file mode 100644 index 4c5c80823041..000000000000 --- a/i18npool/source/breakiterator/data/dict_word_nodash.txt +++ /dev/null @@ -1,147 +0,0 @@ -# -# Copyright (C) 2002-2003, International Business Machines Corporation and others. -# All Rights Reserved. -# -# file: dict_word.txt -# -# ICU Word Break Rules -# See Unicode Standard Annex #29. -# These rules are based on Version 4.0.0, dated 2003-04-17 -# - - - -#################################################################################### -# -# Character class definitions from TR 29 -# -#################################################################################### -$Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND MARK:] - [:name = HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK:] - [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:] - [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]]; - -$Ideographic = [:Ideographic:]; -$Hangul = [:Script = HANGUL:]; - -$ALetter = [\u0002 [:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:] - - $Ideographic - - $Katakana - - $Hangul - - [:Script = Thai:] - - [:Script = Lao:] - - [:Script = Hiragana:]]; - -$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] - [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:] - [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] ]; - -$SufixLetter = [:name= FULL STOP:]; - - -$MidNum = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:] - [:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:] - [:name = PRIME:]]; -$Numeric = [:LineBreak = Numeric:]; - - -$TheZWSP = \u200b; - -# -# Character Class Definitions. -# The names are those from TR29. -# -$CR = \u000d; -$LF = \u000a; -$Control = [[[:Zl:] [:Zp:] [:Cc:] [:Cf:]] - $TheZWSP]; -$Extend = [[:Grapheme_Extend = TRUE:]]; - - - - -#################################################################################### -# -# Word Break Rules. Definitions and Rules specific to word break begin Here. -# -#################################################################################### - -$Format = [[:Cf:] - $TheZWSP]; - - - -# Rule 3: Treat a grapheme cluster as if it were a single character. -# Hangul Syllables are easier to deal with here than they are in Grapheme Clusters -# because we don't need to find the boundaries between adjacent syllables - -# they won't be word boundaries. -# - - -# -# "Extended" definitions. Grapheme Cluster + Format Chars, treated like the base char. -# -$ALetterEx = $ALetter $Extend*; -$NumericEx = $Numeric $Extend*; -$MidNumEx = $MidNum $Extend*; -$MidLetterEx = $MidLetter $Extend*; -$SufixLetterEx= $SufixLetter $Extend*; -$KatakanaEx = $Katakana $Extend*; -$IdeographicEx= $Ideographic $Extend*; -$HangulEx = $Hangul $Extend*; -$FormatEx = $Format $Extend*; - - -# -# Numbers. Rules 8, 11, 12 form the TR. -# -$NumberSequence = $NumericEx ($FormatEx* $MidNumEx? $FormatEx* $NumericEx)*; -$NumberSequence {100}; - -# -# Words. Alpha-numerics. Rule 5, 6, 7, 9, 10 -# - must include at least one letter. -# - may include both letters and numbers. -# - may include MideLetter, MidNumber punctuation. -# -$LetterSequence = $ALetterEx ($FormatEx* $MidLetterEx? $FormatEx* $ALetterEx)*; # rules #6, #7 -($NumberSequence $FormatEx*)? $LetterSequence ($FormatEx* ($NumberSequence | $LetterSequence))* $SufixLetterEx? {200}; - -[[:P:][:S:]]*; - -# -# Do not break between Katakana. Rule #13. -# -$KatakanaEx ($FormatEx* $KatakanaEx)* {300}; -[:Hiragana:] $Extend* {300}; - -# -# Ideographic Characters. Stand by themselves as words. -# Separated from the "Everything Else" rule, below, only so that they -# can be tagged with a return value. TODO: is this what we want? -# -$IdeographicEx ($FormatEx* $IdeographicEx)* {400}; -$HangulEx ($FormatEx* $HangulEx)* {400}; - -# -# Everything Else, with no tag. -# Non-Control chars combine with $Extend (combining) chars. -# Controls are do not. -# -[^$Control [:Ideographic:]] $Extend*; -$CR $LF; - -# -# Reverse Rules. Back up over any of the chars that can group together. -# (Reverse rules do not need to be exact; they can back up too far, -# but must back up at least enough, and must stop on a boundary.) -# - -# NonStarters are the set of all characters that can appear at the 2nd - nth position of -# a word. (They may also be the first.) The reverse rule skips over these, until it -# reaches something that can only be the start (and probably only) char in a "word". -# A space or punctuation meets the test. -# -$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format]; - -#!.*; -! ($NonStarters* | \n \r) .; - diff --git a/i18npool/source/breakiterator/data/dict_word_prepostdash.txt b/i18npool/source/breakiterator/data/dict_word_prepostdash.txt deleted file mode 100644 index 1bf94451fae2..000000000000 --- a/i18npool/source/breakiterator/data/dict_word_prepostdash.txt +++ /dev/null @@ -1,157 +0,0 @@ -# -# Copyright (C) 2002-2003, International Business Machines Corporation and others. -# All Rights Reserved. -# -# file: dict_word.txt -# -# ICU Word Break Rules -# See Unicode Standard Annex #29. -# These rules are based on Version 4.0.0, dated 2003-04-17 -# - - - -#################################################################################### -# -# Character class definitions from TR 29 -# -#################################################################################### -$Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND MARK:] - [:name = HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK:] - [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:] - [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]]; - -$Ideographic = [:Ideographic:]; -$Hangul = [:Script = HANGUL:]; - -# list of dashes or hyphens that should be accepted as part of the word if a single one of these -# pre- or postfixes a word. E.g. in German: "Arbeits-" or "-nehmer" where that hyphen needs to -# be part of the word in order to have it properly spell checked etc. -$PrePostDashHyphen = [ [:name = HYPHEN-MINUS:] [:name = EN DASH:] [:name = EM DASH:] ]; - - -$ALetter = [\u0002 [:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:] - - $Ideographic - - $Katakana - - $Hangul - - [:Script = Thai:] - - [:Script = Lao:] - - [:Script = Hiragana:]]; - -$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] - [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:] - [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] - [:name = HYPHEN-MINUS:] [:name = EN DASH:] [:name = EM DASH:] ]; - -$SufixLetter = [:name= FULL STOP:]; - - -$MidNum = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:] - [:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:] - [:name = PRIME:]]; -$Numeric = [:LineBreak = Numeric:]; - - -$TheZWSP = \u200b; - -# -# Character Class Definitions. -# The names are those from TR29. -# -$CR = \u000d; -$LF = \u000a; -$Control = [[[:Zl:] [:Zp:] [:Cc:] [:Cf:]] - $TheZWSP]; -$Extend = [[:Grapheme_Extend = TRUE:]]; - - - - -#################################################################################### -# -# Word Break Rules. Definitions and Rules specific to word break begin Here. -# -#################################################################################### - -$Format = [[:Cf:] - $TheZWSP]; - - - -# Rule 3: Treat a grapheme cluster as if it were a single character. -# Hangul Syllables are easier to deal with here than they are in Grapheme Clusters -# because we don't need to find the boundaries between adjacent syllables - -# they won't be word boundaries. -# - - -# -# "Extended" definitions. Grapheme Cluster + Format Chars, treated like the base char. -# -$ALetterEx = $ALetter $Extend*; -$NumericEx = $Numeric $Extend*; -$MidNumEx = $MidNum $Extend*; -$MidLetterEx = $MidLetter $Extend*; -$SufixLetterEx= $SufixLetter $Extend*; -$KatakanaEx = $Katakana $Extend*; -$IdeographicEx= $Ideographic $Extend*; -$HangulEx = $Hangul $Extend*; -$FormatEx = $Format $Extend*; - - -# -# Numbers. Rules 8, 11, 12 form the TR. -# -$NumberSequence = $NumericEx ($FormatEx* $MidNumEx? $FormatEx* $NumericEx)*; -$NumberSequence {100}; - -# -# Words. Alpha-numerics. Rule 5, 6, 7, 9, 10 -# - must include at least one letter. -# - may include both letters and numbers. -# - may include MideLetter, MidNumber punctuation. -# -# At most one leading or trailing dash/hyphen should be accepted as well. -# E.g. in German: "Arbeits-" or "-nehmer" where that hyphen needs to -# be part of the word in order to have it properly spell checked etc. -$LetterSequence = $PrePostDashHyphen? $ALetterEx ($FormatEx* $MidLetterEx? $FormatEx* $ALetterEx)* $PrePostDashHyphen?; # rules #6, #7 -($NumberSequence $FormatEx*)? $LetterSequence ($FormatEx* ($NumberSequence | $LetterSequence))* $SufixLetterEx? {200}; - -[[:P:][:S:]]*; - -# -# Do not break between Katakana. Rule #13. -# -$KatakanaEx ($FormatEx* $KatakanaEx)* {300}; -[:Hiragana:] $Extend* {300}; - -# -# Ideographic Characters. Stand by themselves as words. -# Separated from the "Everything Else" rule, below, only so that they -# can be tagged with a return value. TODO: is this what we want? -# -$IdeographicEx ($FormatEx* $IdeographicEx)* {400}; -$HangulEx ($FormatEx* $HangulEx)* {400}; - -# -# Everything Else, with no tag. -# Non-Control chars combine with $Extend (combining) chars. -# Controls are do not. -# -[^$Control [:Ideographic:]] $Extend*; -$CR $LF; - -# -# Reverse Rules. Back up over any of the chars that can group together. -# (Reverse rules do not need to be exact; they can back up too far, -# but must back up at least enough, and must stop on a boundary.) -# - -# NonStarters are the set of all characters that can appear at the 2nd - nth position of -# a word. (They may also be the first.) The reverse rule skips over these, until it -# reaches something that can only be the start (and probably only) char in a "word". -# A space or punctuation meets the test. -# -$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format]; - -#!.*; -! ($NonStarters* | \n \r) .; - diff --git a/i18npool/source/breakiterator/makefile.mk b/i18npool/source/breakiterator/makefile.mk index 50e437e98b91..7ba9cd3e753d 100644 --- a/i18npool/source/breakiterator/makefile.mk +++ b/i18npool/source/breakiterator/makefile.mk @@ -93,7 +93,7 @@ $(MISC)$/%_brk.c : $(MISC)$/%.brk # The output of gencmn generates warnings under Windows. We want to minimize the patches to external tools, # so the output (OpenOffice_icu_dat.c) is changed here to include a pragma to disable the warnings. # Output of gencmn is redirected to OpenOffice_icu_tmp.c with the -t switch. -$(MISC)$/OpenOffice_%.c : $(MY_BRK_BRKFILES:s/.brk/_brk.c/) +$(MISC)$/OpenOffice_%.c : $(WRAPCMD) $(GENCMN) -n OpenOffice -t tmp -S -d $(MISC) O $(mktmp $(subst,$(MISC)$/, $(MY_BRK_BRKFILES:t"\n"))) echo $(USQ)#ifdef _MSC_VER$(USQ) > $@ echo $(USQ)#pragma warning( disable : 4229 4668 )$(USQ) >> $@ diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index f286dd2449ac..fb832f0b6468 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -54,21 +54,7 @@ namespace com { namespace sun { namespace star { namespace i18n { extern "C" { static void SAL_CALL thisModule() {} } -xdictionary::xdictionary(const sal_Char *lang) : - existMark( NULL ), - index1( NULL ), - index2( NULL ), - lenArray( NULL ), - dataArea( NULL ), - hModule( NULL ), - boundary(), - japaneseWordBreak( sal_False ) -#if USE_CELL_BOUNDARY_CODE - // For CTL breakiterator, where the word boundary should not be inside cell. - , - useCellBoundary( sal_False ), - cellBoundary( NULL ) -#endif +xdictionary::xdictionary(const sal_Char *lang) { index1 = 0; #ifdef SAL_DLLPREFIX @@ -106,7 +92,6 @@ xdictionary::xdictionary(const sal_Char *lang) : #if USE_CELL_BOUNDARY_CODE useCellBoundary = sal_False; - cellBoundary = NULL; #endif japaneseWordBreak = sal_False; } @@ -164,19 +149,6 @@ sal_Int32 xdictionary::getLongestMatch(const sal_Unicode* str, sal_Int32 sLen) { return 0; } - -/* - * c-tor - */ - -WordBreakCache::WordBreakCache() : - length( 0 ), - contents( NULL ), - wordboundary( NULL ), - size( 0 ) -{ -} - /* * Compare two unicode string, */ diff --git a/i18npool/source/localedata/data/de_DE.xml b/i18npool/source/localedata/data/de_DE.xml index a83b3dbb791d..295b826a5e9f 100644 --- a/i18npool/source/localedata/data/de_DE.xml +++ b/i18npool/source/localedata/data/de_DE.xml @@ -328,14 +328,7 @@ </LC_CURRENCY> <LC_TRANSLITERATION ref="en_US"/> <LC_MISC> - <BreakIteratorRules> - <EditMode/> - <DictionaryMode>dict_word_prepostdash</DictionaryMode> - <WordCountMode/> - <CharacterMode/> - <LineMode/> - </BreakIteratorRules> - <ReservedWords> + <ReservedWords> <trueWord>wahr</trueWord> <falseWord>falsch</falseWord> <quarter1Word>1. Quartal</quarter1Word> diff --git a/i18npool/source/localedata/data/nl_NL.xml b/i18npool/source/localedata/data/nl_NL.xml index 5a91c9c3e42e..eedfdc146b5e 100644 --- a/i18npool/source/localedata/data/nl_NL.xml +++ b/i18npool/source/localedata/data/nl_NL.xml @@ -360,27 +360,20 @@ </LC_CURRENCY> <LC_TRANSLITERATION ref="en_US"/> <LC_MISC> - <BreakIteratorRules> - <EditMode/> - <DictionaryMode>dict_word_prepostdash</DictionaryMode> - <WordCountMode/> - <CharacterMode/> - <LineMode/> - </BreakIteratorRules> - <ReservedWords> - <trueWord>waar</trueWord> - <falseWord>onwaar</falseWord> - <quarter1Word>1ste kwartaal</quarter1Word> - <quarter2Word>2de kwartaal</quarter2Word> - <quarter3Word>3de kwartaal</quarter3Word> - <quarter4Word>4de kwartaal</quarter4Word> - <aboveWord>boven</aboveWord> - <belowWord>onder</belowWord> - <quarter1Abbreviation>K1</quarter1Abbreviation> - <quarter2Abbreviation>K2</quarter2Abbreviation> - <quarter3Abbreviation>K3</quarter3Abbreviation> - <quarter4Abbreviation>K4</quarter4Abbreviation> - </ReservedWords> +<ReservedWords> +<trueWord>waar</trueWord> +<falseWord>onwaar</falseWord> +<quarter1Word>1ste kwartaal</quarter1Word> +<quarter2Word>2de kwartaal</quarter2Word> +<quarter3Word>3de kwartaal</quarter3Word> +<quarter4Word>4de kwartaal</quarter4Word> +<aboveWord>boven</aboveWord> +<belowWord>onder</belowWord> +<quarter1Abbreviation>K1</quarter1Abbreviation> +<quarter2Abbreviation>K2</quarter2Abbreviation> +<quarter3Abbreviation>K3</quarter3Abbreviation> +<quarter4Abbreviation>K4</quarter4Abbreviation> +</ReservedWords> </LC_MISC> <LC_NumberingLevel ref="en_US"/> <LC_OutLineNumberingLevel ref="en_US"/> diff --git a/i18npool/source/localedata/data/pl_PL.xml b/i18npool/source/localedata/data/pl_PL.xml index 195689666247..4119060ea7df 100644 --- a/i18npool/source/localedata/data/pl_PL.xml +++ b/i18npool/source/localedata/data/pl_PL.xml @@ -326,6 +326,13 @@ </LC_CURRENCY> <LC_TRANSLITERATION ref="en_US"/> <LC_MISC> + <BreakIteratorRules> + <EditMode/> + <DictionaryMode>dict_word_dash</DictionaryMode> + <WordCountMode/> + <CharacterMode/> + <LineMode/> + </BreakIteratorRules> <ReservedWords> <trueWord>prawda</trueWord> <falseWord>fałsz</falseWord> diff --git a/i18npool/source/localedata/data/sv_SE.xml b/i18npool/source/localedata/data/sv_SE.xml index 211f95c3e894..333690a0ef09 100644 --- a/i18npool/source/localedata/data/sv_SE.xml +++ b/i18npool/source/localedata/data/sv_SE.xml @@ -315,13 +315,6 @@ </LC_CURRENCY> <LC_TRANSLITERATION ref="en_US"/> <LC_MISC> - <BreakIteratorRules> - <EditMode/> - <DictionaryMode>dict_word_prepostdash</DictionaryMode> - <WordCountMode/> - <CharacterMode/> - <LineMode/> - </BreakIteratorRules> <ReservedWords> <trueWord>sant</trueWord> <falseWord>falskt</falseWord> |