summaryrefslogtreecommitdiff
path: root/i18npool/source/breakiterator
diff options
context:
space:
mode:
authorLászló Németh <nemeth@numbertext.org>2018-03-14 15:54:57 +0100
committerLászló Németh <nemeth@numbertext.org>2018-03-15 17:05:22 +0100
commit6ef7c2729fb85959dfd76f028166f7631886399c (patch)
tree6f629f414b2b6ff7b8dd62e7dfd0263fad89c7e3 /i18npool/source/breakiterator
parent3c3f10da500967fc6ffecc2f3e076d8ff43e7503 (diff)
tdf#116072 Add PrefixLetter ")" in Hungarian word breaking
to handle bad word breaking of ")-ban", ")-ben" after reference fields. (Field content is not expanded for spell checking, resulting red underlined "ban" and "ben" in the correct form "a)-ban", "b)-ben" etc., see the test file of the issue.) Change-Id: Ic4b1fd2c99bdd2509d85dd6f2aa43e2a53becaa7 Reviewed-on: https://gerrit.libreoffice.org/51284 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: László Németh <nemeth@numbertext.org>
Diffstat (limited to 'i18npool/source/breakiterator')
-rw-r--r--i18npool/source/breakiterator/data/dict_word_hu.txt10
1 files changed, 9 insertions, 1 deletions
diff --git a/i18npool/source/breakiterator/data/dict_word_hu.txt b/i18npool/source/breakiterator/data/dict_word_hu.txt
index d76c6d189984..b0a0276b36a8 100644
--- a/i18npool/source/breakiterator/data/dict_word_hu.txt
+++ b/i18npool/source/breakiterator/data/dict_word_hu.txt
@@ -24,6 +24,14 @@ $Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND M
$Ideographic = [:Ideographic:];
$Hangul = [:Script = HANGUL:];
+
+# Fix spelling of a)-ban, b)-ben, when the letter is a reference
+# resulting bad word breaking "ban" and "ben"
+# (reference fields are not expanded in spell checking, yet, only
+# for grammar checking).
+
+$PrefixLetter = [[:name = RIGHT PARENTHESIS:]];
+
$ALetter = [[:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:]
[:name = PERCENT SIGN:] [:name = PER MILLE SIGN:] [:name = PER TEN THOUSAND SIGN:]
[:name = SECTION SIGN:] [:name = DEGREE SIGN:] [:name = EURO SIGN:]
@@ -123,7 +131,7 @@ $NumberSequence {100};
# - may include both letters and numbers.
# - may include MideLetter, MidNumber punctuation.
#
-$LetterSequence = $ALetterEx ($FormatEx* $MidLetterEx? $FormatEx* $ALetterEx)*; # rules #6, #7
+$LetterSequence = $PrefixLetter? $ALetterEx ($FormatEx* $MidLetterEx? $FormatEx* $ALetterEx)*; # rules #6, #7
($NumberSequence $FormatEx*)? $LetterSequence ($FormatEx* ($NumberSequence | $LetterSequence))* $SufixLetterEx? {200};
[[:P:][:S:]]*;