From af0bd8e2d9f532cdda3d8ed5d32056cf65fc3c44 Mon Sep 17 00:00:00 2001 From: László Németh Date: Fri, 24 Mar 2017 15:26:49 +0100 Subject: tdf#106751 fix regressions in Hungarian spell checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit using recent fixes of Hunspell code base Change-Id: I180a2ecba924180419c5eb1a0e78b5c84e7242c4 Reviewed-on: https://gerrit.libreoffice.org/35670 Tested-by: Jenkins Tested-by: László Németh Reviewed-by: László Németh Reviewed-by: Andras Timar (cherry picked from commit 23c028ba014f521bf1b70c83439e0915e56c296f) --- .../0002-fix-other-regression-in-compounding.patch | 43 ++++++++++++++ ...llable-counting-in-compound-word-handling.patch | 66 ++++++++++++++++++++++ external/hunspell/UnpackedTarball_hunspell.mk | 2 + 3 files changed, 111 insertions(+) create mode 100644 external/hunspell/0002-fix-other-regression-in-compounding.patch create mode 100644 external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch diff --git a/external/hunspell/0002-fix-other-regression-in-compounding.patch b/external/hunspell/0002-fix-other-regression-in-compounding.patch new file mode 100644 index 000000000000..cbf29e54f93a --- /dev/null +++ b/external/hunspell/0002-fix-other-regression-in-compounding.patch @@ -0,0 +1,43 @@ +From 1fada01663b29b57c010a9c274e45a5cf9ecf222 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= + +Date: Sun, 19 Mar 2017 13:19:29 +0100 +Subject: [PATCH 2/7] fix other regression in compounding +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Allow compound words again with +starting "kor", "alak", "asszony", "úr" +related to the "REP kor _kor" etc. rules +using the Hungarian spelling dictionary. + +regression from... + +commit 73b1cad1af7ab94252f75784fa6724cf062a6966 +Author: Martin Hosken +Date: Mon Apr 18 16:28:26 2016 +0700 + + Add support for bounded conversion +--- + src/hunspell/affixmgr.cxx | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 78c70e7..ec2093d 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -1290,8 +1290,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) { + // search every occurence of the pattern in the word + while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) { + std::string candidate(word); +- size_t type = r == word ? 1 : 0; +- if (r - word + reptable[i].pattern.size() == lenp) ++ size_t type = r == word && langnum != LANG_hu ? 1 : 0; ++ if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu) + type += 2; + candidate.replace(r - word, lenp, reptable[i].outstrings[type]); + if (candidate_check(candidate.c_str(), candidate.size())) +-- +2.7.4 + diff --git a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch new file mode 100644 index 000000000000..670d938e5441 --- /dev/null +++ b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch @@ -0,0 +1,66 @@ +From f4ec6a283f972c82d068f4472320d424c40d45cb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= + +Date: Thu, 23 Mar 2017 16:40:52 +0100 +Subject: [PATCH 5/7] fix syllable counting in compound word handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Note: one of the fixed regressions is related to an old +hidden mistake: using clen instead of blen of the stem +word lengths was indifferent with the original get_syllable(), +because blen == clen at 8-bit encodings, and UTF-8 +words were handled by null-termination. Implementing Unicode +support in Hunspell, clen was changed only in +compound_check_morph() to blen accidentally, but not +in compound_check(), resulting problems from the +recent std::string conversion. + +Now this commit is a real fix for the regression from the +commit c63c93237e4decdba5544a96093448605ac549c2, +instead of the following bad fix: + +commit d06b0c57ae87ee8743f1bf53f80c1f8e364db619 +Author: László Németh +Date: Fri Mar 17 15:11:23 2017 +0100 + + fix Hungarian compound word handling +--- + src/hunspell/affixmgr.cxx | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 2ed8233..3d65539 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -1816,7 +1816,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + // LANG_hu section: spec. Hungarian rule + if (langnum == LANG_hu) { + // calculate syllable number of the word +- numsyllable += get_syllable(st.substr(i)); ++ numsyllable += get_syllable(st.substr(0, i)); + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey()) > 1)) +@@ -1901,7 +1901,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && +- (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <= ++ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <= + cpdmaxsyllable))) && + ( + // test CHECKCOMPOUNDPATTERN +@@ -2382,7 +2382,7 @@ int AffixMgr::compound_check_morph(const char* word, + // LANG_hu section: spec. Hungarian rule + if (langnum == LANG_hu) { + // calculate syllable number of the word +- numsyllable += get_syllable(st.substr(i)); ++ numsyllable += get_syllable(st.substr(0, i)); + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) +-- +2.7.4 + diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk index b30bd083b407..40a4a101a8f0 100644 --- a/external/hunspell/UnpackedTarball_hunspell.mk +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -28,6 +28,8 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \ external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \ external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \ external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \ + external/hunspell/0002-fix-other-regression-in-compounding.patch \ + external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch \ )) # vim: set noet sw=4 ts=4: -- cgit