diff options
author | Caolán McNamara <caolanm@redhat.com> | 2017-09-20 16:40:39 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2017-09-21 14:30:34 +0200 |
commit | 917bcea4b4660c516bb18691e4f8ee60313804ef (patch) | |
tree | 8fb4f31bdd9c7b0f3db9d161093236e51ae2fc23 | |
parent | 3c543f87d97060f8d9acb2166cc5c0ad01fa95b4 (diff) |
bump hunspell to 1.6.2
Change-Id: I91d4d58f2b8ba69067de1d08476a8cebbb780535
Reviewed-on: https://gerrit.libreoffice.org/42555
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Tested-by: Caolán McNamara <caolanm@redhat.com>
11 files changed, 2 insertions, 786 deletions
diff --git a/download.lst b/download.lst index dd89c4a73008..08fe31d337e5 100644 --- a/download.lst +++ b/download.lst @@ -87,8 +87,8 @@ export HARFBUZZ_SHA256SUM := ccec4930ff0bb2d0c40aee203075447954b64a8c2695202413c export HARFBUZZ_TARBALL := harfbuzz-1.4.8.tar.bz2 export HSQLDB_SHA256SUM := d30b13f4ba2e3b6a2d4f020c0dee0a9fb9fc6fbcc2d561f36b78da4bf3802370 export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip -export HUNSPELL_SHA256SUM := 512e7d2ee69dad0b35ca011076405e56e0f10963a02d4859dbcc4faf53ca68e2 -export HUNSPELL_TARBALL := 047c3feb121261b76dc16cdb62f54483-hunspell-1.6.0.tar.gz +export HUNSPELL_SHA256SUM := 3cd9ceb062fe5814f668e4f22b2fa6e3ba0b339b921739541ce180cac4d6f4c4 +export HUNSPELL_TARBALL := hunspell-1.6.2.tar.gz export HYPHEN_SHA256SUM := 304636d4eccd81a14b6914d07b84c79ebb815288c76fe027b9ebff6ff24d5705 export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz export ICU_SHA256SUM := 7132fdaf9379429d004005217f10e00b7d2319d0fea22bdfddef8991c45b75fe diff --git a/external/hunspell/0001-cppcheck-redundant-c_str.patch b/external/hunspell/0001-cppcheck-redundant-c_str.patch deleted file mode 100644 index 276ddd2e705b..000000000000 --- a/external/hunspell/0001-cppcheck-redundant-c_str.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 9a0baf202f67291eaf482f1bcf654e21d71943e2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 11:43:53 +0000 -Subject: [PATCH] cppcheck: redundant c_str - ---- - src/hunspell/suggestmgr.cxx | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index b998341..8d46dd6 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -1107,7 +1107,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - int sc2; - if (utf8) { - w_f.clear(); -- u8_u16(w_f, f.c_str()); -+ u8_u16(w_f, f); - sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) + - leftcommonsubstring(w_word, w_f); - } else { -@@ -1132,7 +1132,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - std::string target2 = phonet(candidate, *ph); - w_target2.clear(); - if (utf8) { -- u8_u16(w_target2, target2.c_str()); -+ u8_u16(w_target2, target2); - scphon = 2 * ngram(3, w_target, w_target2, - NGRAM_LONGER_WORSE); - } else { --- -2.9.3 - diff --git a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch deleted file mode 100644 index bfcdf490a0a9..000000000000 --- a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 93156ba9a8e644f8b0b724880668714adcb0d094 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 12:05:07 +0000 -Subject: [PATCH] cppcheck: rv is reassigned before old value used - ---- - src/hunspell/affixmgr.cxx | 6 ++---- - src/hunspell/suggestmgr.cxx | 3 +-- - 2 files changed, 3 insertions(+), 6 deletions(-) - -diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx -index 680cbe9..21cf384 100644 ---- a/src/hunspell/affixmgr.cxx -+++ b/src/hunspell/affixmgr.cxx -@@ -1494,9 +1494,8 @@ int AffixMgr::defcpd_check(hentry*** words, - } - - inline int AffixMgr::candidate_check(const char* word, int len) { -- struct hentry* rv = NULL; - -- rv = lookup(word); -+ struct hentry* rv = lookup(word); - if (rv) - return 1; - -@@ -3045,10 +3044,9 @@ struct hentry* AffixMgr::affix_check(const char* word, - int len, - const FLAG needflag, - char in_compound) { -- struct hentry* rv = NULL; - - // check all prefixes (also crossed with suffixes if allowed) -- rv = prefix_check(word, len, in_compound, needflag); -+ struct hentry* rv = prefix_check(word, len, in_compound, needflag); - if (rv) - return rv; - -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index 8d46dd6..54a474f 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -1675,11 +1675,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { - if (HENTRY_DATA(rv)) - p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); - while (p) { -- struct hentry* rv2 = NULL; - p += MORPH_TAG_LEN; - int plen = fieldlen(p); - std::string allomorph(p, plen); -- rv2 = pAMgr->lookup(allomorph.c_str()); -+ struct hentry* rv2 = pAMgr->lookup(allomorph.c_str()); - while (rv2) { - // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= - // sfxcount) { --- -2.9.3 - diff --git a/external/hunspell/0001-loop-via-iterators.patch b/external/hunspell/0001-loop-via-iterators.patch deleted file mode 100644 index 6ecdd769e3bf..000000000000 --- a/external/hunspell/0001-loop-via-iterators.patch +++ /dev/null @@ -1,36 +0,0 @@ -From f366e97fa8d7ad21060033b733dda15299edf7c5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Fri, 10 Feb 2017 15:37:11 +0000 -Subject: [PATCH 1/4] loop via iterators - ---- - src/hunspell/csutil.cxx | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx -index c1666a5..2408677 100644 ---- a/src/hunspell/csutil.cxx -+++ b/src/hunspell/csutil.cxx -@@ -2537,13 +2537,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) { - size_t ncap = 0; - size_t nneutral = 0; - size_t firstcap = 0; -- for (size_t i = 0; i < word.size(); ++i) { -- unsigned short idx = (word[i].h << 8) + word[i].l; -+ -+ std::vector<w_char>::const_iterator it = word.begin(); -+ std::vector<w_char>::const_iterator it_end = word.end(); -+ while (it != it_end) { -+ unsigned short idx = (it->h << 8) + it->l; - unsigned short lwridx = unicodetolower(idx, langnum); - if (idx != lwridx) - ncap++; - if (unicodetoupper(idx, langnum) == lwridx) - nneutral++; -+ ++it; - } - if (ncap) { - unsigned short idx = (word[0].h << 8) + word[0].l; --- -2.9.3 - diff --git a/external/hunspell/0001-unroll-this-a-bit.patch b/external/hunspell/0001-unroll-this-a-bit.patch deleted file mode 100644 index 607a51a5fd1b..000000000000 --- a/external/hunspell/0001-unroll-this-a-bit.patch +++ /dev/null @@ -1,116 +0,0 @@ -From bf05e232805f6c1fae5dea3c223de8bdaab425e9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 13:26:53 +0000 -Subject: [PATCH 1/3] unroll this a bit - ---- - src/hunspell/csutil.cxx | 49 ++++++++++++++++++++++++++++--------------------- - 1 file changed, 28 insertions(+), 21 deletions(-) - -diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx -index ac5cd98..c1666a5 100644 ---- a/src/hunspell/csutil.cxx -+++ b/src/hunspell/csutil.cxx -@@ -518,18 +518,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) { - - w_char upper_utf(w_char u, int langnum) { - unsigned short idx = (u.h << 8) + u.l; -- if (idx != unicodetoupper(idx, langnum)) { -- u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); -- u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); -+ unsigned short upridx = unicodetoupper(idx, langnum); -+ if (idx != upridx) { -+ u.h = (unsigned char)(upridx >> 8); -+ u.l = (unsigned char)(upridx & 0x00FF); - } - return u; - } - - w_char lower_utf(w_char u, int langnum) { - unsigned short idx = (u.h << 8) + u.l; -- if (idx != unicodetolower(idx, langnum)) { -- u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8); -- u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); -+ unsigned short lwridx = unicodetolower(idx, langnum); -+ if (idx != lwridx) { -+ u.h = (unsigned char)(lwridx >> 8); -+ u.l = (unsigned char)(lwridx & 0x00FF); - } - return u; - } -@@ -551,12 +553,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { - } - - std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, -- int langnum) { -+ int langnum) { - for (size_t i = 0; i < u.size(); ++i) { - unsigned short idx = (u[i].h << 8) + u[i].l; -- if (idx != unicodetolower(idx, langnum)) { -- u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); -- u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); -+ unsigned short lwridx = unicodetolower(idx, langnum); -+ if (idx != lwridx) { -+ u[i].h = (unsigned char)(lwridx >> 8); -+ u[i].l = (unsigned char)(lwridx & 0x00FF); - } - } - return u; -@@ -565,9 +568,10 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, - std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) { - for (size_t i = 0; i < u.size(); i++) { - unsigned short idx = (u[i].h << 8) + u[i].l; -- if (idx != unicodetoupper(idx, langnum)) { -- u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); -- u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); -+ unsigned short upridx = unicodetoupper(idx, langnum); -+ if (idx != upridx) { -+ u[i].h = (unsigned char)(upridx >> 8); -+ u[i].l = (unsigned char)(upridx & 0x00FF); - } - } - return u; -@@ -583,9 +587,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) { - std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) { - if (!u.empty()) { - unsigned short idx = (u[0].h << 8) + u[0].l; -- if (idx != unicodetoupper(idx, langnum)) { -- u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); -- u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); -+ unsigned short upridx = unicodetoupper(idx, langnum); -+ if (idx != upridx) { -+ u[0].h = (unsigned char)(upridx >> 8); -+ u[0].l = (unsigned char)(upridx & 0x00FF); - } - } - return u; -@@ -601,9 +606,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) { - std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) { - if (!u.empty()) { - unsigned short idx = (u[0].h << 8) + u[0].l; -- if (idx != unicodetolower(idx, langnum)) { -- u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); -- u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); -+ unsigned short lwridx = unicodetolower(idx, langnum); -+ if (idx != lwridx) { -+ u[0].h = (unsigned char)(lwridx >> 8); -+ u[0].l = (unsigned char)(lwridx & 0x00FF); - } - } - return u; -@@ -2533,9 +2539,10 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) { - size_t firstcap = 0; - for (size_t i = 0; i < word.size(); ++i) { - unsigned short idx = (word[i].h << 8) + word[i].l; -- if (idx != unicodetolower(idx, langnum)) -+ unsigned short lwridx = unicodetolower(idx, langnum); -+ if (idx != lwridx) - ncap++; -- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) -+ if (unicodetoupper(idx, langnum) == lwridx) - nneutral++; - } - if (ncap) { --- -2.9.3 - diff --git a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch deleted file mode 100644 index 88695ec027d5..000000000000 --- a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Fri, 10 Feb 2017 15:49:17 +0000 -Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer - -kcachegrind reports 1,057,506,901 -> 830,529,143 on - -echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL ---- - src/hunspell/hashmgr.cxx | 16 +++++++++++----- - src/hunspell/hashmgr.hxx | 1 + - 2 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx -index 1de1690..4844b49 100644 ---- a/src/hunspell/hashmgr.cxx -+++ b/src/hunspell/hashmgr.cxx -@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, - } - - // detect captype and modify word length for UTF-8 encoding --int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { -+int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) { - int len; - if (utf8) { -- std::vector<w_char> dest_utf; -- len = u8_u16(dest_utf, word); -- *captype = get_captype_utf8(dest_utf, langnum); -+ len = u8_u16(workbuf, word); -+ *captype = get_captype_utf8(workbuf, langnum); - } else { - len = word.size(); - *captype = get_captype(word, csconv); -@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { - return len; - } - -+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { -+ std::vector<w_char> workbuf; -+ return get_clen_and_captype(word, captype, workbuf); -+} -+ - // remove word (personal dictionary function for standalone applications) - int HashMgr::remove(const std::string& word) { - struct hentry* dp = lookup(word.c_str()); -@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) { - // loop through all words on much list and add to hash - // table and create word and affix strings - -+ std::vector<w_char> workbuf; -+ - while (dict->getline(ts)) { - mychomp(ts); - // split each line into word and morphological description -@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) { - } - - int captype; -- int wcl = get_clen_and_captype(ts, &captype); -+ int wcl = get_clen_and_captype(ts, &captype, workbuf); - const std::string *dp_str = dp.empty() ? NULL : &dp; - // add the word and its index plus its capitalized form optionally - if (add_word(ts, wcl, flags, al, dp_str, false) || -diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx -index 812171a..5a09c45 100644 ---- a/src/hunspell/hashmgr.hxx -+++ b/src/hunspell/hashmgr.hxx -@@ -125,6 +125,7 @@ class HashMgr { - - private: - int get_clen_and_captype(const std::string& word, int* captype); -+ int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf); - int load_tables(const char* tpath, const char* key); - int add_word(const std::string& word, - int wcl, --- -2.9.3 - diff --git a/external/hunspell/0002-fix-other-regression-in-compounding.patch b/external/hunspell/0002-fix-other-regression-in-compounding.patch deleted file mode 100644 index cbf29e54f93a..000000000000 --- a/external/hunspell/0002-fix-other-regression-in-compounding.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 1fada01663b29b57c010a9c274e45a5cf9ecf222 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= - <laszlo.nemeth@collabora.com> -Date: Sun, 19 Mar 2017 13:19:29 +0100 -Subject: [PATCH 2/7] fix other regression in compounding -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Allow compound words again with -starting "kor", "alak", "asszony", "úr" -related to the "REP kor _kor" etc. rules -using the Hungarian spelling dictionary. - -regression from... - -commit 73b1cad1af7ab94252f75784fa6724cf062a6966 -Author: Martin Hosken <martin_hosken@sil.org> -Date: Mon Apr 18 16:28:26 2016 +0700 - - Add support for bounded conversion ---- - src/hunspell/affixmgr.cxx | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx -index 78c70e7..ec2093d 100644 ---- a/src/hunspell/affixmgr.cxx -+++ b/src/hunspell/affixmgr.cxx -@@ -1290,8 +1290,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) { - // search every occurence of the pattern in the word - while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) { - std::string candidate(word); -- size_t type = r == word ? 1 : 0; -- if (r - word + reptable[i].pattern.size() == lenp) -+ size_t type = r == word && langnum != LANG_hu ? 1 : 0; -+ if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu) - type += 2; - candidate.replace(r - word, lenp, reptable[i].outstrings[type]); - if (candidate_check(candidate.c_str(), candidate.size())) --- -2.7.4 - diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch deleted file mode 100644 index ff2530cfe23d..000000000000 --- a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch +++ /dev/null @@ -1,264 +0,0 @@ -From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Fri, 10 Feb 2017 16:36:27 +0000 -Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest - -only lower when we have to and reuse scratch buffers as -tolower destination - -kcachegrind reports 830,529,143 -> 779,887,690 on - -echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL ---- - src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++--------------- - 1 file changed, 95 insertions(+), 48 deletions(-) - -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index 54a474f..ea52707 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - u8_u16(w_target, target); - } - -- std::vector<w_char> w_entry; - std::string f; - std::vector<w_char> w_f; -- std::vector<w_char> w_target2; - - for (size_t i = 0; i < rHMgr.size(); ++i) { - while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) { -@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - continue; - - if (utf8) { -- w_entry.clear(); -- u8_u16(w_entry, HENTRY_WORD(hp)); -- sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) + -- leftcommonsubstring(w_word, w_entry); -+ w_f.clear(); -+ u8_u16(w_f, HENTRY_WORD(hp)); -+ -+ int leftcommon = leftcommonsubstring(w_word, w_f); -+ if (low) { -+ // lowering dictionary word -+ mkallsmall_utf(w_f, langnum); -+ } -+ sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; - } else { -- sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + -- leftcommonsubstring(word, HENTRY_WORD(hp)); -+ f.assign(HENTRY_WORD(hp)); -+ -+ int leftcommon = leftcommonsubstring(word, f.c_str()); -+ if (low) { -+ // lowering dictionary word -+ mkallsmall(f, csconv); -+ } -+ sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; - } - - // check special pronounciation -@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - if (utf8) { - w_f.clear(); - u8_u16(w_f, f); -- sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) + -- leftcommonsubstring(w_word, w_f); -+ -+ int leftcommon = leftcommonsubstring(w_word, w_f); -+ if (low) { -+ // lowering dictionary word -+ mkallsmall_utf(w_f, langnum); -+ } -+ sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; - } else { -- sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + -- leftcommonsubstring(word, f.c_str()); -+ int leftcommon = leftcommonsubstring(word, f.c_str()); -+ if (low) { -+ // lowering dictionary word -+ mkallsmall(f, csconv); -+ } -+ sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; - } - if (sc2 > sc) - sc = sc2; -@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - candidate = HENTRY_WORD(hp); - mkallcap(candidate, csconv); - } -- std::string target2 = phonet(candidate, *ph); -- w_target2.clear(); -+ f = phonet(candidate, *ph); -+ w_f.clear(); - if (utf8) { -- u8_u16(w_target2, target2); -- scphon = 2 * ngram(3, w_target, w_target2, -+ u8_u16(w_f, f); -+ scphon = 2 * ngram(3, w_target, w_f, - NGRAM_LONGER_WORSE); - } else { -- scphon = 2 * ngram(3, target, target2, -+ scphon = 2 * ngram(3, target, f, - NGRAM_LONGER_WORSE); - } - } -@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - w_mw[k].l = '*'; - w_mw[k].h = 0; - } -- thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low); -+ -+ if (low) { -+ // lowering dictionary word -+ mkallsmall_utf(w_mw, langnum); -+ } -+ -+ thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH); - } else { - std::string mw = word; - for (int k = sp; k < n; k += 4) - mw[k] = '*'; -- thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); -+ -+ if (low) { -+ // lowering dictionary word -+ mkallsmall(mw, csconv); -+ } -+ -+ thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH); - } - } - thresh = thresh / 3; -@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - return; - } - -- std::vector<w_char> w_glst_word; - for (int i = 0; i < MAX_ROOTS; i++) { - if (roots[i]) { - struct hentry* rp = roots[i]; -@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - - for (int k = 0; k < nw; k++) { - if (utf8) { -- w_glst_word.clear(); -- u8_u16(w_glst_word, glst[k].word); -- sc = ngram(n, w_word, w_glst_word, -- NGRAM_ANY_MISMATCH + low) + -- leftcommonsubstring(w_word, w_glst_word); -+ w_f.clear(); -+ u8_u16(w_f, glst[k].word); -+ -+ int leftcommon = leftcommonsubstring(w_word, w_f); -+ if (low) { -+ // lowering dictionary word -+ mkallsmall_utf(w_f, langnum); -+ } -+ -+ sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon; - } else { -- sc = ngram(n, word, glst[k].word, -- NGRAM_ANY_MISMATCH + low) + -- leftcommonsubstring(word, glst[k].word); -+ f = glst[k].word; -+ -+ int leftcommon = leftcommonsubstring(word, f.c_str()); -+ if (low) { -+ // lowering dictionary word -+ mkallsmall(f, csconv); -+ } -+ -+ sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon; - } - - if (sc > thresh) { -@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - w_gl.clear(); - if (utf8) { - u8_u16(w_gl, gl); -- re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + -- ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); -+ //w_gl is lowercase already at this point -+ re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); -+ if (low) { -+ w_f = w_word; -+ // lowering dictionary word -+ mkallsmall_utf(w_f, langnum); -+ re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); -+ } else { -+ re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); -+ } - } else { -- re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + -- ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); -+ //gl is lowercase already at this point -+ re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); -+ if (low) { -+ f = word; -+ // lowering dictionary word -+ mkallsmall(f, csconv); -+ re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); -+ } else { -+ re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); -+ } - } - - int ngram_score, leftcommon_score; - if (utf8) { -- ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low); -+ //w_gl is lowercase already at this point -+ ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH); - leftcommon_score = leftcommonsubstring(w_word, w_gl); - } else { -- ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low); -+ //gl is lowercase already at this point -+ ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH); - leftcommon_score = leftcommonsubstring(word, gl.c_str()); - } - gscore[i] = -@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n, - l2 = su2.size(); - if (l2 == 0) - return 0; -- // lowering dictionary word -- const std::vector<w_char>* p_su2 = &su2; -- std::vector<w_char> su2_copy; -- if (opt & NGRAM_LOWERING) { -- su2_copy = su2; -- mkallsmall_utf(su2_copy, langnum); -- p_su2 = &su2_copy; -- } - for (int j = 1; j <= n; j++) { - ns = 0; - for (int i = 0; i <= (l1 - j); i++) { -@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n, - for (int l = 0; l <= (l2 - j); l++) { - for (k = 0; k < j; k++) { - const w_char& c1 = su1[i + k]; -- const w_char& c2 = (*p_su2)[l + k]; -+ const w_char& c2 = su2[l + k]; - if ((c1.l != c2.l) || (c1.h != c2.h)) - break; - } -@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n, - if (l2 == 0) - return 0; - l1 = s1.size(); -- std::string t(s2); -- if (opt & NGRAM_LOWERING) -- mkallsmall(t, csconv); - for (int j = 1; j <= n; j++) { - ns = 0; - for (int i = 0; i <= (l1 - j); i++) { -- //t is haystack, s1[i..i+j) is needle -- if (t.find(s1.c_str()+i, 0, j) != std::string::npos) { -+ //s2 is haystack, s1[i..i+j) is needle -+ if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) { - ns++; - } else if (opt & NGRAM_WEIGHTED) { - ns--; --- -2.9.3 - diff --git a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch deleted file mode 100644 index 6c8a108d6719..000000000000 --- a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch +++ /dev/null @@ -1,81 +0,0 @@ -From aab258adbd9c78931a36b96e58975a08000249a8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Fri, 10 Feb 2017 17:14:35 +0000 -Subject: [PATCH 4/4] either clear will be called anyway before use, or its - unused afterwards - ---- - src/hunspell/suggestmgr.cxx | 8 -------- - 1 file changed, 8 deletions(-) - -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index ea52707..ae34535 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -1089,7 +1089,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - continue; - - if (utf8) { -- w_f.clear(); - u8_u16(w_f, HENTRY_WORD(hp)); - - int leftcommon = leftcommonsubstring(w_word, w_f); -@@ -1115,7 +1114,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { - int sc2; - if (utf8) { -- w_f.clear(); - u8_u16(w_f, f); - - int leftcommon = leftcommonsubstring(w_word, w_f); -@@ -1139,7 +1137,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - int scphon = -20000; - if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { - if (utf8) { -- w_candidate.clear(); - u8_u16(w_candidate, HENTRY_WORD(hp)); - mkallcap_utf(w_candidate, langnum); - u16_u8(candidate, w_candidate); -@@ -1148,7 +1145,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - mkallcap(candidate, csconv); - } - f = phonet(candidate, *ph); -- w_f.clear(); - if (utf8) { - u8_u16(w_f, f); - scphon = 2 * ngram(3, w_target, w_f, -@@ -1254,7 +1250,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - - for (int k = 0; k < nw; k++) { - if (utf8) { -- w_f.clear(); - u8_u16(w_f, glst[k].word); - - int leftcommon = leftcommonsubstring(w_word, w_f); -@@ -1335,7 +1330,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - std::string gl; - int len; - if (utf8) { -- w_gl.clear(); - len = u8_u16(w_gl, guess[i]); - mkallsmall_utf(w_gl, langnum); - u16_u8(gl, w_gl); -@@ -1355,7 +1349,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - } - // using 2-gram instead of 3, and other weightening - -- w_gl.clear(); - if (utf8) { - u8_u16(w_gl, gl); - //w_gl is lowercase already at this point -@@ -1421,7 +1414,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - // lowering rootphon[i] - std::string gl; - int len; -- w_gl.clear(); - if (utf8) { - len = u8_u16(w_gl, rootsphon[i]); - mkallsmall_utf(w_gl, langnum); --- -2.9.3 - diff --git a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch deleted file mode 100644 index 670d938e5441..000000000000 --- a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch +++ /dev/null @@ -1,66 +0,0 @@ -From f4ec6a283f972c82d068f4472320d424c40d45cb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= - <laszlo.nemeth@collabora.com> -Date: Thu, 23 Mar 2017 16:40:52 +0100 -Subject: [PATCH 5/7] fix syllable counting in compound word handling -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Note: one of the fixed regressions is related to an old -hidden mistake: using clen instead of blen of the stem -word lengths was indifferent with the original get_syllable(), -because blen == clen at 8-bit encodings, and UTF-8 -words were handled by null-termination. Implementing Unicode -support in Hunspell, clen was changed only in -compound_check_morph() to blen accidentally, but not -in compound_check(), resulting problems from the -recent std::string conversion. - -Now this commit is a real fix for the regression from the -commit c63c93237e4decdba5544a96093448605ac549c2, -instead of the following bad fix: - -commit d06b0c57ae87ee8743f1bf53f80c1f8e364db619 -Author: László Németh <laszlo.nemeth@collabora.com> -Date: Fri Mar 17 15:11:23 2017 +0100 - - fix Hungarian compound word handling ---- - src/hunspell/affixmgr.cxx | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx -index 2ed8233..3d65539 100644 ---- a/src/hunspell/affixmgr.cxx -+++ b/src/hunspell/affixmgr.cxx -@@ -1816,7 +1816,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, - // LANG_hu section: spec. Hungarian rule - if (langnum == LANG_hu) { - // calculate syllable number of the word -- numsyllable += get_syllable(st.substr(i)); -+ numsyllable += get_syllable(st.substr(0, i)); - // + 1 word, if syllable number of the prefix > 1 (hungarian - // convention) - if (pfx && (get_syllable(pfx->getKey()) > 1)) -@@ -1901,7 +1901,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, - (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && - (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || - ((cpdmaxsyllable != 0) && -- (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <= -+ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <= - cpdmaxsyllable))) && - ( - // test CHECKCOMPOUNDPATTERN -@@ -2382,7 +2382,7 @@ int AffixMgr::compound_check_morph(const char* word, - // LANG_hu section: spec. Hungarian rule - if (langnum == LANG_hu) { - // calculate syllable number of the word -- numsyllable += get_syllable(st.substr(i)); -+ numsyllable += get_syllable(st.substr(0, i)); - - // + 1 word, if syllable number of the prefix > 1 (hungarian - // convention) --- -2.7.4 - diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk index 40a4a101a8f0..3bb7e5e42dc7 100644 --- a/external/hunspell/UnpackedTarball_hunspell.mk +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -21,15 +21,6 @@ $(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1)) $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \ external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \ - external/hunspell/0001-unroll-this-a-bit.patch \ - external/hunspell/0001-cppcheck-redundant-c_str.patch \ - external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch \ - external/hunspell/0001-loop-via-iterators.patch \ - external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \ - external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \ - external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \ - external/hunspell/0002-fix-other-regression-in-compounding.patch \ - external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch \ )) # vim: set noet sw=4 ts=4: |