diff options
author | Caolán McNamara <caolanm@redhat.com> | 2017-02-12 17:20:56 +0000 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2017-02-12 21:16:59 +0000 |
commit | 163435fa23fbfc237a7718c9d440a98847e4f626 (patch) | |
tree | da35610109170ae3b5de1a86acddb12061768800 | |
parent | 125194ff4b983ccdfb2390449c0a4b4d4345b29b (diff) |
use alternative optimizations for buffer creation bottleneck
Change-Id: I9f29e8d3e5e97fe403a3e0d7d03c6ac01c7689c4
11 files changed, 556 insertions, 1238 deletions
diff --git a/external/hunspell/0001-cppcheck-redundant-c_str.patch b/external/hunspell/0001-cppcheck-redundant-c_str.patch new file mode 100644 index 000000000000..276ddd2e705b --- /dev/null +++ b/external/hunspell/0001-cppcheck-redundant-c_str.patch @@ -0,0 +1,34 @@ +From 9a0baf202f67291eaf482f1bcf654e21d71943e2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Mon, 23 Jan 2017 11:43:53 +0000 +Subject: [PATCH] cppcheck: redundant c_str + +--- + src/hunspell/suggestmgr.cxx | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx +index b998341..8d46dd6 100644 +--- a/src/hunspell/suggestmgr.cxx ++++ b/src/hunspell/suggestmgr.cxx +@@ -1107,7 +1107,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + int sc2; + if (utf8) { + w_f.clear(); +- u8_u16(w_f, f.c_str()); ++ u8_u16(w_f, f); + sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) + + leftcommonsubstring(w_word, w_f); + } else { +@@ -1132,7 +1132,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + std::string target2 = phonet(candidate, *ph); + w_target2.clear(); + if (utf8) { +- u8_u16(w_target2, target2.c_str()); ++ u8_u16(w_target2, target2); + scphon = 2 * ngram(3, w_target, w_target2, + NGRAM_LONGER_WORSE); + } else { +-- +2.9.3 + diff --git a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch new file mode 100644 index 000000000000..bfcdf490a0a9 --- /dev/null +++ b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch @@ -0,0 +1,57 @@ +From 93156ba9a8e644f8b0b724880668714adcb0d094 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Mon, 23 Jan 2017 12:05:07 +0000 +Subject: [PATCH] cppcheck: rv is reassigned before old value used + +--- + src/hunspell/affixmgr.cxx | 6 ++---- + src/hunspell/suggestmgr.cxx | 3 +-- + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 680cbe9..21cf384 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -1494,9 +1494,8 @@ int AffixMgr::defcpd_check(hentry*** words, + } + + inline int AffixMgr::candidate_check(const char* word, int len) { +- struct hentry* rv = NULL; + +- rv = lookup(word); ++ struct hentry* rv = lookup(word); + if (rv) + return 1; + +@@ -3045,10 +3044,9 @@ struct hentry* AffixMgr::affix_check(const char* word, + int len, + const FLAG needflag, + char in_compound) { +- struct hentry* rv = NULL; + + // check all prefixes (also crossed with suffixes if allowed) +- rv = prefix_check(word, len, in_compound, needflag); ++ struct hentry* rv = prefix_check(word, len, in_compound, needflag); + if (rv) + return rv; + +diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx +index 8d46dd6..54a474f 100644 +--- a/src/hunspell/suggestmgr.cxx ++++ b/src/hunspell/suggestmgr.cxx +@@ -1675,11 +1675,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { + if (HENTRY_DATA(rv)) + p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); + while (p) { +- struct hentry* rv2 = NULL; + p += MORPH_TAG_LEN; + int plen = fieldlen(p); + std::string allomorph(p, plen); +- rv2 = pAMgr->lookup(allomorph.c_str()); ++ struct hentry* rv2 = pAMgr->lookup(allomorph.c_str()); + while (rv2) { + // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= + // sfxcount) { +-- +2.9.3 + diff --git a/external/hunspell/0001-loop-via-iterators.patch b/external/hunspell/0001-loop-via-iterators.patch new file mode 100644 index 000000000000..6ecdd769e3bf --- /dev/null +++ b/external/hunspell/0001-loop-via-iterators.patch @@ -0,0 +1,36 @@ +From f366e97fa8d7ad21060033b733dda15299edf7c5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Fri, 10 Feb 2017 15:37:11 +0000 +Subject: [PATCH 1/4] loop via iterators + +--- + src/hunspell/csutil.cxx | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx +index c1666a5..2408677 100644 +--- a/src/hunspell/csutil.cxx ++++ b/src/hunspell/csutil.cxx +@@ -2537,13 +2537,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) { + size_t ncap = 0; + size_t nneutral = 0; + size_t firstcap = 0; +- for (size_t i = 0; i < word.size(); ++i) { +- unsigned short idx = (word[i].h << 8) + word[i].l; ++ ++ std::vector<w_char>::const_iterator it = word.begin(); ++ std::vector<w_char>::const_iterator it_end = word.end(); ++ while (it != it_end) { ++ unsigned short idx = (it->h << 8) + it->l; + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) + ncap++; + if (unicodetoupper(idx, langnum) == lwridx) + nneutral++; ++ ++it; + } + if (ncap) { + unsigned short idx = (word[0].h << 8) + word[0].l; +-- +2.9.3 + diff --git a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch new file mode 100644 index 000000000000..88695ec027d5 --- /dev/null +++ b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch @@ -0,0 +1,78 @@ +From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Fri, 10 Feb 2017 15:49:17 +0000 +Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer + +kcachegrind reports 1,057,506,901 -> 830,529,143 on + +echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL +--- + src/hunspell/hashmgr.cxx | 16 +++++++++++----- + src/hunspell/hashmgr.hxx | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx +index 1de1690..4844b49 100644 +--- a/src/hunspell/hashmgr.cxx ++++ b/src/hunspell/hashmgr.cxx +@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, + } + + // detect captype and modify word length for UTF-8 encoding +-int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { ++int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) { + int len; + if (utf8) { +- std::vector<w_char> dest_utf; +- len = u8_u16(dest_utf, word); +- *captype = get_captype_utf8(dest_utf, langnum); ++ len = u8_u16(workbuf, word); ++ *captype = get_captype_utf8(workbuf, langnum); + } else { + len = word.size(); + *captype = get_captype(word, csconv); +@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { + return len; + } + ++int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { ++ std::vector<w_char> workbuf; ++ return get_clen_and_captype(word, captype, workbuf); ++} ++ + // remove word (personal dictionary function for standalone applications) + int HashMgr::remove(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); +@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) { + // loop through all words on much list and add to hash + // table and create word and affix strings + ++ std::vector<w_char> workbuf; ++ + while (dict->getline(ts)) { + mychomp(ts); + // split each line into word and morphological description +@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) { + } + + int captype; +- int wcl = get_clen_and_captype(ts, &captype); ++ int wcl = get_clen_and_captype(ts, &captype, workbuf); + const std::string *dp_str = dp.empty() ? NULL : &dp; + // add the word and its index plus its capitalized form optionally + if (add_word(ts, wcl, flags, al, dp_str, false) || +diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx +index 812171a..5a09c45 100644 +--- a/src/hunspell/hashmgr.hxx ++++ b/src/hunspell/hashmgr.hxx +@@ -125,6 +125,7 @@ class HashMgr { + + private: + int get_clen_and_captype(const std::string& word, int* captype); ++ int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf); + int load_tables(const char* tpath, const char* key); + int add_word(const std::string& word, + int wcl, +-- +2.9.3 + diff --git a/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch b/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch deleted file mode 100644 index 31b8c048157b..000000000000 --- a/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch +++ /dev/null @@ -1,912 +0,0 @@ -From 3a935abd0539143ee952d2f86ec513be6a056d5e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 13:35:13 +0000 -Subject: [PATCH 2/3] rename std::vector<w_char> to wide::string - ---- - src/hunspell/affixmgr.cxx | 8 ++--- - src/hunspell/affixmgr.hxx | 10 +++---- - src/hunspell/csutil.cxx | 28 +++++++++--------- - src/hunspell/csutil.hxx | 26 ++++++++-------- - src/hunspell/hashmgr.cxx | 12 ++++---- - src/hunspell/hashmgr.hxx | 2 +- - src/hunspell/hunspell.cxx | 46 ++++++++++++++--------------- - src/hunspell/hunspell.hxx | 2 +- - src/hunspell/suggestmgr.cxx | 72 ++++++++++++++++++++++----------------------- - src/hunspell/suggestmgr.hxx | 12 ++++---- - src/hunspell/w_char.hxx | 6 ++++ - src/parsers/textparser.cxx | 2 +- - src/tools/hunspell.cxx | 10 +++---- - 13 files changed, 121 insertions(+), 115 deletions(-) - -diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx -index 21cf384..4f64721 100644 ---- a/src/hunspell/affixmgr.cxx -+++ b/src/hunspell/affixmgr.cxx -@@ -1338,7 +1338,7 @@ int AffixMgr::cpdcase_check(const char* word, int pos) { - for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--) - ; - std::string pair(p); -- std::vector<w_char> pair_u; -+ wide::string pair_u; - u8_u16(pair_u, pair); - unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0; - unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0; -@@ -1523,7 +1523,7 @@ short AffixMgr::get_syllable(const std::string& word) { - } - } - } else if (!cpdvowels_utf16.empty()) { -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, word); - for (size_t i = 0; i < w.size(); ++i) { - if (std::binary_search(cpdvowels_utf16.begin(), -@@ -3505,7 +3505,7 @@ const char* AffixMgr::get_ignore() const { - } - - // return the preferred ignore string for suggestions --const std::vector<w_char>& AffixMgr::get_ignore_utf16() const { -+const wide::string& AffixMgr::get_ignore_utf16() const { - return ignorechars_utf16; - } - -@@ -3528,7 +3528,7 @@ const std::string& AffixMgr::get_wordchars() const { - return wordchars; - } - --const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const { -+const wide::string& AffixMgr::get_wordchars_utf16() const { - return wordchars_utf16; - } - -diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx -index 83a4b42..11f1a67 100644 ---- a/src/hunspell/affixmgr.hxx -+++ b/src/hunspell/affixmgr.hxx -@@ -146,7 +146,7 @@ class AffixMgr { - int cpdwordmax; - int cpdmaxsyllable; - std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit, -- std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding -+ wide::string cpdvowels_utf16; //vowels for UTF-8 encoding - std::string cpdsyllablenum; // syllable count incrementing flag - const char* pfxappnd; // BUG: not stateless - const char* sfxappnd; // BUG: not stateless -@@ -157,9 +157,9 @@ class AffixMgr { - PfxEntry* pfx; // BUG: not stateless - int checknum; - std::string wordchars; // letters + spec. word characters -- std::vector<w_char> wordchars_utf16; -+ wide::string wordchars_utf16; - std::string ignorechars; // letters + spec. word characters -- std::vector<w_char> ignorechars_utf16; -+ wide::string ignorechars_utf16; - std::string version; // affix and dictionary file version string - std::string lang; // language - int langnum; -@@ -306,9 +306,9 @@ class AffixMgr { - char* get_key_string(); - char* get_try_string() const; - const std::string& get_wordchars() const; -- const std::vector<w_char>& get_wordchars_utf16() const; -+ const wide::string& get_wordchars_utf16() const; - const char* get_ignore() const; -- const std::vector<w_char>& get_ignore_utf16() const; -+ const wide::string& get_ignore_utf16() const; - int get_compound() const; - FLAG get_compoundflag() const; - FLAG get_forbiddenword() const; -diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx -index c1666a5..2f59b3d 100644 ---- a/src/hunspell/csutil.cxx -+++ b/src/hunspell/csutil.cxx -@@ -143,10 +143,10 @@ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mod - stream.open(path, mode); - } - --std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) { -+std::string& u16_u8(std::string& dest, const wide::string& src) { - dest.clear(); -- std::vector<w_char>::const_iterator u2 = src.begin(); -- std::vector<w_char>::const_iterator u2_max = src.end(); -+ wide::string::const_iterator u2 = src.begin(); -+ wide::string::const_iterator u2_max = src.end(); - while (u2 < u2_max) { - signed char u8; - if (u2->h) { // > 0xFF -@@ -180,7 +180,7 @@ std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) { - return dest; - } - --int u8_u16(std::vector<w_char>& dest, const std::string& src) { -+int u8_u16(wide::string& dest, const std::string& src) { - dest.clear(); - std::string::const_iterator u8 = src.begin(); - std::string::const_iterator u8_max = src.end(); -@@ -474,7 +474,7 @@ size_t reverseword(std::string& word) { - - // reverse word - size_t reverseword_utf(std::string& word) { -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, word); - std::reverse(w.begin(), w.end()); - u16_u8(word, w); -@@ -552,7 +552,7 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { - return s; - } - --std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, -+wide::string& mkallsmall_utf(wide::string& u, - int langnum) { - for (size_t i = 0; i < u.size(); ++i) { - unsigned short idx = (u[i].h << 8) + u[i].l; -@@ -565,7 +565,7 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, - return u; - } - --std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) { -+wide::string& mkallcap_utf(wide::string& u, int langnum) { - for (size_t i = 0; i < u.size(); i++) { - unsigned short idx = (u[i].h << 8) + u[i].l; - unsigned short upridx = unicodetoupper(idx, langnum); -@@ -584,7 +584,7 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) { - return s; - } - --std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) { -+wide::string& mkinitcap_utf(wide::string& u, int langnum) { - if (!u.empty()) { - unsigned short idx = (u[0].h << 8) + u[0].l; - unsigned short upridx = unicodetoupper(idx, langnum); -@@ -603,7 +603,7 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) { - return s; - } - --std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) { -+wide::string& mkinitsmall_utf(wide::string& u, int langnum) { - if (!u.empty()) { - unsigned short idx = (u[0].h << 8) + u[0].l; - unsigned short lwridx = unicodetolower(idx, langnum); -@@ -2532,7 +2532,7 @@ int get_captype(const std::string& word, cs_info* csconv) { - return HUHCAP; - } - --int get_captype_utf8(const std::vector<w_char>& word, int langnum) { -+int get_captype_utf8(const wide::string& word, int langnum) { - // now determine the capitalization type of the first nl letters - size_t ncap = 0; - size_t nneutral = 0; -@@ -2565,9 +2565,9 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) { - - // strip all ignored characters in the string - size_t remove_ignored_chars_utf(std::string& word, -- const std::vector<w_char>& ignored_chars) { -- std::vector<w_char> w; -- std::vector<w_char> w2; -+ const wide::string& ignored_chars) { -+ wide::string w; -+ wide::string w2; - u8_u16(w, word); - - for (size_t i = 0; i < w.size(); ++i) { -@@ -2626,7 +2626,7 @@ bool parse_string(const std::string& line, std::string& out, int ln) { - - bool parse_array(const std::string& line, - std::string& out, -- std::vector<w_char>& out_utf16, -+ wide::string& out_utf16, - int utf8, - int ln) { - if (!parse_string(line, out, ln)) -diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx -index 302d7e9..313672e 100644 ---- a/src/hunspell/csutil.hxx -+++ b/src/hunspell/csutil.hxx -@@ -134,10 +134,10 @@ LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path, - - // convert UTF-16 characters to UTF-8 - LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest, -- const std::vector<w_char>& src); -+ const wide::string& src); - - // convert UTF-8 characters to UTF-16 --LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest, -+LIBHUNSPELL_DLL_EXPORTED int u8_u16(wide::string& dest, - const std::string& src); - - // remove end of line char(s) -@@ -219,31 +219,31 @@ LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s, - const struct cs_info* csconv); - - // convert first letter of UTF-8 string to capital --LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& --mkinitcap_utf(std::vector<w_char>& u, int langnum); -+LIBHUNSPELL_DLL_EXPORTED wide::string& -+mkinitcap_utf(wide::string& u, int langnum); - - // convert UTF-8 string to little --LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& --mkallsmall_utf(std::vector<w_char>& u, int langnum); -+LIBHUNSPELL_DLL_EXPORTED wide::string& -+mkallsmall_utf(wide::string& u, int langnum); - - // convert first letter of UTF-8 string to little --LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& --mkinitsmall_utf(std::vector<w_char>& u, int langnum); -+LIBHUNSPELL_DLL_EXPORTED wide::string& -+mkinitsmall_utf(wide::string& u, int langnum); - - // convert UTF-8 string to capital --LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& --mkallcap_utf(std::vector<w_char>& u, int langnum); -+LIBHUNSPELL_DLL_EXPORTED wide::string& -+mkallcap_utf(wide::string& u, int langnum); - - // get type of capitalization - LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*); - - // get type of capitalization (UTF-8) --LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum); -+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const wide::string& q, int langnum); - - // strip all ignored characters in the string - LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf( - std::string& word, -- const std::vector<w_char>& ignored_chars); -+ const wide::string& ignored_chars); - - // strip all ignored characters in the string - LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars( -@@ -256,7 +256,7 @@ LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line, - - LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line, - std::string& out, -- std::vector<w_char>& out_utf16, -+ wide::string& out_utf16, - int utf8, - int ln); - -diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx -index 1de1690..6d92e9b 100644 ---- a/src/hunspell/hashmgr.cxx -+++ b/src/hunspell/hashmgr.cxx -@@ -345,7 +345,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, - flags2[flagslen] = ONLYUPCASEFLAG; - if (utf8) { - std::string st; -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, word); - mkallsmall_utf(w, langnum); - mkinitcap_utf(w, langnum); -@@ -366,7 +366,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, - int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { - int len; - if (utf8) { -- std::vector<w_char> dest_utf; -+ wide::string dest_utf; - len = u8_u16(dest_utf, word); - *captype = get_captype_utf8(dest_utf, langnum); - } else { -@@ -688,7 +688,7 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil - break; - } - case FLAG_UNI: { // UTF-8 characters -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, flags); - len = w.size(); - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); -@@ -760,7 +760,7 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin - break; - } - case FLAG_UNI: { // UTF-8 characters -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, flags); - size_t len = w.size(); - size_t origsize = result.size(); -@@ -793,7 +793,7 @@ unsigned short HashMgr::decode_flag(const char* f) const { - s = (unsigned short)i; - break; - case FLAG_UNI: { -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, f); - if (!w.empty()) - memcpy(&s, &w[0], 1 * sizeof(short)); -@@ -820,7 +820,7 @@ char* HashMgr::encode_flag(unsigned short f) const { - ch = stream.str(); - } else if (flag_mode == FLAG_UNI) { - const w_char* w_c = (const w_char*)&f; -- std::vector<w_char> w(w_c, w_c + 1); -+ wide::string w(w_c, w_c + 1); - u16_u8(ch, w); - } else { - ch.push_back((unsigned char)(f)); -diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx -index 812171a..312c8ba 100644 ---- a/src/hunspell/hashmgr.hxx -+++ b/src/hunspell/hashmgr.hxx -@@ -96,7 +96,7 @@ class HashMgr { - std::string lang; - struct cs_info* csconv; - std::string ignorechars; -- std::vector<w_char> ignorechars_utf16; -+ wide::string ignorechars_utf16; - int numaliasf; // flag vector `compression' with aliases - unsigned short** aliasf; - unsigned short* aliasflen; -diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx -index a8d78dc..46f1df9 100644 ---- a/src/hunspell/hunspell.cxx -+++ b/src/hunspell/hunspell.cxx -@@ -103,7 +103,7 @@ public: - bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); - std::vector<std::string> suggest(const std::string& word); - const std::string& get_wordchars() const; -- const std::vector<w_char>& get_wordchars_utf16() const; -+ const wide::string& get_wordchars_utf16() const; - const std::string& get_dict_encoding() const; - int add(const std::string& word); - int add_with_affix(const std::string& word, const std::string& example); -@@ -127,15 +127,15 @@ private: - private: - void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev); - size_t cleanword2(std::string& dest, -- std::vector<w_char>& dest_u, -+ wide::string& dest_u, - const std::string& src, - int* pcaptype, - size_t* pabbrev); - void mkinitcap(std::string& u8); -- int mkinitcap2(std::string& u8, std::vector<w_char>& u16); -- int mkinitsmall2(std::string& u8, std::vector<w_char>& u16); -+ int mkinitcap2(std::string& u8, wide::string& u16); -+ int mkinitsmall2(std::string& u8, wide::string& u16); - void mkallcap(std::string& u8); -- int mkallsmall2(std::string& u8, std::vector<w_char>& u16); -+ int mkallsmall2(std::string& u8, wide::string& u16); - struct hentry* checkword(const std::string& source, int* info, std::string* root); - std::string sharps_u8_l1(const std::string& source); - hentry* -@@ -231,7 +231,7 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) { - // return the length of the "cleaned" (and UTF-8 encoded) word - - size_t HunspellImpl::cleanword2(std::string& dest, -- std::vector<w_char>& dest_utf, -+ wide::string& dest_utf, - const std::string& src, - int* pcaptype, - size_t* pabbrev) { -@@ -313,7 +313,7 @@ void HunspellImpl::cleanword(std::string& dest, - // remember to terminate the destination string - firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase; - } else { -- std::vector<w_char> t; -+ wide::string t; - u8_u16(t, src); - for (size_t i = 0; i < t.size(); ++i) { - unsigned short idx = (t[i].h << 8) + t[i].l; -@@ -346,7 +346,7 @@ void HunspellImpl::cleanword(std::string& dest, - - void HunspellImpl::mkallcap(std::string& u8) { - if (utf8) { -- std::vector<w_char> u16; -+ wide::string u16; - u8_u16(u16, u8); - ::mkallcap_utf(u16, langnum); - u16_u8(u8, u16); -@@ -355,7 +355,7 @@ void HunspellImpl::mkallcap(std::string& u8) { - } - } - --int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) { -+int HunspellImpl::mkallsmall2(std::string& u8, wide::string& u16) { - if (utf8) { - ::mkallsmall_utf(u16, langnum); - u16_u8(u8, u16); -@@ -438,7 +438,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) - size_t wl = 0; - - std::string scw; -- std::vector<w_char> sunicw; -+ wide::string sunicw; - - // input conversion - RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL; -@@ -519,7 +519,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) - std::string part1 = scw.substr(0, apos+1); - std::string part2 = scw.substr(apos+1); - if (utf8) { -- std::vector<w_char> part1u, part2u; -+ wide::string part1u, part2u; - u8_u16(part1u, part1); - u8_u16(part2u, part2); - mkinitcap2(part2, part2u); -@@ -704,7 +704,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str - if (ignoredchars != NULL) { - w2.assign(w); - if (utf8) { -- const std::vector<w_char>& ignoredchars_utf16 = -+ const wide::string& ignoredchars_utf16 = - pAMgr->get_ignore_utf16(); - remove_ignored_chars_utf(w2, ignoredchars_utf16); - } else { -@@ -855,7 +855,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { - size_t wl = 0; - - std::string scw; -- std::vector<w_char> sunicw; -+ wide::string sunicw; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -@@ -909,7 +909,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { - std::string postdot = scw.substr(dot_pos + 1); - int captype_; - if (utf8) { -- std::vector<w_char> postdotu; -+ wide::string postdotu; - u8_u16(postdotu, postdot); - captype_ = get_captype_utf8(postdotu, langnum); - } else { -@@ -951,7 +951,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { - if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { - std::string first(slst[j].c_str(), space + 1); - std::string second(space + 1); -- std::vector<w_char> w; -+ wide::string w; - if (utf8) - u8_u16(w, second); - mkinitcap2(second, w); -@@ -1109,7 +1109,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { - for (size_t j = 0; j < slst.size(); ++j) { - if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { - std::string s; -- std::vector<w_char> w; -+ wide::string w; - if (utf8) { - u8_u16(w, slst[j]); - } else { -@@ -1262,17 +1262,17 @@ const std::string& HunspellImpl::get_wordchars() const { - return pAMgr->get_wordchars(); - } - --const std::vector<w_char>& Hunspell::get_wordchars_utf16() const { -+const wide::string& Hunspell::get_wordchars_utf16() const { - return m_Impl->get_wordchars_utf16(); - } - --const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const { -+const wide::string& HunspellImpl::get_wordchars_utf16() const { - return pAMgr->get_wordchars_utf16(); - } - - void HunspellImpl::mkinitcap(std::string& u8) { - if (utf8) { -- std::vector<w_char> u16; -+ wide::string u16; - u8_u16(u16, u8); - ::mkinitcap_utf(u16, langnum); - u16_u8(u8, u16); -@@ -1281,7 +1281,7 @@ void HunspellImpl::mkinitcap(std::string& u8) { - } - } - --int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) { -+int HunspellImpl::mkinitcap2(std::string& u8, wide::string& u16) { - if (utf8) { - ::mkinitcap_utf(u16, langnum); - u16_u8(u8, u16); -@@ -1291,7 +1291,7 @@ int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) { - return u8.size(); - } - --int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) { -+int HunspellImpl::mkinitsmall2(std::string& u8, wide::string& u16) { - if (utf8) { - ::mkinitsmall_utf(u16, langnum); - u16_u8(u8, u16); -@@ -1379,7 +1379,7 @@ std::vector<std::string> HunspellImpl::analyze(const std::string& word) { - size_t wl = 0; - - std::string scw; -- std::vector<w_char> sunicw; -+ wide::string sunicw; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -@@ -1994,7 +1994,7 @@ std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_wo - if (ignoredchars != NULL) { - w2.assign(root_word); - if (utf8) { -- const std::vector<w_char>& ignoredchars_utf16 = -+ const wide::string& ignoredchars_utf16 = - pAMgr->get_ignore_utf16(); - remove_ignored_chars_utf(w2, ignoredchars_utf16); - } else { -diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx -index 43af66b..375a7da 100644 ---- a/src/hunspell/hunspell.hxx -+++ b/src/hunspell/hunspell.hxx -@@ -215,7 +215,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell { - /* get extra word characters definied in affix file for tokenization */ - const char* get_wordchars() const; - const std::string& get_wordchars_cpp() const; -- const std::vector<w_char>& get_wordchars_utf16() const; -+ const wide::string& get_wordchars_utf16() const; - - struct cs_info* get_csconv(); - -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index 54a474f..1deec96 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -179,7 +179,7 @@ void SuggestMgr::suggest(std::vector<std::string>& slst, - const char* w, - int* onlycompoundsug) { - int nocompoundtwowords = 0; -- std::vector<w_char> word_utf; -+ wide::string word_utf; - int wl = 0; - size_t nsugorig = slst.size(); - std::string w2; -@@ -313,7 +313,7 @@ void SuggestMgr::capchars_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - mkallcap_utf(candidate_utf, langnum); - std::string candidate; - u16_u8(candidate, candidate_utf); -@@ -491,7 +491,7 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst, - if (word[i] == word[i - 2]) { - state++; - if (state == 3) { -- std::vector<w_char> candidate_utf(word, word + i - 1); -+ wide::string candidate_utf(word, word + i - 1); - candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); - std::string candidate; - u16_u8(candidate, candidate_utf); -@@ -549,7 +549,7 @@ int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst, - int wl, - int cpdsuggest) { - std::string candidate; -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - // swap out each char one by one and try all the tryme - // chars in its place to see if that makes a good word - for (int i = 0; i < wl; i++) { -@@ -614,7 +614,7 @@ int SuggestMgr::badchar_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - std::string candidate; - clock_t timelimit = clock(); - int timer = MINTIMER; -@@ -641,7 +641,7 @@ int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - if (candidate_utf.size() < 2) - return wlst.size(); - // try omitting one char of word at a time -@@ -703,7 +703,7 @@ int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - clock_t timelimit = clock(); - int timer = MINTIMER; - -@@ -852,7 +852,7 @@ int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - if (candidate_utf.size() < 2) - return wlst.size(); - -@@ -909,10 +909,10 @@ int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - // try swapping not adjacent chars -- for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { -- for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { -+ for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { -+ for (wide::string::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { - if (std::abs(std::distance(q, p)) > 1) { - std::swap(*p, *q); - std::string candidate; -@@ -962,13 +962,13 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst, - const w_char* word, - int wl, - int cpdsuggest) { -- std::vector<w_char> candidate_utf(word, word + wl); -+ wide::string candidate_utf(word, word + wl); - if (candidate_utf.size() < 2) - return wlst.size(); - - // try moving a char -- for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { -- for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) { -+ for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { -+ for (wide::string::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) { - std::swap(*q, *(q - 1)); - if (std::distance(p, q) < 2) - continue; // omit swap char -@@ -979,8 +979,8 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst, - std::copy(word, word + candidate_utf.size(), candidate_utf.begin()); - } - -- for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) { -- for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) { -+ for (wide::string::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) { -+ for (wide::string::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) { - std::swap(*q, *(q - 1)); - if (std::distance(p, q) < 2) - continue; // omit swap char -@@ -1032,7 +1032,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - word = w2.c_str(); - } - -- std::vector<w_char> u8; -+ wide::string u8; - int nc = strlen(word); - int n = (utf8) ? u8_u16(u8, word) : nc; - -@@ -1050,7 +1050,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; - std::string target; - std::string candidate; -- std::vector<w_char> w_candidate; -+ wide::string w_candidate; - if (ph) { - if (utf8) { - u8_u16(w_candidate, word); -@@ -1069,16 +1069,16 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL; - FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL; - -- std::vector<w_char> w_word, w_target; -+ wide::string w_word, w_target; - if (utf8) { - u8_u16(w_word, word); - u8_u16(w_target, target); - } - -- std::vector<w_char> w_entry; -+ wide::string w_entry; - std::string f; -- std::vector<w_char> w_f; -- std::vector<w_char> w_target2; -+ wide::string w_f; -+ wide::string w_target2; - - for (size_t i = 0; i < rHMgr.size(); ++i) { - while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) { -@@ -1168,7 +1168,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - // find minimum threshold for a passable suggestion - // mangle original word three differnt ways - // and score them to generate a minimum acceptable score -- std::vector<w_char> w_mw; -+ wide::string w_mw; - int thresh = 0; - for (int sp = 1; sp < 4; sp++) { - if (utf8) { -@@ -1210,7 +1210,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - return; - } - -- std::vector<w_char> w_glst_word; -+ wide::string w_glst_word; - for (int i = 0; i < MAX_ROOTS; i++) { - if (roots[i]) { - struct hentry* rp = roots[i]; -@@ -1288,7 +1288,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - fact = (10.0 - maxd) / 5.0; - } - -- std::vector<w_char> w_gl; -+ wide::string w_gl; - for (int i = 0; i < MAX_GUESS; i++) { - if (guess[i]) { - // lowering guess[i] -@@ -1789,8 +1789,8 @@ std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const - - // generate an n-gram score comparing s1 and s2, UTF16 version - int SuggestMgr::ngram(int n, -- const std::vector<w_char>& su1, -- const std::vector<w_char>& su2, -+ const wide::string& su1, -+ const wide::string& su2, - int opt) { - int nscore = 0; - int ns; -@@ -1803,8 +1803,8 @@ int SuggestMgr::ngram(int n, - if (l2 == 0) - return 0; - // lowering dictionary word -- const std::vector<w_char>* p_su2 = &su2; -- std::vector<w_char> su2_copy; -+ const wide::string* p_su2 = &su2; -+ wide::string su2_copy; - if (opt & NGRAM_LOWERING) { - su2_copy = su2; - mkallsmall_utf(su2_copy, langnum); -@@ -1894,8 +1894,8 @@ int SuggestMgr::ngram(int n, - - // length of the left common substring of s1 and (decapitalised) s2, UTF version - int SuggestMgr::leftcommonsubstring( -- const std::vector<w_char>& su1, -- const std::vector<w_char>& su2) { -+ const wide::string& su1, -+ const wide::string& su2) { - int l1 = su1.size(); - int l2 = su2.size(); - // decapitalize dictionary word -@@ -1948,8 +1948,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1, - int diffpos[2]; - *is_swap = 0; - if (utf8) { -- std::vector<w_char> su1; -- std::vector<w_char> su2; -+ wide::string su1; -+ wide::string su2; - int l1 = u8_u16(su1, s1); - int l2 = u8_u16(su2, s2); - -@@ -2004,7 +2004,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1, - - int SuggestMgr::mystrlen(const char* word) { - if (utf8) { -- std::vector<w_char> w; -+ wide::string w; - return u8_u16(w, word); - } else - return strlen(word); -@@ -2044,8 +2044,8 @@ void SuggestMgr::lcs(const char* s, - int* l2, - char** result) { - int n, m; -- std::vector<w_char> su; -- std::vector<w_char> su2; -+ wide::string su; -+ wide::string su2; - char* b; - char* c; - int i; -diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx -index 6ba9dc8..9bfa933 100644 ---- a/src/hunspell/suggestmgr.hxx -+++ b/src/hunspell/suggestmgr.hxx -@@ -107,11 +107,11 @@ class SuggestMgr { - private: - char* ckey; - size_t ckeyl; -- std::vector<w_char> ckey_utf; -+ wide::string ckey_utf; - - char* ctry; - size_t ctryl; -- std::vector<w_char> ctry_utf; -+ wide::string ctry_utf; - - AffixMgr* pAMgr; - unsigned int maxSug; -@@ -173,12 +173,12 @@ class SuggestMgr { - const std::vector<mapentry>&, - int*, - clock_t*); -- int ngram(int n, const std::vector<w_char>& su1, -- const std::vector<w_char>& su2, int opt); -+ int ngram(int n, const wide::string& su1, -+ const wide::string& su2, int opt); - int ngram(int n, const std::string& s1, const std::string& s2, int opt); - int mystrlen(const char* word); -- int leftcommonsubstring(const std::vector<w_char>& su1, -- const std::vector<w_char>& su2); -+ int leftcommonsubstring(const wide::string& su1, -+ const wide::string& su2); - int leftcommonsubstring(const char* s1, const char* s2); - int commoncharacterpositions(const char* s1, const char* s2, int* is_swap); - void bubblesort(char** rwd, char** rwd2, int* rsc, int n); -diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx -index c561ffc..84ae13c 100644 ---- a/src/hunspell/w_char.hxx -+++ b/src/hunspell/w_char.hxx -@@ -42,6 +42,7 @@ - #define W_CHAR_HXX_ - - #include <string> -+#include <vector> - - #ifndef GCC - struct w_char { -@@ -72,4 +73,9 @@ struct replentry { - std::string outstrings[4]; // med, ini, fin, isol - }; - -+namespace wide -+{ -+ typedef std::vector<w_char> string; -+} -+ - #endif -diff --git a/src/parsers/textparser.cxx b/src/parsers/textparser.cxx -index 53548e4..8e43f79 100644 ---- a/src/parsers/textparser.cxx -+++ b/src/parsers/textparser.cxx -@@ -81,7 +81,7 @@ int TextParser::is_wordchar(const char* w) { - if (*w == '\0') - return 0; - if (utf8) { -- std::vector<w_char> wc; -+ wide::string wc; - unsigned short idx; - u8_u16(wc, w); - if (wc.empty()) -diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx -index 3172409..c39f148 100644 ---- a/src/tools/hunspell.cxx -+++ b/src/tools/hunspell.cxx -@@ -199,7 +199,7 @@ enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST, FMT_XML, FMT_ODF }; - std::string wordchars; - char* dicpath = NULL; - const w_char* wordchars_utf16 = NULL; --std::vector<w_char> new_wordchars_utf16; -+wide::string new_wordchars_utf16; - int wordchars_utf16_len; - char* dicname = NULL; - char* privdicname = NULL; -@@ -311,7 +311,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) { - } - - if (io_utf8) { -- const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16(); -+ const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16(); - const std::string& vec_wordchars = pMS->get_wordchars_cpp(); - wordchars_utf16_len = vec_wordchars_utf16.size(); - wordchars_utf16 = wordchars_utf16_len ? &vec_wordchars_utf16[0] : NULL; -@@ -356,7 +356,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) { - ch[1] = '\0'; - size_t res = iconv(conv, (ICONV_CONST char**)&ch8bit, &c1, &dest, &c2); - if (res != (size_t)-1) { -- std::vector<w_char> w; -+ wide::string w; - u8_u16(w, std::string(u8, dest)); - unsigned short idx = w.empty() ? 0 : (w[0].h << 8) + w[0].l; - if (unicodeisalpha(idx)) { -@@ -395,7 +395,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) { - } - #else - if (strcmp(denc, "UTF-8") == 0) { -- const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16(); -+ const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16(); - wordchars_utf16 = &vec_wordchars_utf16[0]; - wordchars_utf16_len = vec_wordchars_utf16.size(); - io_utf8 = 1; -@@ -1199,7 +1199,7 @@ void dialogscreen(TextParser* parser, - std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) { - std::string utf8str(token); - chenc(utf8str, ioenc, "UTF-8"); -- std::vector<w_char> u; -+ wide::string u; - u8_u16(u, utf8str); - if (!u.empty()) { - unsigned short idx = (u[0].h << 8) + u[0].l; --- -2.9.3 - diff --git a/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch b/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch deleted file mode 100644 index ba48f9f5e93a..000000000000 --- a/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 7c7f56e1c6fe510a2c5e826cc49aeae3f6614f86 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 13:36:20 +0000 -Subject: [PATCH 3/3] Related: hunspell#406 use a basic_string<w_char> instead - of vector - -kcachegrind reports 1,066,887,723 -> 894,015,631 on - -echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL ---- - src/hunspell/w_char.hxx | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx -index 84ae13c..e112b5c 100644 ---- a/src/hunspell/w_char.hxx -+++ b/src/hunspell/w_char.hxx -@@ -42,7 +42,6 @@ - #define W_CHAR_HXX_ - - #include <string> --#include <vector> - - #ifndef GCC - struct w_char { -@@ -75,7 +74,7 @@ struct replentry { - - namespace wide - { -- typedef std::vector<w_char> string; -+ typedef std::basic_string<w_char> string; - } - - #endif --- -2.9.3 - diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch new file mode 100644 index 000000000000..ff2530cfe23d --- /dev/null +++ b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch @@ -0,0 +1,264 @@ +From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Fri, 10 Feb 2017 16:36:27 +0000 +Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest + +only lower when we have to and reuse scratch buffers as +tolower destination + +kcachegrind reports 830,529,143 -> 779,887,690 on + +echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL +--- + src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++--------------- + 1 file changed, 95 insertions(+), 48 deletions(-) + +diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx +index 54a474f..ea52707 100644 +--- a/src/hunspell/suggestmgr.cxx ++++ b/src/hunspell/suggestmgr.cxx +@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + u8_u16(w_target, target); + } + +- std::vector<w_char> w_entry; + std::string f; + std::vector<w_char> w_f; +- std::vector<w_char> w_target2; + + for (size_t i = 0; i < rHMgr.size(); ++i) { + while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) { +@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + continue; + + if (utf8) { +- w_entry.clear(); +- u8_u16(w_entry, HENTRY_WORD(hp)); +- sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) + +- leftcommonsubstring(w_word, w_entry); ++ w_f.clear(); ++ u8_u16(w_f, HENTRY_WORD(hp)); ++ ++ int leftcommon = leftcommonsubstring(w_word, w_f); ++ if (low) { ++ // lowering dictionary word ++ mkallsmall_utf(w_f, langnum); ++ } ++ sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; + } else { +- sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + +- leftcommonsubstring(word, HENTRY_WORD(hp)); ++ f.assign(HENTRY_WORD(hp)); ++ ++ int leftcommon = leftcommonsubstring(word, f.c_str()); ++ if (low) { ++ // lowering dictionary word ++ mkallsmall(f, csconv); ++ } ++ sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; + } + + // check special pronounciation +@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + if (utf8) { + w_f.clear(); + u8_u16(w_f, f); +- sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) + +- leftcommonsubstring(w_word, w_f); ++ ++ int leftcommon = leftcommonsubstring(w_word, w_f); ++ if (low) { ++ // lowering dictionary word ++ mkallsmall_utf(w_f, langnum); ++ } ++ sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; + } else { +- sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + +- leftcommonsubstring(word, f.c_str()); ++ int leftcommon = leftcommonsubstring(word, f.c_str()); ++ if (low) { ++ // lowering dictionary word ++ mkallsmall(f, csconv); ++ } ++ sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; + } + if (sc2 > sc) + sc = sc2; +@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + candidate = HENTRY_WORD(hp); + mkallcap(candidate, csconv); + } +- std::string target2 = phonet(candidate, *ph); +- w_target2.clear(); ++ f = phonet(candidate, *ph); ++ w_f.clear(); + if (utf8) { +- u8_u16(w_target2, target2); +- scphon = 2 * ngram(3, w_target, w_target2, ++ u8_u16(w_f, f); ++ scphon = 2 * ngram(3, w_target, w_f, + NGRAM_LONGER_WORSE); + } else { +- scphon = 2 * ngram(3, target, target2, ++ scphon = 2 * ngram(3, target, f, + NGRAM_LONGER_WORSE); + } + } +@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + w_mw[k].l = '*'; + w_mw[k].h = 0; + } +- thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low); ++ ++ if (low) { ++ // lowering dictionary word ++ mkallsmall_utf(w_mw, langnum); ++ } ++ ++ thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH); + } else { + std::string mw = word; + for (int k = sp; k < n; k += 4) + mw[k] = '*'; +- thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); ++ ++ if (low) { ++ // lowering dictionary word ++ mkallsmall(mw, csconv); ++ } ++ ++ thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH); + } + } + thresh = thresh / 3; +@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + return; + } + +- std::vector<w_char> w_glst_word; + for (int i = 0; i < MAX_ROOTS; i++) { + if (roots[i]) { + struct hentry* rp = roots[i]; +@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + + for (int k = 0; k < nw; k++) { + if (utf8) { +- w_glst_word.clear(); +- u8_u16(w_glst_word, glst[k].word); +- sc = ngram(n, w_word, w_glst_word, +- NGRAM_ANY_MISMATCH + low) + +- leftcommonsubstring(w_word, w_glst_word); ++ w_f.clear(); ++ u8_u16(w_f, glst[k].word); ++ ++ int leftcommon = leftcommonsubstring(w_word, w_f); ++ if (low) { ++ // lowering dictionary word ++ mkallsmall_utf(w_f, langnum); ++ } ++ ++ sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon; + } else { +- sc = ngram(n, word, glst[k].word, +- NGRAM_ANY_MISMATCH + low) + +- leftcommonsubstring(word, glst[k].word); ++ f = glst[k].word; ++ ++ int leftcommon = leftcommonsubstring(word, f.c_str()); ++ if (low) { ++ // lowering dictionary word ++ mkallsmall(f, csconv); ++ } ++ ++ sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon; + } + + if (sc > thresh) { +@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + w_gl.clear(); + if (utf8) { + u8_u16(w_gl, gl); +- re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + +- ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); ++ //w_gl is lowercase already at this point ++ re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); ++ if (low) { ++ w_f = w_word; ++ // lowering dictionary word ++ mkallsmall_utf(w_f, langnum); ++ re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); ++ } else { ++ re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); ++ } + } else { +- re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + +- ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); ++ //gl is lowercase already at this point ++ re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); ++ if (low) { ++ f = word; ++ // lowering dictionary word ++ mkallsmall(f, csconv); ++ re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); ++ } else { ++ re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); ++ } + } + + int ngram_score, leftcommon_score; + if (utf8) { +- ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low); ++ //w_gl is lowercase already at this point ++ ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH); + leftcommon_score = leftcommonsubstring(w_word, w_gl); + } else { +- ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low); ++ //gl is lowercase already at this point ++ ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH); + leftcommon_score = leftcommonsubstring(word, gl.c_str()); + } + gscore[i] = +@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n, + l2 = su2.size(); + if (l2 == 0) + return 0; +- // lowering dictionary word +- const std::vector<w_char>* p_su2 = &su2; +- std::vector<w_char> su2_copy; +- if (opt & NGRAM_LOWERING) { +- su2_copy = su2; +- mkallsmall_utf(su2_copy, langnum); +- p_su2 = &su2_copy; +- } + for (int j = 1; j <= n; j++) { + ns = 0; + for (int i = 0; i <= (l1 - j); i++) { +@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n, + for (int l = 0; l <= (l2 - j); l++) { + for (k = 0; k < j; k++) { + const w_char& c1 = su1[i + k]; +- const w_char& c2 = (*p_su2)[l + k]; ++ const w_char& c2 = su2[l + k]; + if ((c1.l != c2.l) || (c1.h != c2.h)) + break; + } +@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n, + if (l2 == 0) + return 0; + l1 = s1.size(); +- std::string t(s2); +- if (opt & NGRAM_LOWERING) +- mkallsmall(t, csconv); + for (int j = 1; j <= n; j++) { + ns = 0; + for (int i = 0; i <= (l1 - j); i++) { +- //t is haystack, s1[i..i+j) is needle +- if (t.find(s1.c_str()+i, 0, j) != std::string::npos) { ++ //s2 is haystack, s1[i..i+j) is needle ++ if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) { + ns++; + } else if (opt & NGRAM_WEIGHTED) { + ns--; +-- +2.9.3 + diff --git a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch new file mode 100644 index 000000000000..6c8a108d6719 --- /dev/null +++ b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch @@ -0,0 +1,81 @@ +From aab258adbd9c78931a36b96e58975a08000249a8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Fri, 10 Feb 2017 17:14:35 +0000 +Subject: [PATCH 4/4] either clear will be called anyway before use, or its + unused afterwards + +--- + src/hunspell/suggestmgr.cxx | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx +index ea52707..ae34535 100644 +--- a/src/hunspell/suggestmgr.cxx ++++ b/src/hunspell/suggestmgr.cxx +@@ -1089,7 +1089,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + continue; + + if (utf8) { +- w_f.clear(); + u8_u16(w_f, HENTRY_WORD(hp)); + + int leftcommon = leftcommonsubstring(w_word, w_f); +@@ -1115,7 +1114,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { + int sc2; + if (utf8) { +- w_f.clear(); + u8_u16(w_f, f); + + int leftcommon = leftcommonsubstring(w_word, w_f); +@@ -1139,7 +1137,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + int scphon = -20000; + if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { + if (utf8) { +- w_candidate.clear(); + u8_u16(w_candidate, HENTRY_WORD(hp)); + mkallcap_utf(w_candidate, langnum); + u16_u8(candidate, w_candidate); +@@ -1148,7 +1145,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + mkallcap(candidate, csconv); + } + f = phonet(candidate, *ph); +- w_f.clear(); + if (utf8) { + u8_u16(w_f, f); + scphon = 2 * ngram(3, w_target, w_f, +@@ -1254,7 +1250,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + + for (int k = 0; k < nw; k++) { + if (utf8) { +- w_f.clear(); + u8_u16(w_f, glst[k].word); + + int leftcommon = leftcommonsubstring(w_word, w_f); +@@ -1335,7 +1330,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + std::string gl; + int len; + if (utf8) { +- w_gl.clear(); + len = u8_u16(w_gl, guess[i]); + mkallsmall_utf(w_gl, langnum); + u16_u8(gl, w_gl); +@@ -1355,7 +1349,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + } + // using 2-gram instead of 3, and other weightening + +- w_gl.clear(); + if (utf8) { + u8_u16(w_gl, gl); + //w_gl is lowercase already at this point +@@ -1421,7 +1414,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + // lowering rootphon[i] + std::string gl; + int len; +- w_gl.clear(); + if (utf8) { + len = u8_u16(w_gl, rootsphon[i]); + mkallsmall_utf(w_gl, langnum); +-- +2.9.3 + diff --git a/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch b/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch deleted file mode 100644 index 1d896c6deb49..000000000000 --- a/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 1393bd64581d6010a65d368e1031641391bdb154 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 14:30:13 +0000 -Subject: [PATCH 1/2] use a per-hashmgr persistent wide string scratch buffer - -kcachegrind reports 894,015,631 -> 845,183,693 ---- - src/hunspell/hashmgr.cxx | 40 ++++++++++++++++++---------------------- - src/hunspell/hashmgr.hxx | 1 + - 2 files changed, 19 insertions(+), 22 deletions(-) - -diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx -index 6d92e9b..8d6189b 100644 ---- a/src/hunspell/hashmgr.cxx -+++ b/src/hunspell/hashmgr.cxx -@@ -345,11 +345,10 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, - flags2[flagslen] = ONLYUPCASEFLAG; - if (utf8) { - std::string st; -- wide::string w; -- u8_u16(w, word); -- mkallsmall_utf(w, langnum); -- mkinitcap_utf(w, langnum); -- u16_u8(st, w); -+ u8_u16(workbuf, word); -+ mkallsmall_utf(workbuf, langnum); -+ mkinitcap_utf(workbuf, langnum); -+ u16_u8(st, workbuf); - return add_word(st, wcl, flags2, flagslen + 1, dp, true); - } else { - std::string new_word(word); -@@ -366,9 +365,8 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, - int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { - int len; - if (utf8) { -- wide::string dest_utf; -- len = u8_u16(dest_utf, word); -- *captype = get_captype_utf8(dest_utf, langnum); -+ len = u8_u16(workbuf, word); -+ *captype = get_captype_utf8(workbuf, langnum); - } else { - len = word.size(); - *captype = get_captype(word, csconv); -@@ -688,13 +686,12 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil - break; - } - case FLAG_UNI: { // UTF-8 characters -- wide::string w; -- u8_u16(w, flags); -- len = w.size(); -+ u8_u16(workbuf, flags); -+ len = workbuf.size(); - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); - if (!*result) - return -1; -- memcpy(*result, &w[0], len * sizeof(short)); -+ memcpy(*result, &workbuf[0], len * sizeof(short)); - break; - } - default: { // Ispell's one-character flags (erfg -> e r f g) -@@ -760,12 +757,11 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin - break; - } - case FLAG_UNI: { // UTF-8 characters -- wide::string w; -- u8_u16(w, flags); -- size_t len = w.size(); -+ u8_u16(workbuf, flags); -+ size_t len = workbuf.size(); - size_t origsize = result.size(); - result.resize(origsize + len); -- memcpy(&result[origsize], &w[0], len * sizeof(short)); -+ memcpy(&result[origsize], &workbuf[0], len * sizeof(short)); - break; - } - default: { // Ispell's one-character flags (erfg -> e r f g) -@@ -793,10 +789,9 @@ unsigned short HashMgr::decode_flag(const char* f) const { - s = (unsigned short)i; - break; - case FLAG_UNI: { -- wide::string w; -- u8_u16(w, f); -- if (!w.empty()) -- memcpy(&s, &w[0], 1 * sizeof(short)); -+ u8_u16(workbuf, f); -+ if (!workbuf.empty()) -+ memcpy(&s, &workbuf[0], 1 * sizeof(short)); - break; - } - default: -@@ -820,8 +815,9 @@ char* HashMgr::encode_flag(unsigned short f) const { - ch = stream.str(); - } else if (flag_mode == FLAG_UNI) { - const w_char* w_c = (const w_char*)&f; -- wide::string w(w_c, w_c + 1); -- u16_u8(ch, w); -+ workbuf.clear(); -+ workbuf.push_back(*w_c); -+ u16_u8(ch, workbuf); - } else { - ch.push_back((unsigned char)(f)); - } -diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx -index 312c8ba..78ffb44 100644 ---- a/src/hunspell/hashmgr.hxx -+++ b/src/hunspell/hashmgr.hxx -@@ -97,6 +97,7 @@ class HashMgr { - struct cs_info* csconv; - std::string ignorechars; - wide::string ignorechars_utf16; -+ mutable wide::string workbuf; - int numaliasf; // flag vector `compression' with aliases - unsigned short** aliasf; - unsigned short* aliasflen; --- -2.9.3 - diff --git a/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch b/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch deleted file mode 100644 index a892f74b7843..000000000000 --- a/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 5c7bfa8d36b87a0649f6f88b20624c38a3a5f0ae Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> -Date: Mon, 23 Jan 2017 14:43:19 +0000 -Subject: [PATCH 2/2] use a per-hashmgr persistent wide string scratch buffer - -kcachegrind reports 845,183,693 -> 812,760,392 ---- - src/hunspell/suggestmgr.cxx | 55 ++++++++++++++++++++------------------------- - src/hunspell/suggestmgr.hxx | 3 +++ - 2 files changed, 27 insertions(+), 31 deletions(-) - -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index 1deec96..f5ea01b 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -491,10 +491,11 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst, - if (word[i] == word[i - 2]) { - state++; - if (state == 3) { -- wide::string candidate_utf(word, word + i - 1); -- candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); -+ workbuf1.clear(); -+ workbuf1.insert(workbuf1.end(), word, word + i - 1); -+ workbuf1.insert(workbuf1.end(), word + i + 1, word + wl); - std::string candidate; -- u16_u8(candidate, candidate_utf); -+ u16_u8(candidate, workbuf1); - testsug(wlst, candidate, cpdsuggest, NULL, NULL); - state = 0; - } -@@ -1050,12 +1051,11 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; - std::string target; - std::string candidate; -- wide::string w_candidate; - if (ph) { - if (utf8) { -- u8_u16(w_candidate, word); -- mkallcap_utf(w_candidate, langnum); -- u16_u8(candidate, w_candidate); -+ u8_u16(workbuf1, word); -+ mkallcap_utf(workbuf1, langnum); -+ u16_u8(candidate, workbuf1); - } else { - candidate.assign(word); - if (!nonbmp) -@@ -1121,10 +1121,9 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, - int scphon = -20000; - if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { - if (utf8) { -- w_candidate.clear(); -- u8_u16(w_candidate, HENTRY_WORD(hp)); -- mkallcap_utf(w_candidate, langnum); -- u16_u8(candidate, w_candidate); -+ u8_u16(workbuf1, HENTRY_WORD(hp)); -+ mkallcap_utf(workbuf1, langnum); -+ u16_u8(candidate, workbuf1); - } else { - candidate = HENTRY_WORD(hp); - mkallcap(candidate, csconv); -@@ -1804,11 +1803,10 @@ int SuggestMgr::ngram(int n, - return 0; - // lowering dictionary word - const wide::string* p_su2 = &su2; -- wide::string su2_copy; - if (opt & NGRAM_LOWERING) { -- su2_copy = su2; -- mkallsmall_utf(su2_copy, langnum); -- p_su2 = &su2_copy; -+ workbuf1 = su2; -+ mkallsmall_utf(workbuf1, langnum); -+ p_su2 = &workbuf1; - } - for (int j = 1; j <= n; j++) { - ns = 0; -@@ -1948,22 +1946,20 @@ int SuggestMgr::commoncharacterpositions(const char* s1, - int diffpos[2]; - *is_swap = 0; - if (utf8) { -- wide::string su1; -- wide::string su2; -- int l1 = u8_u16(su1, s1); -- int l2 = u8_u16(su2, s2); -+ int l1 = u8_u16(workbuf1, s1); -+ int l2 = u8_u16(workbuf2, s2); - - if (l1 <= 0 || l2 <= 0) - return 0; - - // decapitalize dictionary word - if (complexprefixes) { -- su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum); -+ workbuf2[l2 - 1] = lower_utf(workbuf2[l2 - 1], langnum); - } else { -- su2[0] = lower_utf(su2[0], langnum); -+ workbuf2[0] = lower_utf(workbuf2[0], langnum); - } - for (int i = 0; (i < l1) && (i < l2); i++) { -- if (su1[i] == su2[i]) { -+ if (workbuf1[i] == workbuf2[i]) { - num++; - } else { - if (diff < 2) -@@ -1972,8 +1968,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1, - } - } - if ((diff == 2) && (l1 == l2) && -- (su1[diffpos[0]] == su2[diffpos[1]]) && -- (su1[diffpos[1]] == su2[diffpos[0]])) -+ (workbuf1[diffpos[0]] == workbuf2[diffpos[1]]) && -+ (workbuf1[diffpos[1]] == workbuf2[diffpos[0]])) - *is_swap = 1; - } else { - size_t i; -@@ -2004,8 +2000,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1, - - int SuggestMgr::mystrlen(const char* word) { - if (utf8) { -- wide::string w; -- return u8_u16(w, word); -+ return u8_u16(workbuf1, word); - } else - return strlen(word); - } -@@ -2044,15 +2039,13 @@ void SuggestMgr::lcs(const char* s, - int* l2, - char** result) { - int n, m; -- wide::string su; -- wide::string su2; - char* b; - char* c; - int i; - int j; - if (utf8) { -- m = u8_u16(su, s); -- n = u8_u16(su2, s2); -+ m = u8_u16(workbuf1, s); -+ n = u8_u16(workbuf2, s2); - } else { - m = strlen(s); - n = strlen(s2); -@@ -2073,7 +2066,7 @@ void SuggestMgr::lcs(const char* s, - c[j] = 0; - for (i = 1; i <= m; i++) { - for (j = 1; j <= n; j++) { -- if (((utf8) && (su[i - 1] == su2[j - 1])) || -+ if (((utf8) && (workbuf1[i - 1] == workbuf2[j - 1])) || - ((!utf8) && (s[i - 1] == s2[j - 1]))) { - c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1; - b[i * (n + 1) + j] = LCS_UPLEFT; -diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx -index 9bfa933..80b0fd7 100644 ---- a/src/hunspell/suggestmgr.hxx -+++ b/src/hunspell/suggestmgr.hxx -@@ -105,6 +105,9 @@ class SuggestMgr { - SuggestMgr& operator=(const SuggestMgr&); - - private: -+ mutable wide::string workbuf1; -+ mutable wide::string workbuf2; -+ - char* ckey; - size_t ckeyl; - wide::string ckey_utf; --- -2.9.3 - diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk index e493366e25b4..b30bd083b407 100644 --- a/external/hunspell/UnpackedTarball_hunspell.mk +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -22,10 +22,12 @@ $(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1)) $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \ external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \ external/hunspell/0001-unroll-this-a-bit.patch \ - external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch \ - external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch \ - external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \ - external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \ + external/hunspell/0001-cppcheck-redundant-c_str.patch \ + external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch \ + external/hunspell/0001-loop-via-iterators.patch \ + external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \ + external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \ + external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \ )) # vim: set noet sw=4 ts=4: |