From afd635082e4d06832877f70b7da9b187a6b21fbc Mon Sep 17 00:00:00 2001 From: Noel Grandin Date: Fri, 3 Nov 2023 11:16:42 +0200 Subject: speed up hunspell inner loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit which calls into the kernel to get elapsed time, instead of using the VDSO-based CLOCK_MONOTONIC (or at least, the C++ equivalent of that) , which is much faster Change-Id: I99d958b0ab64b75360db4e0c8a951c37af2505bc Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158809 Tested-by: Jenkins Tested-by: Caolán McNamara Reviewed-by: Caolán McNamara --- external/hunspell/UnpackedTarball_hunspell.mk | 1 + external/hunspell/clock-monotonic.patch.1 | 105 ++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 external/hunspell/clock-monotonic.patch.1 (limited to 'external') diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk index 9075d98659e7..0d5986798265 100644 --- a/external/hunspell/UnpackedTarball_hunspell.mk +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -26,6 +26,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \ external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch \ external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch \ external/hunspell/bit_cast.patch.0 \ + external/hunspell/clock-monotonic.patch.1 \ )) # vim: set noet sw=4 ts=4: diff --git a/external/hunspell/clock-monotonic.patch.1 b/external/hunspell/clock-monotonic.patch.1 new file mode 100644 index 000000000000..dd873f6c3908 --- /dev/null +++ b/external/hunspell/clock-monotonic.patch.1 @@ -0,0 +1,105 @@ +From 5737bdb3d7e5819528e33c360a73372e0e93a6be Mon Sep 17 00:00:00 2001 +From: Noel Grandin +Date: Fri, 3 Nov 2023 12:04:30 +0000 +Subject: [PATCH] speed up hunspell inner loop + +which calls into the kernel to get elapsed time, instead of using the +VDSO-based CLOCK_MONOTONIC (or at least, the C++ equivalent of that), +which is much faster + +https://gerrit.libreoffice.org/c/core/+/158809 +--- + src/hunspell/affixmgr.cxx | 43 +++++++++++++++++++++++---------------- + 1 file changed, 25 insertions(+), 18 deletions(-) + +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 2cad09f..a3c93cd 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -75,6 +75,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -1590,17 +1591,20 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + +- HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ HUNSPELL_THREAD_LOCAL std::chrono::steady_clock::time_point clock_time_start; ++ HUNSPELL_THREAD_LOCAL bool timelimit_exceeded; ++ ++ // get the current time ++ std::chrono::steady_clock::time_point clock_now = std::chrono::steady_clock::now(); + + if (wordnum == 0) { +- // get the start time, seeing as we're reusing this set to 0 +- // to flag timeout, use clock() + 1 to avoid start clock() +- // of 0 as being a timeout +- timelimit = clock() + 1; +- } +- else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { +- timelimit = 0; ++ // set the start time ++ clock_time_start = clock_now; ++ timelimit_exceeded = false; + } ++ else if (std::chrono::duration_cast(clock_now - clock_time_start).count() ++ > TIMELIMIT * CLOCKS_PER_SEC * 1000) ++ timelimit_exceeded = true; + + setcminmax(&cmin, &cmax, word.c_str(), len); + +@@ -1626,7 +1630,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + + do { // simplified checkcompoundpattern loop + +- if (timelimit == 0) ++ if (timelimit_exceeded) + return 0; + + if (scpd > 0) { +@@ -2216,17 +2220,20 @@ int AffixMgr::compound_check_morph(const std::string& word, + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + +- HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ HUNSPELL_THREAD_LOCAL std::chrono::steady_clock::time_point clock_time_start; ++ HUNSPELL_THREAD_LOCAL bool timelimit_exceeded; ++ ++ // get the current time ++ std::chrono::steady_clock::time_point clock_now = std::chrono::steady_clock::now(); + + if (wordnum == 0) { +- // get the start time, seeing as we're reusing this set to 0 +- // to flag timeout, use clock() + 1 to avoid start clock() +- // of 0 as being a timeout +- timelimit = clock() + 1; +- } +- else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { +- timelimit = 0; ++ // set the start time ++ clock_time_start = clock_now; ++ timelimit_exceeded = false; + } ++ else if (std::chrono::duration_cast(clock_now - clock_time_start).count() ++ > TIMELIMIT * CLOCKS_PER_SEC * 1000) ++ timelimit_exceeded = true; + + setcminmax(&cmin, &cmax, word.c_str(), len); + +@@ -2246,7 +2253,7 @@ int AffixMgr::compound_check_morph(const std::string& word, + + do { // onlycpdrule loop + +- if (timelimit == 0) ++ if (timelimit_exceeded) + return 0; + + oldnumsyllable = numsyllable; +-- +2.41.0 + -- cgit