diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2023-03-04 12:46:56 +0300 |
---|---|---|
committer | Xisco Fauli <xiscofauli@libreoffice.org> | 2023-03-08 19:25:03 +0000 |
commit | 3b17f67f35a771c75e683f0671b9b9d2a010ffd4 (patch) | |
tree | b37688f8d931eb9de52debfa5946228a7238159e | |
parent | 2860a771f93e43b95250beee78351a58bffba1ff (diff) |
tdf#153617: percent-encode the text sent to LanguageTool API.
Change-Id: I0bb55c70f5602444440fca6e3c13b3d75418e49d
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/148236
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org>
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/148299
-rw-r--r-- | lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx index 3d9336f446da..4fa88ac0118f 100644 --- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx @@ -41,6 +41,7 @@ #include <tools/long.hxx> #include <com/sun/star/uno/Any.hxx> #include <comphelper/propertyvalue.hxx> +#include <rtl/uri.hxx> using namespace osl; using namespace com::sun::star; @@ -73,6 +74,20 @@ Sequence<PropertyValue> lcl_GetLineColorPropertyFromErrorId(const std::string& r Sequence<PropertyValue> aProperties{ comphelper::makePropertyValue("LineColor", aColor) }; return aProperties; } + +OString encodeTextForLanguageTool(const OUString& text) +{ + // Let's be a bit conservative. I don't find a good description what needs encoding (and in + // which way) at https://languagetool.org/http-api/; the "Try it out!" function shows that + // different cases are handled differently by the demo; some percent-encode the UTF-8 + // representation, like %D0%90 (for cyrillic А); some turn into entities like ! (for + // exclamation mark !); some other to things like \u0027 (for apostrophe '). + static constexpr auto myCharClass + = rtl::createUriCharClass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + return OUStringToOString( + rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, RTL_TEXTENCODING_UTF8), + RTL_TEXTENCODING_ASCII_US); +} } LanguageToolGrammarChecker::LanguageToolGrammarChecker() @@ -215,9 +230,8 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading( xRes.nBehindEndOfSentencePosition = std::min(xRes.nStartOfNextSentencePosition, aText.getLength()); - OUString langTag(aLocale.Language + "-" + aLocale.Country); - OString postData = OUStringToOString(Concat2View("text=" + aText + "&language=" + langTag), - RTL_TEXTENCODING_UTF8); + OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8()); + OString postData = "text=" + encodeTextForLanguageTool(aText) + "&language=" + langTag; if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end()) { |