summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2023-03-04 12:46:56 +0300
committerXisco Fauli <xiscofauli@libreoffice.org>2023-03-08 19:25:03 +0000
commit3b17f67f35a771c75e683f0671b9b9d2a010ffd4 (patch)
treeb37688f8d931eb9de52debfa5946228a7238159e
parent2860a771f93e43b95250beee78351a58bffba1ff (diff)
tdf#153617: percent-encode the text sent to LanguageTool API.
Change-Id: I0bb55c70f5602444440fca6e3c13b3d75418e49d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/148236 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com> Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/148299
-rw-r--r--lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx20
1 files changed, 17 insertions, 3 deletions
diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
index 3d9336f446da..4fa88ac0118f 100644
--- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
+++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
@@ -41,6 +41,7 @@
#include <tools/long.hxx>
#include <com/sun/star/uno/Any.hxx>
#include <comphelper/propertyvalue.hxx>
+#include <rtl/uri.hxx>
using namespace osl;
using namespace com::sun::star;
@@ -73,6 +74,20 @@ Sequence<PropertyValue> lcl_GetLineColorPropertyFromErrorId(const std::string& r
Sequence<PropertyValue> aProperties{ comphelper::makePropertyValue("LineColor", aColor) };
return aProperties;
}
+
+OString encodeTextForLanguageTool(const OUString& text)
+{
+ // Let's be a bit conservative. I don't find a good description what needs encoding (and in
+ // which way) at https://languagetool.org/http-api/; the "Try it out!" function shows that
+ // different cases are handled differently by the demo; some percent-encode the UTF-8
+ // representation, like %D0%90 (for cyrillic А); some turn into entities like &#33; (for
+ // exclamation mark !); some other to things like \u0027 (for apostrophe ').
+ static constexpr auto myCharClass
+ = rtl::createUriCharClass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+ return OUStringToOString(
+ rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, RTL_TEXTENCODING_UTF8),
+ RTL_TEXTENCODING_ASCII_US);
+}
}
LanguageToolGrammarChecker::LanguageToolGrammarChecker()
@@ -215,9 +230,8 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading(
xRes.nBehindEndOfSentencePosition
= std::min(xRes.nStartOfNextSentencePosition, aText.getLength());
- OUString langTag(aLocale.Language + "-" + aLocale.Country);
- OString postData = OUStringToOString(Concat2View("text=" + aText + "&language=" + langTag),
- RTL_TEXTENCODING_UTF8);
+ OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8());
+ OString postData = "text=" + encodeTextForLanguageTool(aText) + "&language=" + langTag;
if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end())
{