summaryrefslogtreecommitdiff
path: root/lingucomponent
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2023-03-04 12:46:56 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2023-03-04 14:18:51 +0000
commit43dcdfae40c9c37032ed5e92cd0634feb53b706d (patch)
tree42ed727fb0eba5cb11e0b70286fb3761a88ae713 /lingucomponent
parentbbfb49433a04f7ce882c2614f460f0f07860532c (diff)
tdf#153617: percent-encode the text sent to LanguageTool API.
Change-Id: I0bb55c70f5602444440fca6e3c13b3d75418e49d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/148236 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'lingucomponent')
-rw-r--r--lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx26
1 files changed, 20 insertions, 6 deletions
diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
index 0856be3f741d..a153e7ac5ef0 100644
--- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
+++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
@@ -43,6 +43,7 @@
#include <comphelper/propertyvalue.hxx>
#include <unotools/lingucfg.hxx>
#include <osl/mutex.hxx>
+#include <rtl/uri.hxx>
using namespace osl;
using namespace com::sun::star;
@@ -77,6 +78,20 @@ Sequence<PropertyValue> lcl_GetLineColorPropertyFromErrorId(const std::string& r
Sequence<PropertyValue> aProperties{ comphelper::makePropertyValue("LineColor", aColor) };
return aProperties;
}
+
+OString encodeTextForLanguageTool(const OUString& text)
+{
+ // Let's be a bit conservative. I don't find a good description what needs encoding (and in
+ // which way) at https://languagetool.org/http-api/; the "Try it out!" function shows that
+ // different cases are handled differently by the demo; some percent-encode the UTF-8
+ // representation, like %D0%90 (for cyrillic А); some turn into entities like &#33; (for
+ // exclamation mark !); some other to things like \u0027 (for apostrophe ').
+ static constexpr auto myCharClass
+ = rtl::createUriCharClass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+ return OUStringToOString(
+ rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, RTL_TEXTENCODING_UTF8),
+ RTL_TEXTENCODING_ASCII_US);
+}
}
LanguageToolGrammarChecker::LanguageToolGrammarChecker()
@@ -224,14 +239,14 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading(
xRes.nBehindEndOfSentencePosition
= std::min(xRes.nStartOfNextSentencePosition, aText.getLength());
- OUString langTag(aLocale.Language + "-" + aLocale.Country);
- OString postData;
+ OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8());
+ OString postData = encodeTextForLanguageTool(aText);
if (rLanguageOpts.getRestProtocol() == sDuden)
{
std::stringstream aStream;
boost::property_tree::ptree aTree;
- aTree.put("text-language", langTag.toUtf8().getStr());
- aTree.put("text", aText.toUtf8().getStr());
+ aTree.put("text-language", langTag.getStr());
+ aTree.put("text", postData.getStr());
aTree.put("hyphenation", false);
aTree.put("spellchecking-level", 3);
aTree.put("correction-proposals", true);
@@ -240,8 +255,7 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading(
}
else
{
- postData = OUStringToOString(Concat2View("text=" + aText + "&language=" + langTag),
- RTL_TEXTENCODING_UTF8);
+ postData = "text=" + postData + "&language=" + langTag;
}
if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end())