summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2020-06-18 16:03:38 +0300
committerAdolfo Jayme Barrientos <fitojb@ubuntu.com>2020-06-25 13:49:40 +0200
commitdc7b1a09001638d5d6737e9bdf394f3a571ef044 (patch)
treeddc6cc812d5bc4fd04288a42682f43a955305970
parent86ed93261137e24c679baed137cc4559eb5376e4 (diff)
tdf#133589 Numbertext: Use Win32API to avoid std::codecvt_utf8 bug
(cherry picked from commit 21a59b59d9a40ca32d91b05e62ffcd9aef8fd324 and commit ad792bccc0adfdd4be75f99170a92312924d371d) Change-Id: I45c85db44c3dfd92e0929f66c8c95cb309c91e05 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/96894 Tested-by: Jenkins Reviewed-by: Adolfo Jayme Barrientos <fitojb@ubuntu.com>
-rw-r--r--external/libnumbertext/MSVCNonBMPBug.patch169
-rw-r--r--external/libnumbertext/UnpackedTarball_libnumbertext.mk4
-rw-r--r--lingucomponent/source/numbertext/numbertext.cxx17
-rw-r--r--sw/qa/extras/uiwriter/uiwriter.cxx2
4 files changed, 86 insertions, 6 deletions
diff --git a/external/libnumbertext/MSVCNonBMPBug.patch1 b/external/libnumbertext/MSVCNonBMPBug.patch1
new file mode 100644
index 000000000000..8ced22165bcc
--- /dev/null
+++ b/external/libnumbertext/MSVCNonBMPBug.patch1
@@ -0,0 +1,69 @@
+MSVC's std::codecvt_utf8 has a bug converting non-BMP codepoints like U+10CFA.
+Use MultiByteToWideChar/WideCharToMultiByte instead on Windows.
+
+diff --git a/src/Numbertext.cxx b/src/Numbertext.cxx
+index 5f05b48579af..eb83e59f366f 100755
+--- a/src/Numbertext.cxx
++++ b/src/Numbertext.cxx
+@@ -7,6 +7,10 @@
+ #include <sstream>
+ #include <fstream>
+
++#ifdef _WIN32
++#include <Windows.h>
++#endif
++
+ #include "Numbertext.hxx"
+
+ #ifdef NUMBERTEXT_BOOST
+@@ -22,6 +26,14 @@
+
+ bool readfile(const std::string& filename, std::wstring& result)
+ {
++#ifdef _WIN32
++ std::ifstream ifs(filename);
++ if (ifs.fail())
++ return false;
++ std::stringstream ss;
++ ss << ifs.rdbuf();
++ result = Numbertext::string2wstring(ss.str());
++#else
+ std::wifstream wif(filename);
+ if (wif.fail())
+ return false;
+@@ -29,6 +44,7 @@ bool readfile(const std::string& filename, std::wstring& result)
+ std::wstringstream wss;
+ wss << wif.rdbuf();
+ result = wss.str();
++#endif
+ return true;
+ }
+
+@@ -99,7 +112,12 @@
+
+ std::wstring Numbertext::string2wstring(const std::string& s)
+ {
+-#ifndef NUMBERTEXT_BOOST
++#ifdef _WIN32
++ int nSize = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, nullptr, 0);
++ std::unique_ptr<wchar_t[]> wstr(new wchar_t[nSize]);
++ MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, wstr.get(), nSize);
++ return wstr.get();
++#elif !defined NUMBERTEXT_BOOST
+ typedef std::codecvt_utf8<wchar_t> convert_type;
+ std::wstring_convert<convert_type, wchar_t> converter;
+ return converter.from_bytes( s );
+@@ -110,7 +128,12 @@
+
+ std::string Numbertext::wstring2string(const std::wstring& s)
+ {
+-#ifndef NUMBERTEXT_BOOST
++#ifdef _WIN32
++ int nSize = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, nullptr, 0, nullptr, nullptr);
++ std::unique_ptr<char[]> str(new char[nSize]);
++ WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, str.get(), nSize, nullptr, nullptr);
++ return str.get();
++#elif !defined NUMBERTEXT_BOOST
+ typedef std::codecvt_utf8<wchar_t> convert_type;
+ std::wstring_convert<convert_type, wchar_t> converter;
+ return converter.to_bytes( s );
diff --git a/external/libnumbertext/UnpackedTarball_libnumbertext.mk b/external/libnumbertext/UnpackedTarball_libnumbertext.mk
index 1969dcf7d08d..d2efd2f852c3 100644
--- a/external/libnumbertext/UnpackedTarball_libnumbertext.mk
+++ b/external/libnumbertext/UnpackedTarball_libnumbertext.mk
@@ -15,4 +15,8 @@ $(eval $(call gb_UnpackedTarball_update_autoconf_configs,libnumbertext))
$(eval $(call gb_UnpackedTarball_set_patchlevel,libnumbertext,1))
+$(eval $(call gb_UnpackedTarball_add_patches,libnumbertext, \
+ external/libnumbertext/MSVCNonBMPBug.patch1 \
+))
+
# vim: set noet sw=4 ts=4:
diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx
index bc13530d5250..556dc2ea7a90 100644
--- a/lingucomponent/source/numbertext/numbertext.cxx
+++ b/lingucomponent/source/numbertext/numbertext.cxx
@@ -22,6 +22,7 @@
#include <osl/file.hxx>
#include <tools/debug.hxx>
+#include <o3tl/char16_t2wchar_t.hxx>
#include <sal/config.h>
#include <cppuhelper/factory.hxx>
@@ -144,12 +145,20 @@ OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Lo
if (!aCountry.isEmpty())
aCode += "-" + aCountry;
OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US));
+#if defined(_WIN32)
+ std::wstring sResult(o3tl::toW(rText.getStr()));
+#else
OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8));
- std::wstring aResult = Numbertext::string2wstring(aInput.getStr());
- bool result = m_aNumberText.numbertext(aResult, aLangCode.getStr());
+ std::wstring sResult = Numbertext::string2wstring(aInput.getStr());
+#endif
+ bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr());
DBG_ASSERT(result, "numbertext: false");
- OString aResult2(Numbertext::wstring2string(aResult).c_str());
- return OUString::fromUtf8(aResult2);
+#if defined(_WIN32)
+ OUString aResult(o3tl::toU(sResult.c_str()));
+#else
+ OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
+#endif
+ return aResult;
#else
return rText;
#endif
diff --git a/sw/qa/extras/uiwriter/uiwriter.cxx b/sw/qa/extras/uiwriter/uiwriter.cxx
index 66137c9af662..2b8490b09135 100644
--- a/sw/qa/extras/uiwriter/uiwriter.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter.cxx
@@ -7285,7 +7285,6 @@ void SwUiWriterTest::testTdf128860()
#if ENABLE_LIBNUMBERTEXT
void SwUiWriterTest::testTdf133589()
{
-#if !defined(_WIN32)
// Hungarian test document with right-to-left paragraph setting
SwDoc* pDoc = createDoc("tdf133589.fodt");
SwWrtShell* pWrtShell = pDoc->GetDocShell()->GetWrtShell();
@@ -7317,7 +7316,6 @@ void SwUiWriterTest::testTdf133589()
nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
sReplaced += u"𐳺𐳺𐳿𐳼𐳼 ";
CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
-#endif
}
#endif