summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--external/libnumbertext/MSVCNonBMPBug.patch169
-rw-r--r--external/libnumbertext/UnpackedTarball_libnumbertext.mk4
-rw-r--r--lingucomponent/source/numbertext/numbertext.cxx24
3 files changed, 81 insertions, 16 deletions
diff --git a/external/libnumbertext/MSVCNonBMPBug.patch1 b/external/libnumbertext/MSVCNonBMPBug.patch1
new file mode 100644
index 000000000000..8ced22165bcc
--- /dev/null
+++ b/external/libnumbertext/MSVCNonBMPBug.patch1
@@ -0,0 +1,69 @@
+MSVC's std::codecvt_utf8 has a bug converting non-BMP codepoints like U+10CFA.
+Use MultiByteToWideChar/WideCharToMultiByte instead on Windows.
+
+diff --git a/src/Numbertext.cxx b/src/Numbertext.cxx
+index 5f05b48579af..eb83e59f366f 100755
+--- a/src/Numbertext.cxx
++++ b/src/Numbertext.cxx
+@@ -7,6 +7,10 @@
+ #include <sstream>
+ #include <fstream>
+
++#ifdef _WIN32
++#include <Windows.h>
++#endif
++
+ #include "Numbertext.hxx"
+
+ #ifdef NUMBERTEXT_BOOST
+@@ -22,6 +26,14 @@
+
+ bool readfile(const std::string& filename, std::wstring& result)
+ {
++#ifdef _WIN32
++ std::ifstream ifs(filename);
++ if (ifs.fail())
++ return false;
++ std::stringstream ss;
++ ss << ifs.rdbuf();
++ result = Numbertext::string2wstring(ss.str());
++#else
+ std::wifstream wif(filename);
+ if (wif.fail())
+ return false;
+@@ -29,6 +44,7 @@ bool readfile(const std::string& filename, std::wstring& result)
+ std::wstringstream wss;
+ wss << wif.rdbuf();
+ result = wss.str();
++#endif
+ return true;
+ }
+
+@@ -99,7 +112,12 @@
+
+ std::wstring Numbertext::string2wstring(const std::string& s)
+ {
+-#ifndef NUMBERTEXT_BOOST
++#ifdef _WIN32
++ int nSize = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, nullptr, 0);
++ std::unique_ptr<wchar_t[]> wstr(new wchar_t[nSize]);
++ MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, wstr.get(), nSize);
++ return wstr.get();
++#elif !defined NUMBERTEXT_BOOST
+ typedef std::codecvt_utf8<wchar_t> convert_type;
+ std::wstring_convert<convert_type, wchar_t> converter;
+ return converter.from_bytes( s );
+@@ -110,7 +128,12 @@
+
+ std::string Numbertext::wstring2string(const std::wstring& s)
+ {
+-#ifndef NUMBERTEXT_BOOST
++#ifdef _WIN32
++ int nSize = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, nullptr, 0, nullptr, nullptr);
++ std::unique_ptr<char[]> str(new char[nSize]);
++ WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, str.get(), nSize, nullptr, nullptr);
++ return str.get();
++#elif !defined NUMBERTEXT_BOOST
+ typedef std::codecvt_utf8<wchar_t> convert_type;
+ std::wstring_convert<convert_type, wchar_t> converter;
+ return converter.to_bytes( s );
diff --git a/external/libnumbertext/UnpackedTarball_libnumbertext.mk b/external/libnumbertext/UnpackedTarball_libnumbertext.mk
index 1969dcf7d08d..d2efd2f852c3 100644
--- a/external/libnumbertext/UnpackedTarball_libnumbertext.mk
+++ b/external/libnumbertext/UnpackedTarball_libnumbertext.mk
@@ -15,4 +15,8 @@ $(eval $(call gb_UnpackedTarball_update_autoconf_configs,libnumbertext))
$(eval $(call gb_UnpackedTarball_set_patchlevel,libnumbertext,1))
+$(eval $(call gb_UnpackedTarball_add_patches,libnumbertext, \
+ external/libnumbertext/MSVCNonBMPBug.patch1 \
+))
+
# vim: set noet sw=4 ts=4:
diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx
index 89f5432624bf..34e7694601a3 100644
--- a/lingucomponent/source/numbertext/numbertext.cxx
+++ b/lingucomponent/source/numbertext/numbertext.cxx
@@ -21,7 +21,7 @@
#include <osl/file.hxx>
#include <tools/debug.hxx>
-#include <rtl/ustrbuf.hxx>
+#include <o3tl/char16_t2wchar_t.hxx>
#include <sal/config.h>
#include <cppuhelper/factory.hxx>
@@ -132,26 +132,18 @@ OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Lo
if (!aCountry.isEmpty())
aCode += "-" + aCountry;
OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US));
+#if defined(_WIN32)
+ std::wstring sResult(o3tl::toW(rText.getStr()));
+#else
OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8));
std::wstring sResult = Numbertext::string2wstring(aInput.getStr());
+#endif
bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr());
DBG_ASSERT(result, "numbertext: false");
- OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
#if defined(_WIN32)
- // workaround to fix non-BMP Unicode characters resulted by wstring limitation
- if (!aScript.isEmpty() && aScript == "Hung")
- {
- OUStringBuffer aFix;
- for (int i = 0; i < aResult.getLength(); ++i)
- {
- sal_Unicode c = aResult[i];
- if (0x0C80 <= c && c <= 0x0CFF)
- aFix.append(sal_Unicode(0xD803)).append(sal_Unicode(c + 0xD000));
- else
- aFix.append(c);
- }
- aResult = aFix.makeStringAndClear();
- }
+ OUString aResult(o3tl::toU(sResult.c_str()));
+#else
+ OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
#endif
return aResult;
}