diff options
-rw-r--r-- | external/libnumbertext/MSVCNonBMPBug.patch1 | 69 | ||||
-rw-r--r-- | external/libnumbertext/UnpackedTarball_libnumbertext.mk | 4 | ||||
-rw-r--r-- | lingucomponent/source/numbertext/numbertext.cxx | 24 |
3 files changed, 81 insertions, 16 deletions
diff --git a/external/libnumbertext/MSVCNonBMPBug.patch1 b/external/libnumbertext/MSVCNonBMPBug.patch1 new file mode 100644 index 000000000000..8ced22165bcc --- /dev/null +++ b/external/libnumbertext/MSVCNonBMPBug.patch1 @@ -0,0 +1,69 @@ +MSVC's std::codecvt_utf8 has a bug converting non-BMP codepoints like U+10CFA. +Use MultiByteToWideChar/WideCharToMultiByte instead on Windows. + +diff --git a/src/Numbertext.cxx b/src/Numbertext.cxx +index 5f05b48579af..eb83e59f366f 100755 +--- a/src/Numbertext.cxx ++++ b/src/Numbertext.cxx +@@ -7,6 +7,10 @@ + #include <sstream> + #include <fstream> + ++#ifdef _WIN32 ++#include <Windows.h> ++#endif ++ + #include "Numbertext.hxx" + + #ifdef NUMBERTEXT_BOOST +@@ -22,6 +26,14 @@ + + bool readfile(const std::string& filename, std::wstring& result) + { ++#ifdef _WIN32 ++ std::ifstream ifs(filename); ++ if (ifs.fail()) ++ return false; ++ std::stringstream ss; ++ ss << ifs.rdbuf(); ++ result = Numbertext::string2wstring(ss.str()); ++#else + std::wifstream wif(filename); + if (wif.fail()) + return false; +@@ -29,6 +44,7 @@ bool readfile(const std::string& filename, std::wstring& result) + std::wstringstream wss; + wss << wif.rdbuf(); + result = wss.str(); ++#endif + return true; + } + +@@ -99,7 +112,12 @@ + + std::wstring Numbertext::string2wstring(const std::string& s) + { +-#ifndef NUMBERTEXT_BOOST ++#ifdef _WIN32 ++ int nSize = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, nullptr, 0); ++ std::unique_ptr<wchar_t[]> wstr(new wchar_t[nSize]); ++ MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, wstr.get(), nSize); ++ return wstr.get(); ++#elif !defined NUMBERTEXT_BOOST + typedef std::codecvt_utf8<wchar_t> convert_type; + std::wstring_convert<convert_type, wchar_t> converter; + return converter.from_bytes( s ); +@@ -110,7 +128,12 @@ + + std::string Numbertext::wstring2string(const std::wstring& s) + { +-#ifndef NUMBERTEXT_BOOST ++#ifdef _WIN32 ++ int nSize = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, nullptr, 0, nullptr, nullptr); ++ std::unique_ptr<char[]> str(new char[nSize]); ++ WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, str.get(), nSize, nullptr, nullptr); ++ return str.get(); ++#elif !defined NUMBERTEXT_BOOST + typedef std::codecvt_utf8<wchar_t> convert_type; + std::wstring_convert<convert_type, wchar_t> converter; + return converter.to_bytes( s ); diff --git a/external/libnumbertext/UnpackedTarball_libnumbertext.mk b/external/libnumbertext/UnpackedTarball_libnumbertext.mk index 1969dcf7d08d..d2efd2f852c3 100644 --- a/external/libnumbertext/UnpackedTarball_libnumbertext.mk +++ b/external/libnumbertext/UnpackedTarball_libnumbertext.mk @@ -15,4 +15,8 @@ $(eval $(call gb_UnpackedTarball_update_autoconf_configs,libnumbertext)) $(eval $(call gb_UnpackedTarball_set_patchlevel,libnumbertext,1)) +$(eval $(call gb_UnpackedTarball_add_patches,libnumbertext, \ + external/libnumbertext/MSVCNonBMPBug.patch1 \ +)) + # vim: set noet sw=4 ts=4: diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx index 89f5432624bf..34e7694601a3 100644 --- a/lingucomponent/source/numbertext/numbertext.cxx +++ b/lingucomponent/source/numbertext/numbertext.cxx @@ -21,7 +21,7 @@ #include <osl/file.hxx> #include <tools/debug.hxx> -#include <rtl/ustrbuf.hxx> +#include <o3tl/char16_t2wchar_t.hxx> #include <sal/config.h> #include <cppuhelper/factory.hxx> @@ -132,26 +132,18 @@ OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Lo if (!aCountry.isEmpty()) aCode += "-" + aCountry; OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US)); +#if defined(_WIN32) + std::wstring sResult(o3tl::toW(rText.getStr())); +#else OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8)); std::wstring sResult = Numbertext::string2wstring(aInput.getStr()); +#endif bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr()); DBG_ASSERT(result, "numbertext: false"); - OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str()); #if defined(_WIN32) - // workaround to fix non-BMP Unicode characters resulted by wstring limitation - if (!aScript.isEmpty() && aScript == "Hung") - { - OUStringBuffer aFix; - for (int i = 0; i < aResult.getLength(); ++i) - { - sal_Unicode c = aResult[i]; - if (0x0C80 <= c && c <= 0x0CFF) - aFix.append(sal_Unicode(0xD803)).append(sal_Unicode(c + 0xD000)); - else - aFix.append(c); - } - aResult = aFix.makeStringAndClear(); - } + OUString aResult(o3tl::toU(sResult.c_str())); +#else + OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str()); #endif return aResult; } |