diff options
-rw-r--r-- | emfio/source/reader/mtftools.cxx | 2 | ||||
-rw-r--r-- | filter/source/graphicfilter/idxf/dxfreprd.cxx | 4 | ||||
-rw-r--r-- | include/unotools/wincodepage.hxx | 5 | ||||
-rw-r--r-- | unotools/source/misc/wincodepage.cxx | 192 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfdispatchvalue.cxx | 5 |
5 files changed, 95 insertions, 113 deletions
diff --git a/emfio/source/reader/mtftools.cxx b/emfio/source/reader/mtftools.cxx index cc2fddcf4796..01c091be598c 100644 --- a/emfio/source/reader/mtftools.cxx +++ b/emfio/source/reader/mtftools.cxx @@ -185,7 +185,7 @@ namespace emfio || (rFont.alfFaceName == "MT Extra")) eCharSet = RTL_TEXTENCODING_SYMBOL; else if ((rFont.lfCharSet == DEFAULT_CHARSET) || (rFont.lfCharSet == OEM_CHARSET)) - eCharSet = utl_getWinTextEncodingFromLangStr(getLODefaultLanguage().toUtf8().getStr(), + eCharSet = utl_getWinTextEncodingFromLangStr(getLODefaultLanguage(), rFont.lfCharSet == OEM_CHARSET); else eCharSet = rtl_getTextEncodingFromWindowsCharset( rFont.lfCharSet ); diff --git a/filter/source/graphicfilter/idxf/dxfreprd.cxx b/filter/source/graphicfilter/idxf/dxfreprd.cxx index 26ec1077ae79..8e4c9bc73560 100644 --- a/filter/source/graphicfilter/idxf/dxfreprd.cxx +++ b/filter/source/graphicfilter/idxf/dxfreprd.cxx @@ -229,7 +229,7 @@ void DXFRepresentation::ReadHeader(DXFGroupReader & rDGR) // only if the encoding is not set yet // e.g. by previous $DWGCODEPAGE if (!isTextEncodingSet()) - setTextEncoding(utl_getWinTextEncodingFromLangStr(getLODefaultLanguage().toUtf8().getStr(), true)); + setTextEncoding(utl_getWinTextEncodingFromLangStr(getLODefaultLanguage(), true)); } else if (rDGR.GetS() >= "AC1021") setTextEncoding(RTL_TEXTENCODING_UTF8); @@ -239,7 +239,7 @@ void DXFRepresentation::ReadHeader(DXFGroupReader & rDGR) // only if the encoding is not set yet // e.g. by previous $DWGCODEPAGE if (!isTextEncodingSet()) - setTextEncoding(utl_getWinTextEncodingFromLangStr(getLODefaultLanguage().toUtf8().getStr())); + setTextEncoding(utl_getWinTextEncodingFromLangStr(getLODefaultLanguage())); } } else if (rDGR.GetS() == "$DWGCODEPAGE") diff --git a/include/unotools/wincodepage.hxx b/include/unotools/wincodepage.hxx index f3a6aee97ad3..4ec808e5c90f 100644 --- a/include/unotools/wincodepage.hxx +++ b/include/unotools/wincodepage.hxx @@ -12,11 +12,12 @@ #include <unotools/unotoolsdllapi.h> #include <rtl/textenc.h> +#include <rtl/ustring.hxx> /** Map from an ISO-639 language code (and optionally ISO-3166 country/region code) to a text encoding of corresponding Windows ANSI or OEM codepage. -@param pLanguage +@param sLanguage Any language-country string. Must not be null. @param bOEM @@ -28,7 +29,7 @@ If no mapping is found, RTL_TEXTENCODING_IBM_850 is returned when bOEM is true, RTL_TEXTENCODING_MS_1252 otherwise. */ UNOTOOLS_DLLPUBLIC rtl_TextEncoding utl_getWinTextEncodingFromLangStr( - const char* pLanguage, bool bOEM = false); + const OUString& sLanguage, bool bOEM = false); #endif // INCLUDED_UNOTOOLS_WINCODEPAGE_HXX diff --git a/unotools/source/misc/wincodepage.cxx b/unotools/source/misc/wincodepage.cxx index 5a5df8b3ed2e..1fefabecb049 100644 --- a/unotools/source/misc/wincodepage.cxx +++ b/unotools/source/misc/wincodepage.cxx @@ -13,60 +13,54 @@ namespace{ -// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756 -rtl_TextEncoding impl_getWinTextEncodingFromLangStrANSI(const char* pLanguage) +struct LangEncodingDef { - auto nLangLen = rtl_str_getLength(pLanguage); + const OUStringLiteral msLangStr; + rtl_TextEncoding meTextEncoding; +}; - struct LangEncodingDef - { - const char* mpLangStr; - decltype(nLangLen) mnLangStrLen; - rtl_TextEncoding meTextEncoding; - }; - static LangEncodingDef const aLanguageTab[] = +// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756 +rtl_TextEncoding impl_getWinTextEncodingFromLangStrANSI(const OUString& sLanguage) +{ + static constexpr LangEncodingDef aLanguageTab[] = { - { "en", 2, RTL_TEXTENCODING_MS_1252 }, // Most used -> first in list - { "th", 2, RTL_TEXTENCODING_MS_874 }, - { "ja", 2, RTL_TEXTENCODING_MS_932 }, - { "zh-cn", 5, RTL_TEXTENCODING_MS_936 }, // Chinese (simplified) - must go before "zh" - { "ko", 2, RTL_TEXTENCODING_MS_949 }, - { "zh", 2, RTL_TEXTENCODING_MS_950 }, // Chinese (traditional) - { "bs", 2, RTL_TEXTENCODING_MS_1250 }, - { "cs", 2, RTL_TEXTENCODING_MS_1250 }, - { "hr", 2, RTL_TEXTENCODING_MS_1250 }, - { "hu", 2, RTL_TEXTENCODING_MS_1250 }, - { "pl", 2, RTL_TEXTENCODING_MS_1250 }, - { "ro", 2, RTL_TEXTENCODING_MS_1250 }, - { "sk", 2, RTL_TEXTENCODING_MS_1250 }, - { "sl", 2, RTL_TEXTENCODING_MS_1250 }, -// { "sr", 2, RTL_TEXTENCODING_MS_1250 }, - { "sq", 2, RTL_TEXTENCODING_MS_1250 }, - { "be", 2, RTL_TEXTENCODING_MS_1251 }, - { "bg", 2, RTL_TEXTENCODING_MS_1251 }, - { "mk", 2, RTL_TEXTENCODING_MS_1251 }, - { "ru", 2, RTL_TEXTENCODING_MS_1251 }, - { "sr", 2, RTL_TEXTENCODING_MS_1251 }, - { "uk", 2, RTL_TEXTENCODING_MS_1251 }, - { "es", 2, RTL_TEXTENCODING_MS_1252 }, - { "el", 2, RTL_TEXTENCODING_MS_1253 }, - { "tr", 2, RTL_TEXTENCODING_MS_1254 }, - { "he", 2, RTL_TEXTENCODING_MS_1255 }, - { "ar", 2, RTL_TEXTENCODING_MS_1256 }, - { "et", 2, RTL_TEXTENCODING_MS_1257 }, - { "lt", 2, RTL_TEXTENCODING_MS_1257 }, - { "lv", 2, RTL_TEXTENCODING_MS_1257 }, - { "vi", 2, RTL_TEXTENCODING_MS_1258 }, + { "en", RTL_TEXTENCODING_MS_1252 }, // Most used -> first in list + { "th", RTL_TEXTENCODING_MS_874 }, + { "ja", RTL_TEXTENCODING_MS_932 }, + { "zh-cn", RTL_TEXTENCODING_MS_936 }, // Chinese (simplified) - must go before "zh" + { "ko", RTL_TEXTENCODING_MS_949 }, + { "zh", RTL_TEXTENCODING_MS_950 }, // Chinese (traditional) + { "bs", RTL_TEXTENCODING_MS_1250 }, + { "cs", RTL_TEXTENCODING_MS_1250 }, + { "hr", RTL_TEXTENCODING_MS_1250 }, + { "hu", RTL_TEXTENCODING_MS_1250 }, + { "pl", RTL_TEXTENCODING_MS_1250 }, + { "ro", RTL_TEXTENCODING_MS_1250 }, + { "sk", RTL_TEXTENCODING_MS_1250 }, + { "sl", RTL_TEXTENCODING_MS_1250 }, +// { "sr", RTL_TEXTENCODING_MS_1250 }, + { "sq", RTL_TEXTENCODING_MS_1250 }, + { "be", RTL_TEXTENCODING_MS_1251 }, + { "bg", RTL_TEXTENCODING_MS_1251 }, + { "mk", RTL_TEXTENCODING_MS_1251 }, + { "ru", RTL_TEXTENCODING_MS_1251 }, + { "sr", RTL_TEXTENCODING_MS_1251 }, + { "uk", RTL_TEXTENCODING_MS_1251 }, + { "es", RTL_TEXTENCODING_MS_1252 }, + { "el", RTL_TEXTENCODING_MS_1253 }, + { "tr", RTL_TEXTENCODING_MS_1254 }, + { "he", RTL_TEXTENCODING_MS_1255 }, + { "ar", RTL_TEXTENCODING_MS_1256 }, + { "et", RTL_TEXTENCODING_MS_1257 }, + { "lt", RTL_TEXTENCODING_MS_1257 }, + { "lv", RTL_TEXTENCODING_MS_1257 }, + { "vi", RTL_TEXTENCODING_MS_1258 }, }; for (auto& def : aLanguageTab) { - if (rtl_str_shortenedCompareIgnoreAsciiCase_WithLength(pLanguage, nLangLen, - def.mpLangStr, def.mnLangStrLen, - def.mnLangStrLen) == 0) - { + if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr)) return def.meTextEncoding; - } } return RTL_TEXTENCODING_MS_1252; @@ -76,69 +70,57 @@ rtl_TextEncoding impl_getWinTextEncodingFromLangStrANSI(const char* pLanguage) // See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756 // See http://shapelib.maptools.org/codepage.html -rtl_TextEncoding impl_getWinTextEncodingFromLangStrOEM(const char* pLanguage) +rtl_TextEncoding impl_getWinTextEncodingFromLangStrOEM(const OUString& sLanguage) { - auto nLangLen = rtl_str_getLength(pLanguage); - - struct LangEncodingDef - { - const char* mpLangStr; - decltype(nLangLen) mnLangStrLen; - rtl_TextEncoding meTextEncoding; - }; - static LangEncodingDef const aLanguageTab[] = + static constexpr LangEncodingDef aLanguageTab[] = { - { "de", 2, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "en-us", 5, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "fi", 2, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "fr-ca", 5, RTL_TEXTENCODING_IBM_863 }, // OEM French Canadian; French Canadian (DOS) - { "fr", 2, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "it", 2, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "nl", 2, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "sv", 2, RTL_TEXTENCODING_IBM_437 }, // OEM United States - { "el", 2, RTL_TEXTENCODING_IBM_737 }, // OEM Greek (formerly 437G); Greek (DOS) - { "et", 2, RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) - { "lt", 2, RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) - { "lv", 2, RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) - { "en", 2, RTL_TEXTENCODING_IBM_850 }, // OEM Multilingual Latin 1; Western European (DOS) - { "bs", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "cs", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "hr", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "hu", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "pl", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "ro", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "sk", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "sl", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) -// { "sr", 2, RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) - { "bg", 2, RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) - { "mk", 2, RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) - { "sr", 2, RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) - { "tr", 2, RTL_TEXTENCODING_IBM_857 }, // OEM Turkish; Turkish (DOS) - { "pt", 2, RTL_TEXTENCODING_IBM_860 }, // OEM Portuguese; Portuguese (DOS) - { "is", 2, RTL_TEXTENCODING_IBM_861 }, // OEM Icelandic; Icelandic (DOS) - { "he", 2, RTL_TEXTENCODING_IBM_862 }, // OEM Hebrew; Hebrew (DOS) - { "ar", 2, RTL_TEXTENCODING_IBM_864 }, // OEM Arabic; Arabic (864) - { "da", 2, RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS) - { "nn", 2, RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS) - { "be", 2, RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) - { "ru", 2, RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) - { "uk", 2, RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) - { "th", 2, RTL_TEXTENCODING_MS_874 }, // ANSI/OEM Thai (ISO 8859-11); Thai (Windows) - { "ja", 2, RTL_TEXTENCODING_MS_932 }, // ANSI/OEM Japanese; Japanese (Shift-JIS) - { "zh-cn", 5, RTL_TEXTENCODING_MS_936 }, // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) - { "ko", 2, RTL_TEXTENCODING_MS_949 }, // ANSI/OEM Korean (Unified Hangul Code) - { "zh", 2, RTL_TEXTENCODING_MS_950 }, // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) - { "vi", 2, RTL_TEXTENCODING_MS_1258 }, // ANSI/OEM Vietnamese; Vietnamese (Windows) + { "de", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "en-us", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "fi", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "fr-ca", RTL_TEXTENCODING_IBM_863 }, // OEM French Canadian; French Canadian (DOS) + { "fr", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "it", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "nl", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "sv", RTL_TEXTENCODING_IBM_437 }, // OEM United States + { "el", RTL_TEXTENCODING_IBM_737 }, // OEM Greek (formerly 437G); Greek (DOS) + { "et", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) + { "lt", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) + { "lv", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) + { "en", RTL_TEXTENCODING_IBM_850 }, // OEM Multilingual Latin 1; Western European (DOS) + { "bs", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "cs", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "hr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "hu", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "pl", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "ro", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "sk", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "sl", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) +// { "sr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) + { "bg", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) + { "mk", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) + { "sr", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) + { "tr", RTL_TEXTENCODING_IBM_857 }, // OEM Turkish; Turkish (DOS) + { "pt", RTL_TEXTENCODING_IBM_860 }, // OEM Portuguese; Portuguese (DOS) + { "is", RTL_TEXTENCODING_IBM_861 }, // OEM Icelandic; Icelandic (DOS) + { "he", RTL_TEXTENCODING_IBM_862 }, // OEM Hebrew; Hebrew (DOS) + { "ar", RTL_TEXTENCODING_IBM_864 }, // OEM Arabic; Arabic (864) + { "da", RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS) + { "nn", RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS) + { "be", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) + { "ru", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) + { "uk", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) + { "th", RTL_TEXTENCODING_MS_874 }, // ANSI/OEM Thai (ISO 8859-11); Thai (Windows) + { "ja", RTL_TEXTENCODING_MS_932 }, // ANSI/OEM Japanese; Japanese (Shift-JIS) + { "zh-cn", RTL_TEXTENCODING_MS_936 }, // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) + { "ko", RTL_TEXTENCODING_MS_949 }, // ANSI/OEM Korean (Unified Hangul Code) + { "zh", RTL_TEXTENCODING_MS_950 }, // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + { "vi", RTL_TEXTENCODING_MS_1258 }, // ANSI/OEM Vietnamese; Vietnamese (Windows) }; for (auto& def : aLanguageTab) { - if (rtl_str_shortenedCompareIgnoreAsciiCase_WithLength(pLanguage, nLangLen, - def.mpLangStr, def.mnLangStrLen, - def.mnLangStrLen) == 0) - { + if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr)) return def.meTextEncoding; - } } return RTL_TEXTENCODING_IBM_850; @@ -146,11 +128,11 @@ rtl_TextEncoding impl_getWinTextEncodingFromLangStrOEM(const char* pLanguage) } // namespace -rtl_TextEncoding utl_getWinTextEncodingFromLangStr(const char* pLanguage, bool bOEM) +rtl_TextEncoding utl_getWinTextEncodingFromLangStr(const OUString& sLanguage, bool bOEM) { return bOEM ? - impl_getWinTextEncodingFromLangStrOEM(pLanguage) : - impl_getWinTextEncodingFromLangStrANSI(pLanguage); + impl_getWinTextEncodingFromLangStrOEM(sLanguage) : + impl_getWinTextEncodingFromLangStrANSI(sLanguage); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/rtftok/rtfdispatchvalue.cxx b/writerfilter/source/rtftok/rtfdispatchvalue.cxx index 990bfad976ae..e847e58f0797 100644 --- a/writerfilter/source/rtftok/rtfdispatchvalue.cxx +++ b/writerfilter/source/rtftok/rtfdispatchvalue.cxx @@ -841,9 +841,8 @@ RTFError RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) case RTF_CPG: { rtl_TextEncoding nEncoding - = (nParam == 0) - ? utl_getWinTextEncodingFromLangStr(getLODefaultLanguage().toUtf8().getStr()) - : rtl_getTextEncodingFromWindowsCodePage(nParam); + = (nParam == 0) ? utl_getWinTextEncodingFromLangStr(getLODefaultLanguage()) + : rtl_getTextEncodingFromWindowsCodePage(nParam); if (nKeyword == RTF_ANSICPG) m_aDefaultState.setCurrentEncoding(nEncoding); else |