diff options
Diffstat (limited to 'writerfilter')
-rw-r--r-- | writerfilter/source/rtftok/rtfcharsets.cxx | 9 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfcharsets.hxx | 13 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfdocumentimpl.cxx | 39 |
3 files changed, 59 insertions, 2 deletions
diff --git a/writerfilter/source/rtftok/rtfcharsets.cxx b/writerfilter/source/rtftok/rtfcharsets.cxx index f7a03ee17dc9..14d27b5f2deb 100644 --- a/writerfilter/source/rtftok/rtfcharsets.cxx +++ b/writerfilter/source/rtftok/rtfcharsets.cxx @@ -9,6 +9,7 @@ #include "rtfcharsets.hxx" #include <array> +#include <rtl/textenc.h> namespace writerfilter::rtftok { @@ -50,6 +51,14 @@ RTFEncoding const aRTFEncodings[] = { int nRTFEncodings = std::size(aRTFEncodings); +RTFFontNameSuffix const aRTFFontNameSuffixes[] = { + { "Baltic", RTL_TEXTENCODING_MS_1257 }, { "CE", RTL_TEXTENCODING_MS_1250 }, + { "Cyr", RTL_TEXTENCODING_MS_1251 }, { "Greek", RTL_TEXTENCODING_MS_1253 }, + { "Tur", RTL_TEXTENCODING_MS_1254 }, { "(Hebrew)", RTL_TEXTENCODING_MS_1255 }, + { "(Arabic)", RTL_TEXTENCODING_MS_1256 }, { "(Vietnamese)", RTL_TEXTENCODING_MS_1258 }, + { "", RTL_TEXTENCODING_DONTKNOW } // End of array +}; + } // namespace writerfilter::rtftok /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/rtftok/rtfcharsets.hxx b/writerfilter/source/rtftok/rtfcharsets.hxx index 865a9310289e..826dea271f6b 100644 --- a/writerfilter/source/rtftok/rtfcharsets.hxx +++ b/writerfilter/source/rtftok/rtfcharsets.hxx @@ -19,6 +19,19 @@ struct RTFEncoding }; extern RTFEncoding const aRTFEncodings[]; extern int nRTFEncodings; + +/// Font name can contain special suffixes used +/// to determine encoding for given font table entry +/// For example "Arial CE" is "Arial" with CP1250 encoding +/// List of these suffixes is not official and detected in a empirical +/// way thus may be inexact and incomplete. +struct RTFFontNameSuffix +{ + const char* suffix; + int codepage; +}; +extern RTFFontNameSuffix const aRTFFontNameSuffixes[]; + } // namespace writerfilter::rtftok /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index da53e2df1f24..5a19ccebb20a 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -50,6 +50,7 @@ #include "rtfskipdestination.hxx" #include "rtftokenizer.hxx" #include "rtflookahead.hxx" +#include "rtfcharsets.hxx" using namespace com::sun::star; @@ -1370,14 +1371,48 @@ void RTFDocumentImpl::text(OUString& rString) case Destination::FONTTABLE: case Destination::FONTENTRY: { - m_aFontNames[m_nCurrentFontIndex] = aName; + // Old documents can contain no encoding information in fontinfo, + // but there can be font name suffixes: Arial CE is not a special + // font, it is ordinal Arial, but with used cp 1250 encoding. + // Moreover these suffixes have priority over \cpgN and \fcharsetN + // in MS Word. + OUString aFontSuffix; + OUString aNameNoSuffix(aName); + sal_Int32 nLastSpace = aName.lastIndexOf(' '); + if (nLastSpace >= 0) + { + aFontSuffix = aName.copy(nLastSpace + 1); + aNameNoSuffix = aName.copy(0, nLastSpace); + sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW; + for (int i = 0; + aRTFFontNameSuffixes[i].codepage != RTL_TEXTENCODING_DONTKNOW; i++) + { + if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix)) + { + nEncoding = aRTFFontNameSuffixes[i].codepage; + break; + } + } + if (nEncoding > RTL_TEXTENCODING_DONTKNOW) + { + m_nCurrentEncoding = nEncoding; + m_aStates.top().setCurrentEncoding(m_nCurrentEncoding); + } + else + { + // Unknown suffix: looks like it is just a part of font name, restore it + aNameNoSuffix = aName; + } + } + + m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix; if (m_nCurrentEncoding >= 0) { m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding; m_nCurrentEncoding = -1; } m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name, - new RTFValue(aName)); + new RTFValue(aNameNoSuffix)); writerfilter::Reference<Properties>::Pointer_t const pProp( new RTFReferenceProperties(m_aStates.top().getTableAttributes(), |