summaryrefslogtreecommitdiff
path: root/writerfilter
diff options
context:
space:
mode:
Diffstat (limited to 'writerfilter')
-rw-r--r--writerfilter/source/rtftok/rtfcharsets.cxx9
-rw-r--r--writerfilter/source/rtftok/rtfcharsets.hxx13
-rw-r--r--writerfilter/source/rtftok/rtfdocumentimpl.cxx39
3 files changed, 59 insertions, 2 deletions
diff --git a/writerfilter/source/rtftok/rtfcharsets.cxx b/writerfilter/source/rtftok/rtfcharsets.cxx
index f7a03ee17dc9..14d27b5f2deb 100644
--- a/writerfilter/source/rtftok/rtfcharsets.cxx
+++ b/writerfilter/source/rtftok/rtfcharsets.cxx
@@ -9,6 +9,7 @@
#include "rtfcharsets.hxx"
#include <array>
+#include <rtl/textenc.h>
namespace writerfilter::rtftok
{
@@ -50,6 +51,14 @@ RTFEncoding const aRTFEncodings[] = {
int nRTFEncodings = std::size(aRTFEncodings);
+RTFFontNameSuffix const aRTFFontNameSuffixes[] = {
+ { "Baltic", RTL_TEXTENCODING_MS_1257 }, { "CE", RTL_TEXTENCODING_MS_1250 },
+ { "Cyr", RTL_TEXTENCODING_MS_1251 }, { "Greek", RTL_TEXTENCODING_MS_1253 },
+ { "Tur", RTL_TEXTENCODING_MS_1254 }, { "(Hebrew)", RTL_TEXTENCODING_MS_1255 },
+ { "(Arabic)", RTL_TEXTENCODING_MS_1256 }, { "(Vietnamese)", RTL_TEXTENCODING_MS_1258 },
+ { "", RTL_TEXTENCODING_DONTKNOW } // End of array
+};
+
} // namespace writerfilter::rtftok
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/rtftok/rtfcharsets.hxx b/writerfilter/source/rtftok/rtfcharsets.hxx
index 865a9310289e..826dea271f6b 100644
--- a/writerfilter/source/rtftok/rtfcharsets.hxx
+++ b/writerfilter/source/rtftok/rtfcharsets.hxx
@@ -19,6 +19,19 @@ struct RTFEncoding
};
extern RTFEncoding const aRTFEncodings[];
extern int nRTFEncodings;
+
+/// Font name can contain special suffixes used
+/// to determine encoding for given font table entry
+/// For example "Arial CE" is "Arial" with CP1250 encoding
+/// List of these suffixes is not official and detected in a empirical
+/// way thus may be inexact and incomplete.
+struct RTFFontNameSuffix
+{
+ const char* suffix;
+ int codepage;
+};
+extern RTFFontNameSuffix const aRTFFontNameSuffixes[];
+
} // namespace writerfilter::rtftok
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index da53e2df1f24..5a19ccebb20a 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -50,6 +50,7 @@
#include "rtfskipdestination.hxx"
#include "rtftokenizer.hxx"
#include "rtflookahead.hxx"
+#include "rtfcharsets.hxx"
using namespace com::sun::star;
@@ -1370,14 +1371,48 @@ void RTFDocumentImpl::text(OUString& rString)
case Destination::FONTTABLE:
case Destination::FONTENTRY:
{
- m_aFontNames[m_nCurrentFontIndex] = aName;
+ // Old documents can contain no encoding information in fontinfo,
+ // but there can be font name suffixes: Arial CE is not a special
+ // font, it is ordinal Arial, but with used cp 1250 encoding.
+ // Moreover these suffixes have priority over \cpgN and \fcharsetN
+ // in MS Word.
+ OUString aFontSuffix;
+ OUString aNameNoSuffix(aName);
+ sal_Int32 nLastSpace = aName.lastIndexOf(' ');
+ if (nLastSpace >= 0)
+ {
+ aFontSuffix = aName.copy(nLastSpace + 1);
+ aNameNoSuffix = aName.copy(0, nLastSpace);
+ sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW;
+ for (int i = 0;
+ aRTFFontNameSuffixes[i].codepage != RTL_TEXTENCODING_DONTKNOW; i++)
+ {
+ if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix))
+ {
+ nEncoding = aRTFFontNameSuffixes[i].codepage;
+ break;
+ }
+ }
+ if (nEncoding > RTL_TEXTENCODING_DONTKNOW)
+ {
+ m_nCurrentEncoding = nEncoding;
+ m_aStates.top().setCurrentEncoding(m_nCurrentEncoding);
+ }
+ else
+ {
+ // Unknown suffix: looks like it is just a part of font name, restore it
+ aNameNoSuffix = aName;
+ }
+ }
+
+ m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix;
if (m_nCurrentEncoding >= 0)
{
m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding;
m_nCurrentEncoding = -1;
}
m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name,
- new RTFValue(aName));
+ new RTFValue(aNameNoSuffix));
writerfilter::Reference<Properties>::Pointer_t const pProp(
new RTFReferenceProperties(m_aStates.top().getTableAttributes(),