summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sw/qa/extras/rtfexport/data/tdf95706.rtf36
-rw-r--r--sw/qa/extras/rtfexport/rtfexport4.cxx61
-rw-r--r--writerfilter/source/rtftok/rtfcharsets.cxx9
-rw-r--r--writerfilter/source/rtftok/rtfcharsets.hxx13
-rw-r--r--writerfilter/source/rtftok/rtfdocumentimpl.cxx39
5 files changed, 156 insertions, 2 deletions
diff --git a/sw/qa/extras/rtfexport/data/tdf95706.rtf b/sw/qa/extras/rtfexport/data/tdf95706.rtf
new file mode 100644
index 000000000000..64c97930441d
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/tdf95706.rtf
@@ -0,0 +1,36 @@
+{\rtf\ansi
+{\fonttbl
+{\f1 Arial Baltic;}
+{\f2 Arial CE;}
+{\f3 Arial Cyr;}
+{\f4 Arial Greek;}
+{\f5 Arial Tur;}
+{\f6 Arial (Hebrew);}
+{\f7 Arial (Arabic);}
+{\f8 Arial (Vietnamese);}
+{\f9 Arial BlaBlaBla;}
+
+{\f10\cpg1253\fcharset161 Arial;}
+{\f11\fcharset161 Arial;}
+{\f12\cpg1253 Arial;}
+{\f13\cpg1253\fcharset161 Arial Baltic;}
+{\f14 Arial Baltic;\cpg1253\fcharset161}
+
+}
+\pard Font name suffixes:\par
+\pard\f1\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f2\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f3\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f4\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f5\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f6\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f7\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f8\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f9\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard Font entry charset values:\par
+\pard\f10\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f11\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f12\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f13\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f14\fs26 \'c0\'c1\'c2\'c3\'c4\par
+}
diff --git a/sw/qa/extras/rtfexport/rtfexport4.cxx b/sw/qa/extras/rtfexport/rtfexport4.cxx
index 8f81b1565695..2d036538d6fe 100644
--- a/sw/qa/extras/rtfexport/rtfexport4.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport4.cxx
@@ -492,6 +492,67 @@ CPPUNIT_TEST_FIXTURE(Test, testClearingBreak)
verify();
}
+DECLARE_RTFEXPORT_TEST(testTdf95706, "tdf95706.rtf")
+{
+ uno::Reference<text::XTextRange> xRun2
+ = getRun(getParagraph(2), 1, u"\u0104\u012e\u0100\u0106\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun2, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun3
+ = getRun(getParagraph(3), 1, u"\u0154\u00c1\u00c2\u0102\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun3, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun4
+ = getRun(getParagraph(4), 1, u"\u0410\u0411\u0412\u0413\u0414");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun4, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun5
+ = getRun(getParagraph(5), 1, u"\u0390\u0391\u0392\u0393\u0394");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun5, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun6
+ = getRun(getParagraph(6), 1, u"\u00c0\u00c1\u00c2\u00c3\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun6, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun7
+ = getRun(getParagraph(7), 1, u"\u05b0\u05b1\u05b2\u05b3\u05b4");
+ // Do not check font for Hebrew: it can be substituted by smth able to handle these chars
+ //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun7, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun8
+ = getRun(getParagraph(8), 1, u"\u06c1\u0621\u0622\u0623\u0624");
+ // Do not check font for Arabic: it can be substituted by smth able to handle these chars
+ //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun8, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun9
+ = getRun(getParagraph(9), 1, u"\u00c0\u00c1\u00c2\u0102\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun9, "CharFontName"));
+
+ // Ensure strange font remains strange. No reason to check content: in this case it can vary on locale
+ uno::Reference<text::XTextRange> xRun10 = getRun(getParagraph(10), 1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial BlaBlaBla"),
+ getProperty<OUString>(xRun10, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun12
+ = getRun(getParagraph(12), 1, u"\u0390\u0391\u0392\u0393\u0394");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun12, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun13
+ = getRun(getParagraph(13), 1, u"\u0390\u0391\u0392\u0393\u0394");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun13, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun14 = getRun(getParagraph(14), 1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun14, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun15
+ = getRun(getParagraph(15), 1, u"\u0104\u012e\u0100\u0106\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun15, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun16
+ = getRun(getParagraph(16), 1, u"\u0104\u012e\u0100\u0106\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun16, "CharFontName"));
+}
+
DECLARE_RTFEXPORT_TEST(testTdf111851, "tdf111851.rtf")
{
uno::Reference<text::XTextTable> xTable(getParagraphOrTable(1), uno::UNO_QUERY);
diff --git a/writerfilter/source/rtftok/rtfcharsets.cxx b/writerfilter/source/rtftok/rtfcharsets.cxx
index f7a03ee17dc9..14d27b5f2deb 100644
--- a/writerfilter/source/rtftok/rtfcharsets.cxx
+++ b/writerfilter/source/rtftok/rtfcharsets.cxx
@@ -9,6 +9,7 @@
#include "rtfcharsets.hxx"
#include <array>
+#include <rtl/textenc.h>
namespace writerfilter::rtftok
{
@@ -50,6 +51,14 @@ RTFEncoding const aRTFEncodings[] = {
int nRTFEncodings = std::size(aRTFEncodings);
+RTFFontNameSuffix const aRTFFontNameSuffixes[] = {
+ { "Baltic", RTL_TEXTENCODING_MS_1257 }, { "CE", RTL_TEXTENCODING_MS_1250 },
+ { "Cyr", RTL_TEXTENCODING_MS_1251 }, { "Greek", RTL_TEXTENCODING_MS_1253 },
+ { "Tur", RTL_TEXTENCODING_MS_1254 }, { "(Hebrew)", RTL_TEXTENCODING_MS_1255 },
+ { "(Arabic)", RTL_TEXTENCODING_MS_1256 }, { "(Vietnamese)", RTL_TEXTENCODING_MS_1258 },
+ { "", RTL_TEXTENCODING_DONTKNOW } // End of array
+};
+
} // namespace writerfilter::rtftok
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/rtftok/rtfcharsets.hxx b/writerfilter/source/rtftok/rtfcharsets.hxx
index 865a9310289e..826dea271f6b 100644
--- a/writerfilter/source/rtftok/rtfcharsets.hxx
+++ b/writerfilter/source/rtftok/rtfcharsets.hxx
@@ -19,6 +19,19 @@ struct RTFEncoding
};
extern RTFEncoding const aRTFEncodings[];
extern int nRTFEncodings;
+
+/// Font name can contain special suffixes used
+/// to determine encoding for given font table entry
+/// For example "Arial CE" is "Arial" with CP1250 encoding
+/// List of these suffixes is not official and detected in a empirical
+/// way thus may be inexact and incomplete.
+struct RTFFontNameSuffix
+{
+ const char* suffix;
+ int codepage;
+};
+extern RTFFontNameSuffix const aRTFFontNameSuffixes[];
+
} // namespace writerfilter::rtftok
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index da53e2df1f24..5a19ccebb20a 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -50,6 +50,7 @@
#include "rtfskipdestination.hxx"
#include "rtftokenizer.hxx"
#include "rtflookahead.hxx"
+#include "rtfcharsets.hxx"
using namespace com::sun::star;
@@ -1370,14 +1371,48 @@ void RTFDocumentImpl::text(OUString& rString)
case Destination::FONTTABLE:
case Destination::FONTENTRY:
{
- m_aFontNames[m_nCurrentFontIndex] = aName;
+ // Old documents can contain no encoding information in fontinfo,
+ // but there can be font name suffixes: Arial CE is not a special
+ // font, it is ordinal Arial, but with used cp 1250 encoding.
+ // Moreover these suffixes have priority over \cpgN and \fcharsetN
+ // in MS Word.
+ OUString aFontSuffix;
+ OUString aNameNoSuffix(aName);
+ sal_Int32 nLastSpace = aName.lastIndexOf(' ');
+ if (nLastSpace >= 0)
+ {
+ aFontSuffix = aName.copy(nLastSpace + 1);
+ aNameNoSuffix = aName.copy(0, nLastSpace);
+ sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW;
+ for (int i = 0;
+ aRTFFontNameSuffixes[i].codepage != RTL_TEXTENCODING_DONTKNOW; i++)
+ {
+ if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix))
+ {
+ nEncoding = aRTFFontNameSuffixes[i].codepage;
+ break;
+ }
+ }
+ if (nEncoding > RTL_TEXTENCODING_DONTKNOW)
+ {
+ m_nCurrentEncoding = nEncoding;
+ m_aStates.top().setCurrentEncoding(m_nCurrentEncoding);
+ }
+ else
+ {
+ // Unknown suffix: looks like it is just a part of font name, restore it
+ aNameNoSuffix = aName;
+ }
+ }
+
+ m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix;
if (m_nCurrentEncoding >= 0)
{
m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding;
m_nCurrentEncoding = -1;
}
m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name,
- new RTFValue(aName));
+ new RTFValue(aNameNoSuffix));
writerfilter::Reference<Properties>::Pointer_t const pProp(
new RTFReferenceProperties(m_aStates.top().getTableAttributes(),