diff options
-rw-r--r-- | sw/qa/extras/rtfexport/data/tdf95706.rtf | 36 | ||||
-rw-r--r-- | sw/qa/extras/rtfexport/rtfexport4.cxx | 61 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfcharsets.cxx | 9 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfcharsets.hxx | 13 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfdocumentimpl.cxx | 39 |
5 files changed, 156 insertions, 2 deletions
diff --git a/sw/qa/extras/rtfexport/data/tdf95706.rtf b/sw/qa/extras/rtfexport/data/tdf95706.rtf new file mode 100644 index 000000000000..64c97930441d --- /dev/null +++ b/sw/qa/extras/rtfexport/data/tdf95706.rtf @@ -0,0 +1,36 @@ +{\rtf\ansi
+{\fonttbl
+{\f1 Arial Baltic;}
+{\f2 Arial CE;}
+{\f3 Arial Cyr;}
+{\f4 Arial Greek;}
+{\f5 Arial Tur;}
+{\f6 Arial (Hebrew);}
+{\f7 Arial (Arabic);}
+{\f8 Arial (Vietnamese);}
+{\f9 Arial BlaBlaBla;}
+
+{\f10\cpg1253\fcharset161 Arial;}
+{\f11\fcharset161 Arial;}
+{\f12\cpg1253 Arial;}
+{\f13\cpg1253\fcharset161 Arial Baltic;}
+{\f14 Arial Baltic;\cpg1253\fcharset161}
+
+}
+\pard Font name suffixes:\par
+\pard\f1\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f2\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f3\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f4\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f5\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f6\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f7\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f8\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f9\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard Font entry charset values:\par
+\pard\f10\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f11\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f12\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f13\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f14\fs26 \'c0\'c1\'c2\'c3\'c4\par
+}
diff --git a/sw/qa/extras/rtfexport/rtfexport4.cxx b/sw/qa/extras/rtfexport/rtfexport4.cxx index 8f81b1565695..2d036538d6fe 100644 --- a/sw/qa/extras/rtfexport/rtfexport4.cxx +++ b/sw/qa/extras/rtfexport/rtfexport4.cxx @@ -492,6 +492,67 @@ CPPUNIT_TEST_FIXTURE(Test, testClearingBreak) verify(); } +DECLARE_RTFEXPORT_TEST(testTdf95706, "tdf95706.rtf") +{ + uno::Reference<text::XTextRange> xRun2 + = getRun(getParagraph(2), 1, u"\u0104\u012e\u0100\u0106\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun2, "CharFontName")); + + uno::Reference<text::XTextRange> xRun3 + = getRun(getParagraph(3), 1, u"\u0154\u00c1\u00c2\u0102\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun3, "CharFontName")); + + uno::Reference<text::XTextRange> xRun4 + = getRun(getParagraph(4), 1, u"\u0410\u0411\u0412\u0413\u0414"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun4, "CharFontName")); + + uno::Reference<text::XTextRange> xRun5 + = getRun(getParagraph(5), 1, u"\u0390\u0391\u0392\u0393\u0394"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun5, "CharFontName")); + + uno::Reference<text::XTextRange> xRun6 + = getRun(getParagraph(6), 1, u"\u00c0\u00c1\u00c2\u00c3\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun6, "CharFontName")); + + uno::Reference<text::XTextRange> xRun7 + = getRun(getParagraph(7), 1, u"\u05b0\u05b1\u05b2\u05b3\u05b4"); + // Do not check font for Hebrew: it can be substituted by smth able to handle these chars + //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun7, "CharFontName")); + + uno::Reference<text::XTextRange> xRun8 + = getRun(getParagraph(8), 1, u"\u06c1\u0621\u0622\u0623\u0624"); + // Do not check font for Arabic: it can be substituted by smth able to handle these chars + //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun8, "CharFontName")); + + uno::Reference<text::XTextRange> xRun9 + = getRun(getParagraph(9), 1, u"\u00c0\u00c1\u00c2\u0102\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun9, "CharFontName")); + + // Ensure strange font remains strange. No reason to check content: in this case it can vary on locale + uno::Reference<text::XTextRange> xRun10 = getRun(getParagraph(10), 1); + CPPUNIT_ASSERT_EQUAL(OUString("Arial BlaBlaBla"), + getProperty<OUString>(xRun10, "CharFontName")); + + uno::Reference<text::XTextRange> xRun12 + = getRun(getParagraph(12), 1, u"\u0390\u0391\u0392\u0393\u0394"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun12, "CharFontName")); + + uno::Reference<text::XTextRange> xRun13 + = getRun(getParagraph(13), 1, u"\u0390\u0391\u0392\u0393\u0394"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun13, "CharFontName")); + + uno::Reference<text::XTextRange> xRun14 = getRun(getParagraph(14), 1); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun14, "CharFontName")); + + uno::Reference<text::XTextRange> xRun15 + = getRun(getParagraph(15), 1, u"\u0104\u012e\u0100\u0106\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun15, "CharFontName")); + + uno::Reference<text::XTextRange> xRun16 + = getRun(getParagraph(16), 1, u"\u0104\u012e\u0100\u0106\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun16, "CharFontName")); +} + DECLARE_RTFEXPORT_TEST(testTdf111851, "tdf111851.rtf") { uno::Reference<text::XTextTable> xTable(getParagraphOrTable(1), uno::UNO_QUERY); diff --git a/writerfilter/source/rtftok/rtfcharsets.cxx b/writerfilter/source/rtftok/rtfcharsets.cxx index f7a03ee17dc9..14d27b5f2deb 100644 --- a/writerfilter/source/rtftok/rtfcharsets.cxx +++ b/writerfilter/source/rtftok/rtfcharsets.cxx @@ -9,6 +9,7 @@ #include "rtfcharsets.hxx" #include <array> +#include <rtl/textenc.h> namespace writerfilter::rtftok { @@ -50,6 +51,14 @@ RTFEncoding const aRTFEncodings[] = { int nRTFEncodings = std::size(aRTFEncodings); +RTFFontNameSuffix const aRTFFontNameSuffixes[] = { + { "Baltic", RTL_TEXTENCODING_MS_1257 }, { "CE", RTL_TEXTENCODING_MS_1250 }, + { "Cyr", RTL_TEXTENCODING_MS_1251 }, { "Greek", RTL_TEXTENCODING_MS_1253 }, + { "Tur", RTL_TEXTENCODING_MS_1254 }, { "(Hebrew)", RTL_TEXTENCODING_MS_1255 }, + { "(Arabic)", RTL_TEXTENCODING_MS_1256 }, { "(Vietnamese)", RTL_TEXTENCODING_MS_1258 }, + { "", RTL_TEXTENCODING_DONTKNOW } // End of array +}; + } // namespace writerfilter::rtftok /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/rtftok/rtfcharsets.hxx b/writerfilter/source/rtftok/rtfcharsets.hxx index 865a9310289e..826dea271f6b 100644 --- a/writerfilter/source/rtftok/rtfcharsets.hxx +++ b/writerfilter/source/rtftok/rtfcharsets.hxx @@ -19,6 +19,19 @@ struct RTFEncoding }; extern RTFEncoding const aRTFEncodings[]; extern int nRTFEncodings; + +/// Font name can contain special suffixes used +/// to determine encoding for given font table entry +/// For example "Arial CE" is "Arial" with CP1250 encoding +/// List of these suffixes is not official and detected in a empirical +/// way thus may be inexact and incomplete. +struct RTFFontNameSuffix +{ + const char* suffix; + int codepage; +}; +extern RTFFontNameSuffix const aRTFFontNameSuffixes[]; + } // namespace writerfilter::rtftok /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index da53e2df1f24..5a19ccebb20a 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -50,6 +50,7 @@ #include "rtfskipdestination.hxx" #include "rtftokenizer.hxx" #include "rtflookahead.hxx" +#include "rtfcharsets.hxx" using namespace com::sun::star; @@ -1370,14 +1371,48 @@ void RTFDocumentImpl::text(OUString& rString) case Destination::FONTTABLE: case Destination::FONTENTRY: { - m_aFontNames[m_nCurrentFontIndex] = aName; + // Old documents can contain no encoding information in fontinfo, + // but there can be font name suffixes: Arial CE is not a special + // font, it is ordinal Arial, but with used cp 1250 encoding. + // Moreover these suffixes have priority over \cpgN and \fcharsetN + // in MS Word. + OUString aFontSuffix; + OUString aNameNoSuffix(aName); + sal_Int32 nLastSpace = aName.lastIndexOf(' '); + if (nLastSpace >= 0) + { + aFontSuffix = aName.copy(nLastSpace + 1); + aNameNoSuffix = aName.copy(0, nLastSpace); + sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW; + for (int i = 0; + aRTFFontNameSuffixes[i].codepage != RTL_TEXTENCODING_DONTKNOW; i++) + { + if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix)) + { + nEncoding = aRTFFontNameSuffixes[i].codepage; + break; + } + } + if (nEncoding > RTL_TEXTENCODING_DONTKNOW) + { + m_nCurrentEncoding = nEncoding; + m_aStates.top().setCurrentEncoding(m_nCurrentEncoding); + } + else + { + // Unknown suffix: looks like it is just a part of font name, restore it + aNameNoSuffix = aName; + } + } + + m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix; if (m_nCurrentEncoding >= 0) { m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding; m_nCurrentEncoding = -1; } m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name, - new RTFValue(aName)); + new RTFValue(aNameNoSuffix)); writerfilter::Reference<Properties>::Pointer_t const pProp( new RTFReferenceProperties(m_aStates.top().getTableAttributes(), |