diff options
author | Vasily Melenchuk <vasily.melenchuk@cib.de> | 2022-04-05 19:13:05 +0300 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.com> | 2022-04-07 14:29:04 +0200 |
commit | 965313b9efc761c70aacf6e3ebee60ffa2b1d5dd (patch) | |
tree | b9b2bb0d66303485b1db0aa4a2f5aa8be8b597fe /sw/qa | |
parent | 751c6e25a3998845325c9b107163fc23a85b3367 (diff) |
tdf#95706: RTF import: Use fontname suffixes to detect encoding
Font names like "Arial CE", "Times New Roman Cyr" are not special
fonts. They are classical Arial, Times New Roman... And these
suffixes can be used to detect encoding used for RTF text.
Most interesting: for MS Word these suffixes have priority:
{\f34\cpg1253\fcharset161 Arial Baltic;} will have cp1257
and not cp1253.
Looks like compatibility issue came from dark ages.
Change-Id: Ife8e781d5d04c3f6a8c11fcf604357c74bf33055
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132584
Tested-by: Jenkins
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Diffstat (limited to 'sw/qa')
-rw-r--r-- | sw/qa/extras/rtfexport/data/tdf95706.rtf | 36 | ||||
-rw-r--r-- | sw/qa/extras/rtfexport/rtfexport4.cxx | 61 |
2 files changed, 97 insertions, 0 deletions
diff --git a/sw/qa/extras/rtfexport/data/tdf95706.rtf b/sw/qa/extras/rtfexport/data/tdf95706.rtf new file mode 100644 index 000000000000..64c97930441d --- /dev/null +++ b/sw/qa/extras/rtfexport/data/tdf95706.rtf @@ -0,0 +1,36 @@ +{\rtf\ansi
+{\fonttbl
+{\f1 Arial Baltic;}
+{\f2 Arial CE;}
+{\f3 Arial Cyr;}
+{\f4 Arial Greek;}
+{\f5 Arial Tur;}
+{\f6 Arial (Hebrew);}
+{\f7 Arial (Arabic);}
+{\f8 Arial (Vietnamese);}
+{\f9 Arial BlaBlaBla;}
+
+{\f10\cpg1253\fcharset161 Arial;}
+{\f11\fcharset161 Arial;}
+{\f12\cpg1253 Arial;}
+{\f13\cpg1253\fcharset161 Arial Baltic;}
+{\f14 Arial Baltic;\cpg1253\fcharset161}
+
+}
+\pard Font name suffixes:\par
+\pard\f1\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f2\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f3\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f4\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f5\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f6\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f7\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f8\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f9\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard Font entry charset values:\par
+\pard\f10\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f11\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f12\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f13\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f14\fs26 \'c0\'c1\'c2\'c3\'c4\par
+}
diff --git a/sw/qa/extras/rtfexport/rtfexport4.cxx b/sw/qa/extras/rtfexport/rtfexport4.cxx index 8f81b1565695..2d036538d6fe 100644 --- a/sw/qa/extras/rtfexport/rtfexport4.cxx +++ b/sw/qa/extras/rtfexport/rtfexport4.cxx @@ -492,6 +492,67 @@ CPPUNIT_TEST_FIXTURE(Test, testClearingBreak) verify(); } +DECLARE_RTFEXPORT_TEST(testTdf95706, "tdf95706.rtf") +{ + uno::Reference<text::XTextRange> xRun2 + = getRun(getParagraph(2), 1, u"\u0104\u012e\u0100\u0106\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun2, "CharFontName")); + + uno::Reference<text::XTextRange> xRun3 + = getRun(getParagraph(3), 1, u"\u0154\u00c1\u00c2\u0102\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun3, "CharFontName")); + + uno::Reference<text::XTextRange> xRun4 + = getRun(getParagraph(4), 1, u"\u0410\u0411\u0412\u0413\u0414"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun4, "CharFontName")); + + uno::Reference<text::XTextRange> xRun5 + = getRun(getParagraph(5), 1, u"\u0390\u0391\u0392\u0393\u0394"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun5, "CharFontName")); + + uno::Reference<text::XTextRange> xRun6 + = getRun(getParagraph(6), 1, u"\u00c0\u00c1\u00c2\u00c3\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun6, "CharFontName")); + + uno::Reference<text::XTextRange> xRun7 + = getRun(getParagraph(7), 1, u"\u05b0\u05b1\u05b2\u05b3\u05b4"); + // Do not check font for Hebrew: it can be substituted by smth able to handle these chars + //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun7, "CharFontName")); + + uno::Reference<text::XTextRange> xRun8 + = getRun(getParagraph(8), 1, u"\u06c1\u0621\u0622\u0623\u0624"); + // Do not check font for Arabic: it can be substituted by smth able to handle these chars + //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun8, "CharFontName")); + + uno::Reference<text::XTextRange> xRun9 + = getRun(getParagraph(9), 1, u"\u00c0\u00c1\u00c2\u0102\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun9, "CharFontName")); + + // Ensure strange font remains strange. No reason to check content: in this case it can vary on locale + uno::Reference<text::XTextRange> xRun10 = getRun(getParagraph(10), 1); + CPPUNIT_ASSERT_EQUAL(OUString("Arial BlaBlaBla"), + getProperty<OUString>(xRun10, "CharFontName")); + + uno::Reference<text::XTextRange> xRun12 + = getRun(getParagraph(12), 1, u"\u0390\u0391\u0392\u0393\u0394"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun12, "CharFontName")); + + uno::Reference<text::XTextRange> xRun13 + = getRun(getParagraph(13), 1, u"\u0390\u0391\u0392\u0393\u0394"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun13, "CharFontName")); + + uno::Reference<text::XTextRange> xRun14 = getRun(getParagraph(14), 1); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun14, "CharFontName")); + + uno::Reference<text::XTextRange> xRun15 + = getRun(getParagraph(15), 1, u"\u0104\u012e\u0100\u0106\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun15, "CharFontName")); + + uno::Reference<text::XTextRange> xRun16 + = getRun(getParagraph(16), 1, u"\u0104\u012e\u0100\u0106\u00c4"); + CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun16, "CharFontName")); +} + DECLARE_RTFEXPORT_TEST(testTdf111851, "tdf111851.rtf") { uno::Reference<text::XTextTable> xTable(getParagraphOrTable(1), uno::UNO_QUERY); |