summaryrefslogtreecommitdiff
path: root/sw/qa
diff options
context:
space:
mode:
authorVasily Melenchuk <vasily.melenchuk@cib.de>2022-04-05 19:13:05 +0300
committerMiklos Vajna <vmiklos@collabora.com>2022-04-07 14:29:04 +0200
commit965313b9efc761c70aacf6e3ebee60ffa2b1d5dd (patch)
treeb9b2bb0d66303485b1db0aa4a2f5aa8be8b597fe /sw/qa
parent751c6e25a3998845325c9b107163fc23a85b3367 (diff)
tdf#95706: RTF import: Use fontname suffixes to detect encoding
Font names like "Arial CE", "Times New Roman Cyr" are not special fonts. They are classical Arial, Times New Roman... And these suffixes can be used to detect encoding used for RTF text. Most interesting: for MS Word these suffixes have priority: {\f34\cpg1253\fcharset161 Arial Baltic;} will have cp1257 and not cp1253. Looks like compatibility issue came from dark ages. Change-Id: Ife8e781d5d04c3f6a8c11fcf604357c74bf33055 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132584 Tested-by: Jenkins Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Diffstat (limited to 'sw/qa')
-rw-r--r--sw/qa/extras/rtfexport/data/tdf95706.rtf36
-rw-r--r--sw/qa/extras/rtfexport/rtfexport4.cxx61
2 files changed, 97 insertions, 0 deletions
diff --git a/sw/qa/extras/rtfexport/data/tdf95706.rtf b/sw/qa/extras/rtfexport/data/tdf95706.rtf
new file mode 100644
index 000000000000..64c97930441d
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/tdf95706.rtf
@@ -0,0 +1,36 @@
+{\rtf\ansi
+{\fonttbl
+{\f1 Arial Baltic;}
+{\f2 Arial CE;}
+{\f3 Arial Cyr;}
+{\f4 Arial Greek;}
+{\f5 Arial Tur;}
+{\f6 Arial (Hebrew);}
+{\f7 Arial (Arabic);}
+{\f8 Arial (Vietnamese);}
+{\f9 Arial BlaBlaBla;}
+
+{\f10\cpg1253\fcharset161 Arial;}
+{\f11\fcharset161 Arial;}
+{\f12\cpg1253 Arial;}
+{\f13\cpg1253\fcharset161 Arial Baltic;}
+{\f14 Arial Baltic;\cpg1253\fcharset161}
+
+}
+\pard Font name suffixes:\par
+\pard\f1\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f2\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f3\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f4\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f5\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f6\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f7\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f8\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f9\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard Font entry charset values:\par
+\pard\f10\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f11\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f12\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f13\fs26 \'c0\'c1\'c2\'c3\'c4\par
+\pard\f14\fs26 \'c0\'c1\'c2\'c3\'c4\par
+}
diff --git a/sw/qa/extras/rtfexport/rtfexport4.cxx b/sw/qa/extras/rtfexport/rtfexport4.cxx
index 8f81b1565695..2d036538d6fe 100644
--- a/sw/qa/extras/rtfexport/rtfexport4.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport4.cxx
@@ -492,6 +492,67 @@ CPPUNIT_TEST_FIXTURE(Test, testClearingBreak)
verify();
}
+DECLARE_RTFEXPORT_TEST(testTdf95706, "tdf95706.rtf")
+{
+ uno::Reference<text::XTextRange> xRun2
+ = getRun(getParagraph(2), 1, u"\u0104\u012e\u0100\u0106\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun2, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun3
+ = getRun(getParagraph(3), 1, u"\u0154\u00c1\u00c2\u0102\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun3, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun4
+ = getRun(getParagraph(4), 1, u"\u0410\u0411\u0412\u0413\u0414");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun4, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun5
+ = getRun(getParagraph(5), 1, u"\u0390\u0391\u0392\u0393\u0394");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun5, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun6
+ = getRun(getParagraph(6), 1, u"\u00c0\u00c1\u00c2\u00c3\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun6, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun7
+ = getRun(getParagraph(7), 1, u"\u05b0\u05b1\u05b2\u05b3\u05b4");
+ // Do not check font for Hebrew: it can be substituted by smth able to handle these chars
+ //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun7, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun8
+ = getRun(getParagraph(8), 1, u"\u06c1\u0621\u0622\u0623\u0624");
+ // Do not check font for Arabic: it can be substituted by smth able to handle these chars
+ //CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun8, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun9
+ = getRun(getParagraph(9), 1, u"\u00c0\u00c1\u00c2\u0102\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun9, "CharFontName"));
+
+ // Ensure strange font remains strange. No reason to check content: in this case it can vary on locale
+ uno::Reference<text::XTextRange> xRun10 = getRun(getParagraph(10), 1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial BlaBlaBla"),
+ getProperty<OUString>(xRun10, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun12
+ = getRun(getParagraph(12), 1, u"\u0390\u0391\u0392\u0393\u0394");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun12, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun13
+ = getRun(getParagraph(13), 1, u"\u0390\u0391\u0392\u0393\u0394");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun13, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun14 = getRun(getParagraph(14), 1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun14, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun15
+ = getRun(getParagraph(15), 1, u"\u0104\u012e\u0100\u0106\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun15, "CharFontName"));
+
+ uno::Reference<text::XTextRange> xRun16
+ = getRun(getParagraph(16), 1, u"\u0104\u012e\u0100\u0106\u00c4");
+ CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun16, "CharFontName"));
+}
+
DECLARE_RTFEXPORT_TEST(testTdf111851, "tdf111851.rtf")
{
uno::Reference<text::XTextTable> xTable(getParagraphOrTable(1), uno::UNO_QUERY);