diff options
author | Miklos Vajna <vmiklos@collabora.co.uk> | 2017-12-15 17:24:41 +0100 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.co.uk> | 2017-12-15 23:50:49 +0100 |
commit | aad9c6da5154a89c6ef02214d1122d4b444eea23 (patch) | |
tree | 0caf34a8536e00bc5a5b2d1f2300d814635e238d | |
parent | 15d134b4f57e66faa8bcf538a08db98dc9204c54 (diff) |
sw HTML export: add a filter option to produce XHTML
Add initial support for writing XHTML markup as part of the HTML filter.
This already emits valid XHTML for hello world documents.
Times for 100 hello world inputs: 16032 -> 9957 ms is spent in ODT-load
+ export + close (62% of original).
Change-Id: I51a0a20985958fbc817c196d3a966e55dcb3f13f
Reviewed-on: https://gerrit.libreoffice.org/46567
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
Tested-by: Jenkins <ci@libreoffice.org>
-rw-r--r-- | include/svtools/htmlkywd.hxx | 4 | ||||
-rw-r--r-- | sw/qa/extras/htmlexport/data/hello.html | 8 | ||||
-rw-r--r-- | sw/qa/extras/htmlexport/htmlexport.cxx | 17 | ||||
-rw-r--r-- | sw/source/filter/html/wrthtml.cxx | 15 | ||||
-rw-r--r-- | sw/source/filter/html/wrthtml.hxx | 2 |
5 files changed, 43 insertions, 3 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx index 4cc24949fe2d..37a77d938a51 100644 --- a/include/svtools/htmlkywd.hxx +++ b/include/svtools/htmlkywd.hxx @@ -24,6 +24,9 @@ #define OOO_STRING_SVTOOLS_HTML_doctype40 \ "HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"" +#define OOO_STRING_SVTOOLS_XHTML_doctype11 \ + "html PUBLIC \"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN\" " \ + "\"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd\"" // these are only switched on #define OOO_STRING_SVTOOLS_HTML_area "area" @@ -507,6 +510,7 @@ #define OOO_STRING_SVTOOLS_HTML_O_format "format" #define OOO_STRING_SVTOOLS_HTML_O_frame "frame" #define OOO_STRING_SVTOOLS_HTML_O_lang "lang" +#define OOO_STRING_SVTOOLS_XHTML_O_lang "xml:lang" #define OOO_STRING_SVTOOLS_HTML_O_method "method" #define OOO_STRING_SVTOOLS_HTML_O_rel "rel" #define OOO_STRING_SVTOOLS_HTML_O_rev "rev" diff --git a/sw/qa/extras/htmlexport/data/hello.html b/sw/qa/extras/htmlexport/data/hello.html new file mode 100644 index 000000000000..bc4180d17bf7 --- /dev/null +++ b/sw/qa/extras/htmlexport/data/hello.html @@ -0,0 +1,8 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html> + <head> + <title>Title of document</title> + </head> + <body>hello world</body> +</html> diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index 1ea03e880de7..74ffc3818685 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -48,6 +48,8 @@ private: setFilterOptions("SkipImages"); else if (getTestName().indexOf("EmbedImages") != -1) setFilterOptions("EmbedImages"); + else if (getTestName().indexOf("XHTML") != -1) + setFilterOptions("XHTML"); else setFilterOptions(""); @@ -306,6 +308,21 @@ DECLARE_HTMLEXPORT_TEST(testEmbedImagesEnabled, "textAndImage.docx") assertXPath(pDoc, "/html/body/p/img", "src", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAAA3ZAAAN2QHmodeGAAAFyUlEQVR4nO1Za2gcVRQ+szuzO9ndZtfEktY2tVorVdNKxRpQweqPmh/B1w8JhCL4R4RYiPhCfKFEfwX/2VYULJIqJFDqg4KC1EaoEoyGhFJClBgbN+a9z9ndeXnOzJ3szCbbQuY2i5ADh5udmXvP953z3cdMRNM04f9sYq0B+LVNArW2TQJXM0EQdmKTQL+Mi4V2PWJwJ4CgBUkKdImi9F48HpGikTptfj4l19fLX2cyxeeQyALPeNwJxOOx/r17djz6+afdsX233QBg5CCXWYS3es62n/js4mXkdyeSmOMVjysBURQ7djVvbRv87p2oLJUAtHmLQDSUg963W8MhMSscPzXWh48e4RaT10BkKJkXens6orKYRvBZC7zlut2+231r6MOTI4exCgmswjKPmFwJFIvFu+5pqbczr7vB22QkMwd7d8vK2Hj+AD5+gUdMrgRCUnD+yt9T8ZubGsrAHSKMTHK2SDGTvGJyJaBp2g/9Z4abH7j7YMjJupvA0Mgi5BVDxUcneMXkSiCTVV/75ItLTx1qCTR2PpbwEJiYTMHjXamcUjSfMTmeIH0TwAm5H5s96JPou3N57f3n3xh99URfuL79IVmOR1UYGlXUL89phqrCabuLcAjbRvQCcjlfMwII5PCWaPib1ntv0S4O/Skn4jHhjn27FMPQ4d/ZJePjgWzJNExBDCW0gwciWp0ceLpQLHaMjM3UNTeZhcU0CLGI8FE2b75SEwJhSWx/ueuR6JvdD8Kx18+Yl/4oCN8PHEP9KyibAoC50krO76Hf/4EjnUkYHYDY+F8A93XCszhUbQiQSUHcsPQFJINz00TX5hhodCJS2WpLIAbBDARAkMN+o/OYxEYeQS0y0Gz3rQTtJmQs+0ftMp8EDFsm+qLdmvhbn7cBr8o+k5Se5oOcGZ8K6E4FBHb+8ejfS0jLcYBdNn4SInBm0CuhSiIkMc7fEPxLiIDqKgMZYgQqdA86F7Brmf8KmCQhzV6BKLvWHGBy2QDzEKAtEpsgu04eoMvsNqVRY62OO6hhXbXAsieIhL60AbDLZhFA3AQa6w8xsN9hybegy+wZAkspzaAvk2OfTFhaIcds4z+SiQiEABLwrejb0ZvQ8VBvESJiDkgigbPQIjGLfoU9U1NzZCKBnW0iEkEPu+6RUWqJCAGuQ4+CXaXIxsJdbQSSdE27iyMTEjG+jVsSIrBEziFCz1IViOBOzYD6C8OgPvEwSIPDYDYkvJJK4nx+qReMvg9W+oOEo2UVEEbGAcYmrMwovgjQZEQZlRgJhwBVIs6c5EQZp2yHGDHygK7Dzz/9Bvtbj8I2/Ft5EVv34HO4PZw97w24rdFqCvcfhZIQgEJOgSd9EUDwjoQINA1/I5QrILN7QZc7RtnWMjk4zn634QGtzT04ZhoQYEDH2gYD5esBAUo5xYz7Ab5CAGx9Oxp3iDQwAtJV+jp9qlo2b7cpnPYNXri0Ygex+r53OJKQiYPReypJiDROmqQDC8khAXYVnNXIWSeFa4F3E1hKewmwQUIYt+D39dLaB9ggJRyQCBQYGZoLtKzS8uosq4Eq46wJwiGwnFmzj8rj3dizE7MJXQS7EhQ+BTaRm8CeHzSZSVZO9t0VWWVZtr5UIUDzz1jZ0XkQsBDZWdFwcOfokAV79yVJUTVirn4kLQnKc8lLoHoFqJI70BcwDj2lrZdI1cPcGrKir8oRKFeAwEcZKfJVExJXKGuiVyFwO/o0+gx6ipby9UjqmqdRlhmSVRGDpKEsF2dnpi/NdPxIVfZdTlukiECw4hYBpaQYbBzn0MifgCeqnSF3EIVViGQwfbIfSl/9CCoDA5PT1vjJ3lOw/fQ5KwmmqtESav39K7rKXF/vhPb9PoBxVUaiZ2YBBtGdsxQZZfmX5AK0oFtAwc76FPbj8nLM5dMiy14aiXwLthyc5dZgm9UUjzhrGddvo4yIDtfzHbLCNv9LWWvbJFBr2yRQa/sP25LGjrtpN08AAAAASUVORK5CYII="); } +DECLARE_HTMLEXPORT_TEST(testXHTML, "hello.html") +{ + OString aExpected("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"); + SvStream* pStream = maTempFile.GetStream(StreamMode::READ); + CPPUNIT_ASSERT(pStream); + OString aActual(read_uInt8s_ToOString(*pStream, aExpected.getLength())); + // This was HTML, not XHTML. + CPPUNIT_ASSERT_EQUAL(aExpected, aActual); + + htmlDocPtr pDoc = parseHtml(maTempFile); + CPPUNIT_ASSERT(pDoc); + // This was lang, not xml:lang. + assertXPath(pDoc, "/html/body", "xml:lang", "en-US"); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx index 1c1a215fd064..fb27971cf060 100644 --- a/sw/source/filter/html/wrthtml.cxx +++ b/sw/source/filter/html/wrthtml.cxx @@ -182,6 +182,8 @@ void SwHTMLWriter::SetupFilterOptions(SfxMedium& rMedium) { mbEmbedImages = true; } + else if (sFilterOptions == "XHTML") + mbXHTML = true; } ErrCode SwHTMLWriter::WriteStream() @@ -931,7 +933,10 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs ) OStringBuffer sOut; if (!mbSkipHeaderFooter) { - sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_HTML_doctype40); + if (mbXHTML) + sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_XHTML_doctype11); + else + sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_HTML_doctype40); HTMLOutFuncs::Out_AsciiTag( Strm(), sOut.makeStringAndClear().getStr() ); // build prelude @@ -1276,8 +1281,12 @@ void SwHTMLWriter::OutLanguage( LanguageType nLang ) if( LANGUAGE_DONTKNOW != nLang ) { OStringBuffer sOut; - sOut.append(' ').append(OOO_STRING_SVTOOLS_HTML_O_lang) - .append("=\""); + sOut.append(' '); + if (mbXHTML) + sOut.append(OOO_STRING_SVTOOLS_XHTML_O_lang); + else + sOut.append(OOO_STRING_SVTOOLS_HTML_O_lang); + sOut.append("=\""); Strm().WriteCharPtr( sOut.makeStringAndClear().getStr() ); HTMLOutFuncs::Out_String( Strm(), LanguageTag::convertToBcp47(nLang), m_eDestEnc, &m_aNonConvertableCharacters ).WriteChar( '"' ); diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index bacea748f300..60171e6dfea4 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -387,6 +387,8 @@ public: /// If HTML header and footer should be written as well, or just the content itself. bool mbSkipHeaderFooter : 1; bool mbEmbedImages : 1; + /// If XHTML markup should be written instead of HTML. + bool mbXHTML = false; #define sCSS2_P_CLASS_leaders "leaders" bool m_bCfgPrintLayout : 1; // PrintLayout option for TOC dot leaders |