diff options
author | Miklos Vajna <vmiklos@collabora.com> | 2022-10-25 15:55:34 +0200 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.com> | 2022-10-25 18:15:47 +0200 |
commit | b38730ae0ae92ca49b84a45853c2ed098ee9064f (patch) | |
tree | a08c26370a2b73fe6c56b395bffb0b197a956789 /include | |
parent | d55358c7c31e2e9f124ee90d78eba2db3f1af756 (diff) |
sw html import: fix handling of CDATA
In case the HTML contained markup like <![CDATA[...]]>, we simply
ignored it during import, even if e.g. the ODT import handles that
correctly.
The reason for this is that the svtools/ HTMLParser had code to parse
<!-- ... ---> style comments, but not for CDATA.
Fix the problem by introducing a new HtmlTokenId::CDATA, producing a
matching token content in HTMLParser::GetNextToken_(), and finally map
it to normal text on the Writer side.
Note that HtmlTokenId doesn't allow non-on-off tokens past ONOFF_START,
neither allows inserting a single token before ONOFF_START (it breaks
getOnToken()), so for now just add a second, dummy token to avoid
breakage.
Change-Id: I605c3c21dc11986fda5d93d36148788a638e97b4
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141813
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Tested-by: Jenkins
Diffstat (limited to 'include')
-rw-r--r-- | include/svtools/htmlkywd.hxx | 1 | ||||
-rw-r--r-- | include/svtools/htmltokn.h | 2 |
2 files changed, 3 insertions, 0 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx index 5d6b7e629fe7..9a84cddd37bf 100644 --- a/include/svtools/htmlkywd.hxx +++ b/include/svtools/htmlkywd.hxx @@ -32,6 +32,7 @@ #define OOO_STRING_SVTOOLS_HTML_base "base" #define OOO_STRING_SVTOOLS_HTML_comment "!--" #define OOO_STRING_SVTOOLS_HTML_doctype "!DOCTYPE" +#define OOO_STRING_SVTOOLS_HTML_cdata "![cdata[" #define OOO_STRING_SVTOOLS_HTML_embed "embed" #define OOO_STRING_SVTOOLS_HTML_horzrule "hr" #define OOO_STRING_SVTOOLS_HTML_image "img" diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h index bfa1f14d6812..9dca8a8f3ea7 100644 --- a/include/svtools/htmltokn.h +++ b/include/svtools/htmltokn.h @@ -58,6 +58,8 @@ enum class HtmlTokenId : sal_Int16 AREA, // Netscape 2.0 BASE, // HTML 3.0 COMMENT, + CDATA, + DUMMY, // so ONOFF_START is even DOCTYPE, EMBED, // Netscape 2.0 ignore </EMBED> HORZRULE, // ignore </HR> |