summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.com>2022-10-25 15:55:34 +0200
committerMiklos Vajna <vmiklos@collabora.com>2022-10-25 18:15:47 +0200
commitb38730ae0ae92ca49b84a45853c2ed098ee9064f (patch)
treea08c26370a2b73fe6c56b395bffb0b197a956789 /include
parentd55358c7c31e2e9f124ee90d78eba2db3f1af756 (diff)
sw html import: fix handling of CDATA
In case the HTML contained markup like <![CDATA[...]]>, we simply ignored it during import, even if e.g. the ODT import handles that correctly. The reason for this is that the svtools/ HTMLParser had code to parse <!-- ... ---> style comments, but not for CDATA. Fix the problem by introducing a new HtmlTokenId::CDATA, producing a matching token content in HTMLParser::GetNextToken_(), and finally map it to normal text on the Writer side. Note that HtmlTokenId doesn't allow non-on-off tokens past ONOFF_START, neither allows inserting a single token before ONOFF_START (it breaks getOnToken()), so for now just add a second, dummy token to avoid breakage. Change-Id: I605c3c21dc11986fda5d93d36148788a638e97b4 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141813 Reviewed-by: Miklos Vajna <vmiklos@collabora.com> Tested-by: Jenkins
Diffstat (limited to 'include')
-rw-r--r--include/svtools/htmlkywd.hxx1
-rw-r--r--include/svtools/htmltokn.h2
2 files changed, 3 insertions, 0 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index 5d6b7e629fe7..9a84cddd37bf 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -32,6 +32,7 @@
#define OOO_STRING_SVTOOLS_HTML_base "base"
#define OOO_STRING_SVTOOLS_HTML_comment "!--"
#define OOO_STRING_SVTOOLS_HTML_doctype "!DOCTYPE"
+#define OOO_STRING_SVTOOLS_HTML_cdata "![cdata["
#define OOO_STRING_SVTOOLS_HTML_embed "embed"
#define OOO_STRING_SVTOOLS_HTML_horzrule "hr"
#define OOO_STRING_SVTOOLS_HTML_image "img"
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h
index bfa1f14d6812..9dca8a8f3ea7 100644
--- a/include/svtools/htmltokn.h
+++ b/include/svtools/htmltokn.h
@@ -58,6 +58,8 @@ enum class HtmlTokenId : sal_Int16
AREA, // Netscape 2.0
BASE, // HTML 3.0
COMMENT,
+ CDATA,
+ DUMMY, // so ONOFF_START is even
DOCTYPE,
EMBED, // Netscape 2.0 ignore </EMBED>
HORZRULE, // ignore </HR>