diff options
author | Michael Stahl <Michael.Stahl@cib.de> | 2019-10-29 15:54:41 +0100 |
---|---|---|
committer | Michael Stahl <michael.stahl@allotropia.de> | 2021-01-25 14:42:02 +0100 |
commit | 6d3c50331eb1cabf1aa0c71100fb7b5fdbe681d7 (patch) | |
tree | 555e5a50783f79ab09cbe0428f09d1c3fd13d1f9 | |
parent | fc032ed3eb05bfba1dcd07bce566346b3f6f2fb9 (diff) |
writerfilter: rtftok: filter control characters
... in RTFDocumentImpl::checkUnicode(); see ooo86460-1.xls [sic]
for an example.
There is another caller of text() in rtfdispatchdestination.cxx:311 but
it turns out that buffered text was created by text() in the first
place.
This shouldn't be a problem for DOCX because XML 1.0 doesn't allow the
bad control characters anyway so the sax parser should report an error
in that case.
Reviewed-on: https://gerrit.libreoffice.org/81697
Reviewed-by: Michael Stahl <michael.stahl@cib.de>
Tested-by: Michael Stahl <michael.stahl@cib.de>
(cherry picked from commit a6516c76c01b92f7d35bfb352b63af7de42b5707)
Change-Id: Ice45e1c3c8c7db668a4cfb8364e42addea1777ce
-rw-r--r-- | writerfilter/source/rtftok/rtfdocumentimpl.cxx | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 4094dc97dacf..fc6773203cf2 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -26,6 +26,7 @@ #include <tools/datetimeutils.hxx> #include <comphelper/classids.hxx> #include <comphelper/embeddedobjectcontainer.hxx> +#include <svl/lngmisc.hxx> #include <sfx2/sfxbasemodel.hxx> #include <sfx2/classificationhelper.hxx> #include <oox/mathml/import.hxx> @@ -3413,11 +3414,34 @@ bool RTFDocumentImpl::getSkipUnknown() { return m_bSkipUnknown; } void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown) { m_bSkipUnknown = bSkipUnknown; } +static auto FilterControlChars(Destination const destination, OUString const& rString) -> OUString +{ + if (destination == Destination::LEVELNUMBERS || destination == Destination::LEVELTEXT) + { // control characters are magic here! + return rString; + } + OUStringBuffer buf(rString.getLength()); + for (sal_Int32 i = 0; i < rString.getLength(); ++i) + { + sal_Unicode const ch(rString[i]); + if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch == '\t') + { + buf.append(ch); + } + else + { + SAL_INFO("writerfilter.rtf", "filtering control character"); + } + } + return buf.makeStringAndClear(); +} + void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex) { if (bUnicode && !m_aUnicodeBuffer.isEmpty()) { OUString aString = m_aUnicodeBuffer.makeStringAndClear(); + aString = FilterControlChars(m_aStates.top().eDestination, aString); text(aString); } if (bHex && !m_aHexBuffer.isEmpty()) @@ -3427,6 +3451,7 @@ void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex) && m_aStates.top().nCurrentEncoding == RTL_TEXTENCODING_SYMBOL) nEncoding = RTL_TEXTENCODING_MS_1252; OUString aString = OStringToOUString(m_aHexBuffer.makeStringAndClear(), nEncoding); + aString = FilterControlChars(m_aStates.top().eDestination, aString); text(aString); } } |