summaryrefslogtreecommitdiff
path: root/svtools
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2013-05-16 23:53:30 +0200
committerAndras Timar <andras.timar@collabora.com>2013-11-05 10:46:38 +0100
commit910f231d5793e9462d1ac2e0079284dc61e06d38 (patch)
tree5c234386669b2539464f1450f025c2ce4788236c /svtools
parent106da44a9a22e4270aa63992ab2b2fd706b825b7 (diff)
resolved fdo#56772 keep track of HTML ON/OFF tokens
Regression introduced with 11cbcb8b08b540b144a5df744e9fba0b6ba8144a followed by 56d6589368c2e88cffec0c2e518f7c90863eeae6 Deep from svtools/source/svhtml/parhtml.cxx HTMLParser::_GetNextToken() only a HTML_TABLEDATA_OFF was generated for <td .../> without a matching HTML_TABLEDATA_ON (actually same for all <XXX/> ON/OFF tokens). This confuses a state machine that doesn't keep track of such unexpected closures and also expects all attributes of an element at an ON token. Only the parser knows this is actually one token but needs to generate separate ON/OFF tokens. These bugs mentioned in the original code and commits are still fixed with this change: https://bugs.freedesktop.org/show_bug.cgi?id=34666 https://bugs.freedesktop.org/show_bug.cgi?id=36080 https://bugs.freedesktop.org/show_bug.cgi?id=36390 Change-Id: I2b3190d297a35ee3dfda95f9a4841f7c53ed4a92 (cherry picked from commit bb7360ca9929e9b395b3c903f460c9ed5efdce4d) Reviewed-on: https://gerrit.libreoffice.org/3925 Reviewed-by: Fridrich Strba <fridrich@documentfoundation.org> Tested-by: Fridrich Strba <fridrich@documentfoundation.org>
Diffstat (limited to 'svtools')
-rw-r--r--svtools/inc/svtools/parhtml.hxx2
-rw-r--r--svtools/source/svhtml/parhtml.cxx22
2 files changed, 20 insertions, 4 deletions
diff --git a/svtools/inc/svtools/parhtml.hxx b/svtools/inc/svtools/parhtml.hxx
index f8c0c61729f8..fdfeeab9e4ee 100644
--- a/svtools/inc/svtools/parhtml.hxx
+++ b/svtools/inc/svtools/parhtml.hxx
@@ -144,6 +144,8 @@ private:
sal_uInt32 nPre_LinePos; // Pos in der Line im PRE-Tag
+ int mnPendingOffToken; ///< OFF token pending for a <XX.../> ON/OFF ON token
+
String aEndToken;
protected:
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 68232e5db2a4..91bae59e65c8 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -297,7 +297,8 @@ HTMLParser::HTMLParser( SvStream& rIn, bool bReadNewDoc ) :
bEndTokenFound(false),
bPre_IgnoreNewPara(false),
bReadNextChar(false),
- bReadComment(false)
+ bReadComment(false),
+ mnPendingOffToken(0)
{
//#i76649, default to UTF-8 for HTML unless we know differently
SetSrcEncoding(RTL_TEXTENCODING_UTF8);
@@ -1057,6 +1058,15 @@ int HTMLParser::_GetNextToken()
int nRet = 0;
sSaveToken.Erase();
+ if (mnPendingOffToken)
+ {
+ // HTML_<TOKEN>_OFF generated for HTML_<TOKEN>_ON
+ nRet = mnPendingOffToken;
+ mnPendingOffToken = 0;
+ aToken.Erase();
+ return nRet;
+ }
+
// Delete options
if (!maOptions.empty())
maOptions.clear();
@@ -1204,10 +1214,14 @@ int HTMLParser::_GetNextToken()
ScanText( '>' );
// fdo#34666 fdo#36080 fdo#36390: closing "/>"?:
- // return HTML_<TOKEN>_OFF instead of HTML_<TOKEN>_ON
+ // generate pending HTML_<TOKEN>_OFF for HTML_<TOKEN>_ON
+ // Do not convert this to a single HTML_<TOKEN>_OFF
+ // which lead to fdo#56772.
if ((HTML_TOKEN_ONOFF & nRet) && (aToken.Len() >= 1) &&
- ('/' == aToken.GetChar(aToken.Len()-1))) {
- ++nRet; // HTML_<TOKEN>_ON -> HTML_<TOKEN>_OFF;
+ ('/' == aToken.GetChar(aToken.Len()-1)))
+ {
+ mnPendingOffToken = nRet + 1; // HTML_<TOKEN>_ON -> HTML_<TOKEN>_OFF
+ aToken.Erase( aToken.Len()-1, 1); // remove trailing '/'
}
if( sal_Unicode(EOF) == nNextCh && rInput.IsEof() )
{