diff options
author | Miklos Vajna <vmiklos@collabora.co.uk> | 2017-12-13 09:49:41 +0100 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.co.uk> | 2017-12-13 14:44:15 +0100 |
commit | 4af729f31c64c09c76ea8bcfa5067092571b92de (patch) | |
tree | 555e540d0d7564218fc9f31486a59e8119e3cd50 /filter/source/textfilterdetect | |
parent | 1d6b85f85925c3b4d2d2bb8eaf237b10bb8f7d60 (diff) |
tdf#114428 filter: recognize XHTML with XML declaration as HTML
The problem was the additional
<?xml version="1.0" encoding="utf-8"?>
XML declaration before the usual
<!DOCTYPE html ...
line, just ignore it.
Change-Id: I294aae5504b40b42f76da00fef645d0d89009da9
Reviewed-on: https://gerrit.libreoffice.org/46324
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
Tested-by: Jenkins <ci@libreoffice.org>
Diffstat (limited to 'filter/source/textfilterdetect')
-rw-r--r-- | filter/source/textfilterdetect/filterdetect.cxx | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx index d2f8fb0bd54c..2e3e08028683 100644 --- a/filter/source/textfilterdetect/filterdetect.cxx +++ b/filter/source/textfilterdetect/filterdetect.cxx @@ -58,6 +58,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream ) // Now check whether the stream begins with a known HTML tag. enum DetectPhase { BeforeTag, TagOpened, InTagName }; DetectPhase dp = BeforeTag; + /// BeforeDeclaration -> ? -> DeclarationOpened -> > -> BeforeDeclaration. + enum DeclarationPhase + { + BeforeDeclaration, + DeclarationOpened + }; + DeclarationPhase eDeclaration = BeforeDeclaration; const char* pHeader = sHeader.getStr(); const int nLength = sHeader.getLength(); @@ -66,7 +73,8 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream ) for ( i = 0; i < nLength; ++i, ++pHeader ) { char c = *pHeader; - if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' ) + if ((c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f') + && eDeclaration == BeforeDeclaration) { if ( dp == TagOpened ) return false; // Invalid: Should start with a tag name @@ -84,6 +92,11 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream ) { if ( dp == InTagName ) break; // End of tag name reached + else if (eDeclaration == DeclarationOpened) + { + dp = BeforeTag; + eDeclaration = BeforeDeclaration; + } else return false; // Invalid: Empty tag or before '<' } @@ -100,8 +113,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream ) return false; // Invalid: Should start with a tag else if ( dp == TagOpened ) { - nStartOfTagIndex = i; - dp = InTagName; + if (c == '?' && eDeclaration == BeforeDeclaration) + eDeclaration = DeclarationOpened; + else if (eDeclaration == BeforeDeclaration) + { + nStartOfTagIndex = i; + dp = InTagName; + } } } } |