summaryrefslogtreecommitdiff
path: root/filter/source
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.co.uk>2017-12-13 09:49:41 +0100
committerCaolán McNamara <caolanm@redhat.com>2018-01-27 21:03:41 +0100
commit6aa65f7664fe0dbe8c9d4ba7f320ef216e928780 (patch)
tree37309a725d654203608903bcf07c21269d6551c5 /filter/source
parentf937a432c2351852e8b237c6e11dd9e43a2b28c9 (diff)
tdf#114428 filter: recognize XHTML with XML declaration as HTML
The problem was the additional <?xml version="1.0" encoding="utf-8"?> XML declaration before the usual <!DOCTYPE html ... line, just ignore it. Change-Id: I294aae5504b40b42f76da00fef645d0d89009da9 Reviewed-on: https://gerrit.libreoffice.org/46324 Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk> Tested-by: Jenkins <ci@libreoffice.org> (cherry picked from commit 4af729f31c64c09c76ea8bcfa5067092571b92de) Reviewed-on: https://gerrit.libreoffice.org/47587 Reviewed-by: Caolán McNamara <caolanm@redhat.com> Tested-by: Caolán McNamara <caolanm@redhat.com>
Diffstat (limited to 'filter/source')
-rw-r--r--filter/source/textfilterdetect/filterdetect.cxx24
1 files changed, 21 insertions, 3 deletions
diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx
index ee93d28ddbed..3228ca53f62f 100644
--- a/filter/source/textfilterdetect/filterdetect.cxx
+++ b/filter/source/textfilterdetect/filterdetect.cxx
@@ -58,6 +58,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
// Now check whether the stream begins with a known HTML tag.
enum DetectPhase { BeforeTag, TagOpened, InTagName };
DetectPhase dp = BeforeTag;
+ /// BeforeDeclaration -> ? -> DeclarationOpened -> > -> BeforeDeclaration.
+ enum DeclarationPhase
+ {
+ BeforeDeclaration,
+ DeclarationOpened
+ };
+ DeclarationPhase eDeclaration = BeforeDeclaration;
const char* pHeader = sHeader.getStr();
const int nLength = sHeader.getLength();
@@ -66,7 +73,8 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
for ( i = 0; i < nLength; ++i, ++pHeader )
{
char c = *pHeader;
- if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
+ if ((c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f')
+ && eDeclaration == BeforeDeclaration)
{
if ( dp == TagOpened )
return false; // Invalid: Should start with a tag name
@@ -84,6 +92,11 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
{
if ( dp == InTagName )
break; // End of tag name reached
+ else if (eDeclaration == DeclarationOpened)
+ {
+ dp = BeforeTag;
+ eDeclaration = BeforeDeclaration;
+ }
else
return false; // Invalid: Empty tag or before '<'
}
@@ -100,8 +113,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
return false; // Invalid: Should start with a tag
else if ( dp == TagOpened )
{
- nStartOfTagIndex = i;
- dp = InTagName;
+ if (c == '?' && eDeclaration == BeforeDeclaration)
+ eDeclaration = DeclarationOpened;
+ else if (eDeclaration == BeforeDeclaration)
+ {
+ nStartOfTagIndex = i;
+ dp = InTagName;
+ }
}
}
}