summaryrefslogtreecommitdiff
path: root/filter/source/textfilterdetect
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.co.uk>2017-12-13 09:49:41 +0100
committerMiklos Vajna <vmiklos@collabora.co.uk>2017-12-13 14:44:15 +0100
commit4af729f31c64c09c76ea8bcfa5067092571b92de (patch)
tree555e540d0d7564218fc9f31486a59e8119e3cd50 /filter/source/textfilterdetect
parent1d6b85f85925c3b4d2d2bb8eaf237b10bb8f7d60 (diff)
tdf#114428 filter: recognize XHTML with XML declaration as HTML
The problem was the additional <?xml version="1.0" encoding="utf-8"?> XML declaration before the usual <!DOCTYPE html ... line, just ignore it. Change-Id: I294aae5504b40b42f76da00fef645d0d89009da9 Reviewed-on: https://gerrit.libreoffice.org/46324 Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk> Tested-by: Jenkins <ci@libreoffice.org>
Diffstat (limited to 'filter/source/textfilterdetect')
-rw-r--r--filter/source/textfilterdetect/filterdetect.cxx24
1 files changed, 21 insertions, 3 deletions
diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx
index d2f8fb0bd54c..2e3e08028683 100644
--- a/filter/source/textfilterdetect/filterdetect.cxx
+++ b/filter/source/textfilterdetect/filterdetect.cxx
@@ -58,6 +58,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
// Now check whether the stream begins with a known HTML tag.
enum DetectPhase { BeforeTag, TagOpened, InTagName };
DetectPhase dp = BeforeTag;
+ /// BeforeDeclaration -> ? -> DeclarationOpened -> > -> BeforeDeclaration.
+ enum DeclarationPhase
+ {
+ BeforeDeclaration,
+ DeclarationOpened
+ };
+ DeclarationPhase eDeclaration = BeforeDeclaration;
const char* pHeader = sHeader.getStr();
const int nLength = sHeader.getLength();
@@ -66,7 +73,8 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
for ( i = 0; i < nLength; ++i, ++pHeader )
{
char c = *pHeader;
- if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
+ if ((c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f')
+ && eDeclaration == BeforeDeclaration)
{
if ( dp == TagOpened )
return false; // Invalid: Should start with a tag name
@@ -84,6 +92,11 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
{
if ( dp == InTagName )
break; // End of tag name reached
+ else if (eDeclaration == DeclarationOpened)
+ {
+ dp = BeforeTag;
+ eDeclaration = BeforeDeclaration;
+ }
else
return false; // Invalid: Empty tag or before '<'
}
@@ -100,8 +113,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
return false; // Invalid: Should start with a tag
else if ( dp == TagOpened )
{
- nStartOfTagIndex = i;
- dp = InTagName;
+ if (c == '?' && eDeclaration == BeforeDeclaration)
+ eDeclaration = DeclarationOpened;
+ else if (eDeclaration == BeforeDeclaration)
+ {
+ nStartOfTagIndex = i;
+ dp = InTagName;
+ }
}
}
}