summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kohei.yoshida@gmail.com>2018-03-03 09:17:40 -0500
committerKohei Yoshida <kohei.yoshida@gmail.com>2018-03-03 09:23:47 -0500
commit01087bff5cb26c045f90692c2d3b4deab9071a53 (patch)
tree04e7e2fa8f3a4368eadb5b378bec982651a86484
parentef2e27538e335583ef3ff85c4bc4f512efc72eb5 (diff)
downloadorcus-0.13.tar.gz
Be lenient on XML stream with leading whitespace. orcus-0.13
This fixes #52. In the future we should make this configurable, either build-time or run-time. (cherry picked from commit 230e43033178cba61ac3e476de8ab858708e2591)
-rw-r--r--src/parser/sax_parser_base.cpp8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp
index bb53e417..d8a52054 100644
--- a/src/parser/sax_parser_base.cpp
+++ b/src/parser/sax_parser_base.cpp
@@ -134,7 +134,7 @@ cell_buffer& parser_base::get_cell_buffer()
void parser_base::blank()
{
- char c = cur_char();
+ char c = cur_char_checked();
while (is_blank(c))
{
next();
@@ -181,6 +181,12 @@ void parser_base::skip_bom()
// Stream too short to have a byte order mark.
return;
+ if (is_blank(cur_char()))
+ // Allow leading whitespace in the XML stream.
+ // TODO : Make this configurable since strictly speaking such an XML
+ // sttream is invalid.
+ return;
+
// 0xef 0xbb 0 xbf is the UTF-8 byte order mark
unsigned char c = static_cast<unsigned char>(cur_char());
if (c != '<')