diff options
author | Mark Hung <marklh9@gmail.com> | 2015-12-27 00:46:49 +0800 |
---|---|---|
committer | Mark Hung <marklh9@gmail.com> | 2016-02-13 08:05:09 +0000 |
commit | 4647e778993250b8c9431e2890750916fb986ecc (patch) | |
tree | 99d285ec6a33aeca2d9df32d30d2aea801066a37 /sw/qa/extras/htmlexport | |
parent | 3596613153289dae204b5abdc7446b303021f597 (diff) |
tdf#81129 Support reading non-BMP characters in HTML documents.
1. Allow character entity ( &#nnnn; ) to exceed 0xffff in HTMLParser::ScanText()
2. Return a character as sal_uInt32 ( utf32 ) instead of sal_Unicode ( utf16 )
from SvParser::GetNextChar().
Conflicts:
sw/qa/extras/htmlexport/htmlexport.cxx
Change-Id: Ida455040970fae800f0f11471b27f53461fb78e4
Reviewed-on: https://gerrit.libreoffice.org/21152
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Mark Hung <marklh9@gmail.com>
Diffstat (limited to 'sw/qa/extras/htmlexport')
-rw-r--r-- | sw/qa/extras/htmlexport/data/extb.html | 10 | ||||
-rw-r--r-- | sw/qa/extras/htmlexport/htmlexport.cxx | 13 |
2 files changed, 23 insertions, 0 deletions
diff --git a/sw/qa/extras/htmlexport/data/extb.html b/sw/qa/extras/htmlexport/data/extb.html new file mode 100644 index 000000000000..be73feadf89d --- /dev/null +++ b/sw/qa/extras/htmlexport/data/extb.html @@ -0,0 +1,10 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="UTF-8"/> +</head> +<body> +<p>𤭢</p> +<p>𤭢</p> +</body> +</html> diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index f951a0a57006..69b6b7db6c54 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -272,6 +272,19 @@ DECLARE_HTMLEXPORT_TEST(testTdf83890, "tdf83890.odt") assertXPath(pDoc, "/html/body/ol[2]/ol", "start", "2"); } +DECLARE_HTMLEXPORT_TEST(testExtbChars, "extb.html") +{ + sal_uInt32 nCh = 0x24b62; + OUString aExpected( &nCh, 1); + // Assert that UTF8 encoded non-BMP Unicode character is correct + uno::Reference<text::XTextRange> xTextRange1 = getRun(getParagraph(1), 1); + CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange1->getString()); + + // Assert that non-BMP Unicode in character entity format is correct + uno::Reference<text::XTextRange> xTextRange2 = getRun(getParagraph(2), 1); + CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange2->getString()); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |