diff options
author | Markus Mohrhard <markus.mohrhard@googlemail.com> | 2017-08-14 13:39:50 +0200 |
---|---|---|
committer | Markus Mohrhard <markus.mohrhard@googlemail.com> | 2017-08-14 22:08:55 +0200 |
commit | 9f9f0fafe0549f4073b1fcc493f1614c2f3631e0 (patch) | |
tree | 8351c7588504721b6f5ae7a78002654941c8d4bb /sc/source | |
parent | 16b3a46ce0c94f7dfba8d391dd48ee82ba0f1d0f (diff) |
external data: handle child elements of td correctly in html provider
Change-Id: Id4451c13931eaf52adebca13fd237ba6d73a880c
Reviewed-on: https://gerrit.libreoffice.org/41145
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Markus Mohrhard <markus.mohrhard@googlemail.com>
Diffstat (limited to 'sc/source')
-rw-r--r-- | sc/source/ui/dataprovider/htmldataprovider.cxx | 63 |
1 files changed, 51 insertions, 12 deletions
diff --git a/sc/source/ui/dataprovider/htmldataprovider.cxx b/sc/source/ui/dataprovider/htmldataprovider.cxx index c73efee0260c..4353d59e0364 100644 --- a/sc/source/ui/dataprovider/htmldataprovider.cxx +++ b/sc/source/ui/dataprovider/htmldataprovider.cxx @@ -55,29 +55,65 @@ OString toString(const xmlChar* pStr) return OString(reinterpret_cast<const char*>(pStr), xmlStrlen(pStr)); } +OUString trim_string(const OUString& aStr) +{ + OUString aOldString; + OUString aString = aStr; + do + { + aOldString = aString; + aString = comphelper::string::strip(aString, ' '); + aString = comphelper::string::strip(aString, '\n'); + aString = comphelper::string::strip(aString, '\r'); + aString = comphelper::string::strip(aString, '\t'); + } + while (aOldString != aString); + + return aString; +} + +OUString get_node_str(xmlNodePtr pNode) +{ + OUStringBuffer aStr; + for (xmlNodePtr cur_node = pNode->children; cur_node; cur_node = cur_node->next) + { + if (cur_node->type == XML_TEXT_NODE) + { + OUString aString = OStringToOUString(toString(cur_node->content), RTL_TEXTENCODING_UTF8); + aStr.append(trim_string(aString)); + } + else if (cur_node->type == XML_ELEMENT_NODE) + { + aStr.append(get_node_str(cur_node)); + } + } + + return aStr.makeStringAndClear(); +} + } void HTMLFetchThread::handleCell(xmlNodePtr pCellNode, SCROW nRow, SCCOL nCol) { + OUStringBuffer aStr; for (xmlNodePtr cur_node = pCellNode->children; cur_node; cur_node = cur_node->next) { if (cur_node->type == XML_TEXT_NODE) { OUString aString = OStringToOUString(toString(cur_node->content), RTL_TEXTENCODING_UTF8); - OUString aOldString; - do - { - aOldString = aString; - aString = comphelper::string::strip(aString, ' '); - aString = comphelper::string::strip(aString, '\n'); - aString = comphelper::string::strip(aString, '\r'); - aString = comphelper::string::strip(aString, '\t'); - } - while (aOldString != aString); - - mrDocument.SetString(nCol, nRow, 0, aString); + aStr.append(trim_string(aString)); + } + else if (cur_node->type == XML_ELEMENT_NODE) + { + aStr.append(get_node_str(cur_node)); } } + + if (!aStr.isEmpty()) + { + OUString aCellStr = aStr.makeStringAndClear(); + mrDocument.SetString(nCol, nRow, 0, aCellStr); + } } void HTMLFetchThread::handleRow(xmlNodePtr pRowNode, SCROW nRow) @@ -140,6 +176,9 @@ void HTMLFetchThread::execute() OStringBuffer aBuffer(64000); std::unique_ptr<SvStream> pStream = DataProvider::FetchStreamFromURL(maURL, aBuffer); + if (aBuffer.isEmpty()) + return; + htmlDocPtr pHtmlPtr = htmlParseDoc(reinterpret_cast<xmlChar*>(const_cast<char*>(aBuffer.getStr())), nullptr); OString aID = OUStringToOString(maID, RTL_TEXTENCODING_UTF8); |