diff options
Diffstat (limited to 'svtools/source/svhtml/parhtml.cxx')
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 59 |
1 files changed, 33 insertions, 26 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index 7e8ac63fc61e..d94a24632779 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -377,9 +377,14 @@ namespace { constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; } +constexpr bool HTML_ISSPACE(sal_uInt32 c) +{ + return ' ' == c || '\t' == c || '\r' == c || '\n' == c || '\x0b' == c; +} + } -HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) +HtmlTokenId HTMLParser::ScanText(const sal_Unicode cBreak) { OUStringBuffer sTmpBuffer( MAX_LEN ); bool bContinue = true; @@ -705,37 +710,39 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) { break; } - nNextCh = ' '; + if (!m_bPreserveSpaces) + nNextCh = ' '; [[fallthrough]]; case ' ': - sTmpBuffer.appendUtf32( nNextCh ); - if( '>'!=cBreak && (!bReadListing && !bReadXMP && - !bReadPRE && !bReadTextArea) ) + if (!m_bPreserveSpaces) { - // Reduce sequences of Blanks/Tabs/CR/LF to a single blank - do { - nNextCh = GetNextChar(); - if( sal_Unicode(EOF) == nNextCh && rInput.eof() ) + sTmpBuffer.appendUtf32(nNextCh); + if ('>' != cBreak && (!bReadListing && !bReadXMP && !bReadPRE && !bReadTextArea)) + { + // Reduce sequences of Blanks/Tabs/CR/LF to a single blank + do { - if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 ) + nNextCh = GetNextChar(); + if (sal_Unicode(EOF) == nNextCh && rInput.eof()) { - // Have seen s.th. aside from blanks? - aToken.append( sTmpBuffer ); - sTmpBuffer.setLength(0); - return HtmlTokenId::TEXTTOKEN; + if (!aToken.isEmpty() || sTmpBuffer.getLength() > 1) + { + // Have seen s.th. aside from blanks? + aToken.append(sTmpBuffer); + sTmpBuffer.setLength(0); + return HtmlTokenId::TEXTTOKEN; + } + else + // Only read blanks: no text must be returned + // and GetNextToken_ has to read until EOF + return HtmlTokenId::NONE; } - else - // Only read blanks: no text must be returned - // and GetNextToken_ has to read until EOF - return HtmlTokenId::NONE; - } - } while ( ' ' == nNextCh || '\t' == nNextCh || - '\r' == nNextCh || '\n' == nNextCh || - '\x0b' == nNextCh ); - bNextCh = false; + } while (HTML_ISSPACE(nNextCh)); + bNextCh = false; + } + break; } - break; - + [[fallthrough]]; default: bEqSignFound = false; if (nNextCh == cBreak && !cQuote) @@ -743,7 +750,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) else { do { - if (!linguistic::IsControlChar(nNextCh)) + if (!linguistic::IsControlChar(nNextCh) || HTML_ISSPACE(nNextCh)) { // All remaining characters make their way into the text. sTmpBuffer.appendUtf32( nNextCh ); |