summaryrefslogtreecommitdiff
path: root/svtools/source/svhtml/parhtml.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'svtools/source/svhtml/parhtml.cxx')
-rw-r--r--svtools/source/svhtml/parhtml.cxx59
1 files changed, 33 insertions, 26 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 7e8ac63fc61e..d94a24632779 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -377,9 +377,14 @@ namespace {
constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; }
+constexpr bool HTML_ISSPACE(sal_uInt32 c)
+{
+ return ' ' == c || '\t' == c || '\r' == c || '\n' == c || '\x0b' == c;
+}
+
}
-HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
+HtmlTokenId HTMLParser::ScanText(const sal_Unicode cBreak)
{
OUStringBuffer sTmpBuffer( MAX_LEN );
bool bContinue = true;
@@ -705,37 +710,39 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
{
break;
}
- nNextCh = ' ';
+ if (!m_bPreserveSpaces)
+ nNextCh = ' ';
[[fallthrough]];
case ' ':
- sTmpBuffer.appendUtf32( nNextCh );
- if( '>'!=cBreak && (!bReadListing && !bReadXMP &&
- !bReadPRE && !bReadTextArea) )
+ if (!m_bPreserveSpaces)
{
- // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
- do {
- nNextCh = GetNextChar();
- if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
+ sTmpBuffer.appendUtf32(nNextCh);
+ if ('>' != cBreak && (!bReadListing && !bReadXMP && !bReadPRE && !bReadTextArea))
+ {
+ // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
+ do
{
- if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 )
+ nNextCh = GetNextChar();
+ if (sal_Unicode(EOF) == nNextCh && rInput.eof())
{
- // Have seen s.th. aside from blanks?
- aToken.append( sTmpBuffer );
- sTmpBuffer.setLength(0);
- return HtmlTokenId::TEXTTOKEN;
+ if (!aToken.isEmpty() || sTmpBuffer.getLength() > 1)
+ {
+ // Have seen s.th. aside from blanks?
+ aToken.append(sTmpBuffer);
+ sTmpBuffer.setLength(0);
+ return HtmlTokenId::TEXTTOKEN;
+ }
+ else
+ // Only read blanks: no text must be returned
+ // and GetNextToken_ has to read until EOF
+ return HtmlTokenId::NONE;
}
- else
- // Only read blanks: no text must be returned
- // and GetNextToken_ has to read until EOF
- return HtmlTokenId::NONE;
- }
- } while ( ' ' == nNextCh || '\t' == nNextCh ||
- '\r' == nNextCh || '\n' == nNextCh ||
- '\x0b' == nNextCh );
- bNextCh = false;
+ } while (HTML_ISSPACE(nNextCh));
+ bNextCh = false;
+ }
+ break;
}
- break;
-
+ [[fallthrough]];
default:
bEqSignFound = false;
if (nNextCh == cBreak && !cQuote)
@@ -743,7 +750,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
else
{
do {
- if (!linguistic::IsControlChar(nNextCh))
+ if (!linguistic::IsControlChar(nNextCh) || HTML_ISSPACE(nNextCh))
{
// All remaining characters make their way into the text.
sTmpBuffer.appendUtf32( nNextCh );