1 files changed, 33 insertions, 26 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 7e8ac63fc61e..d94a24632779 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -377,9 +377,14 @@ namespace {
 
 constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; }
 
+constexpr bool HTML_ISSPACE(sal_uInt32 c)
+{
+    return ' ' == c || '\t' == c || '\r' == c || '\n' == c || '\x0b' == c;
+}
+
 }
 
-HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
+HtmlTokenId HTMLParser::ScanText(const sal_Unicode cBreak)
 {
     OUStringBuffer sTmpBuffer( MAX_LEN );
     bool bContinue = true;
@@ -705,37 +710,39 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
             {
                 break;
             }
-            nNextCh = ' ';
+            if (!m_bPreserveSpaces)
+                nNextCh = ' ';
             [[fallthrough]];
         case ' ':
-            sTmpBuffer.appendUtf32( nNextCh );
-            if( '>'!=cBreak && (!bReadListing && !bReadXMP &&
-                                !bReadPRE && !bReadTextArea) )
+            if (!m_bPreserveSpaces)
             {
-                // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
-                do {
-                    nNextCh = GetNextChar();
-                    if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
+                sTmpBuffer.appendUtf32(nNextCh);
+                if ('>' != cBreak && (!bReadListing && !bReadXMP && !bReadPRE && !bReadTextArea))
+                {
+                    // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
+                    do
                     {
-                        if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 )
+                        nNextCh = GetNextChar();
+                        if (sal_Unicode(EOF) == nNextCh && rInput.eof())
                         {
-                            // Have seen s.th. aside from blanks?
-                            aToken.append( sTmpBuffer );
-                            sTmpBuffer.setLength(0);
-                            return HtmlTokenId::TEXTTOKEN;
+                            if (!aToken.isEmpty() || sTmpBuffer.getLength() > 1)
+                            {
+                                // Have seen s.th. aside from blanks?
+                                aToken.append(sTmpBuffer);
+                                sTmpBuffer.setLength(0);
+                                return HtmlTokenId::TEXTTOKEN;
+                            }
+                            else
+                                // Only read blanks: no text must be returned
+                                // and GetNextToken_ has to read until EOF
+                                return HtmlTokenId::NONE;
                         }
-                        else
-                            // Only read blanks: no text must be returned
-                            // and GetNextToken_ has to read until EOF
-                            return HtmlTokenId::NONE;
-                    }
-                } while ( ' ' == nNextCh || '\t' == nNextCh ||
-                          '\r' == nNextCh || '\n' == nNextCh ||
-                          '\x0b' == nNextCh );
-                bNextCh = false;
+                    } while (HTML_ISSPACE(nNextCh));
+                    bNextCh = false;
+                }
+                break;
             }
-            break;
-
+            [[fallthrough]];
         default:
             bEqSignFound = false;
             if (nNextCh == cBreak && !cQuote)
@@ -743,7 +750,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
             else
             {
                 do {
-                    if (!linguistic::IsControlChar(nNextCh))
+                    if (!linguistic::IsControlChar(nNextCh) || HTML_ISSPACE(nNextCh))
                     {
                     // All remaining characters make their way into the text.
                         sTmpBuffer.appendUtf32( nNextCh );