diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2023-10-23 19:52:14 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2023-10-23 22:10:06 +0200 |
commit | 926826e40955175a8c115472e0d2f6c7f2f1a453 (patch) | |
tree | ba914ce7dcb96fa5f8ddb36a1b40e8d5bc1bb805 /svtools | |
parent | bae0736bf0ec54828766c3d903e2a27458643395 (diff) |
Implement PreserveSpaces boolean HTML/ReqIF export filter option
This option changes how HTML/ReqIF export handles paragraphs with
leading/trailing spaces, or multiple sequential spaces. Normally
export may insert newlines every ~256 characters, in place of
normal space characters; this relies on default processing of
spaces, where leading/trailing spaces are trimmed, and runs of
spaces are reduced to a single space.
When PreserveSpaces is true, HTML/ReqIF export takes care to not
alter spaces inside paragraphs. For that, it checks if paragraphs
contain sequences of spaces that normally would be reduced; and
for those paragraphs, it adds "white-space: pre-wrap" to style
(in HTML), or 'xml::space="preserve"' attribute (in ReqIF).
Import of 'xml::space' attribute and "white-space: pre-wrap" style
is implemented; when paragraph has these, it keeps the spaces read
from HTML/ReqIF intact.
Import does not currently support this attribute/style in elements
other than 'p'.
Change-Id: I62dba5eaf313b965bf37d8fa5e3f5bbb8f5e8357
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158362
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'svtools')
-rw-r--r-- | svtools/source/svhtml/htmlkywd.cxx | 1 | ||||
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 59 |
2 files changed, 34 insertions, 26 deletions
diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx index 5f81b3e3ca30..d1b0ea2ee03e 100644 --- a/svtools/source/svhtml/htmlkywd.cxx +++ b/svtools/source/svhtml/htmlkywd.cxx @@ -599,6 +599,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = { {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valign), HtmlOptionId::VALIGN}, {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valuetype), HtmlOptionId::VALUETYPE}, {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_wrap), HtmlOptionId::WRAP}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_XHTML_O_xml_space), HtmlOptionId::XML_SPACE}, // Attributes with script code value {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onblur), HtmlOptionId::ONBLUR}, // JavaScript diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index 7e8ac63fc61e..d94a24632779 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -377,9 +377,14 @@ namespace { constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; } +constexpr bool HTML_ISSPACE(sal_uInt32 c) +{ + return ' ' == c || '\t' == c || '\r' == c || '\n' == c || '\x0b' == c; +} + } -HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) +HtmlTokenId HTMLParser::ScanText(const sal_Unicode cBreak) { OUStringBuffer sTmpBuffer( MAX_LEN ); bool bContinue = true; @@ -705,37 +710,39 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) { break; } - nNextCh = ' '; + if (!m_bPreserveSpaces) + nNextCh = ' '; [[fallthrough]]; case ' ': - sTmpBuffer.appendUtf32( nNextCh ); - if( '>'!=cBreak && (!bReadListing && !bReadXMP && - !bReadPRE && !bReadTextArea) ) + if (!m_bPreserveSpaces) { - // Reduce sequences of Blanks/Tabs/CR/LF to a single blank - do { - nNextCh = GetNextChar(); - if( sal_Unicode(EOF) == nNextCh && rInput.eof() ) + sTmpBuffer.appendUtf32(nNextCh); + if ('>' != cBreak && (!bReadListing && !bReadXMP && !bReadPRE && !bReadTextArea)) + { + // Reduce sequences of Blanks/Tabs/CR/LF to a single blank + do { - if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 ) + nNextCh = GetNextChar(); + if (sal_Unicode(EOF) == nNextCh && rInput.eof()) { - // Have seen s.th. aside from blanks? - aToken.append( sTmpBuffer ); - sTmpBuffer.setLength(0); - return HtmlTokenId::TEXTTOKEN; + if (!aToken.isEmpty() || sTmpBuffer.getLength() > 1) + { + // Have seen s.th. aside from blanks? + aToken.append(sTmpBuffer); + sTmpBuffer.setLength(0); + return HtmlTokenId::TEXTTOKEN; + } + else + // Only read blanks: no text must be returned + // and GetNextToken_ has to read until EOF + return HtmlTokenId::NONE; } - else - // Only read blanks: no text must be returned - // and GetNextToken_ has to read until EOF - return HtmlTokenId::NONE; - } - } while ( ' ' == nNextCh || '\t' == nNextCh || - '\r' == nNextCh || '\n' == nNextCh || - '\x0b' == nNextCh ); - bNextCh = false; + } while (HTML_ISSPACE(nNextCh)); + bNextCh = false; + } + break; } - break; - + [[fallthrough]]; default: bEqSignFound = false; if (nNextCh == cBreak && !cQuote) @@ -743,7 +750,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) else { do { - if (!linguistic::IsControlChar(nNextCh)) + if (!linguistic::IsControlChar(nNextCh) || HTML_ISSPACE(nNextCh)) { // All remaining characters make their way into the text. sTmpBuffer.appendUtf32( nNextCh ); |