summaryrefslogtreecommitdiff
path: root/svtools
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2023-10-23 19:52:14 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2023-10-23 22:10:06 +0200
commit926826e40955175a8c115472e0d2f6c7f2f1a453 (patch)
treeba914ce7dcb96fa5f8ddb36a1b40e8d5bc1bb805 /svtools
parentbae0736bf0ec54828766c3d903e2a27458643395 (diff)
Implement PreserveSpaces boolean HTML/ReqIF export filter option
This option changes how HTML/ReqIF export handles paragraphs with leading/trailing spaces, or multiple sequential spaces. Normally export may insert newlines every ~256 characters, in place of normal space characters; this relies on default processing of spaces, where leading/trailing spaces are trimmed, and runs of spaces are reduced to a single space. When PreserveSpaces is true, HTML/ReqIF export takes care to not alter spaces inside paragraphs. For that, it checks if paragraphs contain sequences of spaces that normally would be reduced; and for those paragraphs, it adds "white-space: pre-wrap" to style (in HTML), or 'xml::space="preserve"' attribute (in ReqIF). Import of 'xml::space' attribute and "white-space: pre-wrap" style is implemented; when paragraph has these, it keeps the spaces read from HTML/ReqIF intact. Import does not currently support this attribute/style in elements other than 'p'. Change-Id: I62dba5eaf313b965bf37d8fa5e3f5bbb8f5e8357 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158362 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'svtools')
-rw-r--r--svtools/source/svhtml/htmlkywd.cxx1
-rw-r--r--svtools/source/svhtml/parhtml.cxx59
2 files changed, 34 insertions, 26 deletions
diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx
index 5f81b3e3ca30..d1b0ea2ee03e 100644
--- a/svtools/source/svhtml/htmlkywd.cxx
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -599,6 +599,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = {
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valign), HtmlOptionId::VALIGN},
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valuetype), HtmlOptionId::VALUETYPE},
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_wrap), HtmlOptionId::WRAP},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_XHTML_O_xml_space), HtmlOptionId::XML_SPACE},
// Attributes with script code value
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onblur), HtmlOptionId::ONBLUR}, // JavaScript
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 7e8ac63fc61e..d94a24632779 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -377,9 +377,14 @@ namespace {
constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; }
+constexpr bool HTML_ISSPACE(sal_uInt32 c)
+{
+ return ' ' == c || '\t' == c || '\r' == c || '\n' == c || '\x0b' == c;
+}
+
}
-HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
+HtmlTokenId HTMLParser::ScanText(const sal_Unicode cBreak)
{
OUStringBuffer sTmpBuffer( MAX_LEN );
bool bContinue = true;
@@ -705,37 +710,39 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
{
break;
}
- nNextCh = ' ';
+ if (!m_bPreserveSpaces)
+ nNextCh = ' ';
[[fallthrough]];
case ' ':
- sTmpBuffer.appendUtf32( nNextCh );
- if( '>'!=cBreak && (!bReadListing && !bReadXMP &&
- !bReadPRE && !bReadTextArea) )
+ if (!m_bPreserveSpaces)
{
- // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
- do {
- nNextCh = GetNextChar();
- if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
+ sTmpBuffer.appendUtf32(nNextCh);
+ if ('>' != cBreak && (!bReadListing && !bReadXMP && !bReadPRE && !bReadTextArea))
+ {
+ // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
+ do
{
- if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 )
+ nNextCh = GetNextChar();
+ if (sal_Unicode(EOF) == nNextCh && rInput.eof())
{
- // Have seen s.th. aside from blanks?
- aToken.append( sTmpBuffer );
- sTmpBuffer.setLength(0);
- return HtmlTokenId::TEXTTOKEN;
+ if (!aToken.isEmpty() || sTmpBuffer.getLength() > 1)
+ {
+ // Have seen s.th. aside from blanks?
+ aToken.append(sTmpBuffer);
+ sTmpBuffer.setLength(0);
+ return HtmlTokenId::TEXTTOKEN;
+ }
+ else
+ // Only read blanks: no text must be returned
+ // and GetNextToken_ has to read until EOF
+ return HtmlTokenId::NONE;
}
- else
- // Only read blanks: no text must be returned
- // and GetNextToken_ has to read until EOF
- return HtmlTokenId::NONE;
- }
- } while ( ' ' == nNextCh || '\t' == nNextCh ||
- '\r' == nNextCh || '\n' == nNextCh ||
- '\x0b' == nNextCh );
- bNextCh = false;
+ } while (HTML_ISSPACE(nNextCh));
+ bNextCh = false;
+ }
+ break;
}
- break;
-
+ [[fallthrough]];
default:
bEqSignFound = false;
if (nNextCh == cBreak && !cQuote)
@@ -743,7 +750,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
else
{
do {
- if (!linguistic::IsControlChar(nNextCh))
+ if (!linguistic::IsControlChar(nNextCh) || HTML_ISSPACE(nNextCh))
{
// All remaining characters make their way into the text.
sTmpBuffer.appendUtf32( nNextCh );