tdf#155496: Export list header to (X)HTML using 'display: block' style

This re-implements commit 013a4f1f5c9ea5fb511568c53a7e76d1b365a65d (sw XHTML export: fix handling of list labels, 2021-05-13), and instead of not putting headers (ODF 'text:list-header' elements) into lists, this adds 'style="display: block"' attribute to the respective list items. This makes sure that the items use proper list indentation, and produces correct markup. Change-Id: I900e4aebbe562830dc2ce5400e3e33b38c2f2faa Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152280 Tested-by: Jenkins Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
author: Mike Kaganski <mike.kaganski@collabora.com> 2023-05-25 19:41:32 +0300
committer: Miklos Vajna <vmiklos@collabora.com> 2023-05-26 09:34:22 +0200
commit: 7d87770986fdfc43dd5d4b514f68026ff6ededcf (patch)
tree: e79dd1235a554a4e3c466c180d2f236775422bd5 /sw
parent: 16a8943f7dbf338377fe486cc75116b89742c7b0 (diff)
5 files changed, 89 insertions, 99 deletions
diff --git a/sw/qa/extras/htmlexport/data/listItemSubheader.fodt b/sw/qa/extras/htmlexport/data/listItemSubheader.fodt
new file mode 100644
index 000000000000..4c68ce887650
--- /dev/null
+++ b/sw/qa/extras/htmlexport/data/listItemSubheader.fodt
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" office:version="1.3" office:mimetype="application/vnd.oasis.opendocument.text">
+ <office:body>
+  <office:text>
+   <text:p/>
+   <text:list>
+    <text:list-item>
+     <text:p>list 1 item 1</text:p>
+     <text:list>
+      <text:list-header>
+       <text:p>list 1 item 1 sub-header</text:p>
+      </text:list-header>
+     </text:list>
+    </text:list-item>
+   </text:list>
+   <text:p/>
+   <text:p>text</text:p>
+   <text:list text:continue-numbering="true">
+    <text:list-item>
+     <text:list>
+      <text:list-header>
+       <text:p>list 2 sub-header</text:p>
+      </text:list-header>
+     </text:list>
+    </text:list-item>
+   </text:list>
+   <text:p/>
+  </office:text>
+ </office:body>
+</office:document>
+\ No newline at end of file
diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx
index 710c4c0dc45c..105b19c7ea8d 100644
--- a/sw/qa/extras/htmlexport/htmlexport.cxx
+++ b/sw/qa/extras/htmlexport/htmlexport.cxx
@@ -1552,10 +1552,13 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testListHeading)
     CPPUNIT_ASSERT(pXmlDoc);
 
     // Without the accompanying fix in place, this test would have failed:
-    // - expected: <div><p>...</p></div>
+    // - expected: <div><ol><li style="display: block"><p>...</p></li></ol></div>
     // - actual  : <div><ol><p>...</p></li></ol></div>
     // because a </li> but no <li> is not well-formed and <ol> with a non-li children is invalid.
-    assertXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p", "list header");
+    OUString aContent
+        = getXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ol/"
+                                   "reqif-xhtml:li[@style='display: block']/reqif-xhtml:p");
+    CPPUNIT_ASSERT_EQUAL(OUString("list header"), aContent.trim());
 }
 
 CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testPartiallyNumberedList)
@@ -2151,11 +2154,10 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testListsHeading)
     xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream);
     CPPUNIT_ASSERT(pXmlDoc);
 
-    // Without the accompanying fix in place, this test would have failed with:
-    // - In <>, XPath '/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p' not found
-    // Because the headers of list 1 were inside <div><ol>, not directly under <div>.
-    assertXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p",
-                       "list 1, header 1");
+    OUString aContent
+        = getXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ol/"
+                                   "reqif-xhtml:li[@style='display: block']/reqif-xhtml:p");
+    CPPUNIT_ASSERT_EQUAL(OUString("list 1, header 1"), aContent.trim());
 }
 
 CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testOleEmfPreviewToHtml)
@@ -2611,6 +2613,38 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testTdf155387)
         "l3");
 }
 
+CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testTdf155496)
+{
+    createSwDoc("listItemSubheader.fodt");
+    ExportToReqif();
+
+    SvMemoryStream aStream;
+    WrapReqifFromTempFile(aStream);
+    xmlDocUniquePtr pDoc = parseXmlStream(&aStream);
+    // Without the fix in place, this would fail
+    CPPUNIT_ASSERT(pDoc);
+
+    // Two top-level lists
+    assertXPath(pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul", 2);
+    // Single top-level item
+    assertXPath(pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li");
+    // One top-level paragraph in the item
+    assertXPath(pDoc,
+                "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:p");
+    // One sublist in the item
+    assertXPath(
+        pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:ul");
+    // One item in the sublist
+    assertXPath(pDoc,
+                "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:ul/"
+                "reqif-xhtml:li");
+    // Check its text
+    OUString aContent = getXPathContent(
+        pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:ul/"
+              "reqif-xhtml:li/reqif-xhtml:p");
+    CPPUNIT_ASSERT_EQUAL(OUString("list 1 item 1\n\t\tsub-header"), aContent.trim());
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx
index 37b460eb0869..62460cc2be90 100644
--- a/sw/source/filter/html/htmlatr.cxx
+++ b/sw/source/filter/html/htmlatr.cxx
@@ -688,12 +688,11 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,
     if( nNewDefListLvl != rWrt.m_nDefListLvl )
         rWrt.OutAndSetDefList( nNewDefListLvl );
 
-    bool bAtLeastOneNumbered = false;
     // if necessary, start a bulleted or numbered list
     if( rInfo.bInNumberBulletList )
     {
         OSL_ENSURE( !rWrt.m_nDefListLvl, "DL cannot be inside OL!" );
-        OutHTML_NumberBulletListStart( rWrt, aNumInfo, bAtLeastOneNumbered );
+        OutHTML_NumberBulletListStart( rWrt, aNumInfo );
 
         if( bNumbered )
         {
@@ -763,18 +762,26 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,
 
     // if necessary, start a new list item
     bool bNumberedForListItem = bNumbered;
-    if (!bNumberedForListItem && rWrt.mbXHTML && bAtLeastOneNumbered)
+    if (!bNumberedForListItem)
     {
-        // OutHTML_NumberBulletListEnd() will end a list item if at least one text node is numbered
-        // in the list, so open the list item with the same condition here.
-        bNumberedForListItem = true;
+        // Open a list also for the leading unnumbered nodes (= list headers in ODF terminology);
+        // to do that, detect if this unnumbered node is the first in this list
+        const auto& rPrevListInfo = rWrt.GetNumInfo();
+        if (rPrevListInfo.GetNumRule() != aNumInfo.GetNumRule() || aNumInfo.IsRestart(rPrevListInfo)
+            || rPrevListInfo.GetDepth() < aNumInfo.GetDepth())
+            bNumberedForListItem = true;
     }
     if( rInfo.bInNumberBulletList && bNumberedForListItem )
     {
         HtmlWriter html(rWrt.Strm(), rWrt.maNamespace);
         html.prettyPrint(rWrt.m_bPrettyPrint);
         html.start(OOO_STRING_SVTOOLS_HTML_li);
-        if( USHRT_MAX != nNumStart )
+        if (!bNumbered)
+        {
+            // Handles list headers (<text:list-header> ODF element)
+            html.attribute(OOO_STRING_SVTOOLS_HTML_O_style, "display: block");
+        }
+        else if (USHRT_MAX != nNumStart)
             html.attribute(OOO_STRING_SVTOOLS_HTML_O_value, OString::number(nNumStart));
         // Finish the opening element, but don't close it.
         html.characters("");
diff --git a/sw/source/filter/html/htmlnumwriter.cxx b/sw/source/filter/html/htmlnumwriter.cxx
index 8ca8c08de5ee..98b581700224 100644
--- a/sw/source/filter/html/htmlnumwriter.cxx
+++ b/sw/source/filter/html/htmlnumwriter.cxx
@@ -84,8 +84,7 @@ void SwHTMLWriter::SetNextNumInfo( std::unique_ptr<SwHTMLNumRuleInfo> pNxt )
 }
 
 SwHTMLWriter& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt,
-                                 const SwHTMLNumRuleInfo& rInfo,
-                                 bool& rAtLeastOneNumbered )
+                                 const SwHTMLNumRuleInfo& rInfo )
 {
     SwHTMLNumRuleInfo& rPrevInfo = rWrt.GetNumInfo();
     bool bSameRule = rPrevInfo.GetNumRule() == rInfo.GetNumRule();
@@ -95,43 +94,6 @@ SwHTMLWriter& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt,
         return rWrt;
     }
 
-    if (rWrt.mbXHTML && !rInfo.IsNumbered())
-    {
-        // If the list only consists of non-numbered text nodes, then don't start the list.
-        bool bAtLeastOneNumbered = false;
-        SwNodeOffset nPos = rWrt.m_pCurrentPam->GetPoint()->GetNodeIndex() + 1;
-        SwNumRule* pNumRule = nullptr;
-        while (true)
-        {
-            const SwNode* pNode = rWrt.m_pDoc->GetNodes()[nPos];
-            if (!pNode->IsTextNode())
-            {
-                break;
-            }
-
-            const SwTextNode* pTextNode = pNode->GetTextNode();
-            if (!pTextNode->GetNumRule() || (pNumRule && pTextNode->GetNumRule() != pNumRule))
-            {
-                // Node is not in the same numbering as the previous one.
-                break;
-            }
-
-            pNumRule = pTextNode->GetNumRule();
-            if (pTextNode->IsNumbered())
-            {
-                bAtLeastOneNumbered = true;
-                break;
-            }
-            ++nPos;
-        }
-
-        rAtLeastOneNumbered = bAtLeastOneNumbered;
-        if (!bAtLeastOneNumbered)
-        {
-            return rWrt;
-        }
-    }
-
     bool bStartValue = false;
     if( !bSameRule && rInfo.GetDepth() )
     {
@@ -322,42 +284,8 @@ SwHTMLWriter& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt,
     bool bListEnd = !bSameRule || rNextInfo.GetDepth() < rInfo.GetDepth() || rNextInfo.IsRestart(rInfo);
     bool bNextIsSubitem = !bListEnd && rNextInfo.GetDepth() > rInfo.GetDepth();
 
-    std::optional<bool> oAtLeastOneNumbered;
-    if (!rInfo.IsNumbered())
-    {
-        oAtLeastOneNumbered = false;
-        SwNodeOffset nPos = rWrt.m_pCurrentPam->GetPoint()->GetNodeIndex() - 1;
-        SwNumRule* pNumRule = nullptr;
-        while (true)
-        {
-            const SwNode* pNode = rWrt.m_pDoc->GetNodes()[nPos];
-            if (!pNode->IsTextNode())
-            {
-                break;
-            }
-
-            const SwTextNode* pTextNode = pNode->GetTextNode();
-            if (!pTextNode->GetNumRule() || (pNumRule && pTextNode->GetNumRule() != pNumRule))
-            {
-                // Node is not in the same numbering as the next one.
-                break;
-            }
-
-            pNumRule = pTextNode->GetNumRule();
-            if (pTextNode->IsNumbered())
-            {
-                oAtLeastOneNumbered = true;
-                break;
-            }
-            --nPos;
-        }
-    }
-
-    // The list is numbered if the previous text node is numbered or any other previous text
-    // node is numbered.
-    bool bPrevIsNumbered = rInfo.IsNumbered() || *oAtLeastOneNumbered;
     // XHTML </li> for the list item content, if there is an open <li>.
-    if ((bListEnd && bPrevIsNumbered) || (!bListEnd && !bNextIsSubitem && rNextInfo.IsNumbered()))
+    if (bListEnd || (!bNextIsSubitem && rNextInfo.IsNumbered()))
     {
         HTMLOutFuncs::Out_AsciiTag(
             rWrt.Strm(), Concat2View(rWrt.GetNamespace() + OOO_STRING_SVTOOLS_HTML_li),
@@ -369,15 +297,6 @@ SwHTMLWriter& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt,
         return rWrt;
     }
 
-    if (rWrt.mbXHTML && !rInfo.IsNumbered())
-    {
-        // If the list only consisted of non-numbered text nodes, then don't end the list.
-        if (!*oAtLeastOneNumbered)
-        {
-            return rWrt;
-        }
-    }
-
     OSL_ENSURE( rWrt.m_nLastParaToken == HtmlTokenId::NONE,
                 "<PRE> was not closed before </OL>." );
     sal_uInt16 nNextDepth =
diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx
index 801b90363b72..00d837259b94 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -716,8 +716,7 @@ SwHTMLWriter& OutCSS1_NumberBulletListStyleOpt( SwHTMLWriter& rWrt, const SwNumR
                                     sal_uInt8 nLevel );
 
 SwHTMLWriter& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt,
-                                 const SwHTMLNumRuleInfo& rInfo,
-                                 bool& rAtLeastOneNumbered );
+                                 const SwHTMLNumRuleInfo& rInfo );
 SwHTMLWriter& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt,
                                const SwHTMLNumRuleInfo& rNextInfo );
author	Mike Kaganski <mike.kaganski@collabora.com>	2023-05-25 19:41:32 +0300
committer	Miklos Vajna <vmiklos@collabora.com>	2023-05-26 09:34:22 +0200
commit	7d87770986fdfc43dd5d4b514f68026ff6ededcf (patch)
tree	e79dd1235a554a4e3c466c180d2f236775422bd5 /sw
parent	16a8943f7dbf338377fe486cc75116b89742c7b0 (diff)