From 9eacbf7874320d44b79966cac8fa59c52a191715 Mon Sep 17 00:00:00 2001 From: Michael Stahl Date: Fri, 30 Jun 2023 14:56:48 +0200 Subject: tdf#152231 vcl: PDF/UA export: content of comments in the margin ... is also Artifact. The problem is that ImpEditEngine::Paint() always produces tags, it can't easily check if it's inside a layout environment that is an Artifact. But it looks like PDFWriterImpl::beginStructureElement() already wants to handle and suppress a structure element inside a NonStruct, except that it calls endStructureElementMCSeq() before checkEmitStructure(), so no structure marked content sequence is produced, but the content is outside the Artifact marked content sequence, and possibly a second Artifact marked content sequence will be opened once the structure element is finished. (btw, it's allowed to nest marked content sequences in PDF, but nesting a structure element inside an Artifact appears dubious) Change-Id: Ib3eef9c611e28242854c712ce974fd8cfecd8ac2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153810 Tested-by: Jenkins Reviewed-by: Michael Stahl --- vcl/inc/pdf/pdfwriter_impl.hxx | 3 +- vcl/qa/cppunit/pdfexport/data/tdf152231.fodt | 208 +++++++++++++++++++++++++++ vcl/qa/cppunit/pdfexport/pdfexport.cxx | 110 ++++++++++++++ vcl/source/gdi/pdfwriter_impl.cxx | 13 +- 4 files changed, 327 insertions(+), 7 deletions(-) create mode 100644 vcl/qa/cppunit/pdfexport/data/tdf152231.fodt (limited to 'vcl') diff --git a/vcl/inc/pdf/pdfwriter_impl.hxx b/vcl/inc/pdf/pdfwriter_impl.hxx index bad2d387511c..a9438be8fc13 100644 --- a/vcl/inc/pdf/pdfwriter_impl.hxx +++ b/vcl/inc/pdf/pdfwriter_impl.hxx @@ -1062,7 +1062,8 @@ i12626 void endPage(); void beginStructureElementMCSeq(); - void endStructureElementMCSeq(); + enum class EndMode { Default, OnlyStruct }; + void endStructureElementMCSeq(EndMode = EndMode::Default); /** checks whether a non struct element lies in the ancestor hierarchy of the current structure element diff --git a/vcl/qa/cppunit/pdfexport/data/tdf152231.fodt b/vcl/qa/cppunit/pdfexport/data/tdf152231.fodt new file mode 100644 index 000000000000..26b5d329b2aa --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf152231.fodt @@ -0,0 +1,208 @@ + + + Gabor Kelemen LO2022-11-22T00:38:49.8080000002022-11-22T01:02:17.524000000Gabor Kelemen LOPT23M25S11LibreOfficeDev/24.2.0.0.alpha0$Linux_X86_64 LibreOffice_project/3d1f9c90605623e5c7e7dd2d28f87aaa45fb9c86aaa2023-06-30T17:07:42.601941685PDF files + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + He heard quiet steps behind him. + That didn't bode well. + Who could be following him this late at night and in this deadbeat part of town? + And at this particular moment, just after he pulled off the big time and was making off with the greenbacks. + Was there another crook who'd had the same idea, and was now watching him and waiting for a chance to grab the fruit of his labor? + Or did the steps behind him mean that one of many law officers in town was on to him and just waiting to pounce and snap those cuffs on his wrists? + + Gabor Kelemen LO + 2022-11-22T00:56:44.315000000 + GK + This is a comment + He nervously looked all around. + Suddenly he saw the alley. + Like lightning he darted off to the left and disappeared between the two warehouses almost falling + over the trash can lying in the middle of the sidewalk. + He tried to nervously tap his way along in the inky + darkness and suddenly stiffened: it was a dead-end, he would have to go back the way he had come. The steps got louder and louder, he saw the black outline of a figure coming around the corner. Is this the end of the line? he thought pressing himself back against the wall trying to make himself invisible in the dark, was all that planning and energy wasted? He was dripping with sweat now, cold and wet, he could smell the fear coming off his clothes. Suddenly next to him, with a barely noticeable squeak, a door swung quietly to and fro in the night's breeze. Could this be the haven he'd prayed for? Slowly he slid toward the door, pressing himself more and more into the wall, into the dark, away from his enemy. Would this door save his hide? + + + \ No newline at end of file diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index e19f989ce9b9..120185eb6d14 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -3204,6 +3204,116 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf139736) CPPUNIT_ASSERT(nArtifacts >= 3); } +CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf152231) +{ + aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); + + // Enable PDF/UA + uno::Sequence aFilterData( + comphelper::InitPropertySequence({ { "PDFUACompliance", uno::Any(true) }, + { "ExportNotesInMargin", uno::Any(true) }, + { "SelectPdfVersion", uno::Any(sal_Int32(17)) } })); + aMediaDescriptor["FilterData"] <<= aFilterData; + saveAsPDF(u"tdf152231.fodt"); + + vcl::filter::PDFDocument aDocument; + SvFileStream aStream(maTempFile.GetURL(), StreamMode::READ); + CPPUNIT_ASSERT(aDocument.Read(aStream)); + + std::vector aPages = aDocument.GetPages(); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aPages.size()); + + vcl::filter::PDFObjectElement* pContents = aPages[0]->LookupObject("Contents"); + CPPUNIT_ASSERT(pContents); + vcl::filter::PDFStreamElement* pStream = pContents->GetStream(); + CPPUNIT_ASSERT(pStream); + SvMemoryStream& rObjectStream = pStream->GetMemory(); + // Uncompress it. + SvMemoryStream aUncompressed; + ZCodec aZCodec; + aZCodec.BeginCompression(); + rObjectStream.Seek(0); + aZCodec.Decompress(rObjectStream, aUncompressed); + CPPUNIT_ASSERT(aZCodec.EndCompression()); + + auto pStart = static_cast(aUncompressed.GetData()); + const char* const pEnd = pStart + aUncompressed.GetSize(); + + enum + { + Default, + Artifact, + Tagged + } state + = Default; + + auto nLine(0); + auto nTagged(0); + auto nArtifacts(0); + while (true) + { + ++nLine; + auto const pLine = ::std::find(pStart, pEnd, '\n'); + if (pLine == pEnd) + { + break; + } + std::string_view const line(pStart, pLine - pStart); + pStart = pLine + 1; + if (!line.empty() && line[0] != '%') + { + ::std::cerr << nLine << ": " << line << "\n"; + if (line == "/Artifact BMC") + { + CPPUNIT_ASSERT_EQUAL_MESSAGE("unexpected nesting", Default, state); + state = Artifact; + ++nArtifacts; + } + else if (o3tl::starts_with(line, "/Standard< 1) // first line is expected "0.1 w" + { + CPPUNIT_ASSERT_MESSAGE("unexpected content outside MCS", state != Default); + } + } + } + CPPUNIT_ASSERT_EQUAL_MESSAGE("unclosed MCS", Default, state); + CPPUNIT_ASSERT(nTagged >= 12); // text in body + // 1 annotation + CPPUNIT_ASSERT(nArtifacts >= 1); + + auto nPara(0); + for (const auto& rDocElement : aDocument.GetElements()) + { + auto pObject = dynamic_cast(rDocElement.get()); + if (!pObject) + continue; + auto pType = dynamic_cast(pObject->Lookup("Type")); + if (pType && pType->GetValue() == "StructElem") + { + auto pS = dynamic_cast(pObject->Lookup("S")); + if (pS && pS->GetValue() == "Standard") + { + ++nPara; + auto pKids = dynamic_cast(pObject->Lookup("K")); + CPPUNIT_ASSERT(pKids); + // one problem was that that some StructElem were missing kids + CPPUNIT_ASSERT(!pKids->GetElements().empty()); + } + } + } + CPPUNIT_ASSERT_EQUAL(static_cast(12), nPara); +} + CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf149140) { aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index 2f83c5cb6ef8..81b2bc3dbae0 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -10738,12 +10738,13 @@ void PDFWriterImpl::beginStructureElementMCSeq() } } -void PDFWriterImpl::endStructureElementMCSeq() +void PDFWriterImpl::endStructureElementMCSeq(EndMode const endMode) { - if( m_nCurrentStructElement > 0 && // StructTreeRoot - ( m_bEmitStructure || m_aStructure[ m_nCurrentStructElement ].m_eType == PDFWriter::NonStructElement ) && - m_aStructure[ m_nCurrentStructElement ].m_bOpenMCSeq // must have an opened MC sequence - ) + if (m_nCurrentStructElement > 0 // not StructTreeRoot + && (m_bEmitStructure + || (endMode != EndMode::OnlyStruct + && m_aStructure[m_nCurrentStructElement].m_eType == PDFWriter::NonStructElement)) + && m_aStructure[m_nCurrentStructElement].m_bOpenMCSeq) { writeBuffer( "EMC\n" ); m_aStructure[ m_nCurrentStructElement ].m_bOpenMCSeq = false; @@ -10779,7 +10780,7 @@ sal_Int32 PDFWriterImpl::beginStructureElement( PDFWriter::StructElement eType, return -1; // close eventual current MC sequence - endStructureElementMCSeq(); + endStructureElementMCSeq(EndMode::OnlyStruct); if( m_nCurrentStructElement == 0 && eType != PDFWriter::Document && eType != PDFWriter::NonStructElement ) -- cgit