diff options
author | Jonathan Clark <jonathan@libreoffice.org> | 2024-12-05 20:49:03 -0700 |
---|---|---|
committer | Adolfo Jayme Barrientos <fitojb@ubuntu.com> | 2024-12-07 20:38:18 +0100 |
commit | 80e07ed70c11a5c3ab7c355bb1e75278d8e6bdf3 (patch) | |
tree | 3f2278cbac8cebfb0a567a70f3c2b2ed4d761665 | |
parent | 1221b01653240a515fc8a88c70ae2e06fc5ef57b (diff) |
tdf#164106 Fix reordered glyph positioning with split grapheme clusters
Due to formatting, grapheme clusters can possibly be split across
multiple layouts. Layouts containing split grapheme clusters are created
by laying out the complete string, and extracting only the necessary
glyphs based on source codepoint index.
This approach is good enough for most diacritic cases, but it cannot
handle certain substitution cases where glyphs with advances would be
interleaved with other layouts. Sub-layouts must be contiguous.
This change introduces code to disable grapheme cluster splitting in
these cases that cannot be handled correctly.
Change-Id: I122abbf9c3f8a5efa4c72ad47991d0ad9ff8a8c0
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177927
Tested-by: Jenkins
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org>
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/178043
Reviewed-by: Adolfo Jayme Barrientos <fitojb@ubuntu.com>
-rw-r--r-- | vcl/qa/cppunit/pdfexport/data/tdf164106.fodt | 133 | ||||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 50 | ||||
-rw-r--r-- | vcl/source/gdi/CommonSalLayout.cxx | 84 |
3 files changed, 263 insertions, 4 deletions
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt b/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt new file mode 100644 index 000000000000..6d3866b43af0 --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt @@ -0,0 +1,133 @@ +<?xml version='1.0' encoding='UTF-8'?> +<office:document xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" office:version="1.4" office:mimetype="application/vnd.oasis.opendocument.text"> + <office:meta><meta:creation-date>2024-11-30T17:13:03</meta:creation-date><meta:initial-creator>your servant</meta:initial-creator><dc:language>en-US</dc:language><dc:date>2024-12-05T06:45:52.650400638</dc:date><meta:editing-cycles>6</meta:editing-cycles><meta:editing-duration>PT4M37S</meta:editing-duration><meta:generator>LibreOfficeDev/25.8.0.0.alpha0$Linux_X86_64 LibreOffice_project/5f4d5a012865d717040012eb0f698a725b82d4cc</meta:generator><meta:document-statistic meta:table-count="0" meta:image-count="0" meta:object-count="0" meta:page-count="1" meta:paragraph-count="2" meta:word-count="2" meta:character-count="16" meta:non-whitespace-character-count="16"/><meta:user-defined meta:name="AppVersion">15.0000</meta:user-defined><meta:template xlink:type="simple" xlink:actuate="onRequest" xlink:title="Normal" xlink:href=""/></office:meta> + <office:font-face-decls> + <style:font-face style:name="Arial" svg:font-family="Arial" style:font-family-generic="swiss" style:font-pitch="variable"/> + <style:font-face style:name="NSimSun" svg:font-family="NSimSun" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Noto Sans" svg:font-family="'Noto Sans'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Tahoma1" svg:font-family="Tahoma" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Times New Roman" svg:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Times New Roman1" svg:font-family="'Times New Roman'" style:font-family-generic="system" style:font-pitch="variable"/> + </office:font-face-decls> + <office:styles> + <style:default-style style:family="graphic"> + <style:graphic-properties svg:stroke-color="#3465a4" draw:fill-color="#729fcf" fo:wrap-option="no-wrap" draw:shadow-offset-x="0.1181in" draw:shadow-offset-y="0.1181in" draw:start-line-spacing-horizontal="0.1114in" draw:start-line-spacing-vertical="0.1114in" draw:end-line-spacing-horizontal="0.1114in" draw:end-line-spacing-vertical="0.1114in" style:writing-mode="lr-tb" style:flow-with-text="false"/> + <style:paragraph-properties style:text-autospace="ideograph-alpha" style:line-break="strict" loext:tab-stop-distance="0in" style:font-independent-line-spacing="false"> + <style:tab-stops/> + </style:paragraph-properties> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN"/> + </style:default-style> + <style:default-style style:family="paragraph"> + <style:paragraph-properties fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:text-autospace="ideograph-alpha" style:punctuation-wrap="hanging" style:line-break="strict" style:tab-stop-distance="0.4925in" style:writing-mode="lr-tb"/> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext:hyphenation-zone="no-limit"/> + </style:default-style> + <style:default-style style:family="table"> + <style:table-properties table:border-model="collapsing"/> + </style:default-style> + <style:default-style style:family="table-row"> + <style:table-row-properties fo:keep-together="auto"/> + </style:default-style> + <style:style style:name="LO-normal" style:family="paragraph"> + <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0in" style:contextual-spacing="false" fo:text-align="start" style:justify-single-word="false" fo:orphans="2" fo:widows="2" fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:writing-mode="lr-tb"/> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Times New Roman" fo:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable" fo:font-size="12pt" fo:language="fr" fo:country="CA" style:letter-kerning="false" style:font-name-asian="Times New Roman1" style:font-family-asian="'Times New Roman'" style:font-family-generic-asian="system" style:font-pitch-asian="variable" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Times New Roman1" style:font-family-complex="'Times New Roman'" style:font-family-generic-complex="system" style:font-pitch-complex="variable" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext:hyphenation-zone="no-limit"/> + </style:style> + <text:outline-style style:name="Outline"> + <text:outline-level-style text:level="1" loext:num-list-format="%1%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="2" loext:num-list-format="%2%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="3" loext:num-list-format="%3%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="4" loext:num-list-format="%4%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="5" loext:num-list-format="%5%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="6" loext:num-list-format="%6%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="7" loext:num-list-format="%7%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="8" loext:num-list-format="%8%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="9" loext:num-list-format="%9%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="10" loext:num-list-format="%10%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + </text:outline-style> + <text:notes-configuration text:note-class="footnote" style:num-format="1" text:start-value="0" text:footnotes-position="page" text:start-numbering-at="document"/> + <text:notes-configuration text:note-class="endnote" style:num-format="i" text:start-value="0"/> + <text:linenumbering-configuration text:number-lines="false" text:offset="0.1965in" style:num-format="1" text:number-position="left" text:increment="5"/> + <style:default-page-layout> + <style:page-layout-properties style:writing-mode="lr-tb" style:layout-grid-standard-mode="true"/> + </style:default-page-layout> + </office:styles> + <office:automatic-styles> + <style:style style:name="P1" style:family="paragraph" style:parent-style-name="LO-normal"> + <style:text-properties style:font-name-complex="Noto Sans"/> + </style:style> + <style:style style:name="P2" style:family="paragraph" style:parent-style-name="LO-normal"> + <style:text-properties fo:color="#0000ff" loext:opacity="100%" style:font-name-complex="Noto Sans"/> + </style:style> + <style:style style:name="T1" style:family="text"> + <style:text-properties fo:color="#00ff00" loext:opacity="100%"/> + </style:style> + <style:style style:name="T2" style:family="text"> + <style:text-properties fo:color="#0000ff" loext:opacity="100%"/> + </style:style> + <style:page-layout style:name="pm1"> + <style:page-layout-properties fo:page-width="8.5in" fo:page-height="11in" style:num-format="1" style:print-orientation="portrait" fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in" style:writing-mode="lr-tb" style:layout-grid-color="#c0c0c0" style:layout-grid-lines="136" style:layout-grid-base-height="0.0693in" style:layout-grid-ruby-height="0in" style:layout-grid-mode="none" style:layout-grid-ruby-below="false" style:layout-grid-print="false" style:layout-grid-display="false" style:layout-grid-base-width="0.1665in" style:layout-grid-snap-to="true" style:footnote-max-height="0in" loext:margin-gutter="0in"> + <style:footnote-sep style:width="0.0071in" style:distance-before-sep="0.0398in" style:distance-after-sep="0.0398in" style:line-style="solid" style:adjustment="left" style:rel-width="25%" style:color="#000000"/> + </style:page-layout-properties> + <style:header-style/> + <style:footer-style/> + </style:page-layout> + <style:style style:name="dp1" style:family="drawing-page"> + <style:drawing-page-properties draw:background-size="full"/> + </style:style> + </office:automatic-styles> + <office:master-styles> + <style:master-page style:name="Standard" style:page-layout-name="pm1" draw:style-name="dp1"/> + </office:master-styles> + <office:body> + <office:text> + <text:sequence-decls> + <text:sequence-decl text:display-outline-level="0" text:name="Illustration"/> + <text:sequence-decl text:display-outline-level="0" text:name="Table"/> + <text:sequence-decl text:display-outline-level="0" text:name="Text"/> + <text:sequence-decl text:display-outline-level="0" text:name="Drawing"/> + <text:sequence-decl text:display-outline-level="0" text:name="Figure"/> + </text:sequence-decls> + <text:p text:style-name="P1"><text:span text:style-name="T1">वीथीर्</text:span><text:span text:style-name="T2">भजनमार्गान्</text:span></text:p> + <text:p text:style-name="P2">वीथीर्भजनमार्गान्</text:p> + </office:text> + </office:body> +</office:document> diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index 541bb5f8009e..e7a997b401e2 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -5719,6 +5719,56 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf162750SmallCapsLigature) CPPUNIT_ASSERT_EQUAL(u"FI"_ustr, aText.at(2).trim()); } +CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf164106SplitReorderedClusters) +{ + aMediaDescriptor[u"FilterName"_ustr] <<= u"writer_pdf_Export"_ustr; + saveAsPDF(u"tdf164106.fodt"); + + auto pPdfDocument = parsePDFExport(); + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + + auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0); + CPPUNIT_ASSERT(pPdfPage); + auto pTextPage = pPdfPage->getTextPage(); + CPPUNIT_ASSERT(pTextPage); + + int nPageObjectCount = pPdfPage->getObjectCount(); + + CPPUNIT_ASSERT_EQUAL(14, nPageObjectCount); + + std::vector<OUString> aText; + std::vector<basegfx::B2DRectangle> aRect; + + for (int i = 0; i < nPageObjectCount; ++i) + { + auto pPageObject = pPdfPage->getObject(i); + CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr); + if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text) + { + aText.push_back(pPageObject->getText(pTextPage)); + aRect.push_back(pPageObject->getBounds()); + } + } + + CPPUNIT_ASSERT_EQUAL(size_t(14), aText.size()); + + auto fnCompareIndices = [&](size_t nSplit, size_t nCombined) { + CPPUNIT_ASSERT_EQUAL(aText.at(nSplit).trim(), aText.at(nCombined).trim()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMinX(), aRect.at(nCombined).getMinX(), + /*delta*/ 0.2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMaxX(), aRect.at(nCombined).getMaxX(), + /*delta*/ 0.2); + }; + + fnCompareIndices(0, 7); + fnCompareIndices(1, 8); + fnCompareIndices(2, 9); + fnCompareIndices(3, 10); + fnCompareIndices(4, 11); + fnCompareIndices(5, 12); + fnCompareIndices(6, 13); +} + } // end anonymous namespace CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/vcl/source/gdi/CommonSalLayout.cxx b/vcl/source/gdi/CommonSalLayout.cxx index 115870dc9cf6..61b71f5ea7c4 100644 --- a/vcl/source/gdi/CommonSalLayout.cxx +++ b/vcl/source/gdi/CommonSalLayout.cxx @@ -150,6 +150,14 @@ public: return nClusterId; } + void Reset() + { + for (auto& rElement : m_aGlyphs) + { + rElement.second.m_bUsed = false; + } + } + void ShapeSubRun(const sal_Unicode* pStr, const int nLength, const SubRun& aSubRun, hb_font_t* pHbFont, const std::vector<hb_feature_t>& maFeatures, hb_language_t oHbLanguage) @@ -591,6 +599,73 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay hb_glyph_info_t *pHbGlyphInfos = hb_buffer_get_glyph_infos(pHbBuffer, nullptr); hb_glyph_position_t *pHbPositions = hb_buffer_get_glyph_positions(pHbBuffer, nullptr); + // tdf#164106: Grapheme clusters can be split across multiple layouts. To do this, + // the complete string is laid out, and only the necessary glyphs are extracted. + // These sub-layouts are positioned side-by-side to form the complete text. + // This approach is good enough for most diacritic cases, but it cannot handle cases + // where a glyph with an advance is reordered into a different sub-layout. + bool bStartClusterOutOfOrder = false; + bool bEndClusterOutOfOrder = false; + { + double nNormalAdvance = 0.0; + double nStartAdvance = 0.0; + double nEndAdvance = 0.0; + + auto fnHandleGlyph = [&](int i) + { + int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; + int32_t nCluster = pHbGlyphInfos[i].cluster; + auto nOrigCharPos = stClusterMapper.RemapGlyph(nCluster, nGlyphIndex); + + double nAdvance = 0.0; + if (aSubRun.maDirection == HB_DIRECTION_TTB) + { + nAdvance = -pHbPositions[i].y_advance; + } + else + { + nAdvance = pHbPositions[i].x_advance; + } + + nNormalAdvance += nAdvance; + + if (nOrigCharPos < rArgs.mnDrawMinCharPos) + { + nStartAdvance += nAdvance; + if (nStartAdvance != nNormalAdvance) + { + bStartClusterOutOfOrder = true; + } + } + + if (nOrigCharPos < rArgs.mnDrawEndCharPos) + { + nEndAdvance += nAdvance; + if (nEndAdvance != nNormalAdvance) + { + bEndClusterOutOfOrder = true; + } + } + }; + + if (bRightToLeft) + { + for (int i = nRunGlyphCount - 1; i >= 0; --i) + { + fnHandleGlyph(i); + } + } + else + { + for (int i = 0; i < nRunGlyphCount; ++i) + { + fnHandleGlyph(i); + } + } + + stClusterMapper.Reset(); + } + for (int i = 0; i < nRunGlyphCount; ++i) { int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; int32_t nCharPos = pHbGlyphInfos[i].cluster; @@ -731,14 +806,15 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay const GlyphItem aGI(nCharPos, nCharCount, nGlyphIndex, aNewPos, nGlyphFlags, nAdvance, nXOffset, nYOffset, nOrigCharPos); - if (aGI.origCharPos() >= rArgs.mnDrawMinCharPos - && aGI.origCharPos() < rArgs.mnDrawEndCharPos) + auto nLowerBound = (bStartClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos()); + auto nUpperBound = (bEndClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos()); + if (nLowerBound >= rArgs.mnDrawMinCharPos && nUpperBound < rArgs.mnDrawEndCharPos) { m_GlyphItems.push_back(aGI); } - if (aGI.origCharPos() >= rArgs.mnDrawOriginCluster - && aGI.origCharPos() < rArgs.mnDrawEndCharPos) + if (nLowerBound >= rArgs.mnDrawOriginCluster + && nUpperBound < rArgs.mnDrawEndCharPos) { aCurrPos.adjustX(nAdvance); } |