diff options
author | Kevin Suo <suokunlong@126.com> | 2022-11-20 00:10:14 +0800 |
---|---|---|
committer | Noel Grandin <noel.grandin@collabora.co.uk> | 2022-11-25 08:44:02 +0100 |
commit | 3a2f0e4772e7b4646dd518b33aeafb6fd7025179 (patch) | |
tree | b23fad780fbd5307d5276ef15b4ae8050de22432 /sdext/source | |
parent | bfee63e502f341f0bd039189f2d7492529c4340a (diff) |
Resolves tdf#152083: Ligatures are incorrectly reversed in Draw pdf import
For the string "بسم الله الرحمن الرحیم", the xpdfimport generates the following drawChar tokens:
drawChar 438.500000 723.300000 446.744000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 م
drawChar 446.696000 723.300000 450.908000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ی
drawChar 450.896000 723.300000 458.804000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ح
drawChar 458.792000 723.300000 463.784000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ر
drawChar 463.688000 723.300000 467.048000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ل
drawChar 467.072000 723.300000 469.964000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ا
drawChar 469.964000 723.300000 473.708000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000
drawChar 473.756000 723.300000 482.780000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ن
drawChar 482.756000 723.300000 490.028000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 م
drawChar 490.040000 723.300000 497.948000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ح
drawChar 497.936000 723.300000 502.928000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ر
drawChar 502.928000 723.300000 506.288000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ل
drawChar 506.312000 723.300000 509.204000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ا
drawChar 509.108000 723.300000 512.852000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000
drawChar 512.900000 723.300000 527.216000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 الله
drawChar 527.204000 723.300000 530.948000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000
drawChar 530.996000 723.300000 539.240000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 م
drawChar 539.288000 723.300000 552.020000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 س
drawChar 551.888000 723.300000 555.236000 723.300000 1.000000 0.000000 0.000000 1.000000 12.000000 ب
Previously, all the above are combined to a single text frame in DrawXmlOptimizer::optimizeTextElements,
and then the text in the text frame is reversed (commit 69e9925ded584113e52f84ef0ed7c224079fa061, and
further improved by commit 50d73574b6c3d71f9a539c895a15d6fcda22390b).
The problem is that, the single token "الله" was already in correct order. Thus when it is reversed
together with others, the order for itself would be wrong. Fix this by doing a pre-reverse.
The space char needs special treatment, as observed in tdf104597_textrun.pdf.
Change-Id: If0bd716cc2d68820436d0e3f0f161ffb9cb4397a
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/142978
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Diffstat (limited to 'sdext/source')
-rw-r--r-- | sdext/source/pdfimport/test/tests.cxx | 11 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/drawtreevisiting.cxx | 46 |
2 files changed, 49 insertions, 8 deletions
diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx index c199c2cc4547..7c10c85b29a5 100644 --- a/sdext/source/pdfimport/test/tests.cxx +++ b/sdext/source/pdfimport/test/tests.cxx @@ -798,15 +798,14 @@ namespace // Test for امُ عَلَيْكَ OString xpath = "string(//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 14821.9583333333 2159.23861112778)']/draw:text-box/text:p/text:span)"; OUString sContent = getXPathContent(pXmlDoc, xpath); - CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"اُم َعَلْيَك"), sContent.replaceAll("\n\n", " ").replaceAll("\n", "")); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"امُ عَلَيَْك"), sContent.replaceAll("\n\n", " ").replaceAll("\n", "")); - // Test for ٱلسََّل . It appears in the 3rd frame, i.e. after the امُ عَلَيْكَ which is in the 2nd frame (from left to right) + // Test for ٱلسََّل . It appears in the 3rd frame, i.e. after the امُ عَلَيَْك which is in the 2nd frame (from left to right) // thus these two frames together appear as ٱلسََّل امُ عَلَيْكَ in Draw. // FIXME: Should be ٱلسَّلَامُ عَلَيْكَ (i.e. the two text frames should be merged into one so that the ل and the ا will show as لَا rather than ل ا) - // Note: this is commented due to ٱلسََّل is currently shown as ٱلَّسَل and will be fixed in a separate commit. - //xpath = "string(//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 17420.1666666667 2159.23861112778)']/draw:text-box/text:p/text:span)"; - //sContent = getXPathContent(pXmlDoc, xpath); - //CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"ٱلسََّل"), sContent.replaceAll("\n\n", " ").replaceAll("\n", "")); + xpath = "string(//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 17420.1666666667 2159.23861112778)']/draw:text-box/text:p/text:span)"; + sContent = getXPathContent(pXmlDoc, xpath); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"ٱلسََّل"), sContent.replaceAll("\n\n", " ").replaceAll("\n", "")); // Test for "LibreOffice RTL" xpath = "string(//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 12779.375 5121.79583335)']/draw:text-box/text:p/text:span)"; diff --git a/sdext/source/pdfimport/tree/drawtreevisiting.cxx b/sdext/source/pdfimport/tree/drawtreevisiting.cxx index ead2fd432452..2356ddc254c4 100644 --- a/sdext/source/pdfimport/tree/drawtreevisiting.cxx +++ b/sdext/source/pdfimport/tree/drawtreevisiting.cxx @@ -706,8 +706,50 @@ void DrawXmlOptimizer::optimizeTextElements(Element& rParent) ) { pCur->updateGeometryWith( pNext ); - // append text to current element - pCur->Text.append( pNext->Text ); + if (pPara->bRtl) + { + // Tdf#152083: If RTL, reverse the text in pNext so that its correct order is + // restored when the combined text is reversed in DrawXmlEmitter::visit. + OUString tempStr; + bool bNeedReverse=false; + str = pNext->Text.toString(); + for (sal_Int32 i=0; i < str.getLength(); i++) + { + if (str[i] == u' ') + { // Space char (e.g. the space as in " م") needs special treatment. + // First, append the space char to pCur. + pCur->Text.append(OUStringChar(str[i])); + // Then, check whether the tmpStr needs reverse, if so then reverse and append. + if (bNeedReverse) + { + tempStr = ::comphelper::string::reverseCodePoints(tempStr); + pCur->Text.append(tempStr); + tempStr = u""; + } + bNeedReverse = false; + } + else + { + tempStr += OUStringChar(str[i]); + bNeedReverse = true; + } + } + // Do the last append + if (bNeedReverse) + { + tempStr = ::comphelper::string::reverseCodePoints(tempStr); + pCur->Text.append(tempStr); + } + else + { + pCur->Text.append(tempStr); + } + } + else + { + // append text to current element directly without reverse + pCur->Text.append( pNext->Text ); + } str = pCur->Text.toString(); for(int i=0; i< str.getLength(); i++) |