diff options
author | Hossein <hossein@libreoffice.org> | 2022-08-31 13:13:14 +0200 |
---|---|---|
committer | Hossein <hossein@libreoffice.org> | 2022-08-31 21:17:19 +0200 |
commit | e8c0a88d640b32129eae6c9042fd908c2892c7e1 (patch) | |
tree | dc55d05f22e804697c37d228a29e6e176a8a5b60 /vcl/qa | |
parent | ead23d945fb7bc794996c82287282bc385296ada (diff) |
tdf#139627 Test justified Arabic/Persian text to avoid gaps/big overlaps
With the patches 3901e02..62ff105 from Khaled, many problems with the
justified Arabic/Persian text which were related to Kashida are fixed.
Here, we add a test for tdf#139627 which converts the example file to
the PDF format, then checks the width and the position of the characters
in the output to make sure:
* The characters are present in the PDF file in the correct order
* The characters are joined together
* The diacritic mark is positioned correctly
* The overlapping of the tatweel character is not more than 10% of the
first character
Sample odt file uses "Noto Arabic Sans" font, which is available via
LibreOffice on all platforms.
One may run the test with:
make CPPUNIT_TEST_NAME=testTdf139627 -sr CppunitTest_vcl_pdfexport
Change-Id: I7a826a1b43ee842978decb0cf9a5e2a3b7219982
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138328
Tested-by: Jenkins
Reviewed-by: Hossein <hossein@libreoffice.org>
Diffstat (limited to 'vcl/qa')
-rw-r--r-- | vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt | bin | 0 -> 12875 bytes | |||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/pdfexport.cxx | 84 |
2 files changed, 84 insertions, 0 deletions
diff --git a/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt Binary files differnew file mode 100644 index 000000000000..6ca6ad196546 --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index 057773c3ddaf..4b25202bb2d4 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -3535,6 +3535,90 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testBitmapScaledown) } } // end anonymous namespace +CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf139627) +{ + aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); + saveAsPDF(u"justified-arabic-kashida.odt"); + std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = parseExport(); + CPPUNIT_ASSERT(pPdfDocument); + + // The document has one page. + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(/*nIndex=*/0); + CPPUNIT_ASSERT(pPdfPage); + + // 7 or 8 objects, 4 text, others are path + int nPageObjectCount = pPdfPage->getObjectCount(); + CPPUNIT_ASSERT_GREATEREQUAL(7, nPageObjectCount); + + // 4 text objects, "رم" (reh+mim), then "ِ" (kasreh), tatweel, and "ج" (jeh) + OUString sText[4]; + + /* With "Noto Sans Arabic" font, these are the X ranges on Linux: + 0: ( 61.75 - 218.35) + 1: (479.70 - 520.02) + 2: (209.40 - 457.08) + 3: (447.80 - 546.62) + */ + basegfx::B2DRectangle aRect[4]; + + std::unique_ptr<vcl::pdf::PDFiumTextPage> pTextPage = pPdfPage->getTextPage(); + std::unique_ptr<vcl::pdf::PDFiumPageObject> pPageObject; + + int nTextObjectCount = 0; + for (int i = 0; i < nPageObjectCount; ++i) + { + pPageObject = pPdfPage->getObject(i); + CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr); + if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text) + { + sText[nTextObjectCount] = pPageObject->getText(pTextPage); + aRect[nTextObjectCount] = pPageObject->getBounds(); + ++nTextObjectCount; + } + } + CPPUNIT_ASSERT_EQUAL(4, nTextObjectCount); + + // Text: جِـرم (which means "mass" in Persian) + // Rendered as (left to right): "reh + mim" - "tahtweel" - "kasreh" - "jeh" + int rehmim = 0, kasreh = 1, tatweel = 2, jeh = 3; + + // Bad rendering can cause tatweel enumerated before kasreh + // This can be the end of journey, but let's accept this for now + if (sText[2].equals(u"ِ")) + { + tatweel = 1; + kasreh = 2; + } + + CPPUNIT_ASSERT_EQUAL(OUString(u"رم"), sText[rehmim].trim()); + CPPUNIT_ASSERT_EQUAL(OUString(u"ِ"), sText[kasreh].trim()); + CPPUNIT_ASSERT_EQUAL(OUString(u""), sText[tatweel].trim()); + CPPUNIT_ASSERT_EQUAL(OUString(u"ج"), sText[jeh].trim()); + + // "Kasreh" should be within "jeh" character + CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[kasreh].getMinX()); + CPPUNIT_ASSERT_LESS(aRect[jeh].getMaxX(), aRect[kasreh].getMaxX()); + + // "Tatweel" should cover "jeh" and "reh"+"mim" to avoid gap + // Checking right gap + CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[tatweel].getMaxX()); + // Checking left gap + // Kashida fails to reach to rehmim before the series of patches starting + // with 3901e029bd39575f700e69a73818565d62226a23. The visible sypotom is + // a gap in the left of Kashida. + // CPPUNIT_ASSERT_LESS(aRect[rehmim].getMaxX(), aRect[tatweel].getMinX()); + + // Overlappings of Kashida and surrounding characters is ~9% of the width + // of the "jeh" character, while using "Noto Arabic Sans" font in this + // specific example. + // We set the hard limit of 10% here. + CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[jeh].getMinX() - aRect[tatweel].getMaxX()) + / aRect[jeh].getWidth()); + CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[rehmim].getMaxX() - aRect[tatweel].getMinX()) + / aRect[jeh].getWidth()); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |