From e8c0a88d640b32129eae6c9042fd908c2892c7e1 Mon Sep 17 00:00:00 2001 From: Hossein Date: Wed, 31 Aug 2022 13:13:14 +0200 Subject: tdf#139627 Test justified Arabic/Persian text to avoid gaps/big overlaps With the patches 3901e02..62ff105 from Khaled, many problems with the justified Arabic/Persian text which were related to Kashida are fixed. Here, we add a test for tdf#139627 which converts the example file to the PDF format, then checks the width and the position of the characters in the output to make sure: * The characters are present in the PDF file in the correct order * The characters are joined together * The diacritic mark is positioned correctly * The overlapping of the tatweel character is not more than 10% of the first character Sample odt file uses "Noto Arabic Sans" font, which is available via LibreOffice on all platforms. One may run the test with: make CPPUNIT_TEST_NAME=testTdf139627 -sr CppunitTest_vcl_pdfexport Change-Id: I7a826a1b43ee842978decb0cf9a5e2a3b7219982 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138328 Tested-by: Jenkins Reviewed-by: Hossein --- .../pdfexport/data/justified-arabic-kashida.odt | Bin 0 -> 12875 bytes vcl/qa/cppunit/pdfexport/pdfexport.cxx | 84 +++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt (limited to 'vcl/qa') diff --git a/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt new file mode 100644 index 000000000000..6ca6ad196546 Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt differ diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index 057773c3ddaf..4b25202bb2d4 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -3535,6 +3535,90 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testBitmapScaledown) } } // end anonymous namespace +CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf139627) +{ + aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); + saveAsPDF(u"justified-arabic-kashida.odt"); + std::unique_ptr pPdfDocument = parseExport(); + CPPUNIT_ASSERT(pPdfDocument); + + // The document has one page. + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + std::unique_ptr pPdfPage = pPdfDocument->openPage(/*nIndex=*/0); + CPPUNIT_ASSERT(pPdfPage); + + // 7 or 8 objects, 4 text, others are path + int nPageObjectCount = pPdfPage->getObjectCount(); + CPPUNIT_ASSERT_GREATEREQUAL(7, nPageObjectCount); + + // 4 text objects, "رم" (reh+mim), then "ِ" (kasreh), tatweel, and "ج" (jeh) + OUString sText[4]; + + /* With "Noto Sans Arabic" font, these are the X ranges on Linux: + 0: ( 61.75 - 218.35) + 1: (479.70 - 520.02) + 2: (209.40 - 457.08) + 3: (447.80 - 546.62) + */ + basegfx::B2DRectangle aRect[4]; + + std::unique_ptr pTextPage = pPdfPage->getTextPage(); + std::unique_ptr pPageObject; + + int nTextObjectCount = 0; + for (int i = 0; i < nPageObjectCount; ++i) + { + pPageObject = pPdfPage->getObject(i); + CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr); + if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text) + { + sText[nTextObjectCount] = pPageObject->getText(pTextPage); + aRect[nTextObjectCount] = pPageObject->getBounds(); + ++nTextObjectCount; + } + } + CPPUNIT_ASSERT_EQUAL(4, nTextObjectCount); + + // Text: جِـرم (which means "mass" in Persian) + // Rendered as (left to right): "reh + mim" - "tahtweel" - "kasreh" - "jeh" + int rehmim = 0, kasreh = 1, tatweel = 2, jeh = 3; + + // Bad rendering can cause tatweel enumerated before kasreh + // This can be the end of journey, but let's accept this for now + if (sText[2].equals(u"ِ")) + { + tatweel = 1; + kasreh = 2; + } + + CPPUNIT_ASSERT_EQUAL(OUString(u"رم"), sText[rehmim].trim()); + CPPUNIT_ASSERT_EQUAL(OUString(u"ِ"), sText[kasreh].trim()); + CPPUNIT_ASSERT_EQUAL(OUString(u""), sText[tatweel].trim()); + CPPUNIT_ASSERT_EQUAL(OUString(u"ج"), sText[jeh].trim()); + + // "Kasreh" should be within "jeh" character + CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[kasreh].getMinX()); + CPPUNIT_ASSERT_LESS(aRect[jeh].getMaxX(), aRect[kasreh].getMaxX()); + + // "Tatweel" should cover "jeh" and "reh"+"mim" to avoid gap + // Checking right gap + CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[tatweel].getMaxX()); + // Checking left gap + // Kashida fails to reach to rehmim before the series of patches starting + // with 3901e029bd39575f700e69a73818565d62226a23. The visible sypotom is + // a gap in the left of Kashida. + // CPPUNIT_ASSERT_LESS(aRect[rehmim].getMaxX(), aRect[tatweel].getMinX()); + + // Overlappings of Kashida and surrounding characters is ~9% of the width + // of the "jeh" character, while using "Noto Arabic Sans" font in this + // specific example. + // We set the hard limit of 10% here. + CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[jeh].getMinX() - aRect[tatweel].getMaxX()) + / aRect[jeh].getWidth()); + CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[rehmim].getMaxX() - aRect[tatweel].getMinX()) + / aRect[jeh].getWidth()); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit