summaryrefslogtreecommitdiff
path: root/vcl/qa
diff options
context:
space:
mode:
authorHossein <hossein@libreoffice.org>2022-08-31 13:13:14 +0200
committerHossein <hossein@libreoffice.org>2022-08-31 21:17:19 +0200
commite8c0a88d640b32129eae6c9042fd908c2892c7e1 (patch)
treedc55d05f22e804697c37d228a29e6e176a8a5b60 /vcl/qa
parentead23d945fb7bc794996c82287282bc385296ada (diff)
tdf#139627 Test justified Arabic/Persian text to avoid gaps/big overlaps
With the patches 3901e02..62ff105 from Khaled, many problems with the justified Arabic/Persian text which were related to Kashida are fixed. Here, we add a test for tdf#139627 which converts the example file to the PDF format, then checks the width and the position of the characters in the output to make sure: * The characters are present in the PDF file in the correct order * The characters are joined together * The diacritic mark is positioned correctly * The overlapping of the tatweel character is not more than 10% of the first character Sample odt file uses "Noto Arabic Sans" font, which is available via LibreOffice on all platforms. One may run the test with: make CPPUNIT_TEST_NAME=testTdf139627 -sr CppunitTest_vcl_pdfexport Change-Id: I7a826a1b43ee842978decb0cf9a5e2a3b7219982 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138328 Tested-by: Jenkins Reviewed-by: Hossein <hossein@libreoffice.org>
Diffstat (limited to 'vcl/qa')
-rw-r--r--vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odtbin0 -> 12875 bytes
-rw-r--r--vcl/qa/cppunit/pdfexport/pdfexport.cxx84
2 files changed, 84 insertions, 0 deletions
diff --git a/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt
new file mode 100644
index 000000000000..6ca6ad196546
--- /dev/null
+++ b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt
Binary files differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 057773c3ddaf..4b25202bb2d4 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -3535,6 +3535,90 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testBitmapScaledown)
}
} // end anonymous namespace
+CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf139627)
+{
+ aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+ saveAsPDF(u"justified-arabic-kashida.odt");
+ std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = parseExport();
+ CPPUNIT_ASSERT(pPdfDocument);
+
+ // The document has one page.
+ CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
+ std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(/*nIndex=*/0);
+ CPPUNIT_ASSERT(pPdfPage);
+
+ // 7 or 8 objects, 4 text, others are path
+ int nPageObjectCount = pPdfPage->getObjectCount();
+ CPPUNIT_ASSERT_GREATEREQUAL(7, nPageObjectCount);
+
+ // 4 text objects, "رم" (reh+mim), then "ِ" (kasreh), tatweel, and "ج" (jeh)
+ OUString sText[4];
+
+ /* With "Noto Sans Arabic" font, these are the X ranges on Linux:
+ 0: ( 61.75 - 218.35)
+ 1: (479.70 - 520.02)
+ 2: (209.40 - 457.08)
+ 3: (447.80 - 546.62)
+ */
+ basegfx::B2DRectangle aRect[4];
+
+ std::unique_ptr<vcl::pdf::PDFiumTextPage> pTextPage = pPdfPage->getTextPage();
+ std::unique_ptr<vcl::pdf::PDFiumPageObject> pPageObject;
+
+ int nTextObjectCount = 0;
+ for (int i = 0; i < nPageObjectCount; ++i)
+ {
+ pPageObject = pPdfPage->getObject(i);
+ CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
+ if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
+ {
+ sText[nTextObjectCount] = pPageObject->getText(pTextPage);
+ aRect[nTextObjectCount] = pPageObject->getBounds();
+ ++nTextObjectCount;
+ }
+ }
+ CPPUNIT_ASSERT_EQUAL(4, nTextObjectCount);
+
+ // Text: جِـرم (which means "mass" in Persian)
+ // Rendered as (left to right): "reh + mim" - "tahtweel" - "kasreh" - "jeh"
+ int rehmim = 0, kasreh = 1, tatweel = 2, jeh = 3;
+
+ // Bad rendering can cause tatweel enumerated before kasreh
+ // This can be the end of journey, but let's accept this for now
+ if (sText[2].equals(u"ِ"))
+ {
+ tatweel = 1;
+ kasreh = 2;
+ }
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"رم"), sText[rehmim].trim());
+ CPPUNIT_ASSERT_EQUAL(OUString(u"ِ"), sText[kasreh].trim());
+ CPPUNIT_ASSERT_EQUAL(OUString(u""), sText[tatweel].trim());
+ CPPUNIT_ASSERT_EQUAL(OUString(u"ج"), sText[jeh].trim());
+
+ // "Kasreh" should be within "jeh" character
+ CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[kasreh].getMinX());
+ CPPUNIT_ASSERT_LESS(aRect[jeh].getMaxX(), aRect[kasreh].getMaxX());
+
+ // "Tatweel" should cover "jeh" and "reh"+"mim" to avoid gap
+ // Checking right gap
+ CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[tatweel].getMaxX());
+ // Checking left gap
+ // Kashida fails to reach to rehmim before the series of patches starting
+ // with 3901e029bd39575f700e69a73818565d62226a23. The visible sypotom is
+ // a gap in the left of Kashida.
+ // CPPUNIT_ASSERT_LESS(aRect[rehmim].getMaxX(), aRect[tatweel].getMinX());
+
+ // Overlappings of Kashida and surrounding characters is ~9% of the width
+ // of the "jeh" character, while using "Noto Arabic Sans" font in this
+ // specific example.
+ // We set the hard limit of 10% here.
+ CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[jeh].getMinX() - aRect[tatweel].getMaxX())
+ / aRect[jeh].getWidth());
+ CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[rehmim].getMaxX() - aRect[tatweel].getMinX())
+ / aRect[jeh].getWidth());
+}
+
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */