summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--editeng/source/editeng/impedit3.cxx239
-rw-r--r--i18nutil/CppunitTest_i18nutil_kashida.mk24
-rw-r--r--i18nutil/Library_i18nutil.mk1
-rw-r--r--i18nutil/Module_i18nutil.mk4
-rw-r--r--i18nutil/qa/cppunit/test_kashida.cxx58
-rw-r--r--i18nutil/source/utility/kashida.cxx286
-rw-r--r--include/i18nutil/kashida.hxx24
-rw-r--r--sw/source/core/text/porlay.cxx269
8 files changed, 409 insertions, 496 deletions
diff --git a/editeng/source/editeng/impedit3.cxx b/editeng/source/editeng/impedit3.cxx
index 6f7d1e7ac928..b961393bb24b 100644
--- a/editeng/source/editeng/impedit3.cxx
+++ b/editeng/source/editeng/impedit3.cxx
@@ -68,6 +68,7 @@
#include <com/sun/star/i18n/InputSequenceChecker.hpp>
#include <vcl/pdfextoutdevdata.hxx>
#include <i18nlangtag/mslangid.hxx>
+#include <i18nutil/kashida.hxx>
#include <comphelper/processfactory.hxx>
#include <comphelper/lok.hxx>
@@ -232,93 +233,6 @@ static void lcl_DrawRedLines( OutputDevice& rOutDev,
}
}
-// For Kashidas from sw/source/core/text/porlay.cxx
-
-#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( (c), UCHAR_JOINING_GROUP ) == U_JG_##g )
-#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
-#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
-#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
-#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
-#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
-#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
-#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
-#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
-#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
-#define isRehChar(c) IS_JOINING_GROUP((c), REH)
-#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
-#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
-#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
-#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
-
-// Beh and characters that behave like Beh in medial form.
-static bool isBehChar(sal_Unicode cCh)
-{
- bool bRet = false;
- switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
- {
- case U_JG_BEH:
- case U_JG_NOON:
- case U_JG_AFRICAN_NOON:
- case U_JG_NYA:
- case U_JG_YEH:
- case U_JG_FARSI_YEH:
- case U_JG_BURUSHASKI_YEH_BARREE:
- bRet = true;
- break;
- default:
- bRet = false;
- break;
- }
-
- return bRet;
-}
-
-// Yeh and characters that behave like Yeh in final form.
-static bool isYehChar(sal_Unicode cCh)
-{
- bool bRet = false;
- switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
- {
- case U_JG_YEH:
- case U_JG_FARSI_YEH:
- case U_JG_YEH_BARREE:
- case U_JG_BURUSHASKI_YEH_BARREE:
- case U_JG_YEH_WITH_TAIL:
- bRet = true;
- break;
- default:
- bRet = false;
- break;
- }
-
- return bRet;
-}
-
-static bool isTransparentChar ( sal_Unicode cCh )
-{
- return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
-}
-
-static bool lcl_IsLigature( sal_Unicode cCh, sal_Unicode cNextCh )
-{
- // Lam + Alef
- return ( isLamChar ( cCh ) && isAlefChar ( cNextCh ));
-}
-
-static bool lcl_ConnectToPrev( sal_Unicode cCh, sal_Unicode cPrevCh )
-{
- const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
- bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
-
- // check for ligatures cPrevChar + cChar
- if ( bRet )
- bRet = ! lcl_IsLigature( cPrevCh, cCh );
-
- return bRet;
-}
-
-
-
void ImpEditEngine::UpdateViews( EditView* pCurView )
{
if ( !IsUpdateLayout() || IsFormatting() || maInvalidRect.IsEmpty() )
@@ -2317,9 +2231,6 @@ void ImpEditEngine::ImpAdjustBlocks(ParaPortion& rParaPortion, EditLine& rLine,
{
EditPaM aPaM( pNode, nChar+1 );
sal_uInt16 nScript = GetI18NScriptType(aPaM);
- // Arabic script is handled above, but if no Kashida positions are found, use blanks.
- if (nKashidas)
- continue;
if ( pNode->GetChar(nChar) == ' ' )
{
@@ -2460,154 +2371,12 @@ void ImpEditEngine::ImpFindKashidas(ContentNode* pNode, sal_Int32 nStart, sal_In
// restore selection for proper iteration at the end of the function
aWordSel.Max().SetIndex( nSavPos );
- sal_Int32 nIdx = 0, nPrevIdx = 0;
- sal_Int32 nKashidaPos = -1;
- sal_Unicode cCh, cPrevCh = 0;
-
- int nPriorityLevel = 7; // 0..6 = level found
- // 7 not found
-
- sal_Int32 nWordLen = aWord.getLength();
+ auto stKashidaPos = i18nutil::GetWordKashidaPosition(aWord);
- // ignore trailing vowel chars
- while( nWordLen && isTransparentChar( aWord[ nWordLen - 1 ] ))
- --nWordLen;
-
- while ( nIdx < nWordLen )
+ if (stKashidaPos.has_value())
{
- cCh = aWord[ nIdx ];
-
- // 1. Priority:
- // after user inserted kashida
- if ( 0x640 == cCh )
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nIdx;
- nPriorityLevel = 0;
- }
-
- // 2. Priority:
- // after a Seen or Sad
- if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
- {
- if( isSeenOrSadChar( cCh )
- && (aWord[ nIdx+1 ] != 0x200C) ) // #i98410#: prevent ZWNJ expansion
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nIdx;
- nPriorityLevel = 1;
- }
- }
-
- // 3. Priority:
- // before final form of Teh Marbuta, Heh, Dal
- if ( nPriorityLevel >= 2 && nIdx > 0 )
- {
- if ( isTehMarbutaChar ( cCh ) || // Teh Marbuta (right joining)
- isDalChar ( cCh ) || // Dal (right joining) final form may appear in the middle of word
- ( isHehChar ( cCh ) && nIdx == nWordLen - 1)) // Heh (dual joining) only at end of word
- {
-
- SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
- nPriorityLevel = 2;
- }
- }
- }
-
- // 4. Priority:
- // before final form of Alef, Tah, Lam, Kaf or Gaf
- if ( nPriorityLevel >= 3 && nIdx > 0 )
- {
- if ( isAlefChar ( cCh ) || // Alef (right joining) final form may appear in the middle of word
- (( isLamChar ( cCh ) || // Lam,
- isTahChar ( cCh ) || // Tah,
- isKafChar ( cCh ) || // Kaf (all dual joining)
- isGafChar ( cCh ) )
- && nIdx == nWordLen - 1)) // only at end of word
- {
- SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
- nPriorityLevel = 3;
- }
- }
- }
-
- // 5. Priority:
- // before medial Beh-like
- if ( nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1 )
- {
- if ( isBehChar ( cCh ) )
- {
- // check if next character is Reh or Yeh-like
- sal_Unicode cNextCh = aWord[ nIdx + 1 ];
- if ( isRehChar ( cNextCh ) || isYehChar ( cNextCh ))
- {
- SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
- nPriorityLevel = 4;
- }
- }
- }
- }
+ sal_Int32 nKashidaPos = aWordSel.Min().GetIndex() + stKashidaPos->nIndex;
- // 6. Priority:
- // before the final form of Waw, Ain, Qaf and Feh
- if ( nPriorityLevel >= 5 && nIdx > 0 )
- {
- if ( isWawChar ( cCh ) || // Wav (right joining)
- // final form may appear in the middle of word
- (( isAinChar ( cCh ) || // Ain (dual joining)
- isQafChar ( cCh ) || // Qaf (dual joining)
- isFehChar ( cCh ) ) // Feh (dual joining)
- && nIdx == nWordLen - 1)) // only at end of word
- {
- SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
- nPriorityLevel = 5;
- }
- }
- }
-
- // other connecting possibilities
- if ( nPriorityLevel >= 6 && nIdx > 0 )
- {
- // Reh, Zain
- if ( isRehChar ( cCh ) )
- {
- SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
- nPriorityLevel = 6;
- }
- }
- }
-
- // Do not consider vowel marks when checking if a character
- // can be connected to previous character.
- if ( !isTransparentChar ( cCh) )
- {
- cPrevCh = cCh;
- nPrevIdx = nIdx;
- }
-
- ++nIdx;
- } // end of current word
-
- if (nKashidaPos >= 0)
- {
SeekCursor(pNode, nKashidaPos + 1, aTmpFont);
aTmpFont.SetPhysFont(*GetRefDevice());
diff --git a/i18nutil/CppunitTest_i18nutil_kashida.mk b/i18nutil/CppunitTest_i18nutil_kashida.mk
new file mode 100644
index 000000000000..4920f0a79a54
--- /dev/null
+++ b/i18nutil/CppunitTest_i18nutil_kashida.mk
@@ -0,0 +1,24 @@
+For makefiles:
+
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t; fill-column: 100 -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,i18nutil_kashida))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,i18nutil_kashida,\
+ i18nutil/qa/cppunit/test_kashida \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,i18nutil_kashida,\
+ i18nutil \
+ sal \
+ test \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/i18nutil/Library_i18nutil.mk b/i18nutil/Library_i18nutil.mk
index fd6518a4173f..264c9c9f969e 100644
--- a/i18nutil/Library_i18nutil.mk
+++ b/i18nutil/Library_i18nutil.mk
@@ -44,6 +44,7 @@ $(eval $(call gb_Library_use_libraries,i18nutil,\
$(eval $(call gb_Library_add_exception_objects,i18nutil,\
i18nutil/source/utility/casefolding \
+ i18nutil/source/utility/kashida \
i18nutil/source/utility/oneToOneMapping \
i18nutil/source/utility/paper \
i18nutil/source/utility/scripttypedetector \
diff --git a/i18nutil/Module_i18nutil.mk b/i18nutil/Module_i18nutil.mk
index 9b543dfdc359..bb8ef7056c5c 100644
--- a/i18nutil/Module_i18nutil.mk
+++ b/i18nutil/Module_i18nutil.mk
@@ -12,4 +12,8 @@ $(eval $(call gb_Module_add_targets,i18nutil,\
Library_i18nutil \
))
+$(eval $(call gb_Module_add_check_targets,i18nutil,\
+ CppunitTest_i18nutil_kashida \
+))
+
# vim: set noet sw=4:
diff --git a/i18nutil/qa/cppunit/test_kashida.cxx b/i18nutil/qa/cppunit/test_kashida.cxx
new file mode 100644
index 000000000000..99ce2a1a969c
--- /dev/null
+++ b/i18nutil/qa/cppunit/test_kashida.cxx
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/types.h>
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+#include <i18nutil/kashida.hxx>
+
+using namespace i18nutil;
+
+namespace
+{
+class KashidaTest : public CppUnit::TestFixture
+{
+public:
+ void testCharacteristic();
+
+ CPPUNIT_TEST_SUITE(KashidaTest);
+ CPPUNIT_TEST(testCharacteristic);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+void KashidaTest::testCharacteristic()
+{
+ // Characteristic tests for kashida candidate selection.
+ // Uses words from sample documents.
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"متن"_ustr).has_value());
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"فارسی"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"با"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"نویسه"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"کشیده"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"برای"_ustr).value().nIndex);
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"چینش"_ustr).has_value());
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"بهتر"_ustr).value().nIndex);
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ببببب"_ustr).has_value());
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپپپپ"_ustr).has_value());
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تطویل"_ustr).value().nIndex);
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپ"_ustr).has_value());
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"تطوی"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تحویل"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"تشویل"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تمثیل"_ustr).value().nIndex);
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/i18nutil/source/utility/kashida.cxx b/i18nutil/source/utility/kashida.cxx
new file mode 100644
index 000000000000..dbf2b818abf1
--- /dev/null
+++ b/i18nutil/source/utility/kashida.cxx
@@ -0,0 +1,286 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <i18nutil/kashida.hxx>
+#include <i18nutil/unicode.hxx>
+#include <sal/log.hxx>
+
+namespace
+{
+/*
+ https://www.khtt.net/en/page/1821/the-big-kashida-secret
+
+ the rules of priorities that govern the addition of kashidas in Arabic text
+ made ... for ... Explorer 5.5 browser.
+
+ The kashida justification is based on a connection priority scheme that
+ decides where kashidas are put automatically.
+
+ This is how the software decides on kashida-inserting priorities:
+ 1. First it looks for characters with the highest priority in each word,
+ which means kashida-extensions will only been used in one position in each
+ word. Not more.
+ 2. The kashida will be connected to the character with the highest priority.
+ 3. If kashida connection opportunities are found with an equal level of
+ priority in one word, the kashida will be placed towards the end of the
+ word.
+
+ The priority list of characters and the positioning is as follows:
+ 1. after a kashida that is manually placed in the text by the user,
+ 2. after a Seen or Sad (initial and medial form),
+ 3. before the final form of Taa Marbutah, Haa, Dal,
+ 4. before the final form of Alef, Tah Lam, Kaf and Gaf,
+ 5. before the preceding medial Baa of Ra, Ya and Alef Maqsurah,
+ 6. before the final form of Waw, Ain, Qaf and Fa,
+ 7. before the final form of other characters that can be connected.
+*/
+
+#define IS_JOINING_GROUP(c, g) (u_getIntPropertyValue((c), UCHAR_JOINING_GROUP) == U_JG_##g)
+#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
+#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
+#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
+#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
+#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
+#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
+#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
+#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
+#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
+#define isRehChar(c) IS_JOINING_GROUP((c), REH)
+#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
+#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
+#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
+#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
+
+// Beh and characters that behave like Beh in medial form.
+bool isBehChar(sal_Unicode cCh)
+{
+ bool bRet = false;
+ switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
+ {
+ case U_JG_BEH:
+ case U_JG_NOON:
+ case U_JG_AFRICAN_NOON:
+ case U_JG_NYA:
+ case U_JG_YEH:
+ case U_JG_FARSI_YEH:
+ case U_JG_BURUSHASKI_YEH_BARREE:
+ bRet = true;
+ break;
+ default:
+ bRet = false;
+ break;
+ }
+
+ return bRet;
+}
+
+// Yeh and characters that behave like Yeh in final form.
+bool isYehChar(sal_Unicode cCh)
+{
+ bool bRet = false;
+ switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
+ {
+ case U_JG_YEH:
+ case U_JG_FARSI_YEH:
+ case U_JG_YEH_BARREE:
+ case U_JG_BURUSHASKI_YEH_BARREE:
+ case U_JG_YEH_WITH_TAIL:
+ bRet = true;
+ break;
+ default:
+ bRet = false;
+ break;
+ }
+
+ return bRet;
+}
+
+bool isTransparentChar(sal_Unicode cCh)
+{
+ return u_getIntPropertyValue(cCh, UCHAR_JOINING_TYPE) == U_JT_TRANSPARENT;
+}
+
+// Checks if cCh + cNectCh builds a ligature (used for Kashidas)
+bool isLigature(sal_Unicode cCh, sal_Unicode cNextCh)
+{
+ // Lam + Alef
+ return (isLamChar(cCh) && isAlefChar(cNextCh));
+}
+
+// Checks if cCh is connectable to cPrevCh (used for Kashidas)
+bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
+{
+ const int32_t nJoiningType = u_getIntPropertyValue(cPrevCh, UCHAR_JOINING_TYPE);
+ bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
+
+ // check for ligatures cPrevChar + cChar
+ if (bRet)
+ bRet = !isLigature(cPrevCh, cCh);
+
+ return bRet;
+}
+}
+
+std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const OUString& rWord)
+{
+ sal_Int32 nIdx = 0;
+ sal_Int32 nPrevIdx = 0;
+ sal_Int32 nKashidaPos = -1;
+ sal_Unicode cCh = 0;
+ sal_Unicode cPrevCh = 0;
+
+ int nPriorityLevel = 7; // 0..6 = level found, 7 not found
+
+ sal_Int32 nWordLen = rWord.getLength();
+
+ // ignore trailing vowel chars
+ while (nWordLen && isTransparentChar(rWord[nWordLen - 1]))
+ {
+ --nWordLen;
+ }
+
+ while (nIdx < nWordLen)
+ {
+ cCh = rWord[nIdx];
+
+ // 1. Priority:
+ // after user inserted kashida
+ if (0x640 == cCh)
+ {
+ nKashidaPos = nIdx;
+ nPriorityLevel = 0;
+ }
+
+ // 2. Priority:
+ // after a Seen or Sad
+ if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
+ {
+ if (isSeenOrSadChar(cCh)
+ && (rWord[nIdx + 1] != 0x200C)) // #i98410#: prevent ZWNJ expansion
+ {
+ nKashidaPos = nIdx;
+ nPriorityLevel = 1;
+ }
+ }
+
+ // 3. Priority:
+ // before final form of Teh Marbuta, Heh, Dal
+ if (nPriorityLevel >= 2 && nIdx > 0)
+ {
+ // Teh Marbuta (right joining)
+ // Dal (right joining) final form may appear in the middle of word
+ // Heh (dual joining) only at end of word
+ if (isTehMarbutaChar(cCh) || isDalChar(cCh) || (isHehChar(cCh) && nIdx == nWordLen - 1))
+ {
+ SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
+ // check if character is connectable to previous character,
+ if (CanConnectToPrev(cCh, cPrevCh))
+ {
+ nKashidaPos = nPrevIdx;
+ nPriorityLevel = 2;
+ }
+ }
+ }
+
+ // 4. Priority:
+ // before final form of Alef, Tah, Lam, Kaf or Gaf
+ if (nPriorityLevel >= 3 && nIdx > 0)
+ {
+ // Alef (right joining) final form may appear in the middle of word
+ // Lam, Tah, Kaf (all dual joining) only at end of word
+ if (isAlefChar(cCh)
+ || ((isLamChar(cCh) || isTahChar(cCh) || isKafChar(cCh) || isGafChar(cCh))
+ && nIdx == nWordLen - 1))
+ {
+ SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
+ // check if character is connectable to previous character,
+ if (CanConnectToPrev(cCh, cPrevCh))
+ {
+ nKashidaPos = nPrevIdx;
+ nPriorityLevel = 3;
+ }
+ }
+ }
+
+ // 5. Priority:
+ // before medial Beh-like
+ if (nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1)
+ {
+ if (isBehChar(cCh))
+ {
+ // check if next character is Reh or Yeh-like
+ sal_Unicode cNextCh = rWord[nIdx + 1];
+ if (isRehChar(cNextCh) || isYehChar(cNextCh))
+ {
+ SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
+ // check if character is connectable to previous character,
+ if (CanConnectToPrev(cCh, cPrevCh))
+ {
+ nKashidaPos = nPrevIdx;
+ nPriorityLevel = 4;
+ }
+ }
+ }
+ }
+
+ // 6. Priority:
+ // before the final form of Waw, Ain, Qaf and Feh
+ if (nPriorityLevel >= 5 && nIdx > 0)
+ {
+ // Wav (right joining) final form may appear in the middle of word
+ // Ain, Qaf, Feh (all dual joining) only at end of word
+ if (isWawChar(cCh)
+ || ((isAinChar(cCh) || isQafChar(cCh) || isFehChar(cCh)) && nIdx == nWordLen - 1))
+ {
+ SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
+ // check if character is connectable to previous character,
+ if (CanConnectToPrev(cCh, cPrevCh))
+ {
+ nKashidaPos = nPrevIdx;
+ nPriorityLevel = 5;
+ }
+ }
+ }
+
+ // other connecting possibilities
+ if (nPriorityLevel >= 6 && nIdx > 0)
+ {
+ // Reh, Zain
+ if (isRehChar(cCh))
+ {
+ SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
+ // check if character is connectable to previous character,
+ if (CanConnectToPrev(cCh, cPrevCh))
+ {
+ nKashidaPos = nPrevIdx;
+ nPriorityLevel = 6;
+ }
+ }
+ }
+
+ // Do not consider vowel marks when checking if a character
+ // can be connected to previous character.
+ if (!isTransparentChar(cCh))
+ {
+ cPrevCh = cCh;
+ nPrevIdx = nIdx;
+ }
+
+ ++nIdx;
+ } // end of current word
+
+ if (-1 != nKashidaPos)
+ {
+ return KashidaPosition{ nKashidaPos };
+ }
+
+ return std::nullopt;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/include/i18nutil/kashida.hxx b/include/i18nutil/kashida.hxx
new file mode 100644
index 000000000000..54797143143c
--- /dev/null
+++ b/include/i18nutil/kashida.hxx
@@ -0,0 +1,24 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <i18nutil/i18nutildllapi.h>
+#include <rtl/ustring.hxx>
+#include <optional>
+
+namespace i18nutil
+{
+struct KashidaPosition
+{
+ sal_Int32 nIndex;
+};
+
+I18NUTIL_DLLPUBLIC std::optional<KashidaPosition> GetWordKashidaPosition(const OUString& rWord);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 923d5286c458..8574f6d31d12 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -79,124 +79,12 @@
#include <unicode/ubidi.h>
#include <i18nutil/scripttypedetector.hxx>
#include <i18nutil/unicode.hxx>
+#include <i18nutil/kashida.hxx>
#include <unotxdoc.hxx>
using namespace ::com::sun::star;
using namespace i18n::ScriptType;
-/*
- https://www.khtt.net/en/page/1821/the-big-kashida-secret
-
- the rules of priorities that govern the addition of kashidas in Arabic text
- made ... for ... Explorer 5.5 browser.
-
- The kashida justification is based on a connection priority scheme that
- decides where kashidas are put automatically.
-
- This is how the software decides on kashida-inserting priorities:
- 1. First it looks for characters with the highest priority in each word,
- which means kashida-extensions will only been used in one position in each
- word. Not more.
- 2. The kashida will be connected to the character with the highest priority.
- 3. If kashida connection opportunities are found with an equal level of
- priority in one word, the kashida will be placed towards the end of the
- word.
-
- The priority list of characters and the positioning is as follows:
- 1. after a kashida that is manually placed in the text by the user,
- 2. after a Seen or Sad (initial and medial form),
- 3. before the final form of Taa Marbutah, Haa, Dal,
- 4. before the final form of Alef, Tah Lam, Kaf and Gaf,
- 5. before the preceding medial Baa of Ra, Ya and Alef Maqsurah,
- 6. before the final form of Waw, Ain, Qaf and Fa,
- 7. before the final form of other characters that can be connected.
-*/
-
-#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( (c), UCHAR_JOINING_GROUP ) == U_JG_##g )
-#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
-#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
-#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
-#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
-#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
-#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
-#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
-#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
-#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
-#define isRehChar(c) IS_JOINING_GROUP((c), REH)
-#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
-#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
-#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
-#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
-
-// Beh and characters that behave like Beh in medial form.
-static bool isBehChar(sal_Unicode cCh)
-{
- bool bRet = false;
- switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
- {
- case U_JG_BEH:
- case U_JG_NOON:
- case U_JG_AFRICAN_NOON:
- case U_JG_NYA:
- case U_JG_YEH:
- case U_JG_FARSI_YEH:
- case U_JG_BURUSHASKI_YEH_BARREE:
- bRet = true;
- break;
- default:
- bRet = false;
- break;
- }
-
- return bRet;
-}
-
-// Yeh and characters that behave like Yeh in final form.
-static bool isYehChar(sal_Unicode cCh)
-{
- bool bRet = false;
- switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
- {
- case U_JG_YEH:
- case U_JG_FARSI_YEH:
- case U_JG_YEH_BARREE:
- case U_JG_BURUSHASKI_YEH_BARREE:
- case U_JG_YEH_WITH_TAIL:
- bRet = true;
- break;
- default:
- bRet = false;
- break;
- }
-
- return bRet;
-}
-
-static bool isTransparentChar ( sal_Unicode cCh )
-{
- return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
-}
-
-// Checks if cCh + cNectCh builds a ligature (used for Kashidas)
-static bool lcl_IsLigature( sal_Unicode cCh, sal_Unicode cNextCh )
-{
- // Lam + Alef
- return ( isLamChar ( cCh ) && isAlefChar ( cNextCh ));
-}
-
-// Checks if cCh is connectable to cPrevCh (used for Kashidas)
-static bool lcl_ConnectToPrev( sal_Unicode cCh, sal_Unicode cPrevCh )
-{
- const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
- bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
-
- // check for ligatures cPrevChar + cChar
- if( bRet )
- bRet = !lcl_IsLigature( cPrevCh, cCh );
-
- return bRet;
-}
-
static bool lcl_HasStrongLTR ( std::u16string_view rText, sal_Int32 nStart, sal_Int32 nEnd )
{
for( sal_Int32 nCharIdx = nStart; nCharIdx < nEnd; ++nCharIdx )
@@ -1618,157 +1506,16 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
while ( aScanner.NextWord() )
{
const OUString& rWord = aScanner.GetWord();
+ auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord);
- sal_Int32 nIdx = 0, nPrevIdx = 0;
- sal_Int32 nKashidaPos = -1;
- sal_Unicode cCh, cPrevCh = 0;
-
- int nPriorityLevel = 7; // 0..6 = level found
- // 7 not found
-
- sal_Int32 nWordLen = rWord.getLength();
-
- // ignore trailing vowel chars
- while( nWordLen && isTransparentChar( rWord[ nWordLen - 1 ] ))
- --nWordLen;
-
- while (nIdx < nWordLen)
+ if (stKashidaPos.has_value())
{
- cCh = rWord[ nIdx ];
-
- // 1. Priority:
- // after user inserted kashida
- if ( 0x640 == cCh )
- {
- nKashidaPos = aScanner.GetBegin() + nIdx;
- nPriorityLevel = 0;
- }
-
- // 2. Priority:
- // after a Seen or Sad
- if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
- {
- if( isSeenOrSadChar( cCh )
- && (rWord[ nIdx+1 ] != 0x200C) ) // #i98410#: prevent ZWNJ expansion
- {
- nKashidaPos = aScanner.GetBegin() + nIdx;
- nPriorityLevel = 1;
- }
- }
-
- // 3. Priority:
- // before final form of Teh Marbuta, Heh, Dal
- if ( nPriorityLevel >= 2 && nIdx > 0 )
- {
- if ( isTehMarbutaChar ( cCh ) || // Teh Marbuta (right joining)
- isDalChar ( cCh ) || // Dal (right joining) final form may appear in the middle of word
- ( isHehChar ( cCh ) && nIdx == nWordLen - 1)) // Heh (dual joining) only at end of word
- {
-
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 2;
- }
- }
- }
-
- // 4. Priority:
- // before final form of Alef, Tah, Lam, Kaf or Gaf
- if ( nPriorityLevel >= 3 && nIdx > 0 )
- {
- if ( isAlefChar ( cCh ) || // Alef (right joining) final form may appear in the middle of word
- (( isLamChar ( cCh ) || // Lam,
- isTahChar ( cCh ) || // Tah,
- isKafChar ( cCh ) || // Kaf (all dual joining)
- isGafChar ( cCh ) )
- && nIdx == nWordLen - 1)) // only at end of word
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 3;
- }
- }
- }
-
- // 5. Priority:
- // before medial Beh-like
- if ( nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1 )
- {
- if ( isBehChar ( cCh ) )
- {
- // check if next character is Reh or Yeh-like
- sal_Unicode cNextCh = rWord[ nIdx + 1 ];
- if ( isRehChar ( cNextCh ) || isYehChar ( cNextCh ))
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 4;
- }
- }
- }
+ // Only populate kashida positions for the invalidated tail
+ TextFrameIndex nNewKashidaPos{aScanner.GetBegin() + stKashidaPos->nIndex};
+ if(nNewKashidaPos >= nLastKashida) {
+ m_Kashida.insert(m_Kashida.begin() + nCntKash, nNewKashidaPos);
+ nCntKash++;
}
-
- // 6. Priority:
- // before the final form of Waw, Ain, Qaf and Feh
- if ( nPriorityLevel >= 5 && nIdx > 0 )
- {
- if ( isWawChar ( cCh ) || // Wav (right joining)
- // final form may appear in the middle of word
- (( isAinChar ( cCh ) || // Ain (dual joining)
- isQafChar ( cCh ) || // Qaf (dual joining)
- isFehChar ( cCh ) ) // Feh (dual joining)
- && nIdx == nWordLen - 1)) // only at end of word
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 5;
- }
- }
- }
-
- // other connecting possibilities
- if ( nPriorityLevel >= 6 && nIdx > 0 )
- {
- // Reh, Zain
- if ( isRehChar ( cCh ) )
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 6;
- }
- }
- }
-
- // Do not consider vowel marks when checking if a character
- // can be connected to previous character.
- if ( !isTransparentChar ( cCh) )
- {
- cPrevCh = cCh;
- nPrevIdx = nIdx;
- }
-
- ++nIdx;
- } // end of current word
-
- if ( -1 != nKashidaPos )
- {
- m_Kashida.insert(m_Kashida.begin() + nCntKash, TextFrameIndex(nKashidaPos));
- nCntKash++;
}
} // end of kashida search
}