summaryrefslogtreecommitdiff
path: root/sw/source
diff options
context:
space:
mode:
authorJonathan Clark <jonathan@libreoffice.org>2024-09-23 15:26:45 -0600
committerJonathan Clark <jonathan@libreoffice.org>2024-09-25 01:25:07 +0200
commitfe4687ed174c54f2eb25f8088bf3fb6cb4858175 (patch)
treefe64da5a021f47d573da539d2df414ce6035e646 /sw/source
parent2fc1034de4fd23d810593533b70ff674b0ccd706 (diff)
tdf#163105 Consolidated duplicated kashida justification code
The kashida candidate position selection logic was copied-and-pasted from Writer into Edit Engine. This change consolidates the shared code into a library. This change also adds some minimal characteristic tests, which previously did not exist. Change-Id: I2bfbfa79858347803474b754566436f3e74d1a54 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173883 Reviewed-by: Jonathan Clark <jonathan@libreoffice.org> Tested-by: Jenkins
Diffstat (limited to 'sw/source')
-rw-r--r--sw/source/core/text/porlay.cxx269
1 files changed, 8 insertions, 261 deletions
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 923d5286c458..8574f6d31d12 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -79,124 +79,12 @@
#include <unicode/ubidi.h>
#include <i18nutil/scripttypedetector.hxx>
#include <i18nutil/unicode.hxx>
+#include <i18nutil/kashida.hxx>
#include <unotxdoc.hxx>
using namespace ::com::sun::star;
using namespace i18n::ScriptType;
-/*
- https://www.khtt.net/en/page/1821/the-big-kashida-secret
-
- the rules of priorities that govern the addition of kashidas in Arabic text
- made ... for ... Explorer 5.5 browser.
-
- The kashida justification is based on a connection priority scheme that
- decides where kashidas are put automatically.
-
- This is how the software decides on kashida-inserting priorities:
- 1. First it looks for characters with the highest priority in each word,
- which means kashida-extensions will only been used in one position in each
- word. Not more.
- 2. The kashida will be connected to the character with the highest priority.
- 3. If kashida connection opportunities are found with an equal level of
- priority in one word, the kashida will be placed towards the end of the
- word.
-
- The priority list of characters and the positioning is as follows:
- 1. after a kashida that is manually placed in the text by the user,
- 2. after a Seen or Sad (initial and medial form),
- 3. before the final form of Taa Marbutah, Haa, Dal,
- 4. before the final form of Alef, Tah Lam, Kaf and Gaf,
- 5. before the preceding medial Baa of Ra, Ya and Alef Maqsurah,
- 6. before the final form of Waw, Ain, Qaf and Fa,
- 7. before the final form of other characters that can be connected.
-*/
-
-#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( (c), UCHAR_JOINING_GROUP ) == U_JG_##g )
-#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
-#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
-#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
-#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
-#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
-#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
-#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
-#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
-#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
-#define isRehChar(c) IS_JOINING_GROUP((c), REH)
-#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
-#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
-#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
-#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
-
-// Beh and characters that behave like Beh in medial form.
-static bool isBehChar(sal_Unicode cCh)
-{
- bool bRet = false;
- switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
- {
- case U_JG_BEH:
- case U_JG_NOON:
- case U_JG_AFRICAN_NOON:
- case U_JG_NYA:
- case U_JG_YEH:
- case U_JG_FARSI_YEH:
- case U_JG_BURUSHASKI_YEH_BARREE:
- bRet = true;
- break;
- default:
- bRet = false;
- break;
- }
-
- return bRet;
-}
-
-// Yeh and characters that behave like Yeh in final form.
-static bool isYehChar(sal_Unicode cCh)
-{
- bool bRet = false;
- switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
- {
- case U_JG_YEH:
- case U_JG_FARSI_YEH:
- case U_JG_YEH_BARREE:
- case U_JG_BURUSHASKI_YEH_BARREE:
- case U_JG_YEH_WITH_TAIL:
- bRet = true;
- break;
- default:
- bRet = false;
- break;
- }
-
- return bRet;
-}
-
-static bool isTransparentChar ( sal_Unicode cCh )
-{
- return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
-}
-
-// Checks if cCh + cNectCh builds a ligature (used for Kashidas)
-static bool lcl_IsLigature( sal_Unicode cCh, sal_Unicode cNextCh )
-{
- // Lam + Alef
- return ( isLamChar ( cCh ) && isAlefChar ( cNextCh ));
-}
-
-// Checks if cCh is connectable to cPrevCh (used for Kashidas)
-static bool lcl_ConnectToPrev( sal_Unicode cCh, sal_Unicode cPrevCh )
-{
- const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
- bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
-
- // check for ligatures cPrevChar + cChar
- if( bRet )
- bRet = !lcl_IsLigature( cPrevCh, cCh );
-
- return bRet;
-}
-
static bool lcl_HasStrongLTR ( std::u16string_view rText, sal_Int32 nStart, sal_Int32 nEnd )
{
for( sal_Int32 nCharIdx = nStart; nCharIdx < nEnd; ++nCharIdx )
@@ -1618,157 +1506,16 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
while ( aScanner.NextWord() )
{
const OUString& rWord = aScanner.GetWord();
+ auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord);
- sal_Int32 nIdx = 0, nPrevIdx = 0;
- sal_Int32 nKashidaPos = -1;
- sal_Unicode cCh, cPrevCh = 0;
-
- int nPriorityLevel = 7; // 0..6 = level found
- // 7 not found
-
- sal_Int32 nWordLen = rWord.getLength();
-
- // ignore trailing vowel chars
- while( nWordLen && isTransparentChar( rWord[ nWordLen - 1 ] ))
- --nWordLen;
-
- while (nIdx < nWordLen)
+ if (stKashidaPos.has_value())
{
- cCh = rWord[ nIdx ];
-
- // 1. Priority:
- // after user inserted kashida
- if ( 0x640 == cCh )
- {
- nKashidaPos = aScanner.GetBegin() + nIdx;
- nPriorityLevel = 0;
- }
-
- // 2. Priority:
- // after a Seen or Sad
- if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
- {
- if( isSeenOrSadChar( cCh )
- && (rWord[ nIdx+1 ] != 0x200C) ) // #i98410#: prevent ZWNJ expansion
- {
- nKashidaPos = aScanner.GetBegin() + nIdx;
- nPriorityLevel = 1;
- }
- }
-
- // 3. Priority:
- // before final form of Teh Marbuta, Heh, Dal
- if ( nPriorityLevel >= 2 && nIdx > 0 )
- {
- if ( isTehMarbutaChar ( cCh ) || // Teh Marbuta (right joining)
- isDalChar ( cCh ) || // Dal (right joining) final form may appear in the middle of word
- ( isHehChar ( cCh ) && nIdx == nWordLen - 1)) // Heh (dual joining) only at end of word
- {
-
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 2;
- }
- }
- }
-
- // 4. Priority:
- // before final form of Alef, Tah, Lam, Kaf or Gaf
- if ( nPriorityLevel >= 3 && nIdx > 0 )
- {
- if ( isAlefChar ( cCh ) || // Alef (right joining) final form may appear in the middle of word
- (( isLamChar ( cCh ) || // Lam,
- isTahChar ( cCh ) || // Tah,
- isKafChar ( cCh ) || // Kaf (all dual joining)
- isGafChar ( cCh ) )
- && nIdx == nWordLen - 1)) // only at end of word
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 3;
- }
- }
- }
-
- // 5. Priority:
- // before medial Beh-like
- if ( nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1 )
- {
- if ( isBehChar ( cCh ) )
- {
- // check if next character is Reh or Yeh-like
- sal_Unicode cNextCh = rWord[ nIdx + 1 ];
- if ( isRehChar ( cNextCh ) || isYehChar ( cNextCh ))
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 4;
- }
- }
- }
+ // Only populate kashida positions for the invalidated tail
+ TextFrameIndex nNewKashidaPos{aScanner.GetBegin() + stKashidaPos->nIndex};
+ if(nNewKashidaPos >= nLastKashida) {
+ m_Kashida.insert(m_Kashida.begin() + nCntKash, nNewKashidaPos);
+ nCntKash++;
}
-
- // 6. Priority:
- // before the final form of Waw, Ain, Qaf and Feh
- if ( nPriorityLevel >= 5 && nIdx > 0 )
- {
- if ( isWawChar ( cCh ) || // Wav (right joining)
- // final form may appear in the middle of word
- (( isAinChar ( cCh ) || // Ain (dual joining)
- isQafChar ( cCh ) || // Qaf (dual joining)
- isFehChar ( cCh ) ) // Feh (dual joining)
- && nIdx == nWordLen - 1)) // only at end of word
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 5;
- }
- }
- }
-
- // other connecting possibilities
- if ( nPriorityLevel >= 6 && nIdx > 0 )
- {
- // Reh, Zain
- if ( isRehChar ( cCh ) )
- {
- SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
- // check if character is connectable to previous character,
- if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
- {
- nKashidaPos = aScanner.GetBegin() + nPrevIdx;
- nPriorityLevel = 6;
- }
- }
- }
-
- // Do not consider vowel marks when checking if a character
- // can be connected to previous character.
- if ( !isTransparentChar ( cCh) )
- {
- cPrevCh = cCh;
- nPrevIdx = nIdx;
- }
-
- ++nIdx;
- } // end of current word
-
- if ( -1 != nKashidaPos )
- {
- m_Kashida.insert(m_Kashida.begin() + nCntKash, TextFrameIndex(nKashidaPos));
- nCntKash++;
}
} // end of kashida search
}