diff options
author | Jonathan Clark <jonathan@libreoffice.org> | 2024-09-23 15:26:45 -0600 |
---|---|---|
committer | Jonathan Clark <jonathan@libreoffice.org> | 2024-09-25 01:25:07 +0200 |
commit | fe4687ed174c54f2eb25f8088bf3fb6cb4858175 (patch) | |
tree | fe64da5a021f47d573da539d2df414ce6035e646 /sw/source | |
parent | 2fc1034de4fd23d810593533b70ff674b0ccd706 (diff) |
tdf#163105 Consolidated duplicated kashida justification code
The kashida candidate position selection logic was copied-and-pasted
from Writer into Edit Engine. This change consolidates the shared code
into a library. This change also adds some minimal characteristic tests,
which previously did not exist.
Change-Id: I2bfbfa79858347803474b754566436f3e74d1a54
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173883
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
Tested-by: Jenkins
Diffstat (limited to 'sw/source')
-rw-r--r-- | sw/source/core/text/porlay.cxx | 269 |
1 files changed, 8 insertions, 261 deletions
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx index 923d5286c458..8574f6d31d12 100644 --- a/sw/source/core/text/porlay.cxx +++ b/sw/source/core/text/porlay.cxx @@ -79,124 +79,12 @@ #include <unicode/ubidi.h> #include <i18nutil/scripttypedetector.hxx> #include <i18nutil/unicode.hxx> +#include <i18nutil/kashida.hxx> #include <unotxdoc.hxx> using namespace ::com::sun::star; using namespace i18n::ScriptType; -/* - https://www.khtt.net/en/page/1821/the-big-kashida-secret - - the rules of priorities that govern the addition of kashidas in Arabic text - made ... for ... Explorer 5.5 browser. - - The kashida justification is based on a connection priority scheme that - decides where kashidas are put automatically. - - This is how the software decides on kashida-inserting priorities: - 1. First it looks for characters with the highest priority in each word, - which means kashida-extensions will only been used in one position in each - word. Not more. - 2. The kashida will be connected to the character with the highest priority. - 3. If kashida connection opportunities are found with an equal level of - priority in one word, the kashida will be placed towards the end of the - word. - - The priority list of characters and the positioning is as follows: - 1. after a kashida that is manually placed in the text by the user, - 2. after a Seen or Sad (initial and medial form), - 3. before the final form of Taa Marbutah, Haa, Dal, - 4. before the final form of Alef, Tah Lam, Kaf and Gaf, - 5. before the preceding medial Baa of Ra, Ya and Alef Maqsurah, - 6. before the final form of Waw, Ain, Qaf and Fa, - 7. before the final form of other characters that can be connected. -*/ - -#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( (c), UCHAR_JOINING_GROUP ) == U_JG_##g ) -#define isAinChar(c) IS_JOINING_GROUP((c), AIN) -#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF) -#define isDalChar(c) IS_JOINING_GROUP((c), DAL) -#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH)) -#define isGafChar(c) IS_JOINING_GROUP((c), GAF) -#define isHehChar(c) IS_JOINING_GROUP((c), HEH) -#define isKafChar(c) IS_JOINING_GROUP((c), KAF) -#define isLamChar(c) IS_JOINING_GROUP((c), LAM) -#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF)) -#define isRehChar(c) IS_JOINING_GROUP((c), REH) -#define isTahChar(c) IS_JOINING_GROUP((c), TAH) -#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA) -#define isWawChar(c) IS_JOINING_GROUP((c), WAW) -#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN)) - -// Beh and characters that behave like Beh in medial form. -static bool isBehChar(sal_Unicode cCh) -{ - bool bRet = false; - switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP)) - { - case U_JG_BEH: - case U_JG_NOON: - case U_JG_AFRICAN_NOON: - case U_JG_NYA: - case U_JG_YEH: - case U_JG_FARSI_YEH: - case U_JG_BURUSHASKI_YEH_BARREE: - bRet = true; - break; - default: - bRet = false; - break; - } - - return bRet; -} - -// Yeh and characters that behave like Yeh in final form. -static bool isYehChar(sal_Unicode cCh) -{ - bool bRet = false; - switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP)) - { - case U_JG_YEH: - case U_JG_FARSI_YEH: - case U_JG_YEH_BARREE: - case U_JG_BURUSHASKI_YEH_BARREE: - case U_JG_YEH_WITH_TAIL: - bRet = true; - break; - default: - bRet = false; - break; - } - - return bRet; -} - -static bool isTransparentChar ( sal_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT; -} - -// Checks if cCh + cNectCh builds a ligature (used for Kashidas) -static bool lcl_IsLigature( sal_Unicode cCh, sal_Unicode cNextCh ) -{ - // Lam + Alef - return ( isLamChar ( cCh ) && isAlefChar ( cNextCh )); -} - -// Checks if cCh is connectable to cPrevCh (used for Kashidas) -static bool lcl_ConnectToPrev( sal_Unicode cCh, sal_Unicode cPrevCh ) -{ - const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE ); - bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING; - - // check for ligatures cPrevChar + cChar - if( bRet ) - bRet = !lcl_IsLigature( cPrevCh, cCh ); - - return bRet; -} - static bool lcl_HasStrongLTR ( std::u16string_view rText, sal_Int32 nStart, sal_Int32 nEnd ) { for( sal_Int32 nCharIdx = nStart; nCharIdx < nEnd; ++nCharIdx ) @@ -1618,157 +1506,16 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode, while ( aScanner.NextWord() ) { const OUString& rWord = aScanner.GetWord(); + auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord); - sal_Int32 nIdx = 0, nPrevIdx = 0; - sal_Int32 nKashidaPos = -1; - sal_Unicode cCh, cPrevCh = 0; - - int nPriorityLevel = 7; // 0..6 = level found - // 7 not found - - sal_Int32 nWordLen = rWord.getLength(); - - // ignore trailing vowel chars - while( nWordLen && isTransparentChar( rWord[ nWordLen - 1 ] )) - --nWordLen; - - while (nIdx < nWordLen) + if (stKashidaPos.has_value()) { - cCh = rWord[ nIdx ]; - - // 1. Priority: - // after user inserted kashida - if ( 0x640 == cCh ) - { - nKashidaPos = aScanner.GetBegin() + nIdx; - nPriorityLevel = 0; - } - - // 2. Priority: - // after a Seen or Sad - if (nPriorityLevel >= 1 && nIdx < nWordLen - 1) - { - if( isSeenOrSadChar( cCh ) - && (rWord[ nIdx+1 ] != 0x200C) ) // #i98410#: prevent ZWNJ expansion - { - nKashidaPos = aScanner.GetBegin() + nIdx; - nPriorityLevel = 1; - } - } - - // 3. Priority: - // before final form of Teh Marbuta, Heh, Dal - if ( nPriorityLevel >= 2 && nIdx > 0 ) - { - if ( isTehMarbutaChar ( cCh ) || // Teh Marbuta (right joining) - isDalChar ( cCh ) || // Dal (right joining) final form may appear in the middle of word - ( isHehChar ( cCh ) && nIdx == nWordLen - 1)) // Heh (dual joining) only at end of word - { - - SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" ); - // check if character is connectable to previous character, - if ( lcl_ConnectToPrev( cCh, cPrevCh ) ) - { - nKashidaPos = aScanner.GetBegin() + nPrevIdx; - nPriorityLevel = 2; - } - } - } - - // 4. Priority: - // before final form of Alef, Tah, Lam, Kaf or Gaf - if ( nPriorityLevel >= 3 && nIdx > 0 ) - { - if ( isAlefChar ( cCh ) || // Alef (right joining) final form may appear in the middle of word - (( isLamChar ( cCh ) || // Lam, - isTahChar ( cCh ) || // Tah, - isKafChar ( cCh ) || // Kaf (all dual joining) - isGafChar ( cCh ) ) - && nIdx == nWordLen - 1)) // only at end of word - { - SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" ); - // check if character is connectable to previous character, - if ( lcl_ConnectToPrev( cCh, cPrevCh ) ) - { - nKashidaPos = aScanner.GetBegin() + nPrevIdx; - nPriorityLevel = 3; - } - } - } - - // 5. Priority: - // before medial Beh-like - if ( nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1 ) - { - if ( isBehChar ( cCh ) ) - { - // check if next character is Reh or Yeh-like - sal_Unicode cNextCh = rWord[ nIdx + 1 ]; - if ( isRehChar ( cNextCh ) || isYehChar ( cNextCh )) - { - SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" ); - // check if character is connectable to previous character, - if ( lcl_ConnectToPrev( cCh, cPrevCh ) ) - { - nKashidaPos = aScanner.GetBegin() + nPrevIdx; - nPriorityLevel = 4; - } - } - } + // Only populate kashida positions for the invalidated tail + TextFrameIndex nNewKashidaPos{aScanner.GetBegin() + stKashidaPos->nIndex}; + if(nNewKashidaPos >= nLastKashida) { + m_Kashida.insert(m_Kashida.begin() + nCntKash, nNewKashidaPos); + nCntKash++; } - - // 6. Priority: - // before the final form of Waw, Ain, Qaf and Feh - if ( nPriorityLevel >= 5 && nIdx > 0 ) - { - if ( isWawChar ( cCh ) || // Wav (right joining) - // final form may appear in the middle of word - (( isAinChar ( cCh ) || // Ain (dual joining) - isQafChar ( cCh ) || // Qaf (dual joining) - isFehChar ( cCh ) ) // Feh (dual joining) - && nIdx == nWordLen - 1)) // only at end of word - { - SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" ); - // check if character is connectable to previous character, - if ( lcl_ConnectToPrev( cCh, cPrevCh ) ) - { - nKashidaPos = aScanner.GetBegin() + nPrevIdx; - nPriorityLevel = 5; - } - } - } - - // other connecting possibilities - if ( nPriorityLevel >= 6 && nIdx > 0 ) - { - // Reh, Zain - if ( isRehChar ( cCh ) ) - { - SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" ); - // check if character is connectable to previous character, - if ( lcl_ConnectToPrev( cCh, cPrevCh ) ) - { - nKashidaPos = aScanner.GetBegin() + nPrevIdx; - nPriorityLevel = 6; - } - } - } - - // Do not consider vowel marks when checking if a character - // can be connected to previous character. - if ( !isTransparentChar ( cCh) ) - { - cPrevCh = cCh; - nPrevIdx = nIdx; - } - - ++nIdx; - } // end of current word - - if ( -1 != nKashidaPos ) - { - m_Kashida.insert(m_Kashida.begin() + nCntKash, TextFrameIndex(nKashidaPos)); - nCntKash++; } } // end of kashida search } |