From e63923b0334ae381e0fcc576a6b6e08a62e657cf Mon Sep 17 00:00:00 2001 From: László Németh Date: Thu, 30 Jan 2014 14:56:30 +0100 Subject: fdo#44314 non-standard hyphenation at soft hyphens + with pers. dic. Change-Id: I25e7c13036c6ce1948cc33d45901ef69a258fb03 --- linguistic/source/dicimp.cxx | 29 ++++++++++++++++-- linguistic/source/hyphdsp.cxx | 70 +++++++++++++++++++++++++++++++++---------- linguistic/source/misc.cxx | 4 ++- 3 files changed, 83 insertions(+), 20 deletions(-) (limited to 'linguistic') diff --git a/linguistic/source/dicimp.cxx b/linguistic/source/dicimp.cxx index f50c942b2e81..1306253e0596 100644 --- a/linguistic/source/dicimp.cxx +++ b/linguistic/source/dicimp.cxx @@ -550,24 +550,37 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1, } const sal_Unicode cIgnChar = '='; + const sal_Unicode cIgnBeg = '['; // for alternative hyphenation, eg. Schif[f]fahrt, Zuc[1k]ker + const sal_Unicode cIgnEnd = ']'; // planned: gee"[1-/e]rfde or ge[-/1e]e"rfde (gee"rfde -> ge=erfde) sal_Int32 nIdx1 = 0, nIdx2 = 0, nNumIgnChar1 = 0, nNumIgnChar2 = 0; + bool IgnState; sal_Int32 nDiff = 0; sal_Unicode cChar1 = '\0'; sal_Unicode cChar2 = '\0'; do { // skip chars to be ignored - while (nIdx1 < nLen1 && (cChar1 = aWord1[ nIdx1 ]) == cIgnChar) + IgnState = false; + while (nIdx1 < nLen1 && ((cChar1 = aWord1[ nIdx1 ]) == cIgnChar || cChar1 == cIgnBeg || IgnState )) { + if ( cChar1 == cIgnBeg ) + IgnState = true; + else if (cChar1 == cIgnEnd) + IgnState = false; nIdx1++; nNumIgnChar1++; } - while (nIdx2 < nLen2 && (cChar2 = aWord2[ nIdx2 ]) == cIgnChar) + IgnState = false; + while (nIdx2 < nLen2 && ((cChar2 = aWord2[ nIdx2 ]) == cIgnChar || cChar2 == cIgnBeg || IgnState )) { + if ( cChar2 == cIgnBeg ) + IgnState = true; + else if (cChar2 == cIgnEnd) + IgnState = false; nIdx2++; nNumIgnChar2++; } @@ -590,15 +603,25 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1, // shorter one // count remaining IgnChars + IgnState = false; while (nIdx1 < nLen1 ) { - if (aWord1[ nIdx1++ ] == cIgnChar) + if (aWord1[ nIdx1 ] == cIgnBeg) + IgnState = true; + if (IgnState || aWord1[ nIdx1++ ] == cIgnChar) nNumIgnChar1++; + if (aWord1[ nIdx1] == cIgnEnd) + IgnState = false; } + IgnState = false; while (nIdx2 < nLen2 ) { + if (aWord1[ nIdx2 ] == cIgnBeg) + IgnState = true; if (aWord2[ nIdx2++ ] == cIgnChar) nNumIgnChar2++; + if (aWord1[ nIdx1] == cIgnEnd) + IgnState = false; } nRes = ((sal_Int32) nLen1 - nNumIgnChar1) - ((sal_Int32) nLen2 - nNumIgnChar2); diff --git a/linguistic/source/hyphdsp.cxx b/linguistic/source/hyphdsp.cxx index 815312bad55a..ecc9ec2dad5f 100644 --- a/linguistic/source/hyphdsp.cxx +++ b/linguistic/source/hyphdsp.cxx @@ -79,18 +79,23 @@ Reference HyphenatorDispatcher::buildHyphWord( sal_Int32 nTextLen = aText.getLength(); // trailing '=' means "hyphenation should not be possible" - if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=') + if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[') { sal_Int16 nHyphenationPos = -1; + sal_Int32 nHyphenPos = -1; + sal_Int16 nOrigHyphPos = -1; OUStringBuffer aTmp( nTextLen ); sal_Bool bSkip = sal_False; + sal_Bool bSkip2 = sal_False; sal_Int32 nHyphIdx = -1; sal_Int32 nLeading = 0; for (sal_Int32 i = 0; i < nTextLen; i++) { sal_Unicode cTmp = aText[i]; - if (cTmp != '=') + if (cTmp == '[' || cTmp == ']') + bSkip2 = !bSkip2; + if (cTmp != '=' && !bSkip2 && cTmp != ']') { aTmp.append( cTmp ); nLeading++; @@ -101,8 +106,10 @@ Reference HyphenatorDispatcher::buildHyphWord( { if (!bSkip && nHyphIdx >= 0) { - if (nLeading <= nMaxLeading) + if (nLeading <= nMaxLeading) { nHyphenationPos = (sal_Int16) nHyphIdx; + nOrigHyphPos = i; + } } bSkip = sal_True; //! multiple '=' should count as one only } @@ -110,24 +117,23 @@ Reference HyphenatorDispatcher::buildHyphWord( if (nHyphenationPos > 0) { - aText = aTmp.makeStringAndClear(); #if OSL_DEBUG_LEVEL > 1 { - if (aText != rOrigWord) + if (aTmp.toString() != rOrigWord) { // both words should only differ by a having a trailing '.' // character or not... OUString aShorter, aLonger; - if (aText.getLength() <= rOrigWord.getLength()) + if (aTmp.getLength() <= rOrigWord.getLength()) { - aShorter = aText; + aShorter = aTmp.toString(); aLonger = rOrigWord; } else { aShorter = rOrigWord; - aLonger = aText; + aLonger = aTmp.toString(); } sal_Int32 nS = aShorter.getLength(); sal_Int32 nL = aLonger.getLength(); @@ -139,12 +145,33 @@ Reference HyphenatorDispatcher::buildHyphWord( } } #endif - //! take care of #i22591# - aText = rOrigWord; + if (aText[ nOrigHyphPos ] == '[') // alternative hyphenation + { + sal_Int16 split = 0; + sal_Unicode c = aText [ nOrigHyphPos + 1 ]; + sal_Int32 endhyphpat = aText.indexOf( ']', nOrigHyphPos ); + if ('0' <= c && c <= '9') + { + split = c - '0'; + nOrigHyphPos++; + } + if (endhyphpat > -1) + { + OUStringBuffer aTmp2 ( aTmp.copy(0, std::max (nHyphenationPos + 1 - split, 0) ) ); + aTmp2.append( aText.copy( nOrigHyphPos + 1, endhyphpat - nOrigHyphPos - 1) ); + nHyphenPos = aTmp2.getLength(); + aTmp2.append( aTmp.copy( nHyphenationPos + 1 ) ); + //! take care of #i22591# + if (rOrigWord[ rOrigWord.getLength() - 1 ] == '.') + aTmp2.append( '.' ); + aText = aTmp2.makeStringAndClear(); + } + } + if (nHyphenPos == -1) + aText = rOrigWord; - DBG_ASSERT( aText == rOrigWord, "failed to " ); - xRes = new HyphenatedWord( aText, nLang, nHyphenationPos, - aText, nHyphenationPos ); + xRes = new HyphenatedWord( rOrigWord, nLang, nHyphenationPos, + aText, (nHyphenPos > -1) ? nHyphenPos - 1 : nHyphenationPos); } } } @@ -167,7 +194,7 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens( sal_Int32 nTextLen = aText.getLength(); // trailing '=' means "hyphenation should not be possible" - if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=') + if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[') { // sequence to hold hyphenation positions Sequence< sal_Int16 > aHyphPos( nTextLen ); @@ -176,11 +203,14 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens( OUStringBuffer aTmp( nTextLen ); sal_Bool bSkip = sal_False; + sal_Bool bSkip2 = sal_False; sal_Int32 nHyphIdx = -1; for (sal_Int32 i = 0; i < nTextLen; i++) { sal_Unicode cTmp = aText[i]; - if (cTmp != '=') + if (cTmp == '[' || cTmp == ']') + bSkip2 = !bSkip2; + if (cTmp != '=' && !bSkip2 && cTmp != ']') { aTmp.append( cTmp ); bSkip = sal_False; @@ -426,7 +456,15 @@ Reference< XHyphenatedWord > SAL_CALL if (xEntry.is()) { - //! alternative spellings not yet supported by dictionaries + // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported, + // because Writer has got a layout problem here. + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2 + { + xRes = buildHyphWord(aChkWord, xEntry, nLanguage, nIndex + 1 + extrachar); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } } else { diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx index 6ed0510d241c..737f4ae2ba33 100644 --- a/linguistic/source/misc.cxx +++ b/linguistic/source/misc.cxx @@ -262,9 +262,11 @@ static sal_Bool lcl_HasHyphInfo( const uno::Reference &xEntry sal_Bool bRes = sal_False; if (xEntry.is()) { - // there has to be (at least one) '=' denoting a hyphenation position + // there has to be (at least one) '=' or '[' denoting a hyphenation position // and it must not be before any character of the word sal_Int32 nIdx = xEntry->getDictionaryWord().indexOf( '=' ); + if (nIdx == -1) + nIdx = xEntry->getDictionaryWord().indexOf( '[' ); bRes = nIdx != -1 && nIdx != 0; } return bRes; -- cgit