diff options
author | László Németh <nemeth@numbertext.org> | 2014-01-30 14:56:30 +0100 |
---|---|---|
committer | László Németh <nemeth@numbertext.org> | 2014-01-30 14:56:30 +0100 |
commit | e63923b0334ae381e0fcc576a6b6e08a62e657cf (patch) | |
tree | fd281a8df85ef40df228fcf8ef75bdb4067400e3 | |
parent | a1aa702861694fb114403b3f2746a33eb315ed87 (diff) |
fdo#44314 non-standard hyphenation at soft hyphens + with pers. dic.
Change-Id: I25e7c13036c6ce1948cc33d45901ef69a258fb03
-rw-r--r-- | cui/source/options/optdict.cxx | 19 | ||||
-rw-r--r-- | lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx | 28 | ||||
-rw-r--r-- | linguistic/source/dicimp.cxx | 29 | ||||
-rw-r--r-- | linguistic/source/hyphdsp.cxx | 70 | ||||
-rw-r--r-- | linguistic/source/misc.cxx | 4 |
5 files changed, 118 insertions, 32 deletions
diff --git a/cui/source/options/optdict.cxx b/cui/source/options/optdict.cxx index ba626c4958ab..af009878db2c 100644 --- a/cui/source/options/optdict.cxx +++ b/cui/source/options/optdict.cxx @@ -54,6 +54,23 @@ static long nStaticTabs[]= static OUString getNormDicEntry_Impl(const OUString &rText) { OUString aTmp(comphelper::string::stripEnd(rText, '.')); + // non-standard hyphenation + if (aTmp.indexOf('[') > -1) + { + OUStringBuffer aTmp2 ( aTmp.getLength() ); + sal_Bool bSkip = sal_False; + for (sal_Int32 i = 0; i < aTmp.getLength(); i++) + { + sal_Unicode cTmp = aTmp[i]; + if (cTmp == '[') + bSkip = sal_True; + else if (!bSkip) + aTmp2.append( cTmp ); + else if (cTmp == ']') + bSkip = sal_False; + } + aTmp = aTmp2.makeStringAndClear(); + } return comphelper::string::remove(aTmp, '='); } @@ -68,7 +85,7 @@ static CDE_RESULT cmpDicEntry_Impl( const OUString &rText1, const OUString &rTex eRes = CDE_EQUAL; else { // similar = equal up to trailing '.' and hyphenation positions - // marked with '=' + // marked with '=' and '[' + alternative spelling pattern + ']' if (getNormDicEntry_Impl( rText1 ) == getNormDicEntry_Impl( rText2 )) eRes = CDE_SIMILAR; } diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx index 64fe545fcd21..1cb79963b41e 100644 --- a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx @@ -508,16 +508,22 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( - const OUString& /*aWord*/, - const ::com::sun::star::lang::Locale& /*aLocale*/, - sal_Int16 /*nIndex*/, - const ::com::sun::star::beans::PropertyValues& /*aProperties*/ ) + const OUString& aWord, + const ::com::sun::star::lang::Locale& aLocale, + sal_Int16 nIndex, + const ::com::sun::star::beans::PropertyValues& aProperties ) throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) { - /* alternative spelling isn't supported by tex dictionaries */ - /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */ - /* TASK: implement queryAlternativeSpelling() */ - return NULL; + // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported, + // because Writer has got a layout problem here. + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2 + { + Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } + return NULL; } Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord, @@ -658,7 +664,7 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const for ( i = 0; i < encWord.getLength(); i++) { - if (hyphens[i]&1 && (!rep || !rep[i])) + if (hyphens[i]&1) nHyphCount++; } @@ -670,8 +676,8 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const for (i = 0; i < nWord.getLength(); i++) { hyphenatedWordBuffer.append(aWord[i]); - // hyphenation position (not alternative) - if (hyphens[i]&1 && (!rep || !rep[i])) + // hyphenation position + if (hyphens[i]&1) { pPos[nHyphCount] = i; hyphenatedWordBuffer.append('='); diff --git a/linguistic/source/dicimp.cxx b/linguistic/source/dicimp.cxx index f50c942b2e81..1306253e0596 100644 --- a/linguistic/source/dicimp.cxx +++ b/linguistic/source/dicimp.cxx @@ -550,24 +550,37 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1, } const sal_Unicode cIgnChar = '='; + const sal_Unicode cIgnBeg = '['; // for alternative hyphenation, eg. Schif[f]fahrt, Zuc[1k]ker + const sal_Unicode cIgnEnd = ']'; // planned: gee"[1-/e]rfde or ge[-/1e]e"rfde (gee"rfde -> ge=erfde) sal_Int32 nIdx1 = 0, nIdx2 = 0, nNumIgnChar1 = 0, nNumIgnChar2 = 0; + bool IgnState; sal_Int32 nDiff = 0; sal_Unicode cChar1 = '\0'; sal_Unicode cChar2 = '\0'; do { // skip chars to be ignored - while (nIdx1 < nLen1 && (cChar1 = aWord1[ nIdx1 ]) == cIgnChar) + IgnState = false; + while (nIdx1 < nLen1 && ((cChar1 = aWord1[ nIdx1 ]) == cIgnChar || cChar1 == cIgnBeg || IgnState )) { + if ( cChar1 == cIgnBeg ) + IgnState = true; + else if (cChar1 == cIgnEnd) + IgnState = false; nIdx1++; nNumIgnChar1++; } - while (nIdx2 < nLen2 && (cChar2 = aWord2[ nIdx2 ]) == cIgnChar) + IgnState = false; + while (nIdx2 < nLen2 && ((cChar2 = aWord2[ nIdx2 ]) == cIgnChar || cChar2 == cIgnBeg || IgnState )) { + if ( cChar2 == cIgnBeg ) + IgnState = true; + else if (cChar2 == cIgnEnd) + IgnState = false; nIdx2++; nNumIgnChar2++; } @@ -590,15 +603,25 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1, // shorter one // count remaining IgnChars + IgnState = false; while (nIdx1 < nLen1 ) { - if (aWord1[ nIdx1++ ] == cIgnChar) + if (aWord1[ nIdx1 ] == cIgnBeg) + IgnState = true; + if (IgnState || aWord1[ nIdx1++ ] == cIgnChar) nNumIgnChar1++; + if (aWord1[ nIdx1] == cIgnEnd) + IgnState = false; } + IgnState = false; while (nIdx2 < nLen2 ) { + if (aWord1[ nIdx2 ] == cIgnBeg) + IgnState = true; if (aWord2[ nIdx2++ ] == cIgnChar) nNumIgnChar2++; + if (aWord1[ nIdx1] == cIgnEnd) + IgnState = false; } nRes = ((sal_Int32) nLen1 - nNumIgnChar1) - ((sal_Int32) nLen2 - nNumIgnChar2); diff --git a/linguistic/source/hyphdsp.cxx b/linguistic/source/hyphdsp.cxx index 815312bad55a..ecc9ec2dad5f 100644 --- a/linguistic/source/hyphdsp.cxx +++ b/linguistic/source/hyphdsp.cxx @@ -79,18 +79,23 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord( sal_Int32 nTextLen = aText.getLength(); // trailing '=' means "hyphenation should not be possible" - if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=') + if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[') { sal_Int16 nHyphenationPos = -1; + sal_Int32 nHyphenPos = -1; + sal_Int16 nOrigHyphPos = -1; OUStringBuffer aTmp( nTextLen ); sal_Bool bSkip = sal_False; + sal_Bool bSkip2 = sal_False; sal_Int32 nHyphIdx = -1; sal_Int32 nLeading = 0; for (sal_Int32 i = 0; i < nTextLen; i++) { sal_Unicode cTmp = aText[i]; - if (cTmp != '=') + if (cTmp == '[' || cTmp == ']') + bSkip2 = !bSkip2; + if (cTmp != '=' && !bSkip2 && cTmp != ']') { aTmp.append( cTmp ); nLeading++; @@ -101,8 +106,10 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord( { if (!bSkip && nHyphIdx >= 0) { - if (nLeading <= nMaxLeading) + if (nLeading <= nMaxLeading) { nHyphenationPos = (sal_Int16) nHyphIdx; + nOrigHyphPos = i; + } } bSkip = sal_True; //! multiple '=' should count as one only } @@ -110,24 +117,23 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord( if (nHyphenationPos > 0) { - aText = aTmp.makeStringAndClear(); #if OSL_DEBUG_LEVEL > 1 { - if (aText != rOrigWord) + if (aTmp.toString() != rOrigWord) { // both words should only differ by a having a trailing '.' // character or not... OUString aShorter, aLonger; - if (aText.getLength() <= rOrigWord.getLength()) + if (aTmp.getLength() <= rOrigWord.getLength()) { - aShorter = aText; + aShorter = aTmp.toString(); aLonger = rOrigWord; } else { aShorter = rOrigWord; - aLonger = aText; + aLonger = aTmp.toString(); } sal_Int32 nS = aShorter.getLength(); sal_Int32 nL = aLonger.getLength(); @@ -139,12 +145,33 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord( } } #endif - //! take care of #i22591# - aText = rOrigWord; + if (aText[ nOrigHyphPos ] == '[') // alternative hyphenation + { + sal_Int16 split = 0; + sal_Unicode c = aText [ nOrigHyphPos + 1 ]; + sal_Int32 endhyphpat = aText.indexOf( ']', nOrigHyphPos ); + if ('0' <= c && c <= '9') + { + split = c - '0'; + nOrigHyphPos++; + } + if (endhyphpat > -1) + { + OUStringBuffer aTmp2 ( aTmp.copy(0, std::max (nHyphenationPos + 1 - split, 0) ) ); + aTmp2.append( aText.copy( nOrigHyphPos + 1, endhyphpat - nOrigHyphPos - 1) ); + nHyphenPos = aTmp2.getLength(); + aTmp2.append( aTmp.copy( nHyphenationPos + 1 ) ); + //! take care of #i22591# + if (rOrigWord[ rOrigWord.getLength() - 1 ] == '.') + aTmp2.append( '.' ); + aText = aTmp2.makeStringAndClear(); + } + } + if (nHyphenPos == -1) + aText = rOrigWord; - DBG_ASSERT( aText == rOrigWord, "failed to " ); - xRes = new HyphenatedWord( aText, nLang, nHyphenationPos, - aText, nHyphenationPos ); + xRes = new HyphenatedWord( rOrigWord, nLang, nHyphenationPos, + aText, (nHyphenPos > -1) ? nHyphenPos - 1 : nHyphenationPos); } } } @@ -167,7 +194,7 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens( sal_Int32 nTextLen = aText.getLength(); // trailing '=' means "hyphenation should not be possible" - if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=') + if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[') { // sequence to hold hyphenation positions Sequence< sal_Int16 > aHyphPos( nTextLen ); @@ -176,11 +203,14 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens( OUStringBuffer aTmp( nTextLen ); sal_Bool bSkip = sal_False; + sal_Bool bSkip2 = sal_False; sal_Int32 nHyphIdx = -1; for (sal_Int32 i = 0; i < nTextLen; i++) { sal_Unicode cTmp = aText[i]; - if (cTmp != '=') + if (cTmp == '[' || cTmp == ']') + bSkip2 = !bSkip2; + if (cTmp != '=' && !bSkip2 && cTmp != ']') { aTmp.append( cTmp ); bSkip = sal_False; @@ -426,7 +456,15 @@ Reference< XHyphenatedWord > SAL_CALL if (xEntry.is()) { - //! alternative spellings not yet supported by dictionaries + // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported, + // because Writer has got a layout problem here. + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2 + { + xRes = buildHyphWord(aChkWord, xEntry, nLanguage, nIndex + 1 + extrachar); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } } else { diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx index 6ed0510d241c..737f4ae2ba33 100644 --- a/linguistic/source/misc.cxx +++ b/linguistic/source/misc.cxx @@ -262,9 +262,11 @@ static sal_Bool lcl_HasHyphInfo( const uno::Reference<XDictionaryEntry> &xEntry sal_Bool bRes = sal_False; if (xEntry.is()) { - // there has to be (at least one) '=' denoting a hyphenation position + // there has to be (at least one) '=' or '[' denoting a hyphenation position // and it must not be before any character of the word sal_Int32 nIdx = xEntry->getDictionaryWord().indexOf( '=' ); + if (nIdx == -1) + nIdx = xEntry->getDictionaryWord().indexOf( '[' ); bRes = nIdx != -1 && nIdx != 0; } return bRes; |