/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #if OSL_DEBUG_LEVEL > 0 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hyphdsp.hxx" #include #include #include "lngsvcmgr.hxx" using namespace osl; using namespace com::sun::star; using namespace com::sun::star::beans; using namespace com::sun::star::lang; using namespace com::sun::star::uno; using namespace com::sun::star::linguistic2; using namespace linguistic; HyphenatorDispatcher::HyphenatorDispatcher( LngSvcMgr &rLngSvcMgr ) : rMgr (rLngSvcMgr) { } HyphenatorDispatcher::~HyphenatorDispatcher() { ClearSvcList(); } void HyphenatorDispatcher::ClearSvcList() { // release memory for each table entry HyphSvcByLangMap_t().swap(aSvcMap); } Reference HyphenatorDispatcher::buildHyphWord( const OUString& rOrigWord, const Reference &xEntry, LanguageType nLang, sal_Int16 nMaxLeading ) { MutexGuard aGuard( GetLinguMutex() ); Reference< XHyphenatedWord > xRes; if (xEntry.is()) { OUString aText( xEntry->getDictionaryWord() ); sal_Int32 nTextLen = aText.getLength(); // trailing '=' means "hyphenation should not be possible" if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[') { sal_Int16 nHyphenationPos = -1; sal_Int16 nOrigHyphPos = -1; OUStringBuffer aTmp( nTextLen ); bool bSkip = false; bool bSkip2 = false; sal_Int32 nHyphIdx = -1; sal_Int32 nLeading = 0; for (sal_Int32 i = 0; i < nTextLen; i++) { sal_Unicode cTmp = aText[i]; if (cTmp == '[' || cTmp == ']') bSkip2 = !bSkip2; if (cTmp != '=' && !bSkip2 && cTmp != ']') { aTmp.append( cTmp ); nLeading++; bSkip = false; nHyphIdx++; } else { if (!bSkip && nHyphIdx >= 0) { if (nLeading <= nMaxLeading) { nHyphenationPos = static_cast(nHyphIdx); nOrigHyphPos = i; } } bSkip = true; //! multiple '=' should count as one only } } if (nHyphenationPos > 0) { #if OSL_DEBUG_LEVEL > 0 { if (std::u16string_view(aTmp) != rOrigWord) { // both words should only differ by a having a trailing '.' // character or not... std::u16string_view aShorter(aTmp), aLonger(rOrigWord); if (aTmp.getLength() > rOrigWord.getLength()) std::swap(aShorter, aLonger); sal_Int32 nS = aShorter.size(); sal_Int32 nL = aLonger.size(); if (nS > 0 && nL > 0) { assert( ((nS + 1 == nL) && aLonger[nL-1] == '.') && "HyphenatorDispatcher::buildHyphWord: unexpected difference between words!" ); } } } #endif sal_Int32 nHyphenPos = -1; if (aText[ nOrigHyphPos ] == '[') // alternative hyphenation { sal_Int16 split = 0; sal_Unicode c = aText [ nOrigHyphPos + 1 ]; sal_Int32 endhyphpat = aText.indexOf( ']', nOrigHyphPos ); if ('0' <= c && c <= '9') { split = c - '0'; nOrigHyphPos++; } if (endhyphpat > -1) { OUStringBuffer aTmp2 ( aTmp.copy(0, std::max (nHyphenationPos + 1 - split, 0) ) ); aTmp2.append( aText.subView( nOrigHyphPos + 1, endhyphpat - nOrigHyphPos - 1) ); nHyphenPos = aTmp2.getLength(); aTmp2.append( aTmp.subView( nHyphenationPos + 1 ) ); //! take care of #i22591# if (rOrigWord[ rOrigWord.getLength() - 1 ] == '.') aTmp2.append( '.' ); aText = aTmp2.makeStringAndClear(); } } if (nHyphenPos == -1) aText = rOrigWord; xRes = new HyphenatedWord( rOrigWord, nLang, nHyphenationPos, aText, (nHyphenPos > -1) ? nHyphenPos - 1 : nHyphenationPos); } } } return xRes; } Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens( const Reference< XDictionaryEntry > &xEntry, LanguageType nLanguage ) { MutexGuard aGuard( GetLinguMutex() ); Reference xRes; if (xEntry.is()) { // text with hyphenation info OUString aText( xEntry->getDictionaryWord() ); sal_Int32 nTextLen = aText.getLength(); // trailing '=' means "hyphenation should not be possible" if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[') { // sequence to hold hyphenation positions Sequence< sal_Int16 > aHyphPos( nTextLen ); sal_Int16 *pPos = aHyphPos.getArray(); sal_Int32 nHyphCount = 0; OUStringBuffer aTmp( nTextLen ); bool bSkip = false; bool bSkip2 = false; sal_Int32 nHyphIdx = -1; for (sal_Int32 i = 0; i < nTextLen; i++) { sal_Unicode cTmp = aText[i]; if (cTmp == '[' || cTmp == ']') bSkip2 = !bSkip2; if (cTmp != '=' && !bSkip2 && cTmp != ']') { aTmp.append( cTmp ); bSkip = false; nHyphIdx++; } else { if (!bSkip && nHyphIdx >= 0) pPos[ nHyphCount++ ] = static_cast(nHyphIdx); bSkip = true; //! multiple '=' should count as one only } } // ignore (multiple) trailing '=' if (bSkip && nHyphIdx >= 0) { nHyphCount--; } DBG_ASSERT( nHyphCount >= 0, "lng : invalid hyphenation count"); if (nHyphCount > 0) { aHyphPos.realloc( nHyphCount ); xRes = new PossibleHyphens( aTmp.makeStringAndClear(), nLanguage, aText, aHyphPos ); } } } return xRes; } Sequence< Locale > SAL_CALL HyphenatorDispatcher::getLocales() { MutexGuard aGuard( GetLinguMutex() ); std::vector aLocales; aLocales.reserve(aSvcMap.size()); std::transform(aSvcMap.begin(), aSvcMap.end(), std::back_inserter(aLocales), [](HyphSvcByLangMap_t::const_reference elem) { return LanguageTag::convertToLocale(elem.first); }); return comphelper::containerToSequence(aLocales); } sal_Bool SAL_CALL HyphenatorDispatcher::hasLocale(const Locale& rLocale) { MutexGuard aGuard( GetLinguMutex() ); HyphSvcByLangMap_t::const_iterator aIt( aSvcMap.find( LinguLocaleToLanguage( rLocale ) ) ); return aIt != aSvcMap.end(); } Reference< XHyphenatedWord > SAL_CALL HyphenatorDispatcher::hyphenate( const OUString& rWord, const Locale& rLocale, sal_Int16 nMaxLeading, const css::uno::Sequence< ::css::beans::PropertyValue >& rProperties ) { MutexGuard aGuard( GetLinguMutex() ); Reference< XHyphenatedWord > xRes; sal_Int32 nWordLen = rWord.getLength(); LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); if (LinguIsUnspecified(nLanguage) || !nWordLen || nMaxLeading == 0 || nMaxLeading == nWordLen) return xRes; // search for entry with that language HyphSvcByLangMap_t::iterator aIt( aSvcMap.find( nLanguage ) ); LangSvcEntries_Hyph *pEntry = aIt != aSvcMap.end() ? aIt->second.get() : nullptr; bool bWordModified = false; if (!pEntry || (nMaxLeading < 0 || nMaxLeading > nWordLen)) { return nullptr; } else { OUString aChkWord( rWord ); // replace typographical apostroph by ascii apostroph OUString aSingleQuote( GetLocaleDataWrapper( nLanguage ).getQuotationMarkEnd() ); DBG_ASSERT( 1 == aSingleQuote.getLength(), "unexpected length of quotation mark" ); if (!aSingleQuote.isEmpty()) aChkWord = aChkWord.replace( aSingleQuote[0], '\'' ); bWordModified |= RemoveHyphens( aChkWord ); if (IsIgnoreControlChars( rProperties, GetPropSet() )) bWordModified |= RemoveControlChars( aChkWord ); sal_Int16 nChkMaxLeading = static_cast(GetPosInWordToCheck( rWord, nMaxLeading )); // check for results from (positive) dictionaries which have precedence! Reference< XDictionaryEntry > xEntry; if (GetDicList().is() && IsUseDicList( rProperties, GetPropSet() )) { xEntry = GetDicList()->queryDictionaryEntry( aChkWord, rLocale, true, false ); } if (xEntry.is()) { //! because queryDictionaryEntry (in the end DictionaryNeo::getEntry) //! does not distinguish between "XYZ" and "XYZ." in order to avoid //! to require them as different entry we have to supply the //! original word here as well so it can be used in th result //! otherwise a strange effect may occur (see #i22591#) xRes = buildHyphWord( rWord, xEntry, nLanguage, nChkMaxLeading ); } else { sal_Int32 nLen = pEntry->aSvcImplNames.hasElements() ? 1 : 0; DBG_ASSERT( pEntry->nLastTriedSvcIndex < nLen, "lng : index out of range"); sal_Int32 i = 0; Reference< XHyphenator > xHyph; if (pEntry->aSvcRefs.hasElements()) xHyph = pEntry->aSvcRefs[0]; // try already instantiated service if (i <= pEntry->nLastTriedSvcIndex) { if (xHyph.is() && xHyph->hasLocale( rLocale )) xRes = xHyph->hyphenate( aChkWord, rLocale, nChkMaxLeading, rProperties ); ++i; } else if (pEntry->nLastTriedSvcIndex < nLen - 1) // instantiate services and try it { Reference< XHyphenator > *pRef = pEntry->aSvcRefs.getArray(); Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() ); // build service initialization argument Sequence< Any > aArgs(2); aArgs.getArray()[0] <<= GetPropSet(); // create specific service via it's implementation name try { xHyph = Reference< XHyphenator >( xContext->getServiceManager()->createInstanceWithArgumentsAndContext( pEntry->aSvcImplNames[0], aArgs, xContext ), UNO_QUERY ); } catch (uno::Exception &) { SAL_WARN( "linguistic", "createInstanceWithArguments failed" ); } pRef [i] = xHyph; Reference< XLinguServiceEventBroadcaster > xBroadcaster( xHyph, UNO_QUERY ); if (xBroadcaster.is()) rMgr.AddLngSvcEvtBroadcaster( xBroadcaster ); if (xHyph.is() && xHyph->hasLocale( rLocale )) xRes = xHyph->hyphenate( aChkWord, rLocale, nChkMaxLeading, rProperties ); pEntry->nLastTriedSvcIndex = static_cast(i); ++i; // if language is not supported by the services // remove it from the list. if (xHyph.is() && !xHyph->hasLocale( rLocale )) aSvcMap.erase( nLanguage ); } } // if (xEntry.is()) } if (bWordModified && xRes.is()) xRes = RebuildHyphensAndControlChars( rWord, xRes ); if (xRes.is() && xRes->getWord() != rWord) { xRes = new HyphenatedWord( rWord, nLanguage, xRes->getHyphenationPos(), xRes->getHyphenatedWord(), xRes->getHyphenPos() ); } return xRes; } Reference< XHyphenatedWord > SAL_CALL HyphenatorDispatcher::queryAlternativeSpelling( const OUString& rWord, const Locale& rLocale, sal_Int16 nIndex, const css::uno::Sequence< ::css::beans::PropertyValue >& rProperties ) { MutexGuard aGuard( GetLinguMutex() ); Reference< XHyphenatedWord > xRes; sal_Int32 nWordLen = rWord.getLength(); LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); if (LinguIsUnspecified(nLanguage) || !nWordLen) return xRes; // search for entry with that language HyphSvcByLangMap_t::iterator aIt( aSvcMap.find( nLanguage ) ); LangSvcEntries_Hyph *pEntry = aIt != aSvcMap.end() ? aIt->second.get() : nullptr; bool bWordModified = false; if (!pEntry || 0 > nIndex || nIndex > nWordLen - 2) { return nullptr; } else { OUString aChkWord( rWord ); // replace typographical apostroph by ascii apostroph OUString aSingleQuote( GetLocaleDataWrapper( nLanguage ).getQuotationMarkEnd() ); DBG_ASSERT( 1 == aSingleQuote.getLength(), "unexpected length of quotation mark" ); if (!aSingleQuote.isEmpty()) aChkWord = aChkWord.replace( aSingleQuote[0], '\'' ); bWordModified |= RemoveHyphens( aChkWord ); if (IsIgnoreControlChars( rProperties, GetPropSet() )) bWordModified |= RemoveControlChars( aChkWord ); sal_Int16 nChkIndex = static_cast(GetPosInWordToCheck( rWord, nIndex )); // check for results from (positive) dictionaries which have precedence! Reference< XDictionaryEntry > xEntry; if (GetDicList().is() && IsUseDicList( rProperties, GetPropSet() )) { xEntry = GetDicList()->queryDictionaryEntry( aChkWord, rLocale, true, false ); } if (xEntry.is()) { xRes = buildHyphWord(aChkWord, xEntry, nLanguage, nIndex + 1); if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) return xRes; } else { sal_Int32 nLen = pEntry->aSvcImplNames.hasElements() ? 1 : 0; DBG_ASSERT( pEntry->nLastTriedSvcIndex < nLen, "lng : index out of range"); sal_Int32 i = 0; Reference< XHyphenator > xHyph; if (pEntry->aSvcRefs.hasElements()) xHyph = pEntry->aSvcRefs[0]; // try already instantiated service if (i <= pEntry->nLastTriedSvcIndex) { if (xHyph.is() && xHyph->hasLocale( rLocale )) xRes = xHyph->queryAlternativeSpelling( aChkWord, rLocale, nChkIndex, rProperties ); ++i; } else if (pEntry->nLastTriedSvcIndex < nLen - 1) // instantiate services and try it { Reference< XHyphenator > *pRef = pEntry->aSvcRefs.getArray(); Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() ); // build service initialization argument Sequence< Any > aArgs(2); aArgs.getArray()[0] <<= GetPropSet(); // create specific service via it's implementation name try { xHyph = Reference< XHyphenator >( xContext->getServiceManager()->createInstanceWithArgumentsAndContext( pEntry->aSvcImplNames[0], aArgs, xContext ), UNO_QUERY ); } catch (uno::Exception &) { SAL_WARN( "linguistic", "createInstanceWithArguments failed" ); } pRef [i] = xHyph; Reference< XLinguServiceEventBroadcaster > xBroadcaster( xHyph, UNO_QUERY ); if (xBroadcaster.is()) rMgr.AddLngSvcEvtBroadcaster( xBroadcaster ); if (xHyph.is() && xHyph->hasLocale( rLocale )) xRes = xHyph->queryAlternativeSpelling( aChkWord, rLocale, nChkIndex, rProperties ); pEntry->nLastTriedSvcIndex = static_cast(i); ++i; // if language is not supported by the services // remove it from the list. if (xHyph.is() && !xHyph->hasLocale( rLocale )) aSvcMap.erase( nLanguage ); } } // if (xEntry.is()) } if (bWordModified && xRes.is()) xRes = RebuildHyphensAndControlChars( rWord, xRes ); if (xRes.is() && xRes->getWord() != rWord) { xRes = new HyphenatedWord( rWord, nLanguage, xRes->getHyphenationPos(), xRes->getHyphenatedWord(), xRes->getHyphenPos() ); } return xRes; } Reference< XPossibleHyphens > SAL_CALL HyphenatorDispatcher::createPossibleHyphens( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< ::css::beans::PropertyValue >& rProperties ) { MutexGuard aGuard( GetLinguMutex() ); Reference< XPossibleHyphens > xRes; LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); if (LinguIsUnspecified(nLanguage) || rWord.isEmpty()) return xRes; // search for entry with that language HyphSvcByLangMap_t::iterator aIt( aSvcMap.find( nLanguage ) ); LangSvcEntries_Hyph *pEntry = aIt != aSvcMap.end() ? aIt->second.get() : nullptr; if (pEntry) { OUString aChkWord( rWord ); // replace typographical apostroph by ascii apostroph OUString aSingleQuote( GetLocaleDataWrapper( nLanguage ).getQuotationMarkEnd() ); DBG_ASSERT( 1 == aSingleQuote.getLength(), "unexpected length of quotation mark" ); if (!aSingleQuote.isEmpty()) aChkWord = aChkWord.replace( aSingleQuote[0], '\'' ); RemoveHyphens( aChkWord ); if (IsIgnoreControlChars( rProperties, GetPropSet() )) RemoveControlChars( aChkWord ); // check for results from (positive) dictionaries which have precedence! Reference< XDictionaryEntry > xEntry; if (GetDicList().is() && IsUseDicList( rProperties, GetPropSet() )) { xEntry = GetDicList()->queryDictionaryEntry( aChkWord, rLocale, true, false ); } if (xEntry.is()) { xRes = buildPossHyphens( xEntry, nLanguage ); } else { sal_Int32 nLen = pEntry->aSvcImplNames.hasElements() ? 1 : 0; DBG_ASSERT( pEntry->nLastTriedSvcIndex < nLen, "lng : index out of range"); sal_Int32 i = 0; Reference< XHyphenator > xHyph; if (pEntry->aSvcRefs.hasElements()) xHyph = pEntry->aSvcRefs[0]; // try already instantiated service if (i <= pEntry->nLastTriedSvcIndex) { if (xHyph.is() && xHyph->hasLocale( rLocale )) xRes = xHyph->createPossibleHyphens( aChkWord, rLocale, rProperties ); ++i; } else if (pEntry->nLastTriedSvcIndex < nLen - 1) // instantiate services and try it { Reference< XHyphenator > *pRef = pEntry->aSvcRefs.getArray(); Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() ); // build service initialization argument Sequence< Any > aArgs(2); aArgs.getArray()[0] <<= GetPropSet(); // create specific service via it's implementation name try { xHyph.set( xContext->getServiceManager()->createInstanceWithArgumentsAndContext( pEntry->aSvcImplNames[0], aArgs, xContext ), UNO_QUERY ); } catch (uno::Exception &) { SAL_WARN( "linguistic", "createWithArguments failed" ); } pRef [i] = xHyph; Reference< XLinguServiceEventBroadcaster > xBroadcaster( xHyph, UNO_QUERY ); if (xBroadcaster.is()) rMgr.AddLngSvcEvtBroadcaster( xBroadcaster ); if (xHyph.is() && xHyph->hasLocale( rLocale )) xRes = xHyph->createPossibleHyphens( aChkWord, rLocale, rProperties ); pEntry->nLastTriedSvcIndex = static_cast(i); ++i; // if language is not supported by the services // remove it from the list. if (xHyph.is() && !xHyph->hasLocale( rLocale )) aSvcMap.erase( nLanguage ); } } // if (xEntry.is()) } if (xRes.is() && xRes->getWord() != rWord) { xRes = new PossibleHyphens( rWord, nLanguage, xRes->getPossibleHyphens(), xRes->getHyphenationPositions() ); } return xRes; } void HyphenatorDispatcher::SetServiceList( const Locale &rLocale, const Sequence< OUString > &rSvcImplNames ) { MutexGuard aGuard( GetLinguMutex() ); LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); if (!rSvcImplNames.hasElements()) // remove entry aSvcMap.erase( nLanguage ); else { // modify/add entry LangSvcEntries_Hyph *pEntry = aSvcMap[ nLanguage ].get(); if (pEntry) { pEntry->Clear(); pEntry->aSvcImplNames = rSvcImplNames; pEntry->aSvcImplNames.realloc(1); pEntry->aSvcRefs = Sequence< Reference < XHyphenator > > ( 1 ); } else { auto pTmpEntry = std::make_shared( rSvcImplNames[0] ); pTmpEntry->aSvcRefs = Sequence< Reference < XHyphenator > >( 1 ); aSvcMap[ nLanguage ] = std::move(pTmpEntry); } } } Sequence< OUString > HyphenatorDispatcher::GetServiceList( const Locale &rLocale ) const { MutexGuard aGuard( GetLinguMutex() ); Sequence< OUString > aRes; // search for entry with that language and use data from that LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); const HyphSvcByLangMap_t::const_iterator aIt( aSvcMap.find( nLanguage ) ); const LangSvcEntries_Hyph *pEntry = aIt != aSvcMap.end() ? aIt->second.get() : nullptr; if (pEntry) { aRes = pEntry->aSvcImplNames; if (aRes.hasElements()) aRes.realloc(1); } return aRes; } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */