/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sspellimp.hxx" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace utl; using namespace osl; using namespace com::sun::star; using namespace com::sun::star::beans; using namespace com::sun::star::lang; using namespace com::sun::star::uno; using namespace com::sun::star::linguistic2; using namespace linguistic; // XML-header of SPELLML queries #if !defined SPELL_XML constexpr OUStringLiteral SPELL_XML = u""; #endif // only available in hunspell >= 1.5 #if !defined MAXWORDLEN #define MAXWORDLEN 176 #endif SpellChecker::SpellChecker() : m_aEvtListeners(GetLinguMutex()), m_bDisposing(false) { } SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc) : m_aDName(std::move(i_DName)) , m_aDLoc(std::move(i_DLoc)) , m_aDEnc(i_DEnc) { } SpellChecker::~SpellChecker() { if (m_pPropHelper) { m_pPropHelper->RemoveAsPropListener(); } } PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl() { if (!m_pPropHelper) { Reference< XLinguProperties > xPropSet = GetLinguProperties(); m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast(this), xPropSet ) ); m_pPropHelper->AddAsPropListener(); //! after a reference is established } return *m_pPropHelper; } Sequence< Locale > SAL_CALL SpellChecker::getLocales() { MutexGuard aGuard( GetLinguMutex() ); // this routine should return the locales supported by the installed // dictionaries. if (m_DictItems.empty()) { SvtLinguConfig aLinguCfg; // get list of extension dictionaries-to-use // (or better speaking: the list of dictionaries using the // new configuration entries). std::vector< SvtLinguConfigDictionaryEntry > aDics; uno::Sequence< OUString > aFormatList; aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers", "org.openoffice.lingu.MySpellSpellChecker", aFormatList ); for (auto const& format : std::as_const(aFormatList)) { std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( aLinguCfg.GetActiveDictionariesByFormat(format) ); aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); } //!! for compatibility with old dictionaries (the ones not using extensions //!! or new configuration entries, but still using the dictionary.lst file) //!! Get the list of old style spell checking dictionaries to use... std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( GetOldStyleDics( "DICT" ) ); // to prefer dictionaries with configuration entries we will only // use those old style dictionaries that add a language that // is not yet supported by the list of new style dictionaries MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); if (!aDics.empty()) { uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory()); uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY); // get supported locales from the dictionaries-to-use... std::set aLocaleNamesSet; for (auto const& dict : aDics) { const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); uno::Sequence< OUString > aLocations( dict.aLocations ); SAL_WARN_IF( aLocaleNames.hasElements() && !aLocations.hasElements(), "lingucomponent", "no locations"); if (aLocations.hasElements()) { if (xAccess.is() && xAccess->exists(aLocations[0])) { for (auto const& locale : aLocaleNames) { if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(locale)) continue; aLocaleNamesSet.insert(locale); } } else { SAL_WARN( "lingucomponent", "missing <" << aLocations[0] << ">"); } } } // ... and add them to the resulting sequence m_aSuppLocales.realloc( aLocaleNamesSet.size() ); std::transform( aLocaleNamesSet.begin(), aLocaleNamesSet.end(), m_aSuppLocales.getArray(), [](auto const& localeName) { return LanguageTag::convertToLocale(localeName); }); //! For each dictionary and each locale we need a separate entry. //! If this results in more than one dictionary per locale than (for now) //! it is undefined which dictionary gets used. //! In the future the implementation should support using several dictionaries //! for one locale. sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0), [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) { return nSum + dict.aLocaleNames.getLength(); }); // add dictionary information m_DictItems.reserve(nDictSize); for (auto const& dict : aDics) { if (dict.aLocaleNames.hasElements() && dict.aLocations.hasElements()) { const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); // currently only one language per dictionary is supported in the actual implementation... // Thus here we work-around this by adding the same dictionary several times. // Once for each of its supported locales. for (auto const& localeName : aLocaleNames) { // also both files have to be in the same directory and the // file names must only differ in the extension (.aff/.dic). // Thus we use the first location only and strip the extension part. OUString aLocation = dict.aLocations[0]; sal_Int32 nPos = aLocation.lastIndexOf( '.' ); aLocation = aLocation.copy( 0, nPos ); m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW); } } } DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" ); } else { // no dictionary found so register no dictionaries m_aSuppLocales.realloc(0); } } return m_aSuppLocales; } sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale) { MutexGuard aGuard( GetLinguMutex() ); bool bRes = false; if (!m_aSuppLocales.hasElements()) getLocales(); for (auto const& suppLocale : std::as_const(m_aSuppLocales)) { if (rLocale == suppLocale) { bRes = true; break; } } return bRes; } sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale, int& rInfo) { if (rWord.getLength() > MAXWORDLEN) return -1; Hunspell * pMS = nullptr; rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; // initialize a myspell object for each dictionary once // (note: mutex is held higher up in isValid) sal_Int16 nRes = -1; // first handle smart quotes both single and double OUStringBuffer rBuf(rWord); sal_Int32 n = rBuf.getLength(); sal_Unicode c; sal_Int32 extrachar = 0; for (sal_Int32 ix=0; ix < n; ix++) { c = rBuf[ix]; if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; else if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries // set ICONV and IGNORE aff file options, if needed.) else if ((c == 0x200C) || (c == 0x200D) || ((c >= 0xFB00) && (c <= 0xFB04))) extrachar = 1; } OUString nWord(rBuf.makeStringAndClear()); if (n) { for (auto& currDict : m_DictItems) { pMS = nullptr; eEnc = RTL_TEXTENCODING_DONTKNOW; if (rLocale == currDict.m_aDLoc) { if (!currDict.m_pDict) { OUString dicpath = currDict.m_aDName + ".dic"; OUString affpath = currDict.m_aDName + ".aff"; OUString dict; OUString aff; osl::FileBase::getSystemPathFromFileURL(dicpath,dict); osl::FileBase::getSystemPathFromFileURL(affpath,aff); #if defined(_WIN32) // workaround for Windows specific problem that the // path length in calls to 'fopen' is limited to somewhat // about 120+ characters which will usually be exceed when // using dictionaries as extensions. (Hunspell waits UTF-8 encoded // path with \\?\ long path prefix.) OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8)); OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8)); #else OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding())); OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding())); #endif currDict.m_pDict = std::make_unique(aTmpaff.getStr(),aTmpdict.getStr()); #if defined(H_DEPRECATED) currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str()); #else currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding()); #endif } pMS = currDict.m_pDict.get(); eEnc = currDict.m_aDEnc; } if (pMS) { // we don't want to work with a default text encoding since following incorrect // results may occur only for specific text and thus may be hard to notice. // Thus better always make a clean exit here if the text encoding is in question. // Hopefully something not working at all will raise proper attention quickly. ;-) DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); if (eEnc == RTL_TEXTENCODING_DONTKNOW) return -1; OString aWrd(OU2ENC(nWord,eEnc)); #if defined(H_DEPRECATED) bool bVal = pMS->spell(std::string(aWrd), &rInfo); #else bool bVal = pMS->spell(aWrd.getStr(), &rInfo) != 0; #endif if (!bVal) { if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) { OUStringBuffer aBuf(nWord); n = aBuf.getLength(); for (sal_Int32 ix=n-1; ix >= 0; ix--) { switch (aBuf[ix]) { case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break; case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break; case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break; case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break; case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break; case 0x200C: case 0x200D: aBuf.remove(ix, 1); break; } } OUString aWord(aBuf.makeStringAndClear()); OString bWrd(OU2ENC(aWord, eEnc)); #if defined(H_DEPRECATED) bVal = pMS->spell(std::string(bWrd), &rInfo); #else bVal = pMS->spell(bWrd.getStr(), &rInfo) != 0; #endif if (bVal) return -1; } nRes = SpellFailure::SPELLING_ERROR; } else { return -1; } pMS = nullptr; } } } return nRes; } sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) { MutexGuard aGuard( GetLinguMutex() ); if (rLocale == Locale() || rWord.isEmpty()) return true; if (!hasLocale( rLocale )) return true; // return sal_False to process SPELLML requests (they are longer than the header) if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false; // Get property values to be used. // These are be the default values set in the SN_LINGU_PROPERTIES // PropertySet which are overridden by the supplied ones from the // last argument. // You'll probably like to use a simpler solution than the provided // one using the PropertyHelper_Spell. PropertyHelper_Spelling& rHelper = GetPropHelper(); rHelper.SetTmpPropVals( rProperties ); int nInfo = 0; sal_Int16 nFailure = GetSpellFailure( rWord, rLocale, nInfo ); if (nFailure != -1 && !rWord.match(SPELL_XML, 0)) { LanguageType nLang = LinguLocaleToLanguage( rLocale ); // postprocess result for errors that should be ignored const bool bIgnoreError = (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) || (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) || (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR); if (bIgnoreError) nFailure = -1; } //#define SPELL_COMPOUND 1 << 0 // valid word, but it's a rule-based compound word if ( nFailure == -1 && (nInfo & SPELL_COMPOUND) ) { bool bHasHyphen = rWord.indexOf('-') > -1; if ( (bHasHyphen && !rHelper.IsSpellHyphenatedCompound()) || (!bHasHyphen && !rHelper.IsSpellClosedCompound()) ) { return false; } } return (nFailure == -1); } Reference< XSpellAlternatives > SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) { // Retrieves the return values for the 'spell' function call in case // of a misspelled word. // Especially it may give a list of suggested (correct) words: Reference< XSpellAlternatives > xRes; // note: mutex is held by higher up by spell which covers both Hunspell* pMS = nullptr; rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; // first handle smart quotes (single and double) OUStringBuffer rBuf(rWord); sal_Int32 n = rBuf.getLength(); sal_Unicode c; for (sal_Int32 ix=0; ix < n; ix++) { c = rBuf[ix]; if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; } OUString nWord(rBuf.makeStringAndClear()); if (n) { LanguageType nLang = LinguLocaleToLanguage( rLocale ); int numsug = 0; Sequence< OUString > aStr( 0 ); for (const auto& currDict : m_DictItems) { pMS = nullptr; eEnc = RTL_TEXTENCODING_DONTKNOW; if (rLocale == currDict.m_aDLoc) { pMS = currDict.m_pDict.get(); eEnc = currDict.m_aDEnc; } if (pMS) { OString aWrd(OU2ENC(nWord,eEnc)); #if defined(H_DEPRECATED) std::vector suglst = pMS->suggest(std::string(aWrd)); if (!suglst.empty()) { aStr.realloc(numsug + suglst.size()); OUString *pStr = aStr.getArray(); for (size_t ii = 0; ii < suglst.size(); ++ii) { OUString cvtwrd(suglst[ii].c_str(), suglst[ii].size(), eEnc); pStr[numsug + ii] = cvtwrd; } numsug += suglst.size(); } #else char ** suglst = nullptr; int count = pMS->suggest(&suglst, aWrd.getStr()); if (count) { aStr.realloc( numsug + count ); OUString *pStr = aStr.getArray(); for (int ii=0; ii < count; ++ii) { OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc); pStr[numsug + ii] = cvtwrd; } numsug += count; } pMS->free_list(&suglst, count); #endif } } // now return an empty alternative for no suggestions or the list of alternatives if some found xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr ); return xRes; } return xRes; } Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) { MutexGuard aGuard( GetLinguMutex() ); if (rLocale == Locale() || rWord.isEmpty()) return nullptr; if (!hasLocale( rLocale )) return nullptr; Reference< XSpellAlternatives > xAlt; if (!isValid( rWord, rLocale, rProperties )) { xAlt = GetProposals( rWord, rLocale ); } return xAlt; } sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) { MutexGuard aGuard( GetLinguMutex() ); bool bRes = false; if (!m_bDisposing && rxLstnr.is()) { bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); } return bRes; } sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) { MutexGuard aGuard( GetLinguMutex() ); bool bRes = false; if (!m_bDisposing && rxLstnr.is()) { bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); } return bRes; } OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale) { std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); return Translate::get(STR_DESCRIPTION_HUNSPELL, loc); } void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments ) { MutexGuard aGuard( GetLinguMutex() ); if (m_pPropHelper) return; sal_Int32 nLen = rArguments.getLength(); if (2 == nLen) { Reference< XLinguProperties > xPropSet; rArguments.getConstArray()[0] >>= xPropSet; // rArguments.getConstArray()[1] >>= xDicList; //! Pointer allows for access of the non-UNO functions. //! And the reference to the UNO-functions while increasing //! the ref-count and will implicitly free the memory //! when the object is no longer used. m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast(this), xPropSet ) ); m_pPropHelper->AddAsPropListener(); //! after a reference is established } else { OSL_FAIL( "wrong number of arguments in sequence" ); } } void SAL_CALL SpellChecker::dispose() { MutexGuard aGuard( GetLinguMutex() ); if (!m_bDisposing) { m_bDisposing = true; EventObject aEvtObj( static_cast(this) ); m_aEvtListeners.disposeAndClear( aEvtObj ); if (m_pPropHelper) { m_pPropHelper->RemoveAsPropListener(); m_pPropHelper.reset(); } } } void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) { MutexGuard aGuard( GetLinguMutex() ); if (!m_bDisposing && rxListener.is()) m_aEvtListeners.addInterface( rxListener ); } void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) { MutexGuard aGuard( GetLinguMutex() ); if (!m_bDisposing && rxListener.is()) m_aEvtListeners.removeInterface( rxListener ); } // Service specific part OUString SAL_CALL SpellChecker::getImplementationName() { return "org.openoffice.lingu.MySpellSpellChecker"; } sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName ) { return cppu::supportsService(this, ServiceName); } Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames() { return { SN_SPELLCHECKER }; } extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* lingucomponent_SpellChecker_get_implementation( css::uno::XComponentContext* , css::uno::Sequence const&) { return cppu::acquire(new SpellChecker()); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */