/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gciterator.hxx" using namespace linguistic; using namespace ::com::sun::star; // white space list: obtained from the fonts.config.txt of a Linux system. const sal_Unicode aWhiteSpaces[] = { 0x0020, /* SPACE */ 0x00a0, /* NO-BREAK SPACE */ 0x00ad, /* SOFT HYPHEN */ 0x115f, /* HANGUL CHOSEONG FILLER */ 0x1160, /* HANGUL JUNGSEONG FILLER */ 0x1680, /* OGHAM SPACE MARK */ 0x2000, /* EN QUAD */ 0x2001, /* EM QUAD */ 0x2002, /* EN SPACE */ 0x2003, /* EM SPACE */ 0x2004, /* THREE-PER-EM SPACE */ 0x2005, /* FOUR-PER-EM SPACE */ 0x2006, /* SIX-PER-EM SPACE */ 0x2007, /* FIGURE SPACE */ 0x2008, /* PUNCTUATION SPACE */ 0x2009, /* THIN SPACE */ 0x200a, /* HAIR SPACE */ 0x200b, /* ZERO WIDTH SPACE */ 0x200c, /* ZERO WIDTH NON-JOINER */ 0x200d, /* ZERO WIDTH JOINER */ 0x200e, /* LEFT-TO-RIGHT MARK */ 0x200f, /* RIGHT-TO-LEFT MARK */ 0x2028, /* LINE SEPARATOR */ 0x2029, /* PARAGRAPH SEPARATOR */ 0x202a, /* LEFT-TO-RIGHT EMBEDDING */ 0x202b, /* RIGHT-TO-LEFT EMBEDDING */ 0x202c, /* POP DIRECTIONAL FORMATTING */ 0x202d, /* LEFT-TO-RIGHT OVERRIDE */ 0x202e, /* RIGHT-TO-LEFT OVERRIDE */ 0x202f, /* NARROW NO-BREAK SPACE */ 0x205f, /* MEDIUM MATHEMATICAL SPACE */ 0x2060, /* WORD JOINER */ 0x2061, /* FUNCTION APPLICATION */ 0x2062, /* INVISIBLE TIMES */ 0x2063, /* INVISIBLE SEPARATOR */ 0x206A, /* INHIBIT SYMMETRIC SWAPPING */ 0x206B, /* ACTIVATE SYMMETRIC SWAPPING */ 0x206C, /* INHIBIT ARABIC FORM SHAPING */ 0x206D, /* ACTIVATE ARABIC FORM SHAPING */ 0x206E, /* NATIONAL DIGIT SHAPES */ 0x206F, /* NOMINAL DIGIT SHAPES */ 0x3000, /* IDEOGRAPHIC SPACE */ 0x3164, /* HANGUL FILLER */ 0xfeff, /* ZERO WIDTH NO-BREAK SPACE */ 0xffa0, /* HALFWIDTH HANGUL FILLER */ 0xfff9, /* INTERLINEAR ANNOTATION ANCHOR */ 0xfffa, /* INTERLINEAR ANNOTATION SEPARATOR */ 0xfffb /* INTERLINEAR ANNOTATION TERMINATOR */ }; // Information about reason for proofreading (ProofInfo) const sal_Int32 PROOFINFO_GET_PROOFRESULT = 1; const sal_Int32 PROOFINFO_MARK_PARAGRAPH = 2; const int nWhiteSpaces = SAL_N_ELEMENTS( aWhiteSpaces ); static bool lcl_IsWhiteSpace( sal_Unicode cChar ) { bool bFound = false; for (int i = 0; i < nWhiteSpaces && !bFound; ++i) { if (cChar == aWhiteSpaces[i]) bFound = true; } return bFound; } static sal_Int32 lcl_SkipWhiteSpaces( const OUString &rText, sal_Int32 nStartPos ) { // note having nStartPos point right behind the string is OK since that one // is a correct end-of-sentence position to be returned from a grammar checker... const sal_Int32 nLen = rText.getLength(); bool bIllegalArgument = false; if (nStartPos < 0) { bIllegalArgument = true; nStartPos = 0; } if (nStartPos > nLen) { bIllegalArgument = true; nStartPos = nLen; } if (bIllegalArgument) { SAL_WARN( "linguistic", "lcl_SkipWhiteSpaces: illegal arguments" ); } sal_Int32 nRes = nStartPos; if (0 <= nStartPos && nStartPos < nLen) { const sal_Unicode* const pEnd = rText.getStr() + nLen; const sal_Unicode *pText = rText.getStr() + nStartPos; while (pText != pEnd && lcl_IsWhiteSpace(*pText)) ++pText; nRes = pText - rText.getStr(); } DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_SkipWhiteSpaces return value out of range" ); return nRes; } static sal_Int32 lcl_BacktraceWhiteSpaces( const OUString &rText, sal_Int32 nStartPos ) { // note: having nStartPos point right behind the string is OK since that one // is a correct end-of-sentence position to be returned from a grammar checker... const sal_Int32 nLen = rText.getLength(); bool bIllegalArgument = false; if (nStartPos < 0) { bIllegalArgument = true; nStartPos = 0; } if (nStartPos > nLen) { bIllegalArgument = true; nStartPos = nLen; } if (bIllegalArgument) { SAL_WARN( "linguistic", "lcl_BacktraceWhiteSpaces: illegal arguments" ); } sal_Int32 nRes = nStartPos; sal_Int32 nPosBefore = nStartPos - 1; const sal_Unicode *pStart = rText.getStr(); if (0 <= nPosBefore && nPosBefore < nLen && lcl_IsWhiteSpace( pStart[ nPosBefore ] )) { nStartPos = nPosBefore; const sal_Unicode *pText = rText.getStr() + nStartPos; while (pText > pStart && lcl_IsWhiteSpace( *pText )) --pText; // now add 1 since we want to point to the first char after the last char in the sentence... nRes = pText - pStart + 1; } DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_BacktraceWhiteSpaces return value out of range" ); return nRes; } extern "C" { static void lcl_workerfunc (void * gci) { osl_setThreadName("GrammarCheckingIterator"); static_cast(gci)->DequeueAndCheck(); } } static lang::Locale lcl_GetPrimaryLanguageOfSentence( const uno::Reference< text::XFlatParagraph >& xFlatPara, sal_Int32 nStartIndex ) { //get the language of the first word return xFlatPara->getLanguageOfText( nStartIndex, 1 ); } LngXStringKeyMap::LngXStringKeyMap() {} void SAL_CALL LngXStringKeyMap::insertValue(const OUString& aKey, const css::uno::Any& aValue) { std::map::const_iterator aIter = maMap.find(aKey); if (aIter != maMap.end()) throw css::container::ElementExistException(); maMap[aKey] = aValue; } css::uno::Any SAL_CALL LngXStringKeyMap::getValue(const OUString& aKey) { std::map::const_iterator aIter = maMap.find(aKey); if (aIter == maMap.end()) throw css::container::NoSuchElementException(); return (*aIter).second; } sal_Bool SAL_CALL LngXStringKeyMap::hasValue(const OUString& aKey) { return maMap.find(aKey) != maMap.end(); } ::sal_Int32 SAL_CALL LngXStringKeyMap::getCount() { return maMap.size(); } OUString SAL_CALL LngXStringKeyMap::getKeyByIndex(::sal_Int32 nIndex) { if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size()) throw css::lang::IndexOutOfBoundsException(); return OUString(); } css::uno::Any SAL_CALL LngXStringKeyMap::getValueByIndex(::sal_Int32 nIndex) { if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size()) throw css::lang::IndexOutOfBoundsException(); return css::uno::Any(); } osl::Mutex& GrammarCheckingIterator::MyMutex() { static osl::Mutex SINGLETON; return SINGLETON; } GrammarCheckingIterator::GrammarCheckingIterator() : m_bEnd( false ), m_bGCServicesChecked( false ), m_nDocIdCounter( 0 ), m_thread(nullptr), m_aEventListeners( MyMutex() ), m_aNotifyListeners( MyMutex() ) { } GrammarCheckingIterator::~GrammarCheckingIterator() { TerminateThread(); } void GrammarCheckingIterator::TerminateThread() { oslThread t; { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); t = m_thread; m_thread = nullptr; m_bEnd = true; m_aWakeUpThread.set(); } if (t != nullptr) { osl_joinWithThread(t); osl_destroyThread(t); } // After m_bEnd was used to flag lcl_workerfunc to quit, now // reset it so lcl_workerfunc could be relaunched later. { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); m_bEnd = false; } } bool GrammarCheckingIterator::joinThreads() { TerminateThread(); return true; } sal_Int32 GrammarCheckingIterator::NextDocId() { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); m_nDocIdCounter += 1; return m_nDocIdCounter; } OUString GrammarCheckingIterator::GetOrCreateDocId( const uno::Reference< lang::XComponent > &xComponent ) { // internal method; will always be called with locked mutex OUString aRes; if (xComponent.is()) { if (m_aDocIdMap.find( xComponent.get() ) != m_aDocIdMap.end()) { // return already existing entry aRes = m_aDocIdMap[ xComponent.get() ]; } else // add new entry { sal_Int32 nRes = NextDocId(); aRes = OUString::number( nRes ); m_aDocIdMap[ xComponent.get() ] = aRes; xComponent->addEventListener( this ); } } return aRes; } void GrammarCheckingIterator::AddEntry( const uno::Reference< text::XFlatParagraphIterator >& xFlatParaIterator, const uno::Reference< text::XFlatParagraph >& xFlatPara, const OUString & rDocId, sal_Int32 nStartIndex, bool bAutomatic ) { // we may not need/have a xFlatParaIterator (e.g. if checkGrammarAtPos was called) // but we always need a xFlatPara... if (!xFlatPara.is()) return; FPEntry aNewFPEntry; aNewFPEntry.m_xParaIterator = xFlatParaIterator; aNewFPEntry.m_xPara = xFlatPara; aNewFPEntry.m_aDocId = rDocId; aNewFPEntry.m_nStartIndex = nStartIndex; aNewFPEntry.m_bAutomatic = bAutomatic; // add new entry to the end of this queue ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); if (!m_thread) m_thread = osl_createThread( lcl_workerfunc, this ); m_aFPEntriesQueue.push_back( aNewFPEntry ); // wake up the thread in order to do grammar checking m_aWakeUpThread.set(); } void GrammarCheckingIterator::ProcessResult( const linguistic2::ProofreadingResult &rRes, const uno::Reference< text::XFlatParagraphIterator > &rxFlatParagraphIterator, bool bIsAutomaticChecking ) { DBG_ASSERT( rRes.xFlatParagraph.is(), "xFlatParagraph is missing" ); //no guard necessary as no members are used bool bContinueWithNextPara = false; if (!rRes.xFlatParagraph.is() || rRes.xFlatParagraph->isModified()) { // if paragraph was modified/deleted meanwhile continue with the next one... bContinueWithNextPara = true; } else // paragraph is still unchanged... { // mark found errors... sal_Int32 nTextLen = rRes.aText.getLength(); bool bBoundariesOk = 0 <= rRes.nStartOfSentencePosition && rRes.nStartOfSentencePosition <= nTextLen && 0 <= rRes.nBehindEndOfSentencePosition && rRes.nBehindEndOfSentencePosition <= nTextLen && 0 <= rRes.nStartOfNextSentencePosition && rRes.nStartOfNextSentencePosition <= nTextLen && rRes.nStartOfSentencePosition <= rRes.nBehindEndOfSentencePosition && rRes.nBehindEndOfSentencePosition <= rRes.nStartOfNextSentencePosition; DBG_ASSERT( bBoundariesOk, "inconsistent sentence boundaries" ); uno::Reference< text::XMultiTextMarkup > xMulti( rRes.xFlatParagraph, uno::UNO_QUERY ); if (xMulti.is()) // use new API for markups { try { // length = number of found errors + 1 sentence markup sal_Int32 nErrors = rRes.aErrors.getLength(); uno::Sequence< text::TextMarkupDescriptor > aDescriptors( nErrors + 1 ); text::TextMarkupDescriptor * pDescriptors = aDescriptors.getArray(); uno::Reference< linguistic2::XDictionary > xIgnoreAll = ::GetIgnoreAllList(); sal_Int32 ignoredCount = 0; // at pos 0 .. nErrors-1 -> all grammar errors for (const linguistic2::SingleProofreadingError &rError : rRes.aErrors) { OUString word(rRes.aText.subView(rError.nErrorStart, rError.nErrorLength)); bool ignored = xIgnoreAll.is() && xIgnoreAll->getEntry(word).is(); if (!ignored) { text::TextMarkupDescriptor &rDesc = *pDescriptors++; rDesc.nType = rError.nErrorType; rDesc.nOffset = rError.nErrorStart; rDesc.nLength = rError.nErrorLength; // the proofreader may return SPELLING but right now our core // does only handle PROOFREADING if the result is from the proofreader... // (later on we may wish to color spelling errors found by the proofreader // differently for example. But no special handling right now. if (rDesc.nType == text::TextMarkupType::SPELLCHECK) rDesc.nType = text::TextMarkupType::PROOFREADING; uno::Reference< container::XStringKeyMap > xKeyMap(new LngXStringKeyMap()); for( const beans::PropertyValue& rProperty : rError.aProperties ) { if ( rProperty.Name == "LineColor" ) { xKeyMap->insertValue(rProperty.Name, rProperty.Value); rDesc.xMarkupInfoContainer = xKeyMap; } else if ( rProperty.Name == "LineType" ) { xKeyMap->insertValue(rProperty.Name, rProperty.Value); rDesc.xMarkupInfoContainer = xKeyMap; } } } else ignoredCount++; } if (ignoredCount != 0) { aDescriptors.realloc(aDescriptors.getLength() - ignoredCount); pDescriptors = aDescriptors.getArray(); pDescriptors += aDescriptors.getLength() - 1; } // at pos nErrors -> sentence markup // nSentenceLength: includes the white-spaces following the sentence end... const sal_Int32 nSentenceLength = rRes.nStartOfNextSentencePosition - rRes.nStartOfSentencePosition; pDescriptors->nType = text::TextMarkupType::SENTENCE; pDescriptors->nOffset = rRes.nStartOfSentencePosition; pDescriptors->nLength = nSentenceLength; xMulti->commitMultiTextMarkup( aDescriptors ) ; } catch (lang::IllegalArgumentException &) { TOOLS_WARN_EXCEPTION( "linguistic", "commitMultiTextMarkup" ); } } // other sentences left to be checked in this paragraph? if (rRes.nStartOfNextSentencePosition < rRes.aText.getLength()) { AddEntry( rxFlatParagraphIterator, rRes.xFlatParagraph, rRes.aDocumentIdentifier, rRes.nStartOfNextSentencePosition, bIsAutomaticChecking ); } else // current paragraph finished { // set "already checked" flag for the current flat paragraph if (rRes.xFlatParagraph.is()) rRes.xFlatParagraph->setChecked( text::TextMarkupType::PROOFREADING, true ); bContinueWithNextPara = true; } } if (bContinueWithNextPara) { // we need to continue with the next paragraph if (rxFlatParagraphIterator.is()) AddEntry(rxFlatParagraphIterator, rxFlatParagraphIterator->getNextPara(), rRes.aDocumentIdentifier, 0, bIsAutomaticChecking); } } std::pair> GrammarCheckingIterator::getServiceForLocale(const lang::Locale& rLocale) const { if (!rLocale.Language.isEmpty()) { const OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false); GCImplNames_t::const_iterator aLangIt(m_aGCImplNamesByLang.find(sBcp47)); if (aLangIt != m_aGCImplNamesByLang.end()) return { aLangIt->second, {} }; for (const auto& sFallbackBcp47 : LanguageTag(rLocale).getFallbackStrings(false)) { aLangIt = m_aGCImplNamesByLang.find(sFallbackBcp47); if (aLangIt != m_aGCImplNamesByLang.end()) return { aLangIt->second, sFallbackBcp47 }; } } return {}; } uno::Reference< linguistic2::XProofreader > GrammarCheckingIterator::GetGrammarChecker( lang::Locale &rLocale ) { uno::Reference< linguistic2::XProofreader > xRes; // ---- THREAD SAFE START ---- ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); // check supported locales for each grammarchecker if not already done if (!m_bGCServicesChecked) { GetConfiguredGCSvcs_Impl(); m_bGCServicesChecked = true; } if (const auto& [aSvcImplName, oFallbackBcp47] = getServiceForLocale(rLocale); !aSvcImplName.isEmpty()) // matching configured language found? { if (oFallbackBcp47) rLocale = LanguageTag::convertToLocale(*oFallbackBcp47, false); GCReferences_t::const_iterator aImplNameIt( m_aGCReferencesByService.find( aSvcImplName ) ); if (aImplNameIt != m_aGCReferencesByService.end()) // matching impl name found? { xRes = aImplNameIt->second; } else // the service is to be instantiated here for the first time... { try { uno::Reference< uno::XComponentContext > xContext( comphelper::getProcessComponentContext() ); uno::Reference< linguistic2::XProofreader > xGC( xContext->getServiceManager()->createInstanceWithContext(aSvcImplName, xContext), uno::UNO_QUERY_THROW ); uno::Reference< linguistic2::XSupportedLocales > xSuppLoc( xGC, uno::UNO_QUERY_THROW ); if (xSuppLoc->hasLocale( rLocale )) { m_aGCReferencesByService[ aSvcImplName ] = xGC; xRes = xGC; uno::Reference< linguistic2::XLinguServiceEventBroadcaster > xBC( xGC, uno::UNO_QUERY ); if (xBC.is()) xBC->addLinguServiceEventListener( this ); } else { SAL_WARN( "linguistic", "grammar checker does not support required locale" ); } } catch (uno::Exception &) { SAL_WARN( "linguistic", "instantiating grammar checker failed" ); } } } else // not found - quite normal { SAL_INFO("linguistic", "No grammar checker found for \"" << LanguageTag::convertToBcp47(rLocale, false) << "\""); } // ---- THREAD SAFE END ---- return xRes; } static uno::Sequence lcl_makeProperties(uno::Reference const& xFlatPara, sal_Int32 nProofInfo) { uno::Reference const xProps( xFlatPara, uno::UNO_QUERY_THROW); css::uno::Any a (nProofInfo); return comphelper::InitPropertySequence({ { "FieldPositions", xProps->getPropertyValue(u"FieldPositions"_ustr) }, { "FootnotePositions", xProps->getPropertyValue(u"FootnotePositions"_ustr) }, { "SortedTextId", xProps->getPropertyValue(u"SortedTextId"_ustr) }, { "DocumentElementsCount", xProps->getPropertyValue(u"DocumentElementsCount"_ustr) }, { "ProofInfo", a } }); } void GrammarCheckingIterator::DequeueAndCheck() { for (;;) { // ---- THREAD SAFE START ---- bool bQueueEmpty = false; { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); if (m_bEnd) { break; } bQueueEmpty = m_aFPEntriesQueue.empty(); } // ---- THREAD SAFE END ---- if (!bQueueEmpty) { uno::Reference< text::XFlatParagraphIterator > xFPIterator; uno::Reference< text::XFlatParagraph > xFlatPara; FPEntry aFPEntryItem; OUString aCurDocId; // ---- THREAD SAFE START ---- { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); aFPEntryItem = m_aFPEntriesQueue.front(); xFPIterator = aFPEntryItem.m_xParaIterator; xFlatPara = aFPEntryItem.m_xPara; m_aCurCheckedDocId = aFPEntryItem.m_aDocId; aCurDocId = m_aCurCheckedDocId; m_aFPEntriesQueue.pop_front(); } // ---- THREAD SAFE END ---- if (xFlatPara.is() && xFPIterator.is()) { try { OUString aCurTxt( xFlatPara->getText() ); lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, aFPEntryItem.m_nStartIndex ); const bool bModified = xFlatPara->isModified(); if (!bModified) { linguistic2::ProofreadingResult aRes; // ---- THREAD SAFE START ---- { osl::ClearableMutexGuard aGuard(MyMutex()); sal_Int32 nStartPos = aFPEntryItem.m_nStartIndex; sal_Int32 nSuggestedEnd = GetSuggestedEndOfSentence(aCurTxt, nStartPos, aCurLocale); DBG_ASSERT((nSuggestedEnd == 0 && aCurTxt.isEmpty()) || nSuggestedEnd > nStartPos, "nSuggestedEndOfSentencePos calculation failed?"); uno::Reference xGC = GetGrammarChecker(aCurLocale); if (xGC.is()) { aGuard.clear(); uno::Sequence const aProps( lcl_makeProperties(xFlatPara, PROOFINFO_MARK_PARAGRAPH)); aRes = xGC->doProofreading(aCurDocId, aCurTxt, aCurLocale, nStartPos, nSuggestedEnd, aProps); //!! work-around to prevent looping if the grammar checker //!! failed to properly identify the sentence end if (aRes.nBehindEndOfSentencePosition <= nStartPos && aRes.nBehindEndOfSentencePosition != nSuggestedEnd) { SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!"); aRes.nBehindEndOfSentencePosition = nSuggestedEnd; } aRes.xFlatParagraph = xFlatPara; aRes.nStartOfSentencePosition = nStartPos; } else { // no grammar checker -> no error // but we need to provide the data below in order to continue with the next sentence aRes.aDocumentIdentifier = aCurDocId; aRes.xFlatParagraph = xFlatPara; aRes.aText = aCurTxt; aRes.aLocale = std::move(aCurLocale); aRes.nStartOfSentencePosition = nStartPos; aRes.nBehindEndOfSentencePosition = nSuggestedEnd; } aRes.nStartOfNextSentencePosition = lcl_SkipWhiteSpaces(aCurTxt, aRes.nBehindEndOfSentencePosition); aRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( aCurTxt, aRes.nStartOfNextSentencePosition); //guard has to be cleared as ProcessResult calls out of this class } // ---- THREAD SAFE END ---- ProcessResult( aRes, xFPIterator, aFPEntryItem.m_bAutomatic ); } else { // the paragraph changed meanwhile... (and maybe is still edited) // thus we simply continue to ask for the next to be checked. uno::Reference< text::XFlatParagraph > xFlatParaNext( xFPIterator->getNextPara() ); AddEntry( xFPIterator, xFlatParaNext, aCurDocId, 0, aFPEntryItem.m_bAutomatic ); } } catch (css::uno::Exception &) { TOOLS_WARN_EXCEPTION("linguistic", "GrammarCheckingIterator::DequeueAndCheck ignoring"); } } // ---- THREAD SAFE START ---- { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); m_aCurCheckedDocId.clear(); } // ---- THREAD SAFE END ---- } else { // ---- THREAD SAFE START ---- { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); if (m_bEnd) { break; } // Check queue state again if (m_aFPEntriesQueue.empty()) m_aWakeUpThread.reset(); } // ---- THREAD SAFE END ---- //if the queue is empty // IMPORTANT: Don't call condition.wait() with locked // mutex. Otherwise you would keep out other threads // to add entries to the queue! A condition is thread- // safe implemented. m_aWakeUpThread.wait(); } } } void SAL_CALL GrammarCheckingIterator::startProofreading( const uno::Reference< ::uno::XInterface > & xDoc, const uno::Reference< text::XFlatParagraphIteratorProvider > & xIteratorProvider ) { // get paragraph to start checking with const bool bAutomatic = true; uno::Reference xFPIterator = xIteratorProvider->getFlatParagraphIterator( text::TextMarkupType::PROOFREADING, bAutomatic ); uno::Reference< text::XFlatParagraph > xPara( xFPIterator.is()? xFPIterator->getFirstPara() : nullptr ); uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); // ---- THREAD SAFE START ---- ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); if (xPara.is() && xComponent.is()) { OUString aDocId = GetOrCreateDocId( xComponent ); // create new entry and add it to queue AddEntry( xFPIterator, xPara, aDocId, 0, bAutomatic ); } // ---- THREAD SAFE END ---- } linguistic2::ProofreadingResult SAL_CALL GrammarCheckingIterator::checkSentenceAtPosition( const uno::Reference< uno::XInterface >& xDoc, const uno::Reference< text::XFlatParagraph >& xFlatPara, const OUString& rText, const lang::Locale&, sal_Int32 nStartOfSentencePos, sal_Int32 nSuggestedEndOfSentencePos, sal_Int32 nErrorPosInPara ) { // for the context menu... uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); const bool bDoCheck = (xFlatPara.is() && xComponent.is() && ( nErrorPosInPara < 0 || nErrorPosInPara < rText.getLength())); if (!bDoCheck) return linguistic2::ProofreadingResult(); // iterate through paragraph until we find the sentence we are interested in linguistic2::ProofreadingResult aTmpRes; sal_Int32 nStartPos = nStartOfSentencePos >= 0 ? nStartOfSentencePos : 0; bool bFound = false; do { lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, nStartPos ); sal_Int32 nOldStartOfSentencePos = nStartPos; uno::Reference< linguistic2::XProofreader > xGC; OUString aDocId; // ---- THREAD SAFE START ---- { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); aDocId = GetOrCreateDocId( xComponent ); nSuggestedEndOfSentencePos = GetSuggestedEndOfSentence( rText, nStartPos, aCurLocale ); DBG_ASSERT( nSuggestedEndOfSentencePos > nStartPos, "nSuggestedEndOfSentencePos calculation failed?" ); xGC = GetGrammarChecker( aCurLocale ); } // ---- THREAD SAFE START ---- sal_Int32 nEndPos = -1; if (xGC.is()) { uno::Sequence const aProps( lcl_makeProperties(xFlatPara, PROOFINFO_GET_PROOFRESULT)); aTmpRes = xGC->doProofreading( aDocId, rText, aCurLocale, nStartPos, nSuggestedEndOfSentencePos, aProps ); //!! work-around to prevent looping if the grammar checker //!! failed to properly identify the sentence end if (aTmpRes.nBehindEndOfSentencePosition <= nStartPos) { SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!" ); aTmpRes.nBehindEndOfSentencePosition = nSuggestedEndOfSentencePos; } aTmpRes.xFlatParagraph = xFlatPara; aTmpRes.nStartOfSentencePosition = nStartPos; nEndPos = aTmpRes.nBehindEndOfSentencePosition; if ((nErrorPosInPara< 0 || nStartPos <= nErrorPosInPara) && nErrorPosInPara < nEndPos) bFound = true; } if (nEndPos == -1) // no result from grammar checker nEndPos = nSuggestedEndOfSentencePos; nStartPos = lcl_SkipWhiteSpaces( rText, nEndPos ); aTmpRes.nBehindEndOfSentencePosition = nEndPos; aTmpRes.nStartOfNextSentencePosition = nStartPos; aTmpRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( rText, aTmpRes.nStartOfNextSentencePosition ); // prevent endless loop by forcefully advancing if needs be... if (nStartPos <= nOldStartOfSentencePos) { SAL_WARN( "linguistic", "end-of-sentence detection failed?" ); nStartPos = nOldStartOfSentencePos + 1; } } while (!bFound && nStartPos < rText.getLength()); if (bFound && !xFlatPara->isModified()) return aTmpRes; return linguistic2::ProofreadingResult(); } sal_Int32 GrammarCheckingIterator::GetSuggestedEndOfSentence( const OUString &rText, sal_Int32 nSentenceStartPos, const lang::Locale &rLocale ) { // internal method; will always be called with locked mutex if (!m_xBreakIterator.is()) { uno::Reference< uno::XComponentContext > xContext = ::comphelper::getProcessComponentContext(); m_xBreakIterator = i18n::BreakIterator::create(xContext); } sal_Int32 nTextLen = rText.getLength(); sal_Int32 nEndPosition(0); sal_Int32 nTmpStartPos = nSentenceStartPos; do { sal_Int32 const nPrevEndPosition(nEndPosition); nEndPosition = nTextLen; if (nTmpStartPos < nTextLen) { nEndPosition = m_xBreakIterator->endOfSentence( rText, nTmpStartPos, rLocale ); if (nEndPosition <= nPrevEndPosition) { // fdo#68750 if there's no progress at all then presumably // there's no end of sentence in this paragraph so just // set the end position to end of paragraph nEndPosition = nTextLen; } } if (nEndPosition < 0) nEndPosition = nTextLen; ++nTmpStartPos; } while (nEndPosition <= nSentenceStartPos && nEndPosition < nTextLen); if (nEndPosition > nTextLen) nEndPosition = nTextLen; return nEndPosition; } void SAL_CALL GrammarCheckingIterator::resetIgnoreRules( ) { for (auto const& elem : m_aGCReferencesByService) { uno::Reference< linguistic2::XProofreader > xGC(elem.second); if (xGC.is()) xGC->resetIgnoreRules(); } } sal_Bool SAL_CALL GrammarCheckingIterator::isProofreading( const uno::Reference< uno::XInterface >& xDoc ) { // ---- THREAD SAFE START ---- ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); bool bRes = false; uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); if (xComponent.is()) { // if the component was already used in one of the two calls to check text // i.e. in startGrammarChecking or checkGrammarAtPos it will be found in the // m_aDocIdMap unless the document already disposed. // If it is not found then it is not yet being checked (or requested to being checked) const DocMap_t::const_iterator aIt( m_aDocIdMap.find( xComponent.get() ) ); if (aIt != m_aDocIdMap.end()) { // check in document is checked automatically in the background... OUString aDocId = aIt->second; if (!m_aCurCheckedDocId.isEmpty() && m_aCurCheckedDocId == aDocId) { // an entry for that document was dequeued and is currently being checked. bRes = true; } else { // we need to check if there is an entry for that document in the queue... // That is the document is going to be checked sooner or later. sal_Int32 nSize = m_aFPEntriesQueue.size(); for (sal_Int32 i = 0; i < nSize && !bRes; ++i) { if (aDocId == m_aFPEntriesQueue[i].m_aDocId) bRes = true; } } } } // ---- THREAD SAFE END ---- return bRes; } void SAL_CALL GrammarCheckingIterator::processLinguServiceEvent( const linguistic2::LinguServiceEvent& rLngSvcEvent ) { if (rLngSvcEvent.nEvent != linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN) return; try { uno::Reference< uno::XInterface > xThis( getXWeak() ); linguistic2::LinguServiceEvent aEvent( xThis, linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN ); m_aNotifyListeners.notifyEach( &linguistic2::XLinguServiceEventListener::processLinguServiceEvent, aEvent); } catch (uno::RuntimeException &) { throw; } catch (const ::uno::Exception &) { // ignore TOOLS_WARN_EXCEPTION("linguistic", "processLinguServiceEvent"); } } sal_Bool SAL_CALL GrammarCheckingIterator::addLinguServiceEventListener( const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener ) { if (xListener.is()) { m_aNotifyListeners.addInterface( xListener ); } return true; } sal_Bool SAL_CALL GrammarCheckingIterator::removeLinguServiceEventListener( const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener ) { if (xListener.is()) { m_aNotifyListeners.removeInterface( xListener ); } return true; } void SAL_CALL GrammarCheckingIterator::dispose() { lang::EventObject aEvt( static_cast(this) ); m_aEventListeners.disposeAndClear( aEvt ); TerminateThread(); // ---- THREAD SAFE START ---- { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); // release all UNO references m_xBreakIterator.clear(); // clear containers with UNO references AND have those references released GCReferences_t aTmpEmpty1; DocMap_t aTmpEmpty2; FPQueue_t aTmpEmpty3; m_aGCReferencesByService.swap( aTmpEmpty1 ); m_aDocIdMap.swap( aTmpEmpty2 ); m_aFPEntriesQueue.swap( aTmpEmpty3 ); } // ---- THREAD SAFE END ---- } void SAL_CALL GrammarCheckingIterator::addEventListener( const uno::Reference< lang::XEventListener >& xListener ) { if (xListener.is()) { m_aEventListeners.addInterface( xListener ); } } void SAL_CALL GrammarCheckingIterator::removeEventListener( const uno::Reference< lang::XEventListener >& xListener ) { if (xListener.is()) { m_aEventListeners.removeInterface( xListener ); } } void SAL_CALL GrammarCheckingIterator::disposing( const lang::EventObject &rSource ) { // if the component (document) is disposing release all references //!! There is no need to remove entries from the queue that are from this document //!! since the respectives xFlatParagraphs should become invalid (isModified() == true) //!! and the call to xFlatParagraphIterator->getNextPara() will result in an empty reference. //!! And if an entry is currently checked by a grammar checker upon return the results //!! should be ignored. //!! Also GetOrCreateDocId will not use that very same Id again... //!! All of the above resulting in that we only have to get rid of the implementation pointer here. uno::Reference< lang::XComponent > xDoc( rSource.Source, uno::UNO_QUERY ); if (xDoc.is()) { // ---- THREAD SAFE START ---- ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); m_aDocIdMap.erase( xDoc.get() ); // ---- THREAD SAFE END ---- } } uno::Reference< util::XChangesBatch > const & GrammarCheckingIterator::GetUpdateAccess() const { if (!m_xUpdateAccess.is()) { try { // get configuration provider uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); uno::Reference< lang::XMultiServiceFactory > xConfigurationProvider = configuration::theDefaultProvider::get( xContext ); // get configuration update access beans::PropertyValue aValue; aValue.Name = "nodepath"; aValue.Value <<= u"org.openoffice.Office.Linguistic/ServiceManager"_ustr; uno::Sequence< uno::Any > aProps{ uno::Any(aValue) }; m_xUpdateAccess.set( xConfigurationProvider->createInstanceWithArguments( u"com.sun.star.configuration.ConfigurationUpdateAccess"_ustr, aProps ), uno::UNO_QUERY_THROW ); } catch (uno::Exception &) { } } return m_xUpdateAccess; } void GrammarCheckingIterator::GetConfiguredGCSvcs_Impl() { GCImplNames_t aTmpGCImplNamesByLang; try { // get node names (locale iso strings) for configured grammar checkers uno::Reference< container::XNameAccess > xNA( GetUpdateAccess(), uno::UNO_QUERY_THROW ); xNA.set( xNA->getByName( u"GrammarCheckerList"_ustr ), uno::UNO_QUERY_THROW ); const uno::Sequence< OUString > aElementNames( xNA->getElementNames() ); for (const OUString& rElementName : aElementNames) { uno::Sequence< OUString > aImplNames; uno::Any aTmp( xNA->getByName( rElementName ) ); if (aTmp >>= aImplNames) { if (aImplNames.hasElements()) { // only the first entry is used, there should be only one grammar checker per language const OUString aImplName( aImplNames[0] ); aTmpGCImplNamesByLang[rElementName] = aImplName; } } else { SAL_WARN( "linguistic", "failed to get aImplNames. Wrong type?" ); } } } catch (uno::Exception const &) { TOOLS_WARN_EXCEPTION( "linguistic", "exception caught. Failed to get configured services" ); } { // ---- THREAD SAFE START ---- ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); m_aGCImplNamesByLang.swap(aTmpGCImplNamesByLang); // ---- THREAD SAFE END ---- } } sal_Bool SAL_CALL GrammarCheckingIterator::supportsService( const OUString & rServiceName ) { return cppu::supportsService(this, rServiceName); } OUString SAL_CALL GrammarCheckingIterator::getImplementationName( ) { return u"com.sun.star.lingu2.ProofreadingIterator"_ustr; } uno::Sequence< OUString > SAL_CALL GrammarCheckingIterator::getSupportedServiceNames( ) { return { u"com.sun.star.linguistic2.ProofreadingIterator"_ustr }; } void GrammarCheckingIterator::SetServiceList( const lang::Locale &rLocale, const uno::Sequence< OUString > &rSvcImplNames ) { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false); OUString aImplName; if (rSvcImplNames.hasElements()) aImplName = rSvcImplNames[0]; // there is only one grammar checker per language if (!LinguIsUnspecified(sBcp47) && !sBcp47.isEmpty()) { if (!aImplName.isEmpty()) m_aGCImplNamesByLang[sBcp47] = aImplName; else m_aGCImplNamesByLang.erase(sBcp47); } } uno::Sequence< OUString > GrammarCheckingIterator::GetServiceList( const lang::Locale &rLocale ) const { ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); const OUString aImplName = getServiceForLocale(rLocale).first; // there is only one grammar checker per language if (!aImplName.isEmpty()) return { aImplName }; return {}; } extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* linguistic_GrammarCheckingIterator_get_implementation( css::uno::XComponentContext* , css::uno::Sequence const&) { return cppu::acquire(new GrammarCheckingIterator()); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */