/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "SvXMLAutoCorrectImport.hxx" #include "SvXMLAutoCorrectExport.hxx" #include "SvXMLAutoCorrectTokenHandler.hxx" #include #include #include #include #include #include #include #include #include using namespace ::com::sun::star::ucb; using namespace ::com::sun::star::uno; using namespace ::com::sun::star::xml::sax; using namespace ::com::sun::star; using namespace ::xmloff::token; using namespace ::utl; namespace { enum class Flags { NONE = 0x00, FullStop = 0x01, ExclamationMark = 0x02, QuestionMark = 0x04, }; } namespace o3tl { template<> struct typed_flags : is_typed_flags {}; } const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr; constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr; constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr; // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"? constexpr std::u16string_view /* also at these beginnings - Brackets and all kinds of begin characters */ sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094", /* also at these ends - Brackets and all kinds of begin characters */ sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094"; static OUString EncryptBlockName_Imp(std::u16string_view rName); static bool NonFieldWordDelim( const sal_Unicode c ) { return ' ' == c || '\t' == c || 0x0a == c || cNonBreakingSpace == c || 0x2011 == c; } static bool IsWordDelim( const sal_Unicode c ) { return c == 0x1 || NonFieldWordDelim(c); } static bool IsLowerLetter( sal_Int32 nCharType ) { return CharClass::isLetterType( nCharType ) && ( css::i18n::KCharacterType::LOWER & nCharType); } static bool IsUpperLetter( sal_Int32 nCharType ) { return CharClass::isLetterType( nCharType ) && ( css::i18n::KCharacterType::UPPER & nCharType); } static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt, sal_Int32 nStt, sal_Int32 nEnd ) { for( ; nStt < nEnd; ++nStt ) { css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt ); switch( nScript ) { case css::i18n::UnicodeScript_kCJKRadicalsSupplement: case css::i18n::UnicodeScript_kHangulJamo: case css::i18n::UnicodeScript_kCJKSymbolPunctuation: case css::i18n::UnicodeScript_kHiragana: case css::i18n::UnicodeScript_kKatakana: case css::i18n::UnicodeScript_kHangulCompatibilityJamo: case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth: case css::i18n::UnicodeScript_kCJKCompatibility: case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA: case css::i18n::UnicodeScript_kCJKUnifiedIdeograph: case css::i18n::UnicodeScript_kHangulSyllable: case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph: case css::i18n::UnicodeScript_kHalfwidthFullwidthForm: return true; default: ; //do nothing } } return false; } static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt, sal_Int32 nStt, sal_Int32 nEnd ) { for( ; nStt < nEnd; ++nStt ) { if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt )) return true; } return false; } static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c) { return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; }); } SvxAutoCorrDoc::~SvxAutoCorrDoc() { } // Called by the functions: // - FnCapitalStartWord // - FnCapitalStartSentence // after the exchange of characters. Then the words, if necessary, can be inserted // into the exception list. void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&, sal_Unicode ) { } LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const { return LANGUAGE_SYSTEM; } static const LanguageTag& GetAppLang() { return Application::GetSettings().GetLanguageTag(); } /// Never use an unresolved LANGUAGE_SYSTEM. static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos ) { LanguageType eLang = rDoc.GetLanguage( nPos ); if (eLang == LANGUAGE_SYSTEM) eLang = GetAppLang().getLanguageType(); // the current work locale return eLang; } static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang ) { static std::unique_ptr xLclDtWrp; LanguageTag aLcl( nLang ); if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl) xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl))); return *xLclDtWrp; } static TransliterationWrapper& GetIgnoreTranslWrapper() { static int bIsInit = 0; static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_KANA | TransliterationFlags::IGNORE_WIDTH ); if( !bIsInit ) { aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() ); bIsInit = 1; } return aWrp; } static CollatorWrapper& GetCollatorWrapper() { static CollatorWrapper aCollWrp = []() { CollatorWrapper tmp( ::comphelper::getProcessComponentContext() ); tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 ); return tmp; }(); return aCollWrp; } bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar ) { return cChar == '\0' || cChar == '\t' || cChar == 0x0a || cChar == ' ' || cChar == '\'' || cChar == '\"' || cChar == '*' || cChar == '_' || cChar == '%' || cChar == '.' || cChar == ',' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || cChar == '<' || cChar == '>' || cChar == '/' || cChar == '-'; } namespace { bool IsCompoundWordDelimChar(sal_Unicode cChar) { return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar); } } bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar ) { return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || cChar == '/' /*case for the urls exception*/; } ACFlags SvxAutoCorrect::GetDefaultFlags() { ACFlags nRet = ACFlags::Autocorrect | ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord | ACFlags::ChgOrdinalNumber | ACFlags::ChgToEnEmDash | ACFlags::AddNonBrkSpace | ACFlags::TransliterateRTL | ACFlags::ChgAngleQuotes | ACFlags::ChgWeightUnderl | ACFlags::SetINetAttr | ACFlags::SetDOIAttr | ACFlags::ChgQuotes | ACFlags::SaveWordCplSttLst | ACFlags::SaveWordWordStartLst | ACFlags::CorrectCapsLock; LanguageType eLang = GetAppLang().getLanguageType(); if( eLang.anyOf( LANGUAGE_ENGLISH, LANGUAGE_ENGLISH_US, LANGUAGE_ENGLISH_UK, LANGUAGE_ENGLISH_AUS, LANGUAGE_ENGLISH_CAN, LANGUAGE_ENGLISH_NZ, LANGUAGE_ENGLISH_EIRE, LANGUAGE_ENGLISH_SAFRICA, LANGUAGE_ENGLISH_JAMAICA, LANGUAGE_ENGLISH_CARIBBEAN)) nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes); return nRet; } constexpr sal_Unicode cEmDash = 0x2014; constexpr sal_Unicode cEnDash = 0x2013; constexpr OUString sEmDash(u"\u2014"_ustr); constexpr OUString sEnDash(u"\u2013"_ustr); constexpr sal_Unicode cApostrophe = 0x2019; constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB; constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB; constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039; constexpr sal_Unicode cRightSingleAngleQuote = 0x203A; // stop characters for searching preceding quotes // (the first character is also the opening quote we are looking for) const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,, const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >> // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 }; const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 }; const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 }; SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile, OUString aUserAutocorrFile ) : sShareAutoCorrFile(std::move( aShareAutocorrFile )) , sUserAutoCorrFile(std::move( aUserAutocorrFile )) , eCharClassLang( LANGUAGE_DONTKNOW ) , nFlags(SvxAutoCorrect::GetDefaultFlags()) , cStartDQuote( 0 ) , cEndDQuote( 0 ) , cStartSQuote( 0 ) , cEndSQuote( 0 ) { } SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy ) : sShareAutoCorrFile( rCpy.sShareAutoCorrFile ) , sUserAutoCorrFile( rCpy.sUserAutoCorrFile ) , aSwFlags( rCpy.aSwFlags ) , eCharClassLang(rCpy.eCharClassLang) , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad)) , cStartDQuote( rCpy.cStartDQuote ) , cEndDQuote( rCpy.cEndDQuote ) , cStartSQuote( rCpy.cStartSQuote ) , cEndSQuote( rCpy.cEndSQuote ) { } SvxAutoCorrect::~SvxAutoCorrect() { } void SvxAutoCorrect::GetCharClass_( LanguageType eLang ) { moCharClass.emplace( LanguageTag( eLang) ); eCharClassLang = eLang; } void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn ) { ACFlags nOld = nFlags; nFlags = bOn ? nFlags | nFlag : nFlags & ~nFlag; if( !bOn ) { if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) ) nFlags &= ~ACFlags::CplSttLstLoad; if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) ) nFlags &= ~ACFlags::WordStartLstLoad; if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) ) nFlags &= ~ACFlags::ChgWordLstLoad; } } // Correct TWo INitial CApitals void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ) { CharClass& rCC = GetCharClass( eLang ); // Delete all non alphanumeric. Test the characters at the beginning/end of // the word ( recognizes: "(min.", "/min.", and so on.) for( ; nSttPos < nEndPos; ++nSttPos ) if( rCC.isLetterNumeric( rTxt, nSttPos )) break; for( ; nSttPos < nEndPos; --nEndPos ) if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) break; // Is the word a compounded word separated by delimiters? // If so, keep track of all delimiters so each constituent // word can be checked for two initial capital letters. std::deque aDelimiters; // Always check for two capitals at the beginning // of the entire word, so start at nSttPos. aDelimiters.push_back(nSttPos); // Find all compound word delimiters for (sal_Int32 n = nSttPos; n < nEndPos; ++n) { if (IsCompoundWordDelimChar(rTxt[ n ])) { aDelimiters.push_back( n + 1 ); // Get position of char after delimiter } } // Decide where to put the terminating delimiter. // If the last AutoCorrect char was a newline, then the AutoCorrect // char will not be included in rTxt. // If the last AutoCorrect char was not a newline, then the AutoCorrect // character will be the last character in rTxt. if (!IsCompoundWordDelimChar(rTxt[nEndPos-1])) aDelimiters.push_back(nEndPos); // Iterate through the word and all words that compose it. // Two capital letters at the beginning of word? for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI) { nSttPos = aDelimiters[nI]; nEndPos = aDelimiters[nI + 1]; if( nSttPos+2 < nEndPos && IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) && IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && // Is the third character a lower case IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && // Do not replace special attributes 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ]) { // test if the word is in an exception list OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); if( !FindInWordStartExceptList(eLang, sWord) ) { // Check that word isn't correctly spelt before correcting: css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller = LinguMgr::GetSpellChecker(); if( xSpeller->hasLanguage(static_cast(eLang)) ) { Sequence< css::beans::PropertyValue > aEmptySeq; if (xSpeller->isValid(sWord, static_cast(eLang), aEmptySeq)) { return; } } sal_Unicode cSave = rTxt[ nSttPos ]; OUString sChar = rCC.lowercase( OUString(cSave) ); if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar )) { if( ACFlags::SaveWordWordStartLst & nFlags ) rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave ); } } } } } // Format ordinal numbers suffixes (1st -> 1^st) bool SvxAutoCorrect::FnChgOrdinalNumber( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang) { // 1st, 2nd, 3rd, 4 - 0th // 201th or 201st // 12th or 12nd bool bChg = false; // In some languages ordinal suffixes should never be // changed to superscript. Let's break for those languages. if (!eLang.anyOf( LANGUAGE_SWEDISH, LANGUAGE_SWEDISH_FINLAND)) { CharClass& rCC = GetCharClass(eLang); for (; nSttPos < nEndPos; ++nSttPos) if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos])) break; for (; nSttPos < nEndPos; --nEndPos) if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1])) break; // Get the last number in the string to check sal_Int32 nNumEnd = nEndPos; bool bFoundEnd = false; bool isValidNumber = true; sal_Int32 i = nEndPos; while (i > nSttPos) { i--; bool isDigit = rCC.isDigit(rTxt, i); if (bFoundEnd) isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i)); if (isDigit && !bFoundEnd) { bFoundEnd = true; nNumEnd = i; } } if (bFoundEnd && isValidNumber) { sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1)); // Check if the characters after that number correspond to the ordinal suffix uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext()); const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale()); for (OUString const & sSuffix : aSuffixes) { std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1); if (sSuffix == sEnd) { // Check if the ordinal suffix has to be set as super script if (rCC.isLetter(sSuffix)) { // Do the change SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER, DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT); rDoc.SetAttr(nNumEnd + 1, nEndPos, SID_ATTR_CHAR_ESCAPEMENT, aSvxEscapementItem); bChg = true; } } } } } return bChg; } // Replace dashes bool SvxAutoCorrect::FnChgToEnEmDash( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ) { bool bRet = false; CharClass& rCC = GetCharClass( eLang ); if (eLang == LANGUAGE_SYSTEM) eLang = GetAppLang().getLanguageType(); bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN); // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert; // keep a local copy for later use OUString aOrigTxt = rTxt; sal_Int32 nFirstReplacementTextLengthChange = 0; // replace " - " or " --" with "enDash" if( 1 < nSttPos && 1 <= nEndPos - nSttPos ) { sal_Unicode cCh = rTxt[ nSttPos ]; if( '-' == cCh ) { if( 1 < nEndPos - nSttPos && ' ' == rTxt[ nSttPos-1 ] && '-' == rTxt[ nSttPos+1 ]) { sal_Int32 n; for( n = nSttPos+2; n < nEndPos && lcl_IsInArr( sImplSttSkipChars,(cCh = rTxt[ n ])); ++n ) ; // found: " --[][A-z0-9] if( rCC.isLetterNumeric( OUString(cCh) ) ) { for( n = nSttPos-1; n && lcl_IsInArr( sImplEndSkipChars,(cCh = rTxt[ --n ])); ) ; // found: "[A-z0-9][] --[][A-z0-9] if( rCC.isLetterNumeric( OUString(cCh) )) { rDoc.Delete( nSttPos, nSttPos + 2 ); rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash ); nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch bRet = true; } } } } else if( 3 < nSttPos && ' ' == rTxt[ nSttPos-1 ] && '-' == rTxt[ nSttPos-2 ]) { sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2; if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) ) { --nTmpPos; ++nLen; cCh = rTxt[ nTmpPos-1 ]; } if( ' ' == cCh ) { for( n = nSttPos; n < nEndPos && lcl_IsInArr( sImplSttSkipChars,(cCh = rTxt[ n ])); ++n ) ; // found: " - [][A-z0-9] if( rCC.isLetterNumeric( OUString(cCh) ) ) { cCh = ' '; for( n = nTmpPos-1; n && lcl_IsInArr( sImplEndSkipChars,(cCh = rTxt[ --n ])); ) ; // found: "[A-z0-9][] - [][A-z0-9] if( rCC.isLetterNumeric( OUString(cCh) )) { rDoc.Delete( nTmpPos, nTmpPos + nLen ); rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash ); nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch bRet = true; } } } } } // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" // [0-9]--[0-9] double dash always replaced with "enDash" // Finnish and Hungarian use enDash instead of emDash. bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); if( 4 <= nEndPos - nSttPos ) { std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) ); size_t nFndPos = sTmpView.find(u"--"); if (nFndPos > 0 && nFndPos < sTmpView.size() - 2) { // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it // uses the index *both* as code unit index (when checking it as ASCII), *and* // as code point index (when passes to css::i18n::XCharacterClassification). // Oh well... Anyway, single-codepoint strings will workaround it. sal_Int32 nStart = nSttPos + nFndPos; sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1); OUString sStart(&chStart, 1); // No idea why sImplEndSkipChars is checked at start if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart)) { sal_Int32 nEnd = nSttPos + nFndPos + 2; sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1); OUString sEnd(&chEnd, 1); // No idea why sImplSttSkipChars is checked at end if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd)) { nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange; rDoc.Delete(nSttPos, nSttPos + 2); rDoc.Insert(nSttPos, (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0)) ? sEnDash : sEmDash)); bRet = true; } } } } return bRet; } // Add non-breaking space before specific punctuation marks in French text sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace( SvxAutoCorrDoc& rDoc, std::u16string_view rTxt, sal_Int32 nEndPos, LanguageType eLang, bool& io_bNbspRunNext ) { sal_Int32 nRet = -1; CharClass& rCC = GetCharClass( eLang ); if ( rCC.getLanguageTag().getLanguage() == "fr" ) { bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA"); OUString allChars = ":;?!%"; OUString chars( allChars ); if ( bFrCA ) chars = ":"; sal_Unicode cChar = rTxt[ nEndPos ]; bool bHasSpace = chars.indexOf( cChar ) != -1; bool bIsSpecial = allChars.indexOf( cChar ) != -1; if ( bIsSpecial ) { // Get the last word delimiter position sal_Int32 nSttWdPos = nEndPos; bool bWasWordDelim = false; while( nSttWdPos ) { bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); if (bWasWordDelim) break; } //See if the text is the start of a protocol string, e.g. have text of //"http" see if it is the start of "http:" and if so leave it alone size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0); size_t nProtocolLen = nEndPos - nSttWdPos + 1; if (nIndex + nProtocolLen <= rTxt.size()) { if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid) return -1; } // Check the presence of "://" in the word size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 ); if ( nStrPos == std::u16string_view::npos && nEndPos > 0 ) { // Check the previous char sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' ) { // Remove any previous normal space sal_Int32 nPos = nEndPos - 1; while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace ) { if ( nPos == 0 ) break; nPos--; cPrevChar = rTxt[ nPos ]; } nPos++; if ( nEndPos - nPos > 0 ) rDoc.Delete( nPos, nEndPos ); // Add the non-breaking space at the end pos if ( bHasSpace ) rDoc.Insert( nPos, OUString(cNonBreakingSpace) ); io_bNbspRunNext = true; nRet = nPos; } else if ( chars.indexOf( cPrevChar ) != -1 ) io_bNbspRunNext = true; } } else if ( cChar == '/' && nEndPos > 1 && static_cast(rTxt.size()) > (nEndPos - 1) ) { // Remove the hardspace right before to avoid formatting URLs sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ]; if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace ) { rDoc.Delete( nEndPos - 2, nEndPos - 1 ); nRet = nEndPos - 1; } } } return nRet; } // URL recognition bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ) { OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) )); bool bRet = !sURL.isEmpty(); if( bRet ) // so, set attribute: rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); return bRet; } // DOI citation recognition bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ) { OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) )); bool bRet = !sURL.isEmpty(); if( bRet ) // so, set attribute: rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); return bRet; } // Automatic *bold*, /italic/, -strikeout- and _underline_ bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nEndPos ) { // Condition: // at the beginning: _, *, / or ~ after Space with the following !Space // at the end: _, *, / or ~ before Space (word delimiter?) sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout if( ++nEndPos != rTxt.getLength() && !IsWordDelim( rTxt[ nEndPos ] ) ) return false; --nEndPos; bool bAlphaNum = false; sal_Int32 nPos = nEndPos; sal_Int32 nFndPos = -1; CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM ); while( nPos ) { switch( sal_Unicode c = rTxt[ --nPos ] ) { case '_': case '-': case '/': case '*': if( c == cInsChar ) { if( bAlphaNum && nPos+1 < nEndPos && ( !nPos || IsWordDelim( rTxt[ nPos-1 ])) && !IsWordDelim( rTxt[ nPos+1 ])) nFndPos = nPos; else // Condition is not satisfied, so cancel nFndPos = -1; nPos = 0; } break; default: if( !bAlphaNum ) bAlphaNum = rCC.isLetterNumeric( rTxt, nPos ); } } if( -1 != nFndPos ) { // first delete the Character at the end - this allows insertion // of an empty hint in SetAttr which would be removed by Delete // (fdo#62536, AUTOFMT in Writer) rDoc.Delete( nEndPos, nEndPos + 1 ); // Span the Attribute over the area // the end. if( '*' == cInsChar ) // Bold { SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT ); rDoc.SetAttr( nFndPos + 1, nEndPos, SID_ATTR_CHAR_WEIGHT, aSvxWeightItem); } else if( '/' == cInsChar ) // Italic { SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE ); rDoc.SetAttr( nFndPos + 1, nEndPos, SID_ATTR_CHAR_POSTURE, aSvxPostureItem); } else if( '-' == cInsChar ) // Strikeout { SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT ); rDoc.SetAttr( nFndPos + 1, nEndPos, SID_ATTR_CHAR_STRIKEOUT, aSvxCrossedOutItem); } else // Underline { SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE ); rDoc.SetAttr( nFndPos + 1, nEndPos, SID_ATTR_CHAR_UNDERLINE, aSvxUnderlineItem); } rDoc.Delete( nFndPos, nFndPos + 1 ); } return -1 != nFndPos; } // Capitalize first letter of every sentence void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc, const OUString& rTxt, bool bNormalPos, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ) { if( rTxt.isEmpty() || nEndPos <= nSttPos ) return; CharClass& rCC = GetCharClass( eLang ); OUString aText( rTxt ); const sal_Unicode *pStart = aText.getStr(), *pStr = pStart + nEndPos, *pWordStt = nullptr, *pDelim = nullptr; bool bAtStart = false; do { --pStr; if (rCC.isLetter(aText, pStr - pStart)) { if( !pWordStt ) pDelim = pStr+1; pWordStt = pStr; } else if (pWordStt && !rCC.isDigit(aText, pStr - pStart)) { if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words pWordStt - 1 == pStr && // Installation at beginning of paragraph. Replaced < by <= (#i38971#) (pStart + 1) <= pStr && rCC.isLetter(aText, pStr-1 - pStart)) pWordStt = --pStr; else break; } bAtStart = (pStart == pStr); } while( !bAtStart ); if (!pWordStt) return; // no character to be replaced if (rCC.isDigit(aText, pStr - pStart)) return; // already ok if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) return; // already ok //See if the text is the start of a protocol string, e.g. have text of //"http" see if it is the start of "http:" and if so leave it alone sal_Int32 nIndex = pWordStt - pStart; sal_Int32 nProtocolLen = pDelim - pWordStt + 1; if (nIndex + nProtocolLen <= rTxt.getLength()) { if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid) return; // already ok } if (0x1 == *pWordStt || 0x2 == *pWordStt) return; // already ok // Only capitalize, if string before specified characters is long enough if( *pDelim && 2 >= pDelim - pWordStt && lcl_IsInArr( u".-)>", *pDelim ) ) return; // tdf#59666 don't capitalize single Greek letters (except in Greek texts) if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK ) return; if( !bAtStart ) // Still no beginning of a paragraph? { if (NonFieldWordDelim(*pStr)) { for (;;) { bAtStart = (pStart == pStr--); if (bAtStart || !NonFieldWordDelim(*pStr)) break; } } // Asian full stop, full width full stop, full width exclamation mark // and full width question marks are treated as word delimiters else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr && 0xFF1F != *pStr ) return; // no valid separator -> no replacement } // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) return; if( bAtStart ) // at the beginning of a paragraph? { // Check out the previous paragraph, if it exists. // If so, then check to paragraph separator at the end. OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); if (!pPrevPara) { // valid separator -> replace OUString sChar( *pWordStt ); sChar = rCC.titlecase(sChar); //see fdo#56740 if (sChar != OUStringChar(*pWordStt)) rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); return; } aText = *pPrevPara; bAtStart = false; pStart = aText.getStr(); pStr = pStart + aText.getLength(); do { // overwrite all blanks --pStr; if (!NonFieldWordDelim(*pStr)) break; bAtStart = (pStart == pStr); } while( !bAtStart ); if( bAtStart ) return; // no valid separator -> no replacement } // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. // all three can happen, but not more than once! const sal_Unicode* pExceptStt = nullptr; bool bContinue = true; Flags nFlag = Flags::NONE; do { switch (*pStr) { // Western and Asian full stop case '.': case 0x3002: case 0xFF0E: { if (pStr >= pStart + 2 && *(pStr - 2) == '.') { //e.g. text "f.o.o. word": Now currently considering //capitalizing word but second last character of //previous word is a . So probably last word is an //anagram that ends in . and not truly the end of a //previous sentence, so don't autocapitalize this word return; } if (nFlag & Flags::FullStop) return; // no valid separator -> no replacement nFlag |= Flags::FullStop; pExceptStt = pStr; } break; case '!': case 0xFF01: { if (nFlag & Flags::ExclamationMark) return; // no valid separator -> no replacement nFlag |= Flags::ExclamationMark; } break; case '?': case 0xFF1F: { if (nFlag & Flags::QuestionMark) return; // no valid separator -> no replacement nFlag |= Flags::QuestionMark; } break; default: if (nFlag == Flags::NONE) return; // no valid separator -> no replacement else bContinue = false; break; } if (bContinue && pStr-- == pStart) { return; // no valid separator -> no replacement } } while (bContinue); if (Flags::FullStop != nFlag) pExceptStt = nullptr; // Only capitalize, if string is long enough if( 2 > ( pStr - pStart ) ) return; if (!rCC.isLetterNumeric(aText, pStr-- - pStart)) { bool bValid = false, bAlphaFnd = false; const sal_Unicode* pTmpStr = pStr; while( !bValid ) { if( rCC.isDigit( aText, pTmpStr - pStart ) ) { bValid = true; pStr = pTmpStr - 1; } else if( rCC.isLetter( aText, pTmpStr - pStart ) ) { if( bAlphaFnd ) { bValid = true; pStr = pTmpStr; } else bAlphaFnd = true; } else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr)) break; if( pTmpStr == pStart ) break; --pTmpStr; } if( !bValid ) return; // no valid separator -> no replacement } bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9'; // Search for the beginning of the word while (!NonFieldWordDelim(*pStr)) { if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) ) bNumericOnly = false; if( pStart == pStr ) break; --pStr; } if( bNumericOnly ) // consists of only numbers, then not return; if (NonFieldWordDelim(*pStr)) ++pStr; OUString sWord; // check on the basis of the exception list if( pExceptStt ) { sWord = OUString(pStr, pExceptStt - pStr + 1); if( FindInCplSttExceptList(eLang, sWord) ) return; // Delete all non alphanumeric. Test the characters at the // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.) OUString sTmp( sWord ); while( !sTmp.isEmpty() && !rCC.isLetterNumeric( sTmp, 0 ) ) sTmp = sTmp.copy(1); // Remove all non alphanumeric characters towards the end up until // the last one. sal_Int32 nLen = sTmp.getLength(); while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) ) --nLen; if( nLen + 1 < sTmp.getLength() ) sTmp = sTmp.copy( 0, nLen + 1 ); if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() && FindInCplSttExceptList(eLang, sTmp)) return; if(FindInCplSttExceptList(eLang, sWord, true)) return; } // Ok, then replace sal_Unicode cSave = *pWordStt; nSttPos = pWordStt - rTxt.getStr(); OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ); // Perhaps someone wants to have the word if( bRet && ACFlags::SaveWordCplSttLst & nFlags ) rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave ); } // Correct accidental use of cAPS LOCK key bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ) { if (nEndPos - nSttPos < 2) // string must be at least 2-character long. return false; CharClass& rCC = GetCharClass( eLang ); // Check the first 2 letters. if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) ) return false; if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) ) return false; OUStringBuffer aConverted; aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) ); aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) ); // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) return false; for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i ) { if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) // A lowercase letter disqualifies the whole text. return false; if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) // Another uppercase letter. Convert it. aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); else // This is not an alphabetic letter. Leave it as-is. aConverted.append( rTxt[i] ); } // Replace the word. rDoc.Delete(nSttPos, nEndPos); rDoc.Insert(nSttPos, aConverted.makeStringAndClear()); return true; } sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote, LanguageType eLang ) const { sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar ? GetStartDoubleQuote() : GetStartSingleQuote() ) : ( '\"' == cInsChar ? GetEndDoubleQuote() : GetEndSingleQuote() ); if( !cRet ) { // then through the Language find the right character if( LANGUAGE_NONE == eLang ) cRet = cInsChar; else { LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); OUString sRet( bSttQuote ? ( '\"' == cInsChar ? rLcl.getDoubleQuotationMarkStart() : rLcl.getQuotationMarkStart() ) : ( '\"' == cInsChar ? rLcl.getDoubleQuotationMarkEnd() : rLcl.getQuotationMarkEnd() )); cRet = !sRet.isEmpty() ? sRet[0] : cInsChar; } } return cRet; } void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos, sal_Unicode cInsChar, bool bSttQuote, bool bIns, LanguageType eLang, ACQuotes eType ) const { sal_Unicode cRet; if ( eType == ACQuotes::DoubleAngleQuote ) { bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS; // pressing " inside a quotation -> use second level angle quotes bool bLeftQuote = '\"' == cInsChar && // start position and Romanian OR // not start position and Hungarian bSttQuote == (eLang != LANGUAGE_HUNGARIAN); cRet = ( '<' == cInsChar || bLeftQuote ) ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote ) : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote ); } else if ( eType == ACQuotes::UseApostrophe ) cRet = cApostrophe; else cRet = GetQuote( cInsChar, bSttQuote, eLang ); OUString sChg( cInsChar ); if( bIns ) rDoc.Insert( nInsPos, sChg ); else rDoc.Replace( nInsPos, sChg ); sChg = OUString(cRet); if( eType == ACQuotes::NonBreakingSpace ) { if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) )) { if( !bSttQuote ) ++nInsPos; } } else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' ) { rDoc.Delete( nInsPos-1, nInsPos); --nInsPos; } rDoc.Replace( nInsPos, sChg ); // i' -> I' in English (last step for the Undo) if( eType == ACQuotes::CapitalizeIAm ) rDoc.Replace( nInsPos-1, "I" ); } OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos, sal_Unicode cInsChar, bool bSttQuote ) { const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); OUString sRet(cRet); if( '\"' == cInsChar ) { if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) { if( bSttQuote ) sRet += " "; else sRet = " " + sRet; } } return sRet; } // search preceding opening quote in the paragraph before the insert position static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos, const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars ) { sal_Unicode cTmpChar; do { cTmpChar = rTxt[ --nPos ]; if ( cTmpChar == sPrecedingChar ) return true; if ( cTmpChar == sStopChar ) return false; for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh ) if ( cTmpChar == *pCh ) return false; } while ( nPos > 0 ); return false; } // WARNING: rText may become invalid, see comment below void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nInsPos, sal_Unicode cChar, bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin ) { bool bIsNextRun = io_bNbspRunNext; io_bNbspRunNext = false; // if it was set, then it has to be turned off do{ // only for middle check loop !! if( cChar ) { // Prevent double space if( nInsPos && ' ' == cChar && IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) && ' ' == rTxt[ nInsPos - 1 ]) { break; } bool bSingle = '\'' == cChar; bool bIsReplaceQuote = (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) || (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle ); if( bIsReplaceQuote ) { bool bSttQuote = !nInsPos; ACQuotes eType = ACQuotes::NONE; const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); if (!bSttQuote) { sal_Unicode cPrev = rTxt[ nInsPos-1 ]; bSttQuote = NonFieldWordDelim(cPrev) || lcl_IsInArr( u"([{", cPrev ) || ( cEmDash == cPrev ) || ( cEnDash == cPrev ); // tdf#38394 use opening quotation mark << in French l'<> if ( !bSingle && !bSttQuote && cPrev == cApostrophe && primary(eLang) == primary(LANGUAGE_FRENCH) && ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) && // abbreviated form of ce, de, je, la, le, ne, me, te, se or si OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) || ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) && // abbreviated form of que ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) && ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) ) { bSttQuote = true; } // tdf#108423 for capitalization of English i'm else if ( bSingle && ( cPrev == 'i' ) && primary(eLang) == primary(LANGUAGE_ENGLISH) && ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) ) { eType = ACQuotes::CapitalizeIAm; } // tdf#133524 support >>Hungarian<< and <> secondary level quotations else if ( !bSingle && nInsPos && ( ( eLang == LANGUAGE_HUNGARIAN && lcl_HasPrecedingChar( rTxt, nInsPos, bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0], bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1], bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) || ( eLang.anyOf( LANGUAGE_ROMANIAN, LANGUAGE_ROMANIAN_MOLDOVA ) && lcl_HasPrecedingChar( rTxt, nInsPos, bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0], bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1], bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) ) { LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); // only if the opening double quotation mark is the default one if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) ) eType = ACQuotes::DoubleAngleQuote; } else if ( bSingle && nInsPos && !bSttQuote && // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic, // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018. // tdf#123786 the same for Russian and Ukrainian ( eLang.anyOf ( LANGUAGE_CZECH, LANGUAGE_GERMAN, LANGUAGE_GERMAN_SWISS, LANGUAGE_GERMAN_AUSTRIAN, LANGUAGE_GERMAN_LUXEMBOURG, LANGUAGE_GERMAN_LIECHTENSTEIN, LANGUAGE_ICELANDIC, LANGUAGE_SLOVAK, LANGUAGE_SLOVENIAN ) ) ) { sal_Unicode sStartChar = GetStartSingleQuote(); sal_Unicode sEndChar = GetEndSingleQuote(); if ( !sStartChar || !sEndChar ) { LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0]; if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0]; } if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) ) { CharClass& rCC = GetCharClass( eLang ); if ( rCC.isLetter(rTxt, nInsPos-1) ) { eType = ACQuotes::UseApostrophe; } } } else if ( bSingle && nInsPos && !bSttQuote && ( eLang.anyOf ( LANGUAGE_RUSSIAN, LANGUAGE_UKRAINIAN ) && !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1], aStopSingleQuoteEndRuUa + 2 ) ) ) { LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); CharClass& rCC = GetCharClass( eLang ); if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) && // use apostrophe only after letters, not after digits or punctuation rCC.isLetter(rTxt, nInsPos-1) ) { eType = ACQuotes::UseApostrophe; } } } if ( eType == ACQuotes::NONE && !bSingle && ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) ) eType = ACQuotes::NonBreakingSpace; InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType ); break; } // tdf#133524 change "<<" and ">>" to double angle quotation marks else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) && IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) && ('<' == cChar || '>' == cChar) && nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] ) { const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); if ( eLang.anyOf( LANGUAGE_CATALAN, // primary level LANGUAGE_CATALAN_VALENCIAN, // primary level LANGUAGE_FINNISH, // alternative primary level LANGUAGE_FRENCH_SWISS, // second level LANGUAGE_GALICIAN, // primary level LANGUAGE_HUNGARIAN, // second level LANGUAGE_POLISH, // second level LANGUAGE_PORTUGUESE, // primary level LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level LANGUAGE_ROMANIAN, // second level LANGUAGE_ROMANIAN_MOLDOVA, // second level LANGUAGE_SWEDISH, // alternative primary level LANGUAGE_SWEDISH_FINLAND, // alternative primary level LANGUAGE_UKRAINIAN, // primary level LANGUAGE_USER_ARAGONESE, // primary level LANGUAGE_USER_ASTURIAN ) || // primary level primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level { InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote ); break; } } if( bInsert ) rDoc.Insert( nInsPos, OUString(cChar) ); else rDoc.Replace( nInsPos, OUString(cChar) ); // Hardspaces autocorrection if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) ) { // WARNING ATTENTION: rTxt is an alias of the text node's OUString // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds! sal_Int32 nUpdatedPos = -1; if (NeedsHardspaceAutocorr(cChar)) nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ); if (nUpdatedPos >= 0) { nInsPos = nUpdatedPos; } else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) ) { // Remove the NBSP if it wasn't an autocorrection if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) && cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace ) { // Look for the last HARD_SPACE sal_Int32 nPos = nInsPos - 1; bool bContinue = true; while ( bContinue ) { const sal_Unicode cTmpChar = rTxt[ nPos ]; if ( cTmpChar == cNonBreakingSpace ) { rDoc.Delete( nPos, nPos + 1 ); bContinue = false; } else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 ) bContinue = false; nPos--; } } } } } if( !nInsPos ) break; sal_Int32 nPos = nInsPos - 1; if( IsWordDelim( rTxt[ nPos ])) break; // Set bold or underline automatically? if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength())) { if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) ) { FnChgWeightUnderl( rDoc, rTxt, nPos+1 ); } break; } while( nPos && !IsWordDelim( rTxt[ --nPos ])) ; // Found a Paragraph-start or a Blank, search for the word shortcut in // auto. sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character if( !nPos && !IsWordDelim( rTxt[ 0 ])) --nCapLttrPos; // begin of paragraph and no blank const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); CharClass& rCC = GetCharClass( eLang ); // no symbol characters if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) break; if( IsAutoCorrFlag( ACFlags::Autocorrect ) && // tdf#134940 fix regression of arrow "-->" resulted by premature // replacement of "--" since '>' was added to IsAutoCorrectChar() '>' != cChar ) { // WARNING ATTENTION: rTxt is an alias of the text node's OUString // and becomes INVALID if ChgAutoCorrWord returns true! // => use aPara/pPara to create a valid copy of the string! OUString aPara; OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr; bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos, *this, pPara ); if( !bChgWord ) { sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos; while( nCapLttrPos1 < nInsPos && lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] ) ) ++nCapLttrPos1; while( nCapLttrPos1 < nInsPos1 && nInsPos1 && lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] ) ) --nInsPos1; if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) && nCapLttrPos1 < nInsPos1 && rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara )) { bChgWord = true; nCapLttrPos = nCapLttrPos1; } } if( bChgWord ) { if( !aPara.isEmpty() ) { sal_Int32 nEnd = nCapLttrPos; while( nEnd < aPara.getLength() && !IsWordDelim( aPara[ nEnd ])) ++nEnd; // Capital letter at beginning of paragraph? if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) { FnCapitalStartSentence( rDoc, aPara, false, nCapLttrPos, nEnd, eLang ); } if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) { FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang ); } } break; } } if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN ) { // WARNING ATTENTION: rTxt is an alias of the text node's OUString // and becomes INVALID if TransliterateRTLWord returns true! if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) ) break; } if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) && (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) && FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) && ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) ; else { bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK); bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos ); if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) && FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) { // Correct accidental use of cAPS LOCK key (do this only when // the caps or shift lock key is pressed). Turn off the caps // lock afterwards. pFrameWin->SimulateKeyPress( KEY_CAPSLOCK ); } // Capital letter at beginning of paragraph ? if( !bUnsupported && IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) { FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ); } // Two capital letters at beginning of word ?? if( !bUnsupported && IsAutoCorrFlag( ACFlags::CapitalStartWord ) ) { FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); } if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) { FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); } } } while( false ); } SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_( LanguageType eLang ) { LanguageTag aLanguageTag( eLang); if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end()) (void)CreateLanguageFile(aLanguageTag); const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); return iter->second; } void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang ) { auto const iter = m_aLangTable.find(LanguageTag(eLang)); if (iter != m_aLangTable.end()) iter->second.SaveCplSttExceptList(); else { SAL_WARN("editeng", "Save an empty list? "); } } void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang) { auto const iter = m_aLangTable.find(LanguageTag(eLang)); if (iter != m_aLangTable.end()) iter->second.SaveWordStartExceptList(); else { SAL_WARN("editeng", "Save an empty list? "); } } // Adds a single word. The list will immediately be written to the file! bool SvxAutoCorrect::AddCplSttException( const OUString& rNew, LanguageType eLang ) { SvxAutoCorrectLanguageLists* pLists = nullptr; // either the right language is present or it will be this in the general list auto iter = m_aLangTable.find(LanguageTag(eLang)); if (iter != m_aLangTable.end()) pLists = &iter->second; else { LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); iter = m_aLangTable.find(aLangTagUndetermined); if (iter != m_aLangTable.end()) pLists = &iter->second; else if(CreateLanguageFile(aLangTagUndetermined)) { iter = m_aLangTable.find(aLangTagUndetermined); assert(iter != m_aLangTable.end()); pLists = &iter->second; } } OSL_ENSURE(pLists, "No auto correction data"); return pLists && pLists->AddToCplSttExceptList(rNew); } // Adds a single word. The list will immediately be written to the file! bool SvxAutoCorrect::AddWordStartException( const OUString& rNew, LanguageType eLang ) { SvxAutoCorrectLanguageLists* pLists = nullptr; //either the right language is present or it is set in the general list auto iter = m_aLangTable.find(LanguageTag(eLang)); if (iter != m_aLangTable.end()) pLists = &iter->second; else { LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); iter = m_aLangTable.find(aLangTagUndetermined); if (iter != m_aLangTable.end()) pLists = &iter->second; else if(CreateLanguageFile(aLangTagUndetermined)) { iter = m_aLangTable.find(aLangTagUndetermined); assert(iter != m_aLangTable.end()); pLists = &iter->second; } } OSL_ENSURE(pLists, "No auto correction file!"); return pLists && pLists->AddToWordStartExceptList(rNew); } OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt, sal_Int32 nPos) { OUString sRet; if( !nPos ) return sRet; sal_Int32 nEnd = nPos; // it must be followed by a blank or tab! if( ( nPos < rTxt.getLength() && !IsWordDelim( rTxt[ nPos ])) || IsWordDelim( rTxt[ --nPos ])) return sRet; while( nPos && !IsWordDelim( rTxt[ --nPos ])) ; // Found a Paragraph-start or a Blank, search for the word shortcut in // auto. sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character if( !nPos && !IsWordDelim( rTxt[ 0 ])) --nCapLttrPos; // Beginning of paragraph and no Blank! while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) ) if( ++nCapLttrPos >= nEnd ) return sRet; if( 3 > nEnd - nCapLttrPos ) return sRet; const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); CharClass& rCC = GetCharClass(eLang); if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd )) return sRet; sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos ); return sRet; } // static std::vector SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt, const sal_Int32 nPos) { constexpr sal_Int32 nMinLen = 3; constexpr sal_Int32 nMaxLen = 9; std::vector aRes; if (nPos >= nMinLen) { sal_Int32 nBegin = std::max(nPos - nMaxLen, 0); // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation) if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1])) { while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin])) ++nBegin; } if (nBegin + nMinLen <= nPos) { OUString sRes( rTxt.substr(nBegin, nPos - nBegin) ); aRes.push_back(sRes); bool bLastStartedWithDelim = IsWordDelim(sRes[0]); for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i) { bool bAdd = bLastStartedWithDelim; bLastStartedWithDelim = IsWordDelim(sRes[i]); bAdd = bAdd || bLastStartedWithDelim; if (bAdd) aRes.push_back(sRes.copy(i)); } } } return aRes; } bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile ) { OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists "); OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true )); OUString sShareDirFile( sUserDirFile ); SvxAutoCorrectLanguageLists* pLists = nullptr; tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY ); auto nFndPos = aLastFileTable.find(rLanguageTag); if(nFndPos != aLastFileTable.end() && (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) && nAktTime - nLastCheckTime < nMinTime) { // no need to test the file, because the last check is not older then // 2 minutes. if( bNewFile ) { sShareDirFile = sUserDirFile; auto itBool = m_aLangTable.emplace(std::piecewise_construct, std::forward_as_tuple(rLanguageTag), std::forward_as_tuple(*this, sShareDirFile, sUserDirFile)); pLists = &itBool.first->second; aLastFileTable.erase(nFndPos); } } else if( ( FStatHelper::IsDocument( sUserDirFile ) || FStatHelper::IsDocument( sShareDirFile = GetAutoCorrFileName( rLanguageTag ) ) || FStatHelper::IsDocument( sShareDirFile = GetAutoCorrFileName( rLanguageTag, false, false, true) ) ) || ( sShareDirFile = sUserDirFile, bNewFile ) ) { auto itBool = m_aLangTable.emplace(std::piecewise_construct, std::forward_as_tuple(rLanguageTag), std::forward_as_tuple(*this, sShareDirFile, sUserDirFile)); pLists = &itBool.first->second; if (nFndPos != aLastFileTable.end()) aLastFileTable.erase(nFndPos); } else if( !bNewFile ) { aLastFileTable[rLanguageTag] = nAktTime.GetTime(); } return pLists != nullptr; } bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong, LanguageType eLang ) { LanguageTag aLanguageTag( eLang); if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end()) return iter->second.PutText(rShort, rLong); if (CreateLanguageFile(aLanguageTag)) { auto const iter = m_aLangTable.find(aLanguageTag); assert (iter != m_aLangTable.end()); return iter->second.PutText(rShort, rLong); } return false; } void SvxAutoCorrect::MakeCombinedChanges( std::vector& aNewEntries, std::vector& aDeleteEntries, LanguageType eLang ) { LanguageTag aLanguageTag( eLang); auto iter = m_aLangTable.find(aLanguageTag); if (iter != m_aLangTable.end()) { iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries ); } else if(CreateLanguageFile( aLanguageTag )) { iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries ); } } // - return the replacement text (only for SWG-Format, all other // can be taken from the word list!) bool SvxAutoCorrect::GetLongText( const OUString&, OUString& ) { return false; } void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& ) { } // Text with attribution (only the SWG - SWG format!) bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&, const OUString&, const OUString&, SfxObjectShell&, OUString& ) { return false; } OUString EncryptBlockName_Imp(std::u16string_view rName) { OUStringBuffer aName; aName.append('#').append(rName); for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos) { if (lcl_IsInArr( u"!/:.\\", aName[nPos])) aName[nPos] &= 0x0f; } return aName.makeStringAndClear(); } /* This code is copied from SwXMLTextBlocks::GeneratePackageName */ static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName ) { OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7)); OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US)); for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos) { switch (aBuf[nPos]) { case '!': case '/': case ':': case '.': case '\\': // tdf#156769 - escape the question mark in the storage name case '?': aBuf[nPos] = '_'; break; default: break; } } rPackageName = aBuf.makeStringAndClear(); } static const SvxAutocorrWord* lcl_SearchWordsInList( SvxAutoCorrectLanguageLists* pList, std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos) { const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); } // the search for the words in the substitution table const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos, SvxAutoCorrDoc&, LanguageTag& rLang ) { const SvxAutocorrWord* pRet = nullptr; LanguageTag aLanguageTag( rLang); if( aLanguageTag.isSystemLocale() ) aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage()); /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback * list instead? */ // First search for eLang, then US-English -> English // and last in LANGUAGE_UNDETERMINED if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); SvxAutoCorrectLanguageLists & rList = iter->second; pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); if( pRet ) { rLang = aLanguageTag; return pRet; } else return nullptr; } // If it still could not be found here, then keep on searching LanguageType eLang = aLanguageTag.getLanguageType(); // the primary language for example EN aLanguageTag.reset(aLanguageTag.getLanguage()); LanguageType nTmpKey = aLanguageTag.getLanguageType(false); if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))) { //the language is available - so bring it on SvxAutoCorrectLanguageLists& rList = m_aLangTable.find(aLanguageTag)->second; pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); if( pRet ) { rLang = aLanguageTag; return pRet; } } if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); SvxAutoCorrectLanguageLists& rList = iter->second; pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); if( pRet ) { rLang = aLanguageTag; return pRet; } } return nullptr; } bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang, const OUString& sWord ) { LanguageTag aLanguageTag( eLang); /* TODO-BCP47: again horrible ugliness */ // First search for eLang, then primary language of eLang // and last in LANGUAGE_UNDETERMINED if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail"); auto& rList = iter->second; if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() ) return true; } // If it still could not be found here, then keep on searching // the primary language for example EN aLanguageTag.reset(aLanguageTag.getLanguage()); LanguageType nTmpKey = aLanguageTag.getLanguageType(false); if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail"); auto& rList = iter->second; if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() ) return true; } if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); auto& rList = iter->second; if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() ) return true; } return false; } static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord) { SvStringsISortDtor::const_iterator it = pList->find( "~" ); SvStringsISortDtor::size_type nPos = it - pList->begin(); if( nPos < pList->size() ) { OUString sLowerWord(sWord.toAsciiLowerCase()); OUString sAbr; for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n ) { sAbr = (*pList)[ n ]; if (sAbr[0] != '~') break; // ~ and ~. are not allowed! if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() ) { OUString sLowerAbk(sAbr.toAsciiLowerCase()); for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;) { if( !--i ) // agrees return true; if( sLowerAbk[i] != sLowerWord[--ii]) break; } } } } OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ), "Wrongly sorted exception list?" ); return false; } bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang, const OUString& sWord, bool bAbbreviation) { LanguageTag aLanguageTag( eLang); /* TODO-BCP47: did I mention terrible horrible ugliness? */ // First search for eLang, then primary language of eLang // and last in LANGUAGE_UNDETERMINED if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail"); const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList(); if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) return true; } // If it still could not be found here, then keep on searching // the primary language for example EN aLanguageTag.reset(aLanguageTag.getLanguage()); LanguageType nTmpKey = aLanguageTag.getLanguageType(false); if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail"); const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList(); if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) return true; } if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) { //the language is available - so bring it on const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail"); const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList(); if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) return true; } return false; } OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag, bool bNewFile, bool bTst, bool bUnlocalized ) const { OUString sRet, sExt( rLanguageTag.getBcp47() ); if (bUnlocalized) { // we don't want variant, so we'll take "fr" instead of "fr-CA" for example std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false); if (!vecFallBackStrings.empty()) sExt = vecFallBackStrings[0]; } sExt = "_" + sExt + ".dat"; if( bNewFile ) sRet = sUserAutoCorrFile + sExt; else if( !bTst ) sRet = sShareAutoCorrFile + sExt; else { // test first in the user directory - if not exist, then sRet = sUserAutoCorrFile + sExt; if( !FStatHelper::IsDocument( sRet )) sRet = sShareAutoCorrFile + sExt; } return sRet; } SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists( SvxAutoCorrect& rParent, OUString aShareAutoCorrectFile, OUString aUserAutoCorrectFile) : sShareAutoCorrFile(std::move( aShareAutoCorrectFile )), sUserAutoCorrFile(std::move( aUserAutoCorrectFile )), aModifiedDate( Date::EMPTY ), aModifiedTime( tools::Time::EMPTY ), aLastCheckTime( tools::Time::EMPTY ), rAutoCorrect(rParent), nFlags(ACFlags::NONE) { } SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists() { } bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp() { // Access the file system only every 2 minutes to check the date stamp bool bRet = false; tools::Time nMinTime( 0, 2 ); tools::Time nAktTime( tools::Time::SYSTEM ); if( aLastCheckTime <= nAktTime) // overflow? return false; nAktTime -= aLastCheckTime; if( nAktTime > nMinTime ) // min time past { Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY ); if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, &aTstDate, &aTstTime ) && ( aModifiedDate != aTstDate || aModifiedTime != aTstTime )) { bRet = true; // then remove all the lists fast! if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst ) { pCplStt_ExcptLst.reset(); } if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst ) { pWordStart_ExcptLst.reset(); } if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List ) { pAutocorr_List.reset(); } nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad ); } aLastCheckTime = tools::Time( tools::Time::SYSTEM ); } return bRet; } void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp( std::unique_ptr& rpLst, const OUString& sStrmName, tools::SvRef& rStg) { if( rpLst ) rpLst->clear(); else rpLst.reset( new SvStringsISortDtor ); { if( rStg.is() && rStg->IsStream( sStrmName ) ) { tools::SvRef xStrm = rStg->OpenSotStream( sStrmName, ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) ); if( ERRCODE_NONE != xStrm->GetError()) { xStrm.clear(); rStg.clear(); RemoveStream_Imp( sStrmName ); } else { uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); xml::sax::InputSource aParserInput; aParserInput.sSystemId = sStrmName; xStrm->Seek( 0 ); xStrm->SetBufferSize( 8 * 1024 ); aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm ); // get filter uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst ); // connect parser and filter uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext ); uno::Reference xTokenHandler = new SvXMLAutoCorrectTokenHandler; xParser->setFastDocumentHandler( xFilter ); xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); xParser->setTokenHandler( xTokenHandler ); // parse try { xParser->parseStream( aParserInput ); } catch( const xml::sax::SAXParseException& ) { // re throw ? } catch( const xml::sax::SAXException& ) { // re throw ? } catch( const io::IOException& ) { // re throw ? } } } // Set time stamp FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, &aModifiedDate, &aModifiedTime ); aLastCheckTime = tools::Time( tools::Time::SYSTEM ); } } void SvxAutoCorrectLanguageLists::SaveExceptList_Imp( const SvStringsISortDtor& rLst, const OUString& sStrmName, tools::SvRef const &rStg, bool bConvert ) { if( !rStg.is() ) return; if( rLst.empty() ) { rStg->Remove( sStrmName ); rStg->Commit(); } else { tools::SvRef xStrm = rStg->OpenSotStream( sStrmName, ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); if( xStrm.is() ) { xStrm->SetSize( 0 ); xStrm->SetBufferSize( 8192 ); xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm ); xWriter->setOutputStream(xOut); uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW); rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) ); xExp->exportDoc( XML_BLOCK_LIST ); xStrm->Commit(); if( xStrm->GetError() == ERRCODE_NONE ) { xStrm.clear(); if (!bConvert) { rStg->Commit(); if( ERRCODE_NONE != rStg->GetError() ) { rStg->Remove( sStrmName ); rStg->Commit(); } } } } } } SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList() { if( pAutocorr_List ) pAutocorr_List->DeleteAndDestroyAll(); else pAutocorr_List.reset( new SvxAutocorrWordList() ); try { uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ ); uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ ); uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); xml::sax::InputSource aParserInput; aParserInput.sSystemId = pXMLImplAutocorr_ListStr; aParserInput.aInputStream = xStrm->getInputStream(); // get parser uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext); SAL_INFO("editeng", "AutoCorrect Import" ); uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg ); uno::Reference xTokenHandler = new SvXMLAutoCorrectTokenHandler; // connect parser and filter xParser->setFastDocumentHandler( xFilter ); xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); xParser->setTokenHandler(xTokenHandler); // parse xParser->parseStream( aParserInput ); } catch ( const uno::Exception& ) { TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile); } // Set time stamp FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, &aModifiedDate, &aModifiedTime ); aLastCheckTime = tools::Time( tools::Time::SYSTEM ); return pAutocorr_List.get(); } const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList() { if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() ) { LoadAutocorrWordList(); if( !pAutocorr_List ) { OSL_ENSURE( false, "No valid list" ); pAutocorr_List.reset( new SvxAutocorrWordList() ); } nFlags |= ACFlags::ChgWordLstLoad; } return pAutocorr_List.get(); } SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList() { if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() ) { LoadCplSttExceptList(); if( !pCplStt_ExcptLst ) { OSL_ENSURE( false, "No valid list" ); pCplStt_ExcptLst.reset( new SvStringsISortDtor ); } nFlags |= ACFlags::CplSttLstLoad; } return pCplStt_ExcptLst.get(); } bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew) { bool bRet = false; if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second ) { MakeUserStorage_Impl(); tools::SvRef xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); xStg = nullptr; // Set time stamp FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, &aModifiedDate, &aModifiedTime ); aLastCheckTime = tools::Time( tools::Time::SYSTEM ); bRet = true; } return bRet; } bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew) { bool bRet = false; if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second ) { MakeUserStorage_Impl(); tools::SvRef xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg ); xStg = nullptr; // Set time stamp FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, &aModifiedDate, &aModifiedTime ); aLastCheckTime = tools::Time( tools::Time::SYSTEM ); bRet = true; } return bRet; } SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList() { try { tools::SvRef xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); } catch (const css::ucb::ContentCreationException&) { } return pCplStt_ExcptLst.get(); } void SvxAutoCorrectLanguageLists::SaveCplSttExceptList() { MakeUserStorage_Impl(); tools::SvRef xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); xStg = nullptr; // Set time stamp FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, &aModifiedDate, &aModifiedTime ); aLastCheckTime = tools::Time( tools::Time::SYSTEM ); } SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList() { try { tools::SvRef xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) ) LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg ); } catch (const css::ucb::ContentCreationException &) { TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList"); } return pWordStart_ExcptLst.get(); } void SvxAutoCorrectLanguageLists::SaveWordStartExceptList() { MakeUserStorage_Impl(); tools::SvRef xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg ); xStg = nullptr; // Set time stamp FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, &aModifiedDate, &aModifiedTime ); aLastCheckTime = tools::Time( tools::Time::SYSTEM ); } SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList() { if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() ) { LoadWordStartExceptList(); if( !pWordStart_ExcptLst ) { OSL_ENSURE( false, "No valid list" ); pWordStart_ExcptLst.reset( new SvStringsISortDtor ); } nFlags |= ACFlags::WordStartLstLoad; } return pWordStart_ExcptLst.get(); } void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName ) { if( sShareAutoCorrFile != sUserAutoCorrFile ) { tools::SvRef xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); if( xStg.is() && ERRCODE_NONE == xStg->GetError() && xStg->IsStream( rName ) ) { xStg->Remove( rName ); xStg->Commit(); xStg = nullptr; } } } void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl() { // The conversion needs to happen if the file is already in the user // directory and is in the old format. Additionally it needs to // happen when the file is being copied from share to user. bool bError = false, bConvert = false, bCopy = false; INetURLObject aDest; INetURLObject aSource; if (sUserAutoCorrFile != sShareAutoCorrFile ) { aSource = INetURLObject ( sShareAutoCorrFile ); aDest = INetURLObject ( sUserAutoCorrFile ); if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) ) { aDest.SetExtension ( u"bak" ); bConvert = true; } bCopy = true; } else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) ) { aSource = INetURLObject ( sUserAutoCorrFile ); aDest = INetURLObject ( sUserAutoCorrFile ); aDest.SetExtension ( u"bak" ); bCopy = bConvert = true; } if (bCopy) { try { OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri )); sal_Int32 nSlashPos = sMain.lastIndexOf('/'); sMain = sMain.copy(0, nSlashPos); ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() ); TransferInfo aInfo; aInfo.NameClash = NameClash::OVERWRITE; aInfo.NewTitle = aDest.GetLastName(); aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ); aInfo.MoveData = false; aNewContent.executeCommand( "transfer", Any(aInfo)); } catch (...) { bError = true; } } if (bConvert && !bError) { tools::SvRef xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ ); tools::SvRef xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE ); if( xSrcStg.is() && xDstStg.is() ) { std::unique_ptr pTmpWordList; if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) ) LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg ); if (pTmpWordList) { SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true ); pTmpWordList.reset(); } if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg ); if (pTmpWordList) { SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true ); pTmpWordList->clear(); } GetAutocorrWordList(); MakeBlocklist_Imp( *xDstStg ); sShareAutoCorrFile = sUserAutoCorrFile; xDstStg = nullptr; try { ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() ); aContent.executeCommand ( "delete", Any ( true ) ); } catch (...) { } } } else if( bCopy && !bError ) sShareAutoCorrFile = sUserAutoCorrFile; } bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg ) { bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty(); if( !bRemove ) { tools::SvRef refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr, ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); if( refList.is() ) { refList->SetSize( 0 ); refList->SetBufferSize( 8192 ); refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList ); xWriter->setOutputStream(xOut); rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) ); xExp->exportDoc( XML_BLOCK_LIST ); refList->Commit(); bRet = ERRCODE_NONE == refList->GetError(); if( bRet ) { refList.clear(); rStg.Commit(); if( ERRCODE_NONE != rStg.GetError() ) { bRemove = true; bRet = false; } } } else bRet = false; } if( bRemove ) { rStg.Remove( pXMLImplAutocorr_ListStr ); rStg.Commit(); } return bRet; } bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector& aNewEntries, std::vector& aDeleteEntries ) { // First get the current list! GetAutocorrWordList(); MakeUserStorage_Impl(); tools::SvRef xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError(); if( bRet ) { for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) { std::optional xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); if( xFoundEntry ) { if( !xFoundEntry->IsTextOnly() ) { OUString aName( aWordToDelete.GetShort() ); if (xStorage->IsOLEStorage()) aName = EncryptBlockName_Imp(aName); else GeneratePackageName ( aWordToDelete.GetShort(), aName ); if( xStorage->IsContained( aName ) ) { xStorage->Remove( aName ); bRet = xStorage->Commit(); } } } } for (const SvxAutocorrWord & aNewEntrie : aNewEntries) { SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); std::optional xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); if( xRemoved ) { if( !xRemoved->IsTextOnly() ) { // Still have to remove the Storage OUString sStorageName( aWordToAdd.GetShort() ); if (xStorage->IsOLEStorage()) sStorageName = EncryptBlockName_Imp(sStorageName); else GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); if( xStorage->IsContained( sStorageName ) ) xStorage->Remove( sStorageName ); } } bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); if ( !bRet ) { break; } } if ( bRet ) { bRet = MakeBlocklist_Imp( *xStorage ); } } return bRet; } bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong ) { // First get the current list! GetAutocorrWordList(); MakeUserStorage_Impl(); tools::SvRef xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError(); // Update the word list if( bRet ) { SvxAutocorrWord aNew(rShort, rLong, true ); std::optional xRemove = pAutocorr_List->FindAndRemove( &aNew ); if( xRemove ) { if( !xRemove->IsTextOnly() ) { // Still have to remove the Storage OUString sStgNm( rShort ); if (xStg->IsOLEStorage()) sStgNm = EncryptBlockName_Imp(sStgNm); else GeneratePackageName ( rShort, sStgNm); if( xStg->IsContained( sStgNm ) ) xStg->Remove( sStgNm ); } } if( pAutocorr_List->Insert( std::move(aNew) ) ) { bRet = MakeBlocklist_Imp( *xStg ); xStg = nullptr; } else { bRet = false; } } return bRet; } void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, SfxObjectShell& rShell ) { // First get the current list! GetAutocorrWordList(); MakeUserStorage_Impl(); try { uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE ); OUString sLong; bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong ); xStg = nullptr; // Update the word list if( bRet ) { if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) { tools::SvRef xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); MakeBlocklist_Imp( *xStor ); } } } catch ( const uno::Exception& ) { } } // Keep the list sorted ... struct SvxAutocorrWordList::CompareSvxAutocorrWordList { bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const { CollatorWrapper& rCmp = ::GetCollatorWrapper(); return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; } }; namespace { typedef std::unordered_map AutocorrWordHashType; } struct SvxAutocorrWordList::Impl { // only one of these contains the data // maSortedVector is manually sorted so we can optimise data movement mutable AutocorrWordSetType maSortedVector; mutable AutocorrWordHashType maHash; // key is 'Short' void DeleteAndDestroyAll() { maHash.clear(); maSortedVector.clear(); } }; SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} SvxAutocorrWordList::~SvxAutocorrWordList() { } void SvxAutocorrWordList::DeleteAndDestroyAll() { mpImpl->DeleteAndDestroyAll(); } // returns true if inserted const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const { if ( mpImpl->maSortedVector.empty() ) // use the hash { OUString aShort = aWord.GetShort(); auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); if (inserted) return &(it->second); return nullptr; } else { auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); CollatorWrapper& rCmp = ::GetCollatorWrapper(); if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0) { it = mpImpl->maSortedVector.insert(it, std::move(aWord)); return &*it; } return nullptr; } } void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) { (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); } bool SvxAutocorrWordList::empty() const { return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); } std::optional SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord) { if ( mpImpl->maSortedVector.empty() ) // use the hash { AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); if( it != mpImpl->maHash.end() ) { SvxAutocorrWord pMatch = std::move(it->second); mpImpl->maHash.erase (it); return pMatch; } } else { auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) { SvxAutocorrWord pMatch = std::move(*it); mpImpl->maSortedVector.erase (it); return pMatch; } } return std::optional(); } // return the sorted contents - defer sorting until we have to. const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const { // convert from hash to set permanently if ( mpImpl->maSortedVector.empty() ) { std::vector tmp; tmp.reserve(mpImpl->maHash.size()); for (auto & rPair : mpImpl->maHash) tmp.emplace_back(std::move(rPair.second)); mpImpl->maHash.clear(); // sort twice - this gets the list into mostly-sorted order, which // reduces the number of times we need to invoke the expensive ICU collate fn. std::sort(tmp.begin(), tmp.end(), [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) { return lhs.GetShort() < rhs.GetShort(); }); // This beast has some O(N log(N)) in a terribly slow ICU collate fn. // stable_sort is twice as fast as sort in this situation because it does // fewer comparison operations. std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); mpImpl->maSortedVector = std::move(tmp); } return mpImpl->maSortedVector; } const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, std::u16string_view rTxt, sal_Int32 &rStt, sal_Int32 nEndPos) const { const OUString& rChk = pFnd->GetShort(); sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern? sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern? assert(nEndPos >= 0); size_t nSttWdPos = nEndPos; // direct replacement of keywords surrounded by colons (for example, ":name:") bool bColonNameColon = static_cast(rTxt.size()) > nEndPos && rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard ) return nullptr; bool bWasWordDelim = false; sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; if (bColonNameColon) nCalcStt++; if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon || ( nCalcStt < rStt && IsWordDelim( rTxt[ nCalcStt - 1 ] ))) ) { TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) ); if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) )) { rStt = nCalcStt; if (!left_wildcard) { // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 if (static_cast(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) return nullptr; return pFnd; } // get the first word delimiter position before the matching ".*word" pattern while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) ; if (bWasWordDelim) rStt++; OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) ); // avoid double spaces before simple "word" replacement left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong(); if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(OUString(rTxt.substr(rStt, nEndPos - rStt)), left_pattern) ) ) return pNew; } } else // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support if ( right_wildcard ) { OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) ); // Get the last word delimiter position bool not_suffix; while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) ; // search the first occurrence (with a left word delimitation, if needed) size_t nFndPos = std::u16string_view::npos; do { nFndPos = rTxt.find( sTmp, nFndPos + 1); if (nFndPos == std::u16string_view::npos) break; not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength())); } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix ); if ( nFndPos != std::u16string_view::npos ) { sal_Int32 extra_repl = static_cast(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:" if ( left_wildcard ) { // get the first word delimiter position before the matching ".*word.*" pattern while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ]))) ; if (bWasWordDelim) nFndPos++; } if (nEndPos + extra_repl <= static_cast(nFndPos)) { return nullptr; } // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) ); OUString aLong; rStt = nFndPos; if ( !left_wildcard ) { sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength(); aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u""); } else { OUStringBuffer buf; do { nSttWdPos = rTxt.find( sTmp, nFndPos); if (nSttWdPos != std::u16string_view::npos) { sal_Int32 nTmp(nFndPos); while (nTmp < static_cast(nSttWdPos) && !IsWordDelim(rTxt[nTmp])) nTmp++; if (nTmp < static_cast(nSttWdPos)) break; // word delimiter found buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); nFndPos = nSttWdPos + sTmp.getLength(); } } while (nSttWdPos != std::u16string_view::npos); if (static_cast(nEndPos - nFndPos) > extra_repl) buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos)); aLong = buf.makeStringAndClear(); } if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) { if ( (static_cast(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast(rTxt.size()) == nEndPos ) return pNew; } } } return nullptr; } const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos) const { for (auto const& elem : mpImpl->maHash) { if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) return pTmp; } for (auto const& elem : mpImpl->maSortedVector) { if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) return pTmp; } return nullptr; } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */