diff options
author | Noel Grandin <noel.grandin@collabora.co.uk> | 2019-09-19 12:29:42 +0200 |
---|---|---|
committer | Noel Grandin <noel.grandin@collabora.co.uk> | 2019-09-20 10:33:24 +0200 |
commit | 77dec7588c9141b03f8ec0139eb96c298b26f261 (patch) | |
tree | d56625bf373e1966d57854a61168385c8a08e734 /editeng/source/misc | |
parent | 3aff00ea2ff704547a4d9fa6e2bb2141eb57bf1d (diff) |
tdf#109158 improve sorting when loading large autocorrect file
reduces time from 2.0s to 1.7s
reduce work by
(*) reserving some arrays
(*) pre-sorting with a cheaper comparator
(*) don't copy when returning result, just return a const&
(*) flattening the data-structures to reduce pointer-chasing
Change-Id: I972bd7ffdbf2121c2d38c24aca9618ca708e920c
Reviewed-on: https://gerrit.libreoffice.org/79119
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Diffstat (limited to 'editeng/source/misc')
-rw-r--r-- | editeng/source/misc/SvXMLAutoCorrectExport.cxx | 8 | ||||
-rw-r--r-- | editeng/source/misc/svxacorr.cxx | 143 |
2 files changed, 81 insertions, 70 deletions
diff --git a/editeng/source/misc/SvXMLAutoCorrectExport.cxx b/editeng/source/misc/SvXMLAutoCorrectExport.cxx index 8ebefccbe17f..4dadeca36fe3 100644 --- a/editeng/source/misc/SvXMLAutoCorrectExport.cxx +++ b/editeng/source/misc/SvXMLAutoCorrectExport.cxx @@ -51,15 +51,15 @@ ErrCode SvXMLAutoCorrectExport::exportDoc(enum XMLTokenEnum /*eClass*/) GetNamespaceMap_().GetNameByKey ( XML_NAMESPACE_BLOCKLIST ) ); { SvXMLElementExport aRoot (*this, XML_NAMESPACE_BLOCKLIST, XML_BLOCK_LIST, true, true); - SvxAutocorrWordList::Content aContent = pAutocorr_List->getSortedContent(); - for (auto const& content : aContent) + const SvxAutocorrWordList::AutocorrWordSetType& rContent = pAutocorr_List->getSortedContent(); + for (auto const& content : rContent) { AddAttribute( XML_NAMESPACE_BLOCKLIST, XML_ABBREVIATED_NAME, - content->GetShort()); + content.GetShort()); AddAttribute( XML_NAMESPACE_BLOCKLIST, XML_NAME, - content->IsTextOnly() ? content->GetLong() : content->GetShort()); + content.IsTextOnly() ? content.GetLong() : content.GetShort()); SvXMLElementExport aBlock( *this, XML_NAMESPACE_BLOCKLIST, XML_BLOCK, true, true); } diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx index 6b9e6df1111c..96d7c1561f27 100644 --- a/editeng/source/misc/svxacorr.cxx +++ b/editeng/source/misc/svxacorr.cxx @@ -2488,10 +2488,10 @@ bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWo { for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) { - std::unique_ptr<SvxAutocorrWord> pFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); - if( pFoundEntry ) + boost::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); + if( xFoundEntry ) { - if( !pFoundEntry->IsTextOnly() ) + if( !xFoundEntry->IsTextOnly() ) { OUString aName( aWordToDelete.GetShort() ); if (xStorage->IsOLEStorage()) @@ -2510,24 +2510,24 @@ bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWo for (SvxAutocorrWord & aNewEntrie : aNewEntries) { - std::unique_ptr<SvxAutocorrWord> pWordToAdd(new SvxAutocorrWord( aNewEntrie.GetShort(), aNewEntrie.GetLong(), true )); - std::unique_ptr<SvxAutocorrWord> pRemoved = pAutocorr_List->FindAndRemove( pWordToAdd.get() ); - if( pRemoved ) + SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); + boost::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); + if( xRemoved ) { - if( !pRemoved->IsTextOnly() ) + if( !xRemoved->IsTextOnly() ) { // Still have to remove the Storage - OUString sStorageName( pWordToAdd->GetShort() ); + OUString sStorageName( aWordToAdd.GetShort() ); if (xStorage->IsOLEStorage()) sStorageName = EncryptBlockName_Imp(sStorageName); else - GeneratePackageName ( pWordToAdd->GetShort(), sStorageName); + GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); if( xStorage->IsContained( sStorageName ) ) xStorage->Remove( sStorageName ); } } - bRet = pAutocorr_List->Insert( std::move(pWordToAdd) ); + bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); if ( !bRet ) { @@ -2556,11 +2556,11 @@ bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUStrin // Update the word list if( bRet ) { - std::unique_ptr<SvxAutocorrWord> pNew(new SvxAutocorrWord( rShort, rLong, true )); - std::unique_ptr<SvxAutocorrWord> pRemove = pAutocorr_List->FindAndRemove( pNew.get() ); - if( pRemove ) + SvxAutocorrWord aNew(rShort, rLong, true ); + boost::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew ); + if( xRemove ) { - if( !pRemove->IsTextOnly() ) + if( !xRemove->IsTextOnly() ) { // Still have to remove the Storage OUString sStgNm( rShort ); @@ -2574,7 +2574,7 @@ bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUStrin } } - if( pAutocorr_List->Insert( std::move(pNew) ) ) + if( pAutocorr_List->Insert( std::move(aNew) ) ) { bRet = MakeBlocklist_Imp( *xStg ); xStg = nullptr; @@ -2605,8 +2605,7 @@ void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, // Update the word list if( bRet ) { - std::unique_ptr<SvxAutocorrWord> pNew(new SvxAutocorrWord( rShort, sLong, false )); - if( pAutocorr_List->Insert( std::move(pNew) ) ) + if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) { tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); MakeBlocklist_Imp( *xStor ); @@ -2619,20 +2618,18 @@ void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, } // Keep the list sorted ... -struct CompareSvxAutocorrWordList +struct SvxAutocorrWordList::CompareSvxAutocorrWordList { - bool operator()( SvxAutocorrWord* const& lhs, SvxAutocorrWord* const& rhs ) const + bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const { CollatorWrapper& rCmp = ::GetCollatorWrapper(); - return rCmp.compareString( lhs->GetShort(), rhs->GetShort() ) < 0; + return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; } }; namespace { -// can't use std::unique_ptr until we have C++14 -typedef std::set<SvxAutocorrWord*, CompareSvxAutocorrWordList> AutocorrWordSetType; -typedef std::unordered_map<OUString, std::unique_ptr<SvxAutocorrWord>> AutocorrWordHashType; +typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType; } @@ -2640,16 +2637,14 @@ struct SvxAutocorrWordList::Impl { // only one of these contains the data - mutable AutocorrWordSetType maSet; + // maSortedVector is manually sorted so we can optimise data movement + mutable AutocorrWordSetType maSortedVector; mutable AutocorrWordHashType maHash; // key is 'Short' void DeleteAndDestroyAll() { maHash.clear(); - - for (auto const& elem : maSet) - delete elem; - maSet.clear(); + maSortedVector.clear(); } }; @@ -2657,7 +2652,6 @@ SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} SvxAutocorrWordList::~SvxAutocorrWordList() { - mpImpl->DeleteAndDestroyAll(); } void SvxAutocorrWordList::DeleteAndDestroyAll() @@ -2666,70 +2660,89 @@ void SvxAutocorrWordList::DeleteAndDestroyAll() } // returns true if inserted -bool SvxAutocorrWordList::Insert(std::unique_ptr<SvxAutocorrWord> pWord) const +const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const { - if ( mpImpl->maSet.empty() ) // use the hash + if ( mpImpl->maSortedVector.empty() ) // use the hash { - OUString aShort( pWord->GetShort() ); - return mpImpl->maHash.insert( std::pair<OUString, std::unique_ptr<SvxAutocorrWord> >( aShort, std::move(pWord) ) ).second; + OUString aShort = aWord.GetShort(); + auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); + if (inserted) + return &(it->second); + return nullptr; } else - return mpImpl->maSet.insert( pWord.release() ).second; + { + auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); + if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(aWord, *it)) + { + it = mpImpl->maSortedVector.insert(it, std::move(aWord)); + return &*it; + } + return nullptr; + } } void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) { - std::unique_ptr<SvxAutocorrWord> pNew(new SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); - (void)Insert(std::move(pNew)); + (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); } bool SvxAutocorrWordList::empty() const { - return mpImpl->maHash.empty() && mpImpl->maSet.empty(); + return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); } -std::unique_ptr<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord) +boost::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord) { - std::unique_ptr<SvxAutocorrWord> pMatch; - if ( mpImpl->maSet.empty() ) // use the hash + if ( mpImpl->maSortedVector.empty() ) // use the hash { AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); if( it != mpImpl->maHash.end() ) { - pMatch = std::move(it->second); + SvxAutocorrWord pMatch = std::move(it->second); mpImpl->maHash.erase (it); + return pMatch; } } else { - AutocorrWordSetType::iterator it = mpImpl->maSet.find( pWord ); - if( it != mpImpl->maSet.end() ) + auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); + if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) { - pMatch = std::unique_ptr<SvxAutocorrWord>(*it); - mpImpl->maSet.erase (it); + SvxAutocorrWord pMatch = std::move(*it); + mpImpl->maSortedVector.erase (it); + return pMatch; } } - return pMatch; + return boost::optional<SvxAutocorrWord>(); } // return the sorted contents - defer sorting until we have to. -SvxAutocorrWordList::Content SvxAutocorrWordList::getSortedContent() const +const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const { - Content aContent; - // convert from hash to set permanently - if ( mpImpl->maSet.empty() ) + if ( mpImpl->maSortedVector.empty() ) { - // This beast has some O(N log(N)) in a terribly slow ICU collate fn. - for (auto & elem : mpImpl->maHash) - mpImpl->maSet.insert( elem.second.release() ); + std::vector<SvxAutocorrWord> tmp; + tmp.reserve(mpImpl->maHash.size()); + for (auto & rPair : mpImpl->maHash) + tmp.emplace_back(std::move(rPair.second)); mpImpl->maHash.clear(); + // sort twice - this gets the list into mostly-sorted order, which + // reduces the number of times we need to invoke the expensive ICU collate fn. + std::sort(tmp.begin(), tmp.end(), + [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) + { + return lhs.GetShort() < rhs.GetShort(); + }); + // This beast has some O(N log(N)) in a terribly slow ICU collate fn. + // stable_sort is twice as fast as sort in this situation because it does + // fewer comparison operations. + std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); + mpImpl->maSortedVector = std::move(tmp); } - for (auto const& elem : mpImpl->maSet) - aContent.push_back(elem); - - return aContent; + return mpImpl->maSortedVector; } const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, @@ -2776,8 +2789,7 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard); // avoid double spaces before simple "word" replacement left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong(); - SvxAutocorrWord* pNew = new SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern); - if( Insert( std::unique_ptr<SvxAutocorrWord>(pNew) ) ) + if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) ) return pNew; } } else @@ -2843,8 +2855,7 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p buf.append(std::u16string_view(rTxt).substr(nFndPos, nEndPos - nFndPos)); aLong = buf.makeStringAndClear(); } - SvxAutocorrWord* pNew = new SvxAutocorrWord(aShort, aLong); - if ( Insert( std::unique_ptr<SvxAutocorrWord>(pNew) ) ) + if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) { if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos ) return pNew; @@ -2860,14 +2871,14 @@ const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rT { for (auto const& elem : mpImpl->maHash) { - if( const SvxAutocorrWord *aTmp = WordMatches( elem.second.get(), rTxt, rStt, nEndPos ) ) - return aTmp; + if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) + return pTmp; } - for (auto const& elem : mpImpl->maSet) + for (auto const& elem : mpImpl->maSortedVector) { - if( const SvxAutocorrWord *aTmp = WordMatches( elem, rTxt, rStt, nEndPos ) ) - return aTmp; + if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) + return pTmp; } return nullptr; } |