From f566a73adcf170d103b0561c7ea2871596af7142 Mon Sep 17 00:00:00 2001 From: "Armin Le Grand (allotropia)" Date: Fri, 17 Nov 2023 17:17:23 +0100 Subject: Fix performance regression with ScPatternAttr/SC Due to the paradigm item change the test make CppunitTest_sc_tablesheetobj with CPPUNIT_TEST_NAME sc_apitest::ScTableSheetObj::testSheetCellRangeProperties got much slower. Unfortunately it did not break, so got unnoted. I took a look now. First I intended to add some hashing in an std::unordered_set using that hash values at ScPatternAttr, but that is not even possible due to other data in that item that needs to be compared. I had the impression that it was 'somehow' hashed before, but after debugging the version before that change I noted that also the list of existing items was linearly compared to the new entry, using the operator==. Thus the problem was not due to not hashing, but due to the ScPatternAttr::operator==. That uses the hash (not changed), but no longer finds equal entries. This is because the hash code is made up from the SfxPoolItemPtrs in the SfxItemSet, so when all are equal we can be sure the SfxItemSet content is equal. To use this the other way around is *not* correct: Even with not all ptrs equal the SfxItemSets can still be equal, simply by one SfxItemSet using another, but identical incarnation of an item. Thuis means that ScPatternAttr::operator== does not detect all cases of equality. This worked in most cases before since most items were 'pooled' and thus much effort was used to ensure their uniqueness, but even before the paradigm item change an item type could be flagged as non-poolable. In that case, this already could fail but with no too bad consequences (just one more copy of an ScPatternAttr would stay). So I fixed that mainly in adapting and optimizing ScPatternAttr::operator==. The results are (same machine, same compiler, dbg version, metioned test): Version before item paradigm change: user 0m50,778s Version after item paradigm change: user 20m13,944s Version with memcmp: user 0m48,845s Version with hash: user 0m48,710s Since that hash does nothing else than to buffer the comparison of those item pointers I just tried to use memcmp instead, as is already used in other places (same file, ScPatternAttr::FastEqualPatternSets, also SfxItemSet::operator==). As can be seen above it makes practically no difference (memcomp even slightly faster). Since that hash is only used in ScPatternAttr::operator== and is same speed (memcomp linearly compares 56 SfxPoolItem ptrs) I decided to remove it. It needs quite some spaces to be reset and re-calculated which are not needed anymore. The calculation is based on dividing the number of items by 4, so we are good with 56, but if someone has/ will adapt the items used by ScPatternAttr it is easy to forget to adapt this, and not easy to change the alghorithm when it's not a multiple of 4. I also optimized/overhauled SfxItemSet::operator== (or better: the SfxItemSet::Equals used by it). It is now better readable, too. I also adapted ScAttrArray::AddCondFormat to not always incarnate/ delete ScPatternAttr instances, only when needed. This also helps a bit and could be done in more places. All in all it is really necessary to cleanup SC's usage of ScPatternAttr - there are quite some possibilities to make that cleaner and faster. In principle it's a huge 'compromize' to use item functionailty to have a possibly 'cheap' maximum shared SfxItemSet at a Cell. Decided to make SfxItemSet::operator== even faster for the case of unequal ranges by iterating over ranges of local SfxItemSet and incremented offset. Still accesses to 2nd SfxItemSet will be the expensive ones using the iterated WhichID. Added two more things to SfxItemSet::operator==: We can return early when both have no items set. For the unequal-ranges compare I added an early-exit when Count() items were compared. Looked at the errors that indeed do trigger the assert in ScPatternAttr::operator== and hint to incarnations of ScPatternAttr that do not use the needed range ATTR_PATTERN_START, ATTR_PATTERN_END. Hunted down to come from ScViewFunc::ApplyAttributes and there from some Dialogs, so seems some SC dialogs do not work with the correct range schema for that item. I tried code in ScViewFunc::ApplyAttributes to fix it, that works. I also tried to hunt that down to the Dialogs that use the wrong schema (TotalCount @SfxItemSet is 61 BTW). While it would be possible to do so, it's no guarentee to have this fixed. Thus I looked at ScPatternAttr::ScPatternAttr and added correciton code when one with the wrong range schema gets created, this is luckily not often needed and transfers the existing items with low costs. Maybe we should add a warning there if used, so at least new implementations of stuff or old ones (the Dialogs) can be corrected? Change-Id: I31da73ae30786bd6c5a08a5e3b7df8fe279af261 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159592 Tested-by: Jenkins Reviewed-by: Noel Grandin Reviewed-by: Armin Le Grand --- sc/inc/patattr.hxx | 6 +- sc/source/core/data/attarray.cxx | 13 +++- sc/source/core/data/global.cxx | 12 +++- sc/source/core/data/patattr.cxx | 128 +++++++++++++++++++-------------------- 4 files changed, 85 insertions(+), 74 deletions(-) (limited to 'sc') diff --git a/sc/inc/patattr.hxx b/sc/inc/patattr.hxx index b5deb19826d9..d6536551f965 100644 --- a/sc/inc/patattr.hxx +++ b/sc/inc/patattr.hxx @@ -53,7 +53,6 @@ enum class ScAutoFontColorMode class SC_DLLPUBLIC ScPatternAttr final : public SfxSetItem { std::optional pName; - mutable std::optional mxHashCode; mutable std::optional mxVisible; ScStyleSheet* pStyle; sal_uInt64 mnPAKey; @@ -181,16 +180,13 @@ public: void SetPAKey(sal_uInt64 nKey); sal_uInt64 GetPAKey() const; - static std::optional FastEqualPatternSets( const SfxItemSet& rSet1, const SfxItemSet& rSet2 ); - // TODO: tdf#135215: This is a band-aid to detect changes and invalidate the hash, // a proper way would be probably to override SfxItemSet::Changed(), but 6cb400f41df0dd10 // hardcoded SfxSetItem to contain SfxItemSet. - SfxItemSet& GetItemSet() { mxHashCode.reset(); mxVisible.reset(); return SfxSetItem::GetItemSet(); } + SfxItemSet& GetItemSet() { mxVisible.reset(); return SfxSetItem::GetItemSet(); } using SfxSetItem::GetItemSet; private: - void CalcHashCode() const; bool CalcVisible() const; }; diff --git a/sc/source/core/data/attarray.cxx b/sc/source/core/data/attarray.cxx index 3e56226f3704..c2f3b0b75195 100644 --- a/sc/source/core/data/attarray.cxx +++ b/sc/source/core/data/attarray.cxx @@ -294,10 +294,13 @@ void ScAttrArray::AddCondFormat( SCROW nStartRow, SCROW nEndRow, sal_uInt32 nInd { const ScPatternAttr* pPattern = GetPattern(nTempStartRow); + // changed to create pNewPattern only if needed, else use already + // existing pPattern. This shows by example how to avoid that special + // handling of ATTR_PATTERN/ScPatternAttr in SC and massive + // incarnations/desctructions of that Item (which contains an ItemSet) std::unique_ptr pNewPattern; if(pPattern) { - pNewPattern.reset( new ScPatternAttr(*pPattern) ); SCROW nPatternStartRow; SCROW nPatternEndRow; GetPatternRange( nPatternStartRow, nPatternEndRow, nTempStartRow ); @@ -313,12 +316,14 @@ void ScAttrArray::AddCondFormat( SCROW nStartRow, SCROW nEndRow, sal_uInt32 nInd aNewCondFormatData = rCondFormatData; aNewCondFormatData.insert(nIndex); ScCondFormatItem aItem( std::move(aNewCondFormatData) ); + pNewPattern.reset( new ScPatternAttr(*pPattern) ); pNewPattern->GetItemSet().Put( aItem ); } } else { ScCondFormatItem aItem(nIndex); + pNewPattern.reset( new ScPatternAttr(*pPattern) ); pNewPattern->GetItemSet().Put( aItem ); } } @@ -330,7 +335,11 @@ void ScAttrArray::AddCondFormat( SCROW nStartRow, SCROW nEndRow, sal_uInt32 nInd nTempEndRow = nEndRow; } - SetPatternArea( nTempStartRow, nTempEndRow, std::move(pNewPattern), true ); + if (pNewPattern) + SetPatternArea( nTempStartRow, nTempEndRow, std::move(pNewPattern), true ); + else + SetPatternArea( nTempStartRow, nTempEndRow, pPattern, true ); + nTempStartRow = nTempEndRow + 1; } while(nTempEndRow < nEndRow); diff --git a/sc/source/core/data/global.cxx b/sc/source/core/data/global.cxx index c4737d1e0bce..29a616c6b6e8 100644 --- a/sc/source/core/data/global.cxx +++ b/sc/source/core/data/global.cxx @@ -187,8 +187,16 @@ bool ScGlobal::CheckWidthInvalidate( bool& bNumFormatChanged, const SfxItemSet& rNewAttrs, const SfxItemSet& rOldAttrs ) { - std::optional equal = ScPatternAttr::FastEqualPatternSets( rNewAttrs, rOldAttrs ); - if( equal.has_value() && *equal ) + // Here ScPatternAttr::FastEqualPatternSets was used before. This implies that + // the two given SfxItemSet are internal ones from ScPatternAttr, but there is + // no guarantee here for that. Also that former method contained the comment + // "Actually test_tdf133629 from UITest_calc_tests9 somehow manages to have + // a different range (and I don't understand enough why), so better be safe and compare fully." + // which may be based on this usage. I check for that already in + // ScPatternAttr::operator==, seems not to be triggered there. + // All in all: Better use SfxItemSet::operator== here, and not one specialized + // on the SfxItemSets of ScPatternAttr + if (rNewAttrs == rOldAttrs) { bNumFormatChanged = false; return false; diff --git a/sc/source/core/data/patattr.cxx b/sc/source/core/data/patattr.cxx index d7d460abdbb5..7fe65bc82ba9 100644 --- a/sc/source/core/data/patattr.cxx +++ b/sc/source/core/data/patattr.cxx @@ -68,6 +68,8 @@ #include #include +const WhichRangesContainer aScPatternAttrSchema(svl::Items); + ScPatternAttr::ScPatternAttr( SfxItemSet&& pItemSet, const OUString& rStyleName ) : SfxSetItem ( ATTR_PATTERN, std::move(pItemSet) ), pName ( rStyleName ), @@ -75,6 +77,12 @@ ScPatternAttr::ScPatternAttr( SfxItemSet&& pItemSet, const OUString& rStyleName mnPAKey(0) { setNewItemCallback(); + + // We need to ensure that ScPatternAttr is using the correct WhichRange, + // see comments in commit message. This does transfers the items with + // minimized overhead, too + if (GetItemSet().GetRanges() != aScPatternAttrSchema) + GetItemSet().SetRanges(aScPatternAttrSchema); } ScPatternAttr::ScPatternAttr( SfxItemSet&& pItemSet ) @@ -83,6 +91,12 @@ ScPatternAttr::ScPatternAttr( SfxItemSet&& pItemSet ) mnPAKey(0) { setNewItemCallback(); + + // We need to ensure that ScPatternAttr is using the correct WhichRange, + // see comments in commit message. This does transfers the items with + // minimized overhead, too + if (GetItemSet().GetRanges() != aScPatternAttrSchema) + GetItemSet().SetRanges(aScPatternAttrSchema); } ScPatternAttr::ScPatternAttr( SfxItemPool* pItemPool ) @@ -125,49 +139,63 @@ static bool StrCmp( const OUString* pStr1, const OUString* pStr2 ) constexpr size_t compareSize = ATTR_PATTERN_END - ATTR_PATTERN_START + 1; -std::optional ScPatternAttr::FastEqualPatternSets( const SfxItemSet& rSet1, const SfxItemSet& rSet2 ) +bool ScPatternAttr::operator==( const SfxPoolItem& rCmp ) const { - // #i62090# The SfxItemSet in the SfxSetItem base class always has the same ranges - // (single range from ATTR_PATTERN_START to ATTR_PATTERN_END), and the items are pooled, - // so it's enough to compare just the pointers (Count just because it's even faster). - - if ( rSet1.Count() != rSet2.Count() ) - return { false }; - - // Actually test_tdf133629 from UITest_calc_tests9 somehow manages to have - // a different range (and I don't understand enough why), so better be safe and compare fully. - if( rSet1.TotalCount() != compareSize || rSet2.TotalCount() != compareSize ) - return std::nullopt; + // check if same incarnation + if (this == &rCmp) + return true; - SfxPoolItem const ** pItems1 = rSet1.GetItems_Impl(); // inline method of SfxItemSet - SfxPoolItem const ** pItems2 = rSet2.GetItems_Impl(); + // check SfxPoolItem base class + if (!SfxPoolItem::operator==(rCmp) ) + return false; - return { memcmp( pItems1, pItems2, compareSize * sizeof(pItems1[0]) ) == 0 }; -} + // check everything except the SfxItemSet from base class SfxSetItem + const ScPatternAttr& rOther(static_cast(rCmp)); + if (!StrCmp(GetStyleName(), rOther.GetStyleName())) + return false; -static bool EqualPatternSets( const SfxItemSet& rSet1, const SfxItemSet& rSet2 ) -{ - std::optional equal = ScPatternAttr::FastEqualPatternSets( rSet1, rSet2 ); - if(equal.has_value()) - return *equal; - return rSet1 == rSet2; -} + // here we need to compare the SfxItemSet. We *know* that these are + // all simple (one range, same range) + const SfxItemSet& rSet1(GetItemSet()); + const SfxItemSet& rSet2(rOther.GetItemSet()); -bool ScPatternAttr::operator==( const SfxPoolItem& rCmp ) const -{ - // #i62090# Use quick comparison between ScPatternAttr's ItemSets + // the former method 'FastEqualPatternSets' mentioned: + // "Actually test_tdf133629 from UITest_calc_tests9 somehow manages to have + // a different range (and I don't understand enough why), so better be safe and compare fully." + // in that case the hash code above would already fail, too + if (rSet1.TotalCount() != compareSize || rSet2.TotalCount() != compareSize) + { + // assert this for now, should not happen. If it does, look for it and evtl. + // enable SfxItemSet::operator== below + assert(false); + return rSet1 == rSet2; + } - if (!SfxPoolItem::operator==(rCmp) ) + // check pools, do not accept different pools + if (rSet1.GetPool() != rSet2.GetPool()) return false; - if (!mxHashCode) - CalcHashCode(); - auto const & rOther = static_cast(rCmp); - if (!rOther.mxHashCode) - rOther.CalcHashCode(); - if (*mxHashCode != *rOther.mxHashCode) + + // check count of set items, has to be equal + if (rSet1.Count() != rSet2.Count()) return false; - return EqualPatternSets( GetItemSet(), rOther.GetItemSet() ) && - StrCmp( GetStyleName(), rOther.GetStyleName() ); + + // compare each item separately + const SfxPoolItem **ppItem1(rSet1.GetItems_Impl()); + const SfxPoolItem **ppItem2(rSet2.GetItems_Impl()); + + // are all pointers the same? + if (0 == memcmp(ppItem1, ppItem2, compareSize * sizeof(ppItem1[0]))) + return true; + + for (sal_uInt16 nPos(0); nPos < compareSize; nPos++) + { + if (!SfxPoolItem::areSame(*ppItem1, *ppItem2)) + return false; + ++ppItem1; + ++ppItem2; + } + + return true; } SvxCellOrientation ScPatternAttr::GetCellOrientation( const SfxItemSet& rItemSet, const SfxItemSet* pCondSet ) @@ -971,7 +999,6 @@ void ScPatternAttr::GetFromEditItemSet( const SfxItemSet* pEditSet ) if( !pEditSet ) return; GetFromEditItemSet( GetItemSet(), *pEditSet ); - mxHashCode.reset(); mxVisible.reset(); } @@ -1015,7 +1042,6 @@ void ScPatternAttr::DeleteUnchanged( const ScPatternAttr* pOldAttrs ) if (SfxPoolItem::areSame( pThisItem, pOldItem )) { rThisSet.ClearItem( nSubWhich ); - mxHashCode.reset(); mxVisible.reset(); } } @@ -1025,7 +1051,6 @@ void ScPatternAttr::DeleteUnchanged( const ScPatternAttr* pOldAttrs ) if ( *pThisItem == rThisSet.GetPool()->GetDefaultItem( nSubWhich ) ) { rThisSet.ClearItem( nSubWhich ); - mxHashCode.reset(); mxVisible.reset(); } } @@ -1047,7 +1072,6 @@ void ScPatternAttr::ClearItems( const sal_uInt16* pWhich ) SfxItemSet& rSet = GetItemSet(); for (sal_uInt16 i=0; pWhich[i]; i++) rSet.ClearItem(pWhich[i]); - mxHashCode.reset(); mxVisible.reset(); } @@ -1313,7 +1337,6 @@ void ScPatternAttr::SetStyleSheet( ScStyleSheet* pNewStyle, bool bClearDirectFor GetItemSet().SetParent(nullptr); pStyle = nullptr; } - mxHashCode.reset(); mxVisible.reset(); } @@ -1340,7 +1363,6 @@ void ScPatternAttr::UpdateStyleSheet(const ScDocument& rDoc) } else pStyle = nullptr; - mxHashCode.reset(); mxVisible.reset(); } @@ -1353,7 +1375,6 @@ void ScPatternAttr::StyleToName() pName = pStyle->GetName(); pStyle = nullptr; GetItemSet().SetParent( nullptr ); - mxHashCode.reset(); mxVisible.reset(); } } @@ -1489,27 +1510,4 @@ sal_uInt64 ScPatternAttr::GetPAKey() const return mnPAKey; } -void ScPatternAttr::CalcHashCode() const -{ - auto const & rSet = GetItemSet(); - // This is an unrolled hash function so the compiler/CPU can execute it in parallel, - // because we hit this hard when loading documents with lots of styles. - sal_uInt32 h1 = 0; - sal_uInt32 h2 = 0; - sal_uInt32 h3 = 0; - sal_uInt32 h4 = 0; - for (auto it = rSet.GetItems_Impl(), end = rSet.GetItems_Impl() + (compareSize / 4 * 4); it != end; ) - { - h1 = 31 * h1 + reinterpret_cast(*it); - ++it; - h2 = 31 * h2 + reinterpret_cast(*it); - ++it; - h3 = 31 * h3 + reinterpret_cast(*it); - ++it; - h4 = 31 * h4 + reinterpret_cast(*it); - ++it; - } - mxHashCode = h1 + h2 + h3 + h4; -} - /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit