diff options
author | Noel Grandin <noel.grandin@collabora.co.uk> | 2024-10-23 16:22:14 +0200 |
---|---|---|
committer | Noel Grandin <noel.grandin@collabora.co.uk> | 2024-10-25 08:20:51 +0200 |
commit | a958e09409b94b97c0d0aa67dd605d321dc42615 (patch) | |
tree | d747761d94198774c8204854c61ac3b4f625f968 | |
parent | 00c2d0c6e5dd567c0b235ad95f6b3c4282725821 (diff) |
speedup saving large XLS file with lots of query formula
make more use of svl::SharedString instead of
unnecessarily creating OUString that then need to be
interned. We were spending all our time inside the
the SharedStringPool::intern function.
20s to 12s
Change-Id: I344f0acfd462fdb6d78e392881f066a56844fa94
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/175538
Tested-by: Jenkins
Reviewed-by: Caolán McNamara <caolan.mcnamara@collabora.com>
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
-rw-r--r-- | sc/inc/cellform.hxx | 13 | ||||
-rw-r--r-- | sc/inc/cellvalue.hxx | 2 | ||||
-rw-r--r-- | sc/inc/queryevaluator.hxx | 4 | ||||
-rw-r--r-- | sc/source/core/data/cellvalue.cxx | 21 | ||||
-rw-r--r-- | sc/source/core/data/column3.cxx | 2 | ||||
-rw-r--r-- | sc/source/core/data/queryevaluator.cxx | 126 | ||||
-rw-r--r-- | sc/source/core/tool/cellform.cxx | 63 | ||||
-rw-r--r-- | sc/source/core/tool/interpr4.cxx | 2 | ||||
-rw-r--r-- | sc/source/core/tool/rangecache.cxx | 5 |
9 files changed, 150 insertions, 88 deletions
diff --git a/sc/inc/cellform.hxx b/sc/inc/cellform.hxx index a4a985e2d9cc..f1efb330746e 100644 --- a/sc/inc/cellform.hxx +++ b/sc/inc/cellform.hxx @@ -28,6 +28,7 @@ class ScAddress; class ScDocument; struct ScInterpreterContext; struct ScRefCellValue; +namespace svl { class SharedStringPool; } class SC_DLLPUBLIC ScCellFormat { @@ -43,18 +44,14 @@ public: const Color** ppColor, ScInterpreterContext* pContext, bool bNullVals = true, bool bFormula = false ); - // Note that if pShared is set and a value is returned that way, the returned OUString is empty. - static OUString GetInputString( + static svl::SharedString GetInputSharedString( const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* pContext, - const ScDocument& rDoc, const svl::SharedString** pShared = nullptr, bool bFiltering = false, - bool bForceSystemLocale = false ); + const ScDocument& rDoc, svl::SharedStringPool& rStrPool, + bool bFiltering = false, bool bForceSystemLocale = false ); static OUString GetInputString( const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* pContext, - const ScDocument& rDoc, bool bFiltering, bool bForceSystemLocale = false ) - { - return GetInputString( rCell, nFormat, pContext, rDoc, nullptr, bFiltering, bForceSystemLocale ); - } + const ScDocument& rDoc, bool bFiltering = false, bool bForceSystemLocale = false ); static OUString GetOutputString( ScDocument& rDoc, const ScAddress& rPos, const ScRefCellValue& rCell ); diff --git a/sc/inc/cellvalue.hxx b/sc/inc/cellvalue.hxx index d5a29c607c90..531f477acb2e 100644 --- a/sc/inc/cellvalue.hxx +++ b/sc/inc/cellvalue.hxx @@ -22,6 +22,7 @@ struct ScRefCellValue; namespace sc { struct ColumnBlockPosition; } +namespace svl { class SharedStringPool; } /** * Store arbitrary cell value of any kind. It only stores cell value and @@ -174,6 +175,7 @@ public: * ScEditUtil::GetString(). */ SC_DLLPUBLIC OUString getString( const ScDocument* pDoc ) const; + SC_DLLPUBLIC svl::SharedString getSharedString( const ScDocument* pDoc, svl::SharedStringPool& rStrPool ) const; /** * Retrieve a string value without modifying the states of any objects in diff --git a/sc/inc/queryevaluator.hxx b/sc/inc/queryevaluator.hxx index 6d3012141db8..a2cbc0848a3b 100644 --- a/sc/inc/queryevaluator.hxx +++ b/sc/inc/queryevaluator.hxx @@ -118,8 +118,8 @@ public: const ScRefCellValue& rCell); static bool isQueryByString(ScQueryOp eOp, ScQueryEntry::QueryType eType, const ScRefCellValue& rCell); - OUString getCellString(const ScRefCellValue& rCell, SCROW nRow, SCCOL nCol, - const svl::SharedString** sharedString); + OUString getCellString(const ScRefCellValue& rCell, SCROW nRow, SCCOL nCol); + svl::SharedString getCellSharedString(const ScRefCellValue& rCell, SCROW nRow, SCCOL nCol); static bool isMatchWholeCell(const ScDocument& rDoc, ScQueryOp eOp); }; diff --git a/sc/source/core/data/cellvalue.cxx b/sc/source/core/data/cellvalue.cxx index 4330ea972992..2d51b57a827d 100644 --- a/sc/source/core/data/cellvalue.cxx +++ b/sc/source/core/data/cellvalue.cxx @@ -19,6 +19,7 @@ #include <formula/token.hxx> #include <formula/errorcodes.hxx> #include <svl/sharedstring.hxx> +#include <svl/sharedstringpool.hxx> namespace { @@ -659,6 +660,26 @@ OUString ScRefCellValue::getString( const ScDocument* pDoc ) const return getStringImpl(*this, pDoc); } +svl::SharedString ScRefCellValue::getSharedString( const ScDocument* pDoc, svl::SharedStringPool& rStrPool ) const +{ + switch (getType()) + { + case CELLTYPE_VALUE: + return rStrPool.intern(OUString::number(getDouble())); + case CELLTYPE_STRING: + return *getSharedString(); + case CELLTYPE_EDIT: + if (auto pEditText = getEditText()) + return rStrPool.intern(ScEditUtil::GetString(*pEditText, pDoc)); + break; + case CELLTYPE_FORMULA: + return getFormula()->GetString(); + default: + ; + } + return svl::SharedString::getEmptyString(); +} + OUString ScRefCellValue::getRawString( const ScDocument& rDoc ) const { return getRawStringImpl(*this, rDoc); diff --git a/sc/source/core/data/column3.cxx b/sc/source/core/data/column3.cxx index 25f943f68955..9326fbab4d1f 100644 --- a/sc/source/core/data/column3.cxx +++ b/sc/source/core/data/column3.cxx @@ -3163,7 +3163,7 @@ double* ScColumn::GetValueCell( SCROW nRow ) OUString ScColumn::GetInputString( const ScRefCellValue& aCell, SCROW nRow, bool bForceSystemLocale ) const { sal_uInt32 nFormat = GetNumberFormat(GetDoc().GetNonThreadedContext(), nRow); - return ScCellFormat::GetInputString(aCell, nFormat, nullptr, GetDoc(), nullptr, false, bForceSystemLocale); + return ScCellFormat::GetInputString(aCell, nFormat, nullptr, GetDoc(), false, bForceSystemLocale); } double ScColumn::GetValue( SCROW nRow ) const diff --git a/sc/source/core/data/queryevaluator.cxx b/sc/source/core/data/queryevaluator.cxx index 060716e08d92..4d3a02575066 100644 --- a/sc/source/core/data/queryevaluator.cxx +++ b/sc/source/core/data/queryevaluator.cxx @@ -295,8 +295,7 @@ std::pair<bool, bool> ScQueryEvaluator::compareByValue(const ScRefCellValue& rCe return std::pair<bool, bool>(bOk, bTestEqual); } -OUString ScQueryEvaluator::getCellString(const ScRefCellValue& rCell, SCROW nRow, SCCOL nCol, - const svl::SharedString** sharedString) +OUString ScQueryEvaluator::getCellString(const ScRefCellValue& rCell, SCROW nRow, SCCOL nCol) { if (rCell.getType() == CELLTYPE_FORMULA && rCell.getFormula()->GetErrCode() != FormulaError::NONE) @@ -311,20 +310,50 @@ OUString ScQueryEvaluator::getCellString(const ScRefCellValue& rCell, SCROW nRow assert(pos.second); // inserted it = pos.first; } - *sharedString = &it->second; - return OUString(); + return it->second.getString(); } else if (rCell.getType() == CELLTYPE_STRING) { - *sharedString = rCell.getSharedString(); - return OUString(); + return rCell.getSharedString()->getString(); } else { sal_uInt32 nFormat = mpContext ? mrTab.GetNumberFormat(*mpContext, ScAddress(nCol, nRow, mrTab.GetTab())) : mrTab.GetNumberFormat(nCol, nRow); - return ScCellFormat::GetInputString(rCell, nFormat, mpContext, mrDoc, sharedString, true); + return ScCellFormat::GetInputString(rCell, nFormat, mpContext, mrDoc, true); + } +} + +svl::SharedString ScQueryEvaluator::getCellSharedString(const ScRefCellValue& rCell, SCROW nRow, + SCCOL nCol) +{ + if (rCell.getType() == CELLTYPE_FORMULA + && rCell.getFormula()->GetErrCode() != FormulaError::NONE) + { + // Error cell is evaluated as string (for now). + const FormulaError error = rCell.getFormula()->GetErrCode(); + auto it = mCachedSharedErrorStrings.find(error); + if (it == mCachedSharedErrorStrings.end()) + { + svl::SharedString str = mrStrPool.intern(ScGlobal::GetErrorString(error)); + auto pos = mCachedSharedErrorStrings.insert({ error, str }); + assert(pos.second); // inserted + it = pos.first; + } + return it->second; + } + else if (rCell.getType() == CELLTYPE_STRING) + { + return *rCell.getSharedString(); + } + else + { + sal_uInt32 nFormat + = mpContext ? mrTab.GetNumberFormat(*mpContext, ScAddress(nCol, nRow, mrTab.GetTab())) + : mrTab.GetNumberFormat(nCol, nRow); + return ScCellFormat::GetInputSharedString(rCell, nFormat, mpContext, mrDoc, mrStrPool, + true); } } @@ -705,16 +734,14 @@ std::pair<bool, bool> ScQueryEvaluator::processEntry(SCROW nRow, SCCOL nCol, ScR } } } - const svl::SharedString* cellSharedString = nullptr; - std::optional<OUString> oCellString; + svl::SharedString cellSharedString; const bool bFastCompareByString = isFastCompareByString(rEntry); if (rEntry.eOp == SC_EQUAL && rItems.size() >= 10 && bFastCompareByString) { // The same as above but for strings. Try to optimize the case when // it's a svl::SharedString comparison. That happens when SC_EQUAL is used // and simple matching is used, see compareByString() - if (!oCellString) - oCellString = getCellString(aCell, nRow, rEntry.nField, &cellSharedString); + cellSharedString = getCellSharedString(aCell, nRow, rEntry.nField); // Allow also checking ScQueryEntry::ByValue if the cell is not numeric, // as in that case isQueryByNumeric() would be false and isQueryByString() would // be true because of SC_EQUAL making isTextMatchOp() true. @@ -722,51 +749,45 @@ std::pair<bool, bool> ScQueryEvaluator::processEntry(SCROW nRow, SCCOL nCol, ScR // For ScQueryEntry::ByString check that the cell is represented by a shared string, // which means it's either a string cell or a formula error. This is not as // generous as isQueryByString() but it should be enough and better be safe. - if (cellSharedString != nullptr) + if (rItems.size() >= 100) { - if (rItems.size() >= 100) + // Sort, cache and binary search for the string in items. + // Since each SharedString is identified by pointer value, + // sorting by pointer value is enough. + if (mCachedSortedItemStrings.size() <= nEntryIndex) { - // Sort, cache and binary search for the string in items. - // Since each SharedString is identified by pointer value, - // sorting by pointer value is enough. - if (mCachedSortedItemStrings.size() <= nEntryIndex) + mCachedSortedItemStrings.resize(nEntryIndex + 1); + auto& values = mCachedSortedItemStrings[nEntryIndex]; + values.reserve(rItems.size()); + for (const auto& rItem : rItems) { - mCachedSortedItemStrings.resize(nEntryIndex + 1); - auto& values = mCachedSortedItemStrings[nEntryIndex]; - values.reserve(rItems.size()); - for (const auto& rItem : rItems) + if (rItem.meType == ScQueryEntry::ByString + || (compareByValue && rItem.meType == ScQueryEntry::ByValue)) { - if (rItem.meType == ScQueryEntry::ByString - || (compareByValue && rItem.meType == ScQueryEntry::ByValue)) - { - values.push_back(mrParam.bCaseSens - ? rItem.maString.getData() - : rItem.maString.getDataIgnoreCase()); - } + values.push_back(mrParam.bCaseSens ? rItem.maString.getData() + : rItem.maString.getDataIgnoreCase()); } - std::sort(values.begin(), values.end()); } - auto& values = mCachedSortedItemStrings[nEntryIndex]; - const rtl_uString* string = mrParam.bCaseSens - ? cellSharedString->getData() - : cellSharedString->getDataIgnoreCase(); - auto it = std::lower_bound(values.begin(), values.end(), string); - if (it != values.end() && *it == string) - return std::make_pair(true, true); + std::sort(values.begin(), values.end()); } - else + auto& values = mCachedSortedItemStrings[nEntryIndex]; + const rtl_uString* string = mrParam.bCaseSens ? cellSharedString.getData() + : cellSharedString.getDataIgnoreCase(); + auto it = std::lower_bound(values.begin(), values.end(), string); + if (it != values.end() && *it == string) + return std::make_pair(true, true); + } + else + { + for (const auto& rItem : rItems) { - for (const auto& rItem : rItems) + if ((rItem.meType == ScQueryEntry::ByString + || (compareByValue && rItem.meType == ScQueryEntry::ByValue)) + && (mrParam.bCaseSens ? cellSharedString.getData() == rItem.maString.getData() + : cellSharedString.getDataIgnoreCase() + == rItem.maString.getDataIgnoreCase())) { - if ((rItem.meType == ScQueryEntry::ByString - || (compareByValue && rItem.meType == ScQueryEntry::ByValue)) - && (mrParam.bCaseSens - ? cellSharedString->getData() == rItem.maString.getData() - : cellSharedString->getDataIgnoreCase() - == rItem.maString.getDataIgnoreCase())) - { - return std::make_pair(true, true); - } + return std::make_pair(true, true); } } } @@ -794,15 +815,12 @@ std::pair<bool, bool> ScQueryEvaluator::processEntry(SCROW nRow, SCCOL nCol, ScR } else if (isQueryByString(rEntry.eOp, rItem.meType, aCell)) { - if (!oCellString) - oCellString = getCellString(aCell, nRow, rEntry.nField, &cellSharedString); + cellSharedString = getCellSharedString(aCell, nRow, rEntry.nField); std::pair<bool, bool> aThisRes; - if (cellSharedString && bFastCompareByString) // fast - aThisRes = compareByString<true>(rEntry, rItem, cellSharedString, nullptr); - else if (cellSharedString) - aThisRes = compareByString(rEntry, rItem, cellSharedString, nullptr); + if (bFastCompareByString) // fast + aThisRes = compareByString<true>(rEntry, rItem, &cellSharedString, nullptr); else - aThisRes = compareByString(rEntry, rItem, nullptr, &*oCellString); + aThisRes = compareByString(rEntry, rItem, &cellSharedString, nullptr); aRes.first |= aThisRes.first; aRes.second |= aThisRes.second; } diff --git a/sc/source/core/tool/cellform.cxx b/sc/source/core/tool/cellform.cxx index c20175cd9a62..692b019586c2 100644 --- a/sc/source/core/tool/cellform.cxx +++ b/sc/source/core/tool/cellform.cxx @@ -21,6 +21,7 @@ #include <svl/numformat.hxx> #include <svl/sharedstring.hxx> +#include <svl/sharedstringpool.hxx> #include <formulacell.hxx> #include <document.hxx> @@ -130,12 +131,10 @@ OUString ScCellFormat::GetString( OUString ScCellFormat::GetInputString( const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* pContext, const ScDocument& rDoc, - const svl::SharedString** pShared, bool bFiltering, bool bForceSystemLocale ) + bool bFiltering, bool bForceSystemLocale ) { ScInterpreterContext& rContext = pContext ? *pContext : rDoc.GetNonThreadedContext(); - if(pShared != nullptr) - *pShared = nullptr; switch (rCell.getType()) { case CELLTYPE_STRING: @@ -160,35 +159,63 @@ OUString ScCellFormat::GetInputString( rContext.NFGetInputLineString(pFC->GetValue(), nFormat, *str, bFiltering, bForceSystemLocale); } else - { - const svl::SharedString& shared = pFC->GetString(); - // Allow callers to optimize by avoiding converting later back to OUString. - // To avoid refcounting that won't be needed, do not even return the OUString. - if( pShared != nullptr ) - *pShared = &shared; - else - str = shared.getString(); - } + str = pFC->GetString().getString(); const FormulaError nErrCode = pFC->GetErrCode(); if (nErrCode != FormulaError::NONE) - { str.reset(); - if( pShared != nullptr ) - *pShared = nullptr; - } return str ? std::move(*str) : svl::SharedString::EMPTY_STRING; } case CELLTYPE_NONE: - if( pShared != nullptr ) - *pShared = &svl::SharedString::getEmptyString(); return svl::SharedString::EMPTY_STRING; default: return svl::SharedString::EMPTY_STRING; } } +svl::SharedString ScCellFormat::GetInputSharedString( + const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* pContext, const ScDocument& rDoc, + svl::SharedStringPool& rStrPool, + bool bFiltering, bool bForceSystemLocale ) +{ + ScInterpreterContext& rContext = pContext ? *pContext : rDoc.GetNonThreadedContext(); + + switch (rCell.getType()) + { + case CELLTYPE_STRING: + case CELLTYPE_EDIT: + return rCell.getSharedString(&rDoc, rStrPool); + case CELLTYPE_VALUE: + { + OUString str; + rContext.NFGetInputLineString(rCell.getDouble(), nFormat, str, bFiltering, bForceSystemLocale); + return rStrPool.intern(str); + } + break; + case CELLTYPE_FORMULA: + { + ScFormulaCell* pFC = rCell.getFormula(); + const FormulaError nErrCode = pFC->GetErrCode(); + if (nErrCode != FormulaError::NONE) + return svl::SharedString::getEmptyString(); + else if (pFC->IsEmptyDisplayedAsString()) + return svl::SharedString::getEmptyString(); + else if (pFC->IsValue()) + { + OUString str; + rContext.NFGetInputLineString(pFC->GetValue(), nFormat, str, bFiltering, bForceSystemLocale); + return rStrPool.intern(str); + } + else + return pFC->GetString(); + } + case CELLTYPE_NONE: + default: + return svl::SharedString::getEmptyString(); + } +} + OUString ScCellFormat::GetOutputString( ScDocument& rDoc, const ScAddress& rPos, const ScRefCellValue& rCell ) { if (rCell.isEmpty()) diff --git a/sc/source/core/tool/interpr4.cxx b/sc/source/core/tool/interpr4.cxx index 4f88ad3b4d27..d463ffdd81a6 100644 --- a/sc/source/core/tool/interpr4.cxx +++ b/sc/source/core/tool/interpr4.cxx @@ -253,7 +253,7 @@ void ScInterpreter::GetCellString( svl::SharedString& rStr, ScRefCellValue& rCel { case CELLTYPE_STRING: case CELLTYPE_EDIT: - rStr = mrStrPool.intern(rCell.getString(&mrDoc)); + rStr = rCell.getSharedString(&mrDoc, mrStrPool); break; case CELLTYPE_FORMULA: { diff --git a/sc/source/core/tool/rangecache.cxx b/sc/source/core/tool/rangecache.cxx index 24a8a39313ff..f5e98343455b 100644 --- a/sc/source/core/tool/rangecache.cxx +++ b/sc/source/core/tool/rangecache.cxx @@ -186,10 +186,7 @@ ScSortedRangeCache::ScSortedRangeCache(ScDocument* pDoc, const ScRange& rRange, assert(!ScQueryEvaluator::isQueryByValue(mQueryOp, mQueryType, cell)); if (ScQueryEvaluator::isQueryByString(mQueryOp, mQueryType, cell)) { - const svl::SharedString* sharedString = nullptr; - OUString string = evaluator.getCellString(cell, nRow, nCol, &sharedString); - if (sharedString) - string = sharedString->getString(); + OUString string = evaluator.getCellString(cell, nRow, nCol); colrowData.push_back(ColRowData{ mRowSearch ? nRow : nCol, string }); } } |