From 1d0cdc461c43f0ce0eda4961311a972edf9e78e2 Mon Sep 17 00:00:00 2001 From: Luboš Luňák Date: Tue, 23 Nov 2021 20:48:02 +0100 Subject: try to search efficiently with a query with many items (tdf#133867) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Autofilter with large documents can create queries that have thousands of items. Searching all of those for every cell using the generic algorithm can be quite slow. First try an optimized search for this case that skips most of the complications and just tries to find in the query items an exact match for the cell. This significantly speeds up tdf#133867 or attachment from comment #2 in tdf#136838. Change-Id: I2bba18da6a101c76398d8c42c4306c53682899c1 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/125746 Tested-by: Jenkins Reviewed-by: Luboš Luňák --- sc/source/core/data/table3.cxx | 121 +++++++++++++++++++++++++++++++++-------- 1 file changed, 99 insertions(+), 22 deletions(-) (limited to 'sc/source') diff --git a/sc/source/core/data/table3.cxx b/sc/source/core/data/table3.cxx index 49c075121b61..6012deb77540 100644 --- a/sc/source/core/data/table3.cxx +++ b/sc/source/core/data/table3.cxx @@ -2367,25 +2367,6 @@ class QueryEvaluator return false; } - bool isRealWildOrRegExp(const ScQueryEntry& rEntry) const - { - if (mrParam.eSearchType == utl::SearchParam::SearchType::Normal) - return false; - - return isTextMatchOp(rEntry); - } - - bool isTestWildOrRegExp(const ScQueryEntry& rEntry) const - { - if (!mpTestEqualCondition) - return false; - - if (mrParam.eSearchType == utl::SearchParam::SearchType::Normal) - return false; - - return (rEntry.eOp == SC_LESS_EQUAL || rEntry.eOp == SC_GREATER_EQUAL); - } - void setupTransliteratorIfNeeded() { if (!mpTransliteration) @@ -2414,6 +2395,25 @@ public: { } + bool isRealWildOrRegExp(const ScQueryEntry& rEntry) const + { + if (mrParam.eSearchType == utl::SearchParam::SearchType::Normal) + return false; + + return isTextMatchOp(rEntry); + } + + bool isTestWildOrRegExp(const ScQueryEntry& rEntry) const + { + if (!mpTestEqualCondition) + return false; + + if (mrParam.eSearchType == utl::SearchParam::SearchType::Normal) + return false; + + return (rEntry.eOp == SC_LESS_EQUAL || rEntry.eOp == SC_GREATER_EQUAL); + } + static bool isQueryByValue( const ScQueryEntry::Item& rItem, ScRefCellValue& rCell) { @@ -2937,7 +2937,7 @@ public: std::pair validQueryProcessEntry(SCROW nRow, SCCOL nCol, SCTAB nTab, const ScQueryParam& rParam, ScRefCellValue& aCell, bool* pbTestEqualCondition, const ScInterpreterContext* pContext, QueryEvaluator& aEval, - const ScQueryEntry& rEntry ) + const ScDocument& rDoc, const ScQueryEntry& rEntry ) { std::pair aRes(false, false); const ScQueryEntry::QueryItemsType& rItems = rEntry.GetQueryItems(); @@ -2952,10 +2952,87 @@ std::pair validQueryProcessEntry(SCROW nRow, SCCOL nCol, SCTAB nTab, } return aRes; } - // Generic handling. + if( rEntry.eOp == SC_EQUAL && rItems.size() >= 10 ) + { + // If there are many items to query for (autofilter does this), then try to search + // efficiently in those items. So first search all the items of the relevant type, + // If that does not find anything, fall back to the generic code. + double value = 0; + bool valid = true; + // For ScQueryEntry::ByValue check that the cell either is a value or is a formula + // that has a value and is not an error (those are compared as strings). This + // is basically simplified isQueryByValue(). + if( aCell.meType == CELLTYPE_VALUE ) + value = aCell.mfValue; + else if (aCell.meType == CELLTYPE_FORMULA && aCell.mpFormula->GetErrCode() != FormulaError::NONE + && aCell.mpFormula->IsValue()) + { + value = aCell.mpFormula->GetValue(); + } + else + valid = false; + if(valid) + { + for (const auto& rItem : rItems) + { + // For speed don't bother comparing approximately here, usually there either + // will be an exact match or it wouldn't match anyway. + if (rItem.meType == ScQueryEntry::ByValue + && value == rItem.mfVal) + { + return std::make_pair(true, true); + } + } + } + } const svl::SharedString* cellSharedString = nullptr; OUString cellString; bool cellStringSet = false; + if( rEntry.eOp == SC_EQUAL && rItems.size() >= 10 ) + { + // The same as above but for strings. Try to optimize the case when + // it's a svl::SharedString comparison (case sensitive or not). + // That happens when SC_EQUAL is used, whole cell matching is enabled, + // and a regexp is not wanted, see compareByString() above. + if(rDoc.GetDocOptions().IsMatchWholeCell() + && !aEval.isRealWildOrRegExp(rEntry) && !aEval.isTestWildOrRegExp(rEntry)) + { + if(!cellStringSet) + { + cellString = aEval.getCellString(aCell, nRow, rEntry, pContext, &cellSharedString); + cellStringSet = true; + } + // For ScQueryEntry::ByString check that the cell is represented by a shared string, + // which means it's either a string cell or a formula error. This is not as + // generous as isQueryByString() but it should be enough and better be safe. + if(cellSharedString != nullptr) + { + if (rParam.bCaseSens) + { + for (const auto& rItem : rItems) + { + if (rItem.meType == ScQueryEntry::ByString + && cellSharedString->getData() == rItem.maString.getData()) + { + return std::make_pair(true, true); + } + } + } + else + { + for (const auto& rItem : rItems) + { + if (rItem.meType == ScQueryEntry::ByString + && cellSharedString->getDataIgnoreCase() == rItem.maString.getDataIgnoreCase()) + { + return std::make_pair(true, true); + } + } + } + } + } + } + // Generic handling. for (const auto& rItem : rItems) { if (rItem.meType == ScQueryEntry::ByTextColor) @@ -3059,7 +3136,7 @@ bool ScTable::ValidQuery( aCell = GetCellValue(nCol, nRow); std::pair aRes = validQueryProcessEntry(nRow, nCol, nTab, rParam, aCell, - pbTestEqualCondition, pContext, aEval, rEntry); + pbTestEqualCondition, pContext, aEval, rDocument, rEntry); if (nPos == -1) { -- cgit