diff options
author | Luboš Luňák <l.lunak@collabora.com> | 2022-05-27 19:51:40 +0200 |
---|---|---|
committer | Luboš Luňák <l.lunak@collabora.com> | 2022-05-28 05:44:27 +0200 |
commit | 7674399aac661eb503d7badc53b9a4d68bd9839d (patch) | |
tree | 1d5a5ed31a1e10d175da3232cccfe92a0a931fc1 | |
parent | e3d3adbcde43965b517473387acda81e53e248ed (diff) |
try to range-reduce even COUNTIFS if not matching empty cells
It's possible to do reduce range of COUNTIFS to non-empty data too,
since empty cells cannot contribute to the result, as long as
the criteria do not require matching empty cells. Without this
queries like =COUNTIFS($A:$A,...) can spend most of their time
clearing and searching the vConditions vector that's big and
not useful for the trailing empty cells in that column.
Change-Id: I8d1e7977f172ac9b2cf84af3f982e945be3cb46c
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135049
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
-rw-r--r-- | sc/qa/unit/ucalc_formula.cxx | 52 | ||||
-rw-r--r-- | sc/source/core/tool/interpr1.cxx | 130 |
2 files changed, 132 insertions, 50 deletions
diff --git a/sc/qa/unit/ucalc_formula.cxx b/sc/qa/unit/ucalc_formula.cxx index 14d779d2e4f3..6bad4a40127c 100644 --- a/sc/qa/unit/ucalc_formula.cxx +++ b/sc/qa/unit/ucalc_formula.cxx @@ -308,6 +308,7 @@ public: void testFuncRowsHidden(); void testFuncSUMIFS(); void testFuncCOUNTIFEmpty(); + void testFuncCOUNTIFSRangeReduce(); void testFuncRefListArraySUBTOTAL(); void testFuncJumpMatrixArrayIF(); void testFuncJumpMatrixArrayOFFSET(); @@ -427,6 +428,7 @@ public: CPPUNIT_TEST(testFuncRowsHidden); CPPUNIT_TEST(testFuncSUMIFS); CPPUNIT_TEST(testFuncCOUNTIFEmpty); + CPPUNIT_TEST(testFuncCOUNTIFSRangeReduce); CPPUNIT_TEST(testFuncRefListArraySUBTOTAL); CPPUNIT_TEST(testFuncJumpMatrixArrayIF); CPPUNIT_TEST(testFuncJumpMatrixArrayOFFSET); @@ -9356,6 +9358,56 @@ void TestFormula::testFuncCOUNTIFEmpty() m_pDoc->DeleteTab(0); } +// Test that COUNTIFS counts properly empty cells if asked to. +void TestFormula::testFuncCOUNTIFSRangeReduce() +{ + sc::AutoCalcSwitch aACSwitch(*m_pDoc, true); // turn auto calc on. + m_pDoc->InsertTab(0, "Test"); + + // Data in A1:C9. + std::vector<std::vector<const char*>> aData = { + { "" }, + { "a", "1", "1" }, + { "b", "2", "2" }, + { "c", "4", "3" }, + { "d", "8", "4" }, + { "a", "16", "5" }, + { "" }, + { "b", "", "6" }, + { "c", "64", "7" } + }; + + insertRangeData(m_pDoc, ScAddress(0,0,0), aData); + + constexpr SCROW maxRow = 20; // so that the unittest is not slow in dbgutil builds + ScRange aSubRange( ScAddress( 0, 0, 0 ), ScAddress( 2, maxRow, 0 )); + m_pDoc->GetDataAreaSubrange(aSubRange); + // This is the range the data should be reduced to in ScInterpreter::IterateParametersIfs(). + CPPUNIT_ASSERT_EQUAL( SCROW(1), aSubRange.aStart.Row()); + CPPUNIT_ASSERT_EQUAL( SCROW(8), aSubRange.aEnd.Row()); + + m_pDoc->SetFormula( ScAddress(10, 0, 0), "=COUNTIFS($A1:$A" + OUString::number(maxRow+1) + + "; \"\"; $B1:$B" + OUString::number(maxRow+1) + + "; \"\"; $C1:$C" + OUString::number(maxRow+1) +"; \"\")", + formula::FormulaGrammar::GRAM_NATIVE_UI); + // But it should find out that it can't range reduce and must count all the empty rows. + CPPUNIT_ASSERT_EQUAL( double(maxRow + 1 - 7), m_pDoc->GetValue(ScAddress(10, 0, 0))); + + // Check also with criteria set as cell references, the middle one resulting in matching + // empty cells (which should cause ScInterpreter::IterateParametersIfs() to undo + // the range reduction). This should only match the A8-C8 row, but it also shouldn't crash. + // Matching empty cells using a cell reference needs a formula to set the cell to + // an empty string, plain empty cell wouldn't do, so use K2 for that. + m_pDoc->SetFormula( ScAddress(10, 1, 0), "=\"\"", formula::FormulaGrammar::GRAM_NATIVE_UI ); + m_pDoc->SetFormula( ScAddress(10, 0, 0), "=COUNTIFS($A1:$A" + OUString::number(maxRow+1) + + "; A8; $B1:$B" + OUString::number(maxRow+1) + + "; K2; $C1:$C" + OUString::number(maxRow+1) + "; C8)", + formula::FormulaGrammar::GRAM_NATIVE_UI); + CPPUNIT_ASSERT_EQUAL( double(1), m_pDoc->GetValue(ScAddress(10, 0, 0))); + + m_pDoc->DeleteTab(0); +} + // Test SUBTOTAL with reference lists in array context. void TestFormula::testFuncRefListArraySUBTOTAL() { diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index c6bcb29b28bb..0b3592976a71 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -5844,6 +5844,7 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf // matrix formula. vConditions.clear(); + // Range-reduce optimization SCCOL nStartColDiff = 0; SCCOL nEndColDiff = 0; SCROW nStartRowDiff = 0; @@ -5851,43 +5852,39 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf bool bRangeReduce = false; ScRange aMainRange; - // Range-reduce optimization - if (nParamCount % 2) // Not COUNTIFS + bool bHasDoubleRefCriteriaRanges = true; + // Do not attempt main-range reduce if any of the criteria-ranges are not double-refs. + // For COUNTIFS queries it's possible to range-reduce too, if the query is not supposed + // to match empty cells (will be checked and undone later if needed), so simply treat + // the first criteria range as the main range for purposes of detecting if this can be done. + for (sal_uInt16 nParamIdx = 2; nParamIdx < nParamCount; nParamIdx += 2 ) { - bool bHasDoubleRefCriteriaRanges = true; - // Do not attempt main-range reduce if any of the criteria-ranges are not double-refs. - for (sal_uInt16 nParamIdx = 2; nParamIdx < nParamCount; nParamIdx += 2 ) + const formula::FormulaToken* pCriteriaRangeToken = pStack[ sp-nParamIdx ]; + if (pCriteriaRangeToken->GetType() != svDoubleRef ) { - const formula::FormulaToken* pCriteriaRangeToken = pStack[ sp-nParamIdx ]; - if (pCriteriaRangeToken->GetType() != svDoubleRef ) - { - bHasDoubleRefCriteriaRanges = false; - break; - } + bHasDoubleRefCriteriaRanges = false; + break; } + } - // Probe the main range token, and try if we can shrink the range without altering results. - const formula::FormulaToken* pMainRangeToken = pStack[ sp-nParamCount ]; - if (pMainRangeToken->GetType() == svDoubleRef && bHasDoubleRefCriteriaRanges) + // Probe the main range token, and try if we can shrink the range without altering results. + const formula::FormulaToken* pMainRangeToken = pStack[ sp-nParamCount ]; + if (pMainRangeToken->GetType() == svDoubleRef && bHasDoubleRefCriteriaRanges) + { + const ScComplexRefData* pRefData = pMainRangeToken->GetDoubleRef(); + if (!pRefData->IsDeleted()) { - const ScComplexRefData* pRefData = pMainRangeToken->GetDoubleRef(); - if (!pRefData->IsDeleted()) + DoubleRefToRange( *pRefData, aMainRange); + if (aMainRange.aStart.Tab() == aMainRange.aEnd.Tab()) { - DoubleRefToRange( *pRefData, aMainRange); - - if (aMainRange.aStart.Tab() == aMainRange.aEnd.Tab()) - { - // Shrink the range to actual data content. - ScRange aSubRange = aMainRange; - mrDoc.GetDataAreaSubrange(aSubRange); - - nStartColDiff = aSubRange.aStart.Col() - aMainRange.aStart.Col(); - nStartRowDiff = aSubRange.aStart.Row() - aMainRange.aStart.Row(); - - nEndColDiff = aSubRange.aEnd.Col() - aMainRange.aEnd.Col(); - nEndRowDiff = aSubRange.aEnd.Row() - aMainRange.aEnd.Row(); - bRangeReduce = nStartColDiff || nStartRowDiff || nEndColDiff || nEndRowDiff; - } + // Shrink the range to actual data content. + ScRange aSubRange = aMainRange; + mrDoc.GetDataAreaSubrange(aSubRange); + nStartColDiff = aSubRange.aStart.Col() - aMainRange.aStart.Col(); + nStartRowDiff = aSubRange.aStart.Row() - aMainRange.aStart.Row(); + nEndColDiff = aSubRange.aEnd.Col() - aMainRange.aEnd.Col(); + nEndRowDiff = aSubRange.aEnd.Row() - aMainRange.aEnd.Row(); + bRangeReduce = nStartColDiff || nStartRowDiff || nEndColDiff || nEndRowDiff; } } } @@ -6077,6 +6074,56 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf return; } + ScQueryParam rParam; + ScQueryEntry& rEntry = rParam.GetEntry(0); + ScQueryEntry::Item& rItem = rEntry.GetQueryItem(); + rEntry.bDoQuery = true; + if (!bIsString) + { + rItem.meType = ScQueryEntry::ByValue; + rItem.mfVal = fVal; + rEntry.eOp = SC_EQUAL; + } + else + { + rParam.FillInExcelSyntax(mrDoc.GetSharedStringPool(), aString.getString(), 0, pFormatter); + if (rItem.meType == ScQueryEntry::ByString) + rParam.eSearchType = DetectSearchType(rItem.maString.getString(), mrDoc); + } + + // Undo bRangeReduce if asked to match empty cells (which should be rare). + assert(rEntry.GetQueryItems().size() == 1); + if((rEntry.IsQueryByEmpty() || rItem.mbMatchEmpty) && bRangeReduce) + { + bRangeReduce = false; + // All criteria ranges are svDoubleRef's, so only vConditions needs adjusting. + assert(vRefArrayConditions.empty()); + if(!vConditions.empty()) + { + std::vector<sal_uInt8> newConditions; + SCCOL newDimensionCols = nCol2 - nCol1 + 1; + SCROW newDimensionRows = nRow2 - nRow1 + 1; + newConditions.reserve( newDimensionCols * newDimensionRows ); + SCCOL col = nCol1; + for(; col < nCol1 + nStartColDiff; ++col) + newConditions.insert( newConditions.end(), newDimensionRows, 0 ); + for(; col <= nCol2 - nStartColDiff; ++col) + { + newConditions.insert( newConditions.end(), nStartRowDiff, 0 ); + SCCOL oldCol = col - ( nCol1 + nStartColDiff ); + auto it = vConditions.begin() + oldCol * nDimensionRows; + newConditions.insert( newConditions.end(), it, it + nDimensionRows ); + newConditions.insert( newConditions.end(), -nEndRowDiff, 0 ); + } + for(; col <= nCol2; ++col) + newConditions.insert( newConditions.end(), newDimensionRows, 0 ); + assert( newConditions.size() == o3tl::make_unsigned( newDimensionCols * newDimensionRows )); + vConditions = std::move( newConditions ); + nDimensionCols = newDimensionCols; + nDimensionRows = newDimensionRows; + } + } + if (bRangeReduce) { // All reference ranges must be of the same size as the main range. @@ -6115,25 +6162,8 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf if (vConditions.empty()) vConditions.resize( nDimensionCols * nDimensionRows, 0); - ScQueryParam rParam; - rParam.nRow1 = nRow1; - rParam.nRow2 = nRow2; - - ScQueryEntry& rEntry = rParam.GetEntry(0); - ScQueryEntry::Item& rItem = rEntry.GetQueryItem(); - rEntry.bDoQuery = true; - if (!bIsString) - { - rItem.meType = ScQueryEntry::ByValue; - rItem.mfVal = fVal; - rEntry.eOp = SC_EQUAL; - } - else - { - rParam.FillInExcelSyntax(mrDoc.GetSharedStringPool(), aString.getString(), 0, pFormatter); - if (rItem.meType == ScQueryEntry::ByString) - rParam.eSearchType = DetectSearchType(rItem.maString.getString(), mrDoc); - } + rParam.nRow1 = nRow1; + rParam.nRow2 = nRow2; rParam.nCol1 = nCol1; rParam.nCol2 = nCol2; rEntry.nField = nCol1; |