summaryrefslogtreecommitdiff
path: root/sc/source/filter
diff options
context:
space:
mode:
authorNoel Grandin <noel@peralex.com>2021-04-30 18:24:12 +0200
committerNoel Grandin <noel.grandin@collabora.co.uk>2021-04-30 21:39:51 +0200
commitdeac5c84732c3491a0ef5bf7f8c1552e6def4fc0 (patch)
tree14d93b04f790c80695df11fb22a49fc3648700a7 /sc/source/filter
parentc926974ec1a4dbb72f273927fed0bc2f5e19c374 (diff)
tdf#79049 speed up OOXML workbook load (2)
the allocation of memory and pointer chasing was slowing things down in the styles conversion. So switch to more cache-dense data structure, and re-arrange the loops to be more cache friendly. The takes the time from 1m9 to 40s for me. Change-Id: I876580adc7823313b0cdb067f2f6b3e61c39ecf8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/114941 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Diffstat (limited to 'sc/source/filter')
-rw-r--r--sc/source/filter/inc/sheetdatabuffer.hxx6
-rw-r--r--sc/source/filter/oox/sheetdatabuffer.cxx153
2 files changed, 90 insertions, 69 deletions
diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx b/sc/source/filter/inc/sheetdatabuffer.hxx
index c49e33280a10..17add16e6234 100644
--- a/sc/source/filter/inc/sheetdatabuffer.hxx
+++ b/sc/source/filter/inc/sheetdatabuffer.hxx
@@ -22,6 +22,7 @@
#include <vector>
#include <map>
#include <set>
+#include <o3tl/sorted_vector.hxx>
#include "richstring.hxx"
#include "worksheethelper.hxx"
@@ -165,7 +166,8 @@ private:
/** Writes all cell formatting attributes to the passed cell range list. (depreciates writeXfIdRangeProperties) */
void applyCellMerging( const ScRange& rRange );
- void addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, const ScRange& rAddress, bool bProcessRowRange = false );
+ void addColXfStyles();
+ void addColXfStyleProcessRowRanges();
private:
/** Stores cell range address and formula token array of an array formula. */
typedef std::pair< ScRange, ApiTokenSequence > ArrayFormula;
@@ -200,7 +202,7 @@ private:
return lhs.mnEndRow<rhs.mnStartRow;
}
};
- typedef ::std::set< RowRangeStyle, StyleRowRangeComp > RowStyles;
+ typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > RowStyles;
typedef ::std::map< sal_Int32, RowStyles > ColStyles;
/** Stores information about a merged cell range. */
struct MergedRange
diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx b/sc/source/filter/oox/sheetdatabuffer.cxx
index de1d2c76f3c9..c9c688c2ca17 100644
--- a/sc/source/filter/oox/sheetdatabuffer.cxx
+++ b/sc/source/filter/oox/sheetdatabuffer.cxx
@@ -346,57 +346,99 @@ static void addIfNotInMyMap( const StylesBuffer& rStyles, std::map< FormatKeyPai
rMap[ FormatKeyPair( nXfId, nFormatId ) ] = rRangeList;
}
-void SheetDataBuffer::addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, const ScRange& rAddress, bool bProcessRowRange )
-{
- RowRangeStyle aStyleRows;
- aStyleRows.mnNumFmt.first = nXfId;
- aStyleRows.mnNumFmt.second = nFormatId;
- aStyleRows.mnStartRow = rAddress.aStart.Row();
- aStyleRows.mnEndRow = rAddress.aEnd.Row();
- for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol )
+void SheetDataBuffer::addColXfStyles()
+{
+ std::map< FormatKeyPair, ScRangeList > rangeStyleListMap;
+ for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists )
+ {
+ addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList );
+ }
+ // gather all ranges that have the same style and apply them in bulk
+ for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
{
- if ( !bProcessRowRange )
- maStylesPerColumn[ nCol ].insert( aStyleRows );
- else
+ for (const ScRange & rAddress : rRanges)
{
- RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
- // Reset row range for each column
+ RowRangeStyle aStyleRows;
+ aStyleRows.mnNumFmt.first = rFormatKeyPair.first;
+ aStyleRows.mnNumFmt.second = rFormatKeyPair.second;
aStyleRows.mnStartRow = rAddress.aStart.Row();
aStyleRows.mnEndRow = rAddress.aEnd.Row();
+ for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol )
+ maStylesPerColumn[ nCol ].insert( aStyleRows );
+ }
+ }
+}
- // If aStyleRows includes rows already allocated to a style
- // in rRowStyles, then we need to split it into parts.
- // ( to occupy only rows that have no style definition)
-
- // Start iterating at the first element that is not completely before aStyleRows
- RowStyles::iterator rows_it = rRowStyles.lower_bound(aStyleRows);
- RowStyles::iterator rows_end = rRowStyles.end();
- bool bAddRange = true;
- for ( ; rows_it != rows_end; ++rows_it )
+void SheetDataBuffer::addColXfStyleProcessRowRanges()
+{
+ // count the number of row-range-styles we have
+ AddressConverter& rAddrConv = getAddressConverter();
+ int cnt = 0;
+ for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
+ {
+ if ( nXfId == -1 ) // it's a dud skip it
+ continue;
+ cnt += rRowRangeList.size();
+ }
+ // pre-allocate space in the sorted_vector
+ for ( sal_Int32 nCol = 0; nCol <= rAddrConv.getMaxApiAddress().Col(); ++nCol )
+ {
+ RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
+ rRowStyles.reserve(rRowStyles.size() + cnt);
+ }
+ const auto nMaxCol = rAddrConv.getMaxApiAddress().Col();
+ for ( sal_Int32 nCol = 0; nCol <= nMaxCol; ++nCol )
+ {
+ RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
+ for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
+ {
+ if ( nXfId == -1 ) // it's a dud skip it
+ continue;
+ // get all row ranges for id
+ for ( const auto& rRange : rRowRangeList )
{
- const RowRangeStyle& r = *rows_it;
-
- // Add the part of aStyleRows that does not overlap with r
- if ( aStyleRows.mnStartRow < r.mnStartRow )
+ RowRangeStyle aStyleRows;
+ aStyleRows.mnNumFmt.first = nXfId;
+ aStyleRows.mnNumFmt.second = -1;
+ aStyleRows.mnStartRow = rRange.mnFirst;
+ aStyleRows.mnEndRow = rRange.mnLast;
+
+ // Reset row range for each column
+ aStyleRows.mnStartRow = rRange.mnFirst;
+ aStyleRows.mnEndRow = rRange.mnLast;
+
+ // If aStyleRows includes rows already allocated to a style
+ // in rRowStyles, then we need to split it into parts.
+ // ( to occupy only rows that have no style definition)
+
+ // Start iterating at the first element that is not completely before aStyleRows
+ RowStyles::const_iterator rows_it = rRowStyles.lower_bound(aStyleRows);
+ bool bAddRange = true;
+ for ( ; rows_it != rRowStyles.end(); ++rows_it )
{
- RowRangeStyle aSplit = aStyleRows;
- aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, r.mnStartRow - 1);
- // Insert with hint that aSplit comes directly before the current position
- rRowStyles.insert( rows_it, aSplit );
+ const RowRangeStyle& r = *rows_it;
+
+ // Add the part of aStyleRows that does not overlap with r
+ if ( aStyleRows.mnStartRow < r.mnStartRow )
+ {
+ RowRangeStyle aSplit = aStyleRows;
+ aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, r.mnStartRow - 1);
+ rows_it = rRowStyles.insert( aSplit ).first;
+ }
+
+ // Done if no part of aStyleRows extends beyond r
+ if ( aStyleRows.mnEndRow <= r.mnEndRow )
+ {
+ bAddRange = false;
+ break;
+ }
+
+ // Cut off the part aStyleRows that was handled above
+ aStyleRows.mnStartRow = r.mnEndRow + 1;
}
-
- // Done if no part of aStyleRows extends beyond r
- if ( aStyleRows.mnEndRow <= r.mnEndRow )
- {
- bAddRange = false;
- break;
- }
-
- // Cut off the part aStyleRows that was handled above
- aStyleRows.mnStartRow = r.mnEndRow + 1;
+ if ( bAddRange )
+ rRowStyles.insert( aStyleRows );
}
- if ( bAddRange )
- rRowStyles.insert( aStyleRows );
}
}
}
@@ -414,32 +456,9 @@ void SheetDataBuffer::finalizeImport()
// write default formatting of remaining row range
maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( maXfIdRowRange.maRowRange );
- std::map< FormatKeyPair, ScRangeList > rangeStyleListMap;
- for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists )
- {
- addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList );
- }
- // gather all ranges that have the same style and apply them in bulk
- for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
- {
- for (size_t i = 0, nSize = rRanges.size(); i < nSize; ++i)
- addColXfStyle( rFormatKeyPair.first, rFormatKeyPair.second, rRanges[i]);
- }
-
- for ( const auto& [rXfId, rRowRangeList] : maXfIdRowRangeList )
- {
- if ( rXfId == -1 ) // it's a dud skip it
- continue;
- AddressConverter& rAddrConv = getAddressConverter();
- // get all row ranges for id
- for ( const auto& rRange : rRowRangeList )
- {
- ScRange aRange( 0, rRange.mnFirst, getSheetIndex(),
- rAddrConv.getMaxApiAddress().Col(), rRange.mnLast, getSheetIndex() );
+ addColXfStyles();
- addColXfStyle( rXfId, -1, aRange, true );
- }
- }
+ addColXfStyleProcessRowRanges();
ScDocumentImport& rDocImport = getDocImport();
ScDocument& rDoc = rDocImport.getDoc();