diff options
author | Luboš Luňák <l.lunak@collabora.com> | 2022-03-06 19:24:37 +0100 |
---|---|---|
committer | Luboš Luňák <l.lunak@collabora.com> | 2022-03-23 09:09:08 +0100 |
commit | 74cf42c522e59397bb725d35c682364de032cb99 (patch) | |
tree | a4770e553ee04d6881807cc568ef27820e68f1d7 | |
parent | 4cb50c1081aa3f4686da8a35ca1159de7bcb750b (diff) |
faster bulk insert into o3tl::sorted_vector (tdf#117366)
Repeated insert() into o3tl::sorted_vector can turn out to be
pathological, repeatedly moving most items aside for each
element inserted. Make it possible to insert a normal vector
that's been pre-sorted and made unique. 31s->9s for loading
tdf#117366.
Change-Id: If3a0366dd240ad46c23f5f3dc04e781b8c4b2aa2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/131085
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
-rw-r--r-- | include/o3tl/sorted_vector.hxx | 57 | ||||
-rw-r--r-- | sc/source/filter/inc/sheetdatabuffer.hxx | 9 | ||||
-rw-r--r-- | sc/source/filter/oox/sheetdatabuffer.cxx | 12 |
3 files changed, 64 insertions, 14 deletions
diff --git a/include/o3tl/sorted_vector.hxx b/include/o3tl/sorted_vector.hxx index 2bd8b7273a9a..13c0b25f8f0e 100644 --- a/include/o3tl/sorted_vector.hxx +++ b/include/o3tl/sorted_vector.hxx @@ -12,7 +12,9 @@ #include <vector> #include <algorithm> +#include <cassert> #include <functional> +#include <iterator> #include <memory> #include <type_traits> @@ -229,19 +231,32 @@ public: void insert(sorted_vector<Value,Compare,Find> const& rOther) { - // optimization for the rather common case that we are overwriting this with the contents - // of another sorted vector - if ( empty() ) - { - m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), rOther.m_vector.end()); - } - else - { - for (const_iterator it = rOther.m_vector.begin(); it != rOther.m_vector.end(); ++it) - { - insert(*it); - } - } + // optimization for the rather common case that we are overwriting this with the contents + // of another sorted vector + if ( empty() ) + m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), rOther.m_vector.end()); + else + insert_internal( rOther.m_vector ); + } + + void insert_sorted_unique_vector(const std::vector<Value>& rOther) + { + assert( std::is_sorted(rOther.begin(), rOther.end(), Compare())); + assert( std::unique(rOther.begin(), rOther.end(), compare_equal) == rOther.end()); + if ( empty() ) + m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), rOther.m_vector.end()); + else + insert_internal( rOther ); + } + + void insert_sorted_unique_vector(std::vector<Value>&& rOther) + { + assert( std::is_sorted(rOther.begin(), rOther.end(), Compare())); + assert( std::unique(rOther.begin(), rOther.end(), compare_equal) == rOther.end()); + if ( empty() ) + m_vector.swap( rOther ); + else + insert_internal( rOther ); } /* Clear() elements in the vector, and free them one by one. */ @@ -266,6 +281,22 @@ public: } private: + static bool compare_equal( const Value& v1, const Value& v2 ) + { // Synthetize == check from < check for std::unique asserts above. + return !Compare()( v1, v2 ) && !Compare()( v2, v1 ); + } + + void insert_internal( const std::vector<Value>& rOther ) + { + // Do a union in one pass rather than repeated insert() that could repeatedly + // move large amounts of data. + vector_t tmp; + tmp.reserve( m_vector.size() + rOther.size()); + std::set_union( m_vector.begin(), m_vector.end(), + rOther.begin(), rOther.end(), + std::back_inserter( tmp ), Compare()); + m_vector.swap( tmp ); + } vector_t m_vector; }; diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx b/sc/source/filter/inc/sheetdatabuffer.hxx index 17add16e6234..8002e6ee93f9 100644 --- a/sc/source/filter/inc/sheetdatabuffer.hxx +++ b/sc/source/filter/inc/sheetdatabuffer.hxx @@ -202,8 +202,17 @@ private: return lhs.mnEndRow<rhs.mnStartRow; } }; + struct StyleRowRangeCompEqual + { + bool operator() (const RowRangeStyle& lhs, const RowRangeStyle& rhs) const + { + return lhs.mnEndRow==rhs.mnStartRow; + } + }; typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > RowStyles; typedef ::std::map< sal_Int32, RowStyles > ColStyles; + typedef ::std::vector< RowRangeStyle > TmpRowStyles; + typedef ::std::map< sal_Int32, TmpRowStyles > TmpColStyles; /** Stores information about a merged cell range. */ struct MergedRange { diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx b/sc/source/filter/oox/sheetdatabuffer.cxx index d4d0ad87affd..b4d6158f66fe 100644 --- a/sc/source/filter/oox/sheetdatabuffer.cxx +++ b/sc/source/filter/oox/sheetdatabuffer.cxx @@ -353,6 +353,9 @@ void SheetDataBuffer::addColXfStyles() addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList ); } // gather all ranges that have the same style and apply them in bulk + // Collect data in unsorted vectors and sort them just once at the end + // instead of possibly slow repeated inserts. + TmpColStyles tmpStylesPerColumn; for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap ) { for (const ScRange & rAddress : rRanges) @@ -363,9 +366,16 @@ void SheetDataBuffer::addColXfStyles() aStyleRows.mnStartRow = rAddress.aStart.Row(); aStyleRows.mnEndRow = rAddress.aEnd.Row(); for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol ) - maStylesPerColumn[ nCol ].insert( aStyleRows ); + tmpStylesPerColumn[ nCol ].push_back( aStyleRows ); } } + for( auto& rowStyles : tmpStylesPerColumn ) + { + TmpRowStyles& s = rowStyles.second; + std::sort( s.begin(), s.end(), StyleRowRangeComp()); + s.erase( std::unique( s.begin(), s.end(), StyleRowRangeCompEqual()), s.end()); + maStylesPerColumn[ rowStyles.first ].insert_sorted_unique_vector( std::move( s )); + } } void SheetDataBuffer::addColXfStyleProcessRowRanges() |