diff options
author | Caolán McNamara <caolanm@redhat.com> | 2012-05-10 15:43:47 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2012-05-11 16:33:30 +0100 |
commit | e7c861e3ec9398436eda16bb748af22a2d9afc27 (patch) | |
tree | 1df05a032fe4387af7e1fa959439bced4f32b3e9 /sot | |
parent | a2859b0bdf4bc84ea02c3a59ae4d626739732643 (diff) |
Related: fdo#47644 for huge files build a page chain cache for seeks
For huge ole2 structured storage files build a page chain cache on
demand to speed up long distance seeks
i.e. reduces .doc parse time for fdo#47644 from 1 minute 7 seconds to 18
seconds for me
Diffstat (limited to 'sot')
-rw-r--r-- | sot/source/sdstor/stgstrms.cxx | 133 | ||||
-rw-r--r-- | sot/source/sdstor/stgstrms.hxx | 4 |
2 files changed, 121 insertions, 16 deletions
diff --git a/sot/source/sdstor/stgstrms.cxx b/sot/source/sdstor/stgstrms.cxx index 5e6c1eadf084..e982837352c8 100644 --- a/sot/source/sdstor/stgstrms.cxx +++ b/sot/source/sdstor/stgstrms.cxx @@ -26,9 +26,10 @@ * ************************************************************************/ +#include <algorithm> #include <string.h> // memcpy() - +#include <sal/log.hxx> #include <osl/file.hxx> #include <tools/tempfile.hxx> #include <tools/debug.hxx> @@ -334,6 +335,56 @@ void StgStrm::SetEntry( StgDirEntry& r ) r.SetDirty(); } +namespace lcl +{ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L + using std::is_sorted; +#else + template <typename iter> bool is_sorted(iter aStart, iter aEnd) + { + if (aStart == aEnd) + return true; + + for (iter aNext = aStart + 1; aNext != aEnd; aStart = aNext, ++aNext) + { + if (*aNext < *aStart) + return false; + } + + return true; + } +#endif +} + +bool StgStrm::buildPageChainCache() +{ + if (nSize > 0) + m_aPagesCache.reserve(nSize/nPageSize); + + sal_Int32 nBgn = nStart; + while (nBgn >= 0) + { + m_aPagesCache.push_back(nBgn); + sal_Int32 nOldBgn = nBgn; + nBgn = pFat->GetNextPage(nBgn); + if (nBgn == nOldBgn) + return false; + } + + m_bSortedPageChain = lcl::is_sorted(m_aPagesCache.begin(), m_aPagesCache.end()); + + SAL_WARN_IF(!m_bSortedPageChain, "sot", "unsorted page chain, that's suspicious"); + + return true; +} + +//See fdo#47644 for a .doc with a vast amount of pages where seeking around the +//document takes a colossal amount of time +// +//There's a cost to building a page cache, so only build one if the number of +//pages to seek through hits some sufficiently high value where it's worth it. +#define ARBITRARY_LARGE_AMOUNT_OF_PAGES 256 + // Compute page number and offset for the given byte position. // If the position is behind the size, set the stream right // behind the EOF. @@ -355,7 +406,7 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos ) return sal_True; if( nNew > nOld ) { - // the new position is behind the current, so an incremental + // the new position is after the current, so an incremental // positioning is OK. Set the page relative position nRel = nNew - nOld; nBgn = nPage; @@ -369,13 +420,59 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos ) } // now, traverse the FAT chain. nRel /= nPageSize; + sal_Int32 nLast = STG_EOF; - while( nRel && nBgn >= 0 ) + if (m_aPagesCache.empty() && nRel < ARBITRARY_LARGE_AMOUNT_OF_PAGES) + { + while (nRel && nBgn >= 0) + { + nLast = nBgn; + nBgn = pFat->GetNextPage( nBgn ); + nRel--; + } + } + else if (nBgn >= 0) { - nLast = nBgn; - nBgn = pFat->GetNextPage( nBgn ); - nRel--; + //Seeking large distances is slow, so if we're starting seeking (some + //fairly arbitrary) large distances, build a cache and re-use it for + //subsequent seeks + if (m_aPagesCache.empty()) + { + SAL_INFO("sot", "kicking off large seek helper\n"); + buildPageChainCache(); + } + + std::vector<sal_Int32>::iterator aI; + + if (m_bSortedPageChain) + aI = std::lower_bound(m_aPagesCache.begin(), m_aPagesCache.end(), nBgn); + else + aI = std::find(m_aPagesCache.begin(), m_aPagesCache.end(), nBgn); + + if (aI == m_aPagesCache.end()) + { + SAL_WARN("sot", "Unknown page position"); + nBgn = STG_EOF; + } + else + { + size_t nBgnDistance = std::distance(m_aPagesCache.begin(), aI); + + size_t nIndex = nBgnDistance + nRel; + + if (nIndex > m_aPagesCache.size()) + { + nRel = m_aPagesCache.size() - nBgnDistance; + nIndex = m_aPagesCache.size() - 1; + } + else + nRel = 0; + + nLast = nIndex ? m_aPagesCache[nIndex - 1] : STG_EOF; + nBgn = m_aPagesCache[nIndex]; + } } + // special case: seek to 1st byte of new, unallocated page // (in case the file size is a multiple of the page size) if( nBytePos == nSize && nBgn == STG_EOF && !nRel && !nOffset ) @@ -404,6 +501,8 @@ StgPage* StgStrm::GetPhysPage( sal_Int32 nBytePos, sal_Bool bForce ) sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes ) { + m_aPagesCache.clear(); + sal_Int32 nTo = nStart; sal_Int32 nPgs = ( nBytes + nPageSize - 1 ) / nPageSize; while( nPgs-- ) @@ -430,6 +529,8 @@ sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes ) sal_Bool StgStrm::SetSize( sal_Int32 nBytes ) { + m_aPagesCache.clear(); + // round up to page size sal_Int32 nOld = ( ( nSize + nPageSize - 1 ) / nPageSize ) * nPageSize; sal_Int32 nNew = ( ( nBytes + nPageSize - 1 ) / nPageSize ) * nPageSize; @@ -528,6 +629,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA { if( bMake ) { + m_aPagesCache.clear(); + // create a new master page nFAT = nMaxPage++; pMaster = rIo.Copy( nFAT, STG_FREE ); @@ -582,6 +685,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage ) { + m_aPagesCache.clear(); + sal_Bool bRes = sal_True; if( nOff < rIo.aHdr.GetFAT1Size() ) rIo.aHdr.SetFATPage( nOff, nNewPage ); @@ -631,6 +736,8 @@ sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage ) sal_Bool StgFATStrm::SetSize( sal_Int32 nBytes ) { + m_aPagesCache.clear(); + // Set the number of entries to a multiple of the page size short nOld = (short) ( ( nSize + ( nPageSize - 1 ) ) / nPageSize ); short nNew = (short) ( @@ -747,16 +854,10 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen ) { // determine the actual size of the stream by scanning // the FAT chain and counting the # of pages allocated - nSize = 0; - sal_Int32 nOldBgn = -1; - while( nBgn >= 0 && nBgn != nOldBgn ) - { - nOldBgn = nBgn; - nBgn = pFat->GetNextPage( nBgn ); - if( nBgn == nOldBgn ) - rIo.SetError( ERRCODE_IO_WRONGFORMAT ); - nSize += nPageSize; - } + bool bOk = buildPageChainCache(); + if (!bOk) + rIo.SetError( ERRCODE_IO_WRONGFORMAT ); + nSize = m_aPagesCache.size() * nPageSize; } } diff --git a/sot/source/sdstor/stgstrms.hxx b/sot/source/sdstor/stgstrms.hxx index a6a0ad108da9..cfae9e2a2041 100644 --- a/sot/source/sdstor/stgstrms.hxx +++ b/sot/source/sdstor/stgstrms.hxx @@ -30,6 +30,7 @@ #define _STGSTRMS_HXX #include <tools/stream.hxx> +#include <vector> class StgIo; class StgStrm; @@ -77,6 +78,9 @@ protected: sal_Int32 nPage; // current logical page short nOffset; // offset into current page short nPageSize; // logical page size + std::vector<sal_Int32> m_aPagesCache; + bool m_bSortedPageChain; + bool buildPageChainCache(); sal_Bool Copy( sal_Int32 nFrom, sal_Int32 nBytes ); StgStrm( StgIo& ); public: |