From a5328cf5605cdc243522eddcaffe7336196a4900 Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Wed, 30 May 2012 11:43:41 +0200 Subject: fdo#49968 speed up RTF import of repeated character/paragraph properties Most RTF documents (produced by Word/Writer) reset character and paragraph properties at the start of each paragraph. Because of this, appending properties of the same type didn't cause any noticable performance problems. However, it's valid to not reset these properties, and in this case a longer document takes forever to import. Filter these duplicates at the tokenizer level for trivial properties to get acceptable import speed. Also fixes rhbz#825548 in an easier-to-backport way. Change-Id: Id0b7289323d45ff0d747c74bb78d8eb7def0cfc2 --- writerfilter/source/rtftok/rtfdocumentimpl.cxx | 20 ++++++++++---------- writerfilter/source/rtftok/rtfsprm.cxx | 14 ++++++++++++++ writerfilter/source/rtftok/rtfsprm.hxx | 2 ++ 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'writerfilter/source/rtftok') diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 2182c5cf6541..431a0dc47434 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -1614,7 +1614,7 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword) if (nParam >= 0) { RTFValue::Pointer_t pValue(new RTFValue(nParam)); - m_aStates.top().aParagraphSprms->push_back(make_pair(NS_sprm::LN_PJc, pValue)); + m_aStates.top().aParagraphSprms.set(NS_sprm::LN_PJc, pValue); return 0; } @@ -2223,7 +2223,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) } if (nSprm > 0) { - m_aStates.top().aCharacterSprms->push_back(make_pair(nSprm, pIntValue)); + m_aStates.top().aCharacterSprms.set(nSprm, pIntValue); // Language is a character property, but we should store it at a paragraph level as well for fields. if (nKeyword == RTF_LANG && m_bNeedPap) m_aStates.top().aParagraphSprms->push_back(make_pair(nSprm, pIntValue)); @@ -2241,7 +2241,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) } if (nSprm > 0) { - m_aStates.top().aParagraphSprms->push_back(make_pair(nSprm, pIntValue)); + m_aStates.top().aParagraphSprms.set(nSprm, pIntValue); if (nKeyword == RTF_ITAP && nParam > 0) // Invalid tables may omit INTBL after ITAP dispatchFlag(RTF_INTBL); @@ -2306,7 +2306,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) { int nFontIndex = getFontIndex(nParam); RTFValue::Pointer_t pValue(new RTFValue(nFontIndex)); - m_aStates.top().aCharacterSprms->push_back(make_pair(NS_sprm::LN_CRgFtc0, pValue)); + m_aStates.top().aCharacterSprms.set(NS_sprm::LN_CRgFtc0, pValue); m_aStates.top().nCurrentEncoding = getEncoding(nFontIndex); } break; @@ -2354,7 +2354,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) m_aStates.top().aTableAttributes->push_back(make_pair(NS_rtf::LN_SGC, pValue)); // paragraph style } else - m_aStates.top().aParagraphAttributes->push_back(make_pair(NS_rtf::LN_ISTD, pIntValue)); + m_aStates.top().aParagraphAttributes.set(NS_rtf::LN_ISTD, pIntValue); break; case RTF_CS: if (m_aStates.top().nDestinationState == DESTINATION_STYLESHEET) @@ -2814,11 +2814,11 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) break; case RTF_SB: lcl_putNestedAttribute(m_aStates.top().aParagraphSprms, - NS_ooxml::LN_CT_PPrBase_spacing, NS_ooxml::LN_CT_Spacing_before, pIntValue); + NS_ooxml::LN_CT_PPrBase_spacing, NS_ooxml::LN_CT_Spacing_before, pIntValue, true); break; case RTF_SA: lcl_putNestedAttribute(m_aStates.top().aParagraphSprms, - NS_ooxml::LN_CT_PPrBase_spacing, NS_ooxml::LN_CT_Spacing_after, pIntValue); + NS_ooxml::LN_CT_PPrBase_spacing, NS_ooxml::LN_CT_Spacing_after, pIntValue, true); break; case RTF_DPX: m_aStates.top().aDrawingObject.nLeft = TWIP_TO_MM100(nParam); @@ -2868,11 +2868,11 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) m_aStates.top().aDrawingObject.nFillColorB = nParam; m_aStates.top().aDrawingObject.bHasFillColor = true; break; case RTF_LI: - m_aStates.top().aParagraphSprms->push_back(make_pair(NS_sprm::LN_PDxaLeft, pIntValue)); + m_aStates.top().aParagraphSprms.set(NS_sprm::LN_PDxaLeft, pIntValue); // It turns out \li should reset the \fi inherited from the stylesheet. // So set the direct formatting to zero, if we don't have such direct formatting yet. if (!m_aStates.top().aParagraphSprms.find(NS_sprm::LN_PDxaLeft1).get()) - m_aStates.top().aParagraphSprms->push_back(make_pair(NS_sprm::LN_PDxaLeft1, RTFValue::Pointer_t(new RTFValue(0)))); + m_aStates.top().aParagraphSprms.set(NS_sprm::LN_PDxaLeft1, RTFValue::Pointer_t(new RTFValue(0))); break; default: SAL_INFO("writerfilter", OSL_THIS_FUNC << ": TODO handle value '" << lcl_RtfToString(nKeyword) << "'"); @@ -2953,7 +2953,7 @@ int RTFDocumentImpl::dispatchToggle(RTFKeyword nKeyword, bool bParam, int nParam } if (nSprm >= 0) { - m_aStates.top().aCharacterSprms->push_back(make_pair(nSprm, pBoolValue)); + m_aStates.top().aCharacterSprms.set(nSprm, pBoolValue); return 0; } diff --git a/writerfilter/source/rtftok/rtfsprm.cxx b/writerfilter/source/rtftok/rtfsprm.cxx index d84d37474f80..3a17927f3708 100644 --- a/writerfilter/source/rtftok/rtfsprm.cxx +++ b/writerfilter/source/rtftok/rtfsprm.cxx @@ -105,6 +105,20 @@ RTFValue::Pointer_t RTFSprms::find(Id nKeyword) return pValue; } +void RTFSprms::set(Id nKeyword, RTFValue::Pointer_t pValue, bool bOverwrite) +{ + if (bOverwrite) + { + for (RTFSprms::Iterator_t i = m_aSprms.begin(); i != m_aSprms.end(); ++i) + if (i->first == nKeyword) + { + i->second = pValue; + return; + } + } + m_aSprms.push_back(std::make_pair(nKeyword, pValue)); +} + bool RTFSprms::erase(Id nKeyword) { for (RTFSprms::Iterator_t i = m_aSprms.begin(); i != m_aSprms.end(); ++i) diff --git a/writerfilter/source/rtftok/rtfsprm.hxx b/writerfilter/source/rtftok/rtfsprm.hxx index b8275e14a637..f4bd462a9a60 100644 --- a/writerfilter/source/rtftok/rtfsprm.hxx +++ b/writerfilter/source/rtftok/rtfsprm.hxx @@ -44,6 +44,8 @@ namespace writerfilter { RTFSprms& operator=(const RTFSprms& rOther); std::vector< std::pair >* operator->(); RTFValue::Pointer_t find(Id nKeyword); + /// Does the same as ->push_back(), except that it can overwrite existing entries. + void set(Id nKeyword, RTFValue::Pointer_t pValue, bool bOverwrite = true); bool erase(Id nKeyword); void swap(RTFSprms& rOther); private: -- cgit