From 9205bc69fc5458c5090e8c1e97aed8bde9e776a1 Mon Sep 17 00:00:00 2001 From: Daniel Rentz Date: Mon, 3 May 2010 13:32:59 +0200 Subject: dr76: #i104719# preprocess VML streams to remove all quirks making expat fail --- oox/source/vml/vmlinputstream.cxx | 219 +++++++++++++++++++++++++++++++++++--- 1 file changed, 204 insertions(+), 15 deletions(-) (limited to 'oox/source/vml/vmlinputstream.cxx') diff --git a/oox/source/vml/vmlinputstream.cxx b/oox/source/vml/vmlinputstream.cxx index d80058c8f3e0..a6d50e193bc4 100644 --- a/oox/source/vml/vmlinputstream.cxx +++ b/oox/source/vml/vmlinputstream.cxx @@ -26,9 +26,13 @@ ************************************************************************/ #include "oox/vml/vmlinputstream.hxx" -#include -#include +#include +#include +#include +#include "oox/helper/helper.hxx" +using ::rtl::OString; +using ::rtl::OStringBuffer; using ::com::sun::star::uno::Exception; using ::com::sun::star::uno::Reference; using ::com::sun::star::io::XInputStream; @@ -38,6 +42,116 @@ namespace vml { // ============================================================================ +namespace { + +inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar ) +{ + sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar ); + return (nIndex < 0) ? pcEnd : (pcBeg + nIndex); +} + +inline bool lclIsWhiteSpace( sal_Char cChar ) +{ + return (cChar == ' ') || (cChar == '\t') || (cChar == '\n') || (cChar == '\r'); +} + +const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd ) +{ + for( ; pcBeg < pcEnd; ++pcBeg ) + if( lclIsWhiteSpace( *pcBeg ) ) + return pcBeg; + return pcEnd; +} + +const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd ) +{ + for( ; pcBeg < pcEnd; ++pcBeg ) + if( !lclIsWhiteSpace( *pcBeg ) ) + return pcBeg; + return pcEnd; +} + +const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd ) +{ + while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) ) + --pcEnd; + return pcEnd; +} + +inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd ) +{ + rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) ); +} + +// ---------------------------------------------------------------------------- + +void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd ) +{ + /* Map attribute names to char-pointer of all attributes. This map is used + to find multiple occurences of attributes with the same name. The + mapped pointers are used as map key in the next map below. */ + typedef ::std::map< OString, const sal_Char* > AttributeNameMap; + AttributeNameMap aAttributeNames; + + /* Map the char-pointers of all attributes to the full attribute definition + string. This preserves the original order of the used attributes. */ + typedef ::std::map< const sal_Char*, OString > AttributeDataMap; + AttributeDataMap aAttributes; + + bool bOk = true; + const sal_Char* pcNameBeg = pcBeg; + while( bOk && (pcNameBeg < pcEnd) ) + { + // pcNameBeg points to begin of attribute name, find equality sign + const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' ); + if( (bOk = pcEqualSign < pcEnd) == true ) + { + // find end of attribute name (ignore whitespace between name and equality sign) + const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign ); + if( (bOk = pcNameBeg < pcNameEnd) == true ) + { + // find begin of attribute value (must be single or double quote) + const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd ); + if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true ) + { + // find end of attribute value (matching quote character) + const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg ); + if( (bOk = pcValueEnd < pcEnd) == true ) + { + ++pcValueEnd; + OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) ); + OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) ); + // search for an existing attribute with the same name + AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName ); + // remove its definition from the data map + if( aIt != aAttributeNames.end() ) + aAttributes.erase( aIt->second ); + // insert the attribute into both maps + aAttributeNames[ aAttribName ] = pcNameBeg; + aAttributes[ pcNameBeg ] = aAttribData; + // continue with next attribute (skip whitespace after this attribute) + pcNameBeg = pcValueEnd; + if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) ) + pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd ); + } + } + } + } + } + + // if no error has occured, build the resulting attribute list + if( bOk ) + for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt ) + rBuffer.append( ' ' ).append( aIt->second ); + // on error, just append the complete passed string + else + lclAppendToBuffer( rBuffer, pcBeg, pcEnd ); +} + +} // namespace + +// ============================================================================ + StreamDataContainer::StreamDataContainer( const Reference< XInputStream >& rxInStrm ) { if( rxInStrm.is() ) try @@ -49,26 +163,101 @@ StreamDataContainer::StreamDataContainer( const Reference< XInputStream >& rxInS { } - // parse the data and eat all parser instructions that make expat sad if( maDataSeq.hasElements() ) { - sal_Char* pcBeg = reinterpret_cast< sal_Char* >( maDataSeq.getArray() ); - sal_Char* pcEnd = pcBeg + maDataSeq.getLength(); - sal_Char* pcCurr = pcBeg; + const OString aCDataOpen = CREATE_OSTRING( "" ); + + OStringBuffer aBuffer; + aBuffer.ensureCapacity( maDataSeq.getLength() + 256 ); + const sal_Char* pcCurr = reinterpret_cast< const sal_Char* >( maDataSeq.getConstArray() ); + const sal_Char* pcEnd = pcCurr + maDataSeq.getLength(); while( pcCurr < pcEnd ) { - pcCurr = ::std::find( pcCurr, pcEnd, '<' ); - sal_Char* pcClose = ::std::find( pcCurr, pcEnd, '>' ); - if( (pcCurr < pcEnd) && (pcClose < pcEnd) && (pcClose - pcCurr >= 5) && (pcCurr[ 1 ] == '!') && (pcCurr[ 2 ] == '[') && (pcClose[ -1 ] == ']') ) + // look for the next opening angle bracket + const sal_Char* pcOpen = lclFindCharacter( pcCurr, pcEnd, '<' ); + // copy all characters from current position to opening bracket + lclAppendToBuffer( aBuffer, pcCurr, pcOpen ); + + // nothing to do if no opening bracket has been found + if( pcOpen < pcEnd ) { - ++pcClose; - memmove( pcCurr, pcClose, pcEnd - pcClose ); - pcEnd -= (pcClose - pcCurr); + // string length from opening bracket to end + sal_Int32 nLengthToEnd = static_cast< sal_Int32 >( pcEnd - pcOpen ); + + // check for CDATA part, starting with '' + sal_Int32 nClosePos = rtl_str_indexOfStr_WithLength( pcOpen, nLengthToEnd, aCDataClose.getStr(), aCDataClose.getLength() ); + pcCurr = (nClosePos < 0) ? pcEnd : (pcOpen + nClosePos + aCDataClose.getLength()); + // copy the entire CDATA part + lclAppendToBuffer( aBuffer, pcOpen, pcCurr ); + } + + // no CDATA part - process the element starting at pcOpen + else + { + // look for the next closing angle bracket + const sal_Char* pcClose = lclFindCharacter( pcOpen + 1, pcEnd, '>' ); + // complete element found? + if( pcClose < pcEnd ) + { + // continue after closing bracket + pcCurr = pcClose + 1; + // length of entire element with angle brackets + sal_Int32 nElementLen = static_cast< sal_Int32 >( pcCurr - pcOpen ); + + // skip parser instructions: '' + if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') ) + { + // do nothing + } + + // replace '
' elements with '
' elements + else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) ) + { + aBuffer.append( RTL_CONSTASCII_STRINGPARAM( "
" ) ); + } + + // check start elements and empty elements for repeated attributes + else if( pcOpen[ 1 ] != '/' ) + { + // find positions of text content inside brackets, exclude '/' in '' + const sal_Char* pcContentBeg = pcOpen + 1; + bool bIsEmptyElement = pcClose[ -1 ] == '/'; + const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose; + // append element name to buffer + const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd ); + lclAppendToBuffer( aBuffer, pcOpen, pcWhiteSpace ); + // find begin of attributes, and process all attributes + const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd ); + if( pcAttribBeg < pcContentEnd ) + lclProcessAttribs( aBuffer, pcAttribBeg, pcContentEnd ); + // close the element + if( bIsEmptyElement ) + aBuffer.append( '/' ); + aBuffer.append( '>' ); + } + + // append end elements without further processing + else + { + lclAppendToBuffer( aBuffer, pcOpen, pcCurr ); + } + } + else + { + // no complete element found, copy all from opening bracket to end + lclAppendToBuffer( aBuffer, pcOpen, pcEnd ); + pcCurr = pcEnd; + } + } } - else - pcCurr = pcClose; } - maDataSeq.realloc( static_cast< sal_Int32 >( pcEnd - pcBeg ) ); + + // set the final data sequence + maDataSeq = ::comphelper::ByteSequence( reinterpret_cast< const sal_Int8* >( aBuffer.getStr() ), aBuffer.getLength() ); } } -- cgit