/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include "fastserializer.hxx" #include #include #include #include #include #if OSL_DEBUG_LEVEL > 0 #include #include #endif using ::comphelper::SequenceAsVector; using ::com::sun::star::uno::Reference; using ::com::sun::star::uno::Sequence; using ::com::sun::star::xml::Attribute; using ::com::sun::star::io::XOutputStream; #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0) #define NAMESPACE(x) (x >> 16) #define TOKEN(x) (x & 0xffff) // number of characters without terminating 0 #define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1) static const char sClosingBracket[] = ">"; static const char sSlashAndClosingBracket[] = "/>"; static const char sColon[] = ":"; static const char sOpeningBracket[] = "<"; static const char sOpeningBracketAndSlash[] = "\n"; namespace sax_fastparser { FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) : maCachedOutputStream() , maMarkStack() , mbMarkStackEmpty(true) , mpDoubleStr(NULL) , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE) { rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity); mxFastTokenHandler = css::xml::sax::FastTokenHandler::create( ::comphelper::getProcessComponentContext()); assert(xOutputStream.is()); // cannot do anything without that maCachedOutputStream.setOutputStream( xOutputStream ); } FastSaxSerializer::~FastSaxSerializer() { rtl_string_release(mpDoubleStr); } void FastSaxSerializer::startDocument() { writeBytes(sXmlHeader, N_CHARS(sXmlHeader)); } void FastSaxSerializer::write( double value ) { rtl_math_doubleToString( &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G, RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0, sal_True); write(mpDoubleStr->buffer, mpDoubleStr->length); // and "clear" the string mpDoubleStr->length = 0; mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE; } void FastSaxSerializer::write( const OUString& sOutput, bool bEscape ) { const sal_Int32 nLength = sOutput.getLength(); for (sal_Int32 i = 0; i < nLength; ++i) { const sal_Unicode cUnicode = sOutput[ i ]; const char cChar = cUnicode; if (cUnicode & 0xff80) { write( OString(&cUnicode, 1, RTL_TEXTENCODING_UTF8) ); } else if(bEscape) switch( cChar ) { case '<': writeBytes( "<", 4 ); break; case '>': writeBytes( ">", 4 ); break; case '&': writeBytes( "&", 5 ); break; case '\'': writeBytes( "'", 6 ); break; case '"': writeBytes( """, 6 ); break; case '\n': writeBytes( " ", 5 ); break; case '\r': writeBytes( " ", 5 ); break; default: writeBytes( &cChar, 1 ); break; } else writeBytes( &cChar, 1 ); } } void FastSaxSerializer::write( const OString& sOutput, bool bEscape ) { write( sOutput.getStr(), sOutput.getLength(), bEscape ); } void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape ) { if (nLen == -1) nLen = pStr ? strlen(pStr) : 0; if (!bEscape) { writeBytes( pStr, nLen ); return; } for (sal_Int32 i = 0; i < nLen; ++i) { char c = pStr[ i ]; switch( c ) { case '<': writeBytes( "<", 4 ); break; case '>': writeBytes( ">", 4 ); break; case '&': writeBytes( "&", 5 ); break; case '\'': writeBytes( "'", 6 ); break; case '"': writeBytes( """, 6 ); break; case '\n': writeBytes( " ", 5 ); break; case '\r': writeBytes( " ", 5 ); break; default: writeBytes( &c, 1 ); break; } } } void FastSaxSerializer::endDocument() { assert(mbMarkStackEmpty && maMarkStack.empty()); maCachedOutputStream.flush(); } void FastSaxSerializer::writeId( ::sal_Int32 nElement ) { if( HAS_NAMESPACE( nElement ) ) { writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); writeBytes(sColon, N_CHARS(sColon)); writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); } else writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement)); } #ifdef DBG_UTIL OString FastSaxSerializer::getId( ::sal_Int32 nElement ) { if (HAS_NAMESPACE(nElement)) { Sequence const ns( mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); Sequence const name( mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); return OString(reinterpret_cast(ns.getConstArray()), ns.getLength()) + OString(sColon, N_CHARS(sColon)) + OString(reinterpret_cast(name.getConstArray()), name.getLength()); } else { Sequence const name( mxFastTokenHandler->getUTF8Identifier(nElement)); return OString(reinterpret_cast(name.getConstArray()), name.getLength()); } } #endif void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList ) { if ( !mbMarkStackEmpty ) { maCachedOutputStream.flush(); maMarkStack.top()->setCurrentElement( Element ); } #ifdef DBG_UTIL m_DebugStartedElements.push(Element); #endif writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); writeId(Element); if (pAttrList) writeFastAttributeList(*pAttrList); else writeTokenValueList(); writeBytes(sClosingBracket, N_CHARS(sClosingBracket)); } void FastSaxSerializer::endFastElement( ::sal_Int32 Element ) { #ifdef DBG_UTIL assert(!m_DebugStartedElements.empty()); // Well-formedness constraint: Element Type Match assert(Element == m_DebugStartedElements.top()); m_DebugStartedElements.pop(); #endif writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash)); writeId(Element); writeBytes(sClosingBracket, N_CHARS(sClosingBracket)); } void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList ) { if ( !mbMarkStackEmpty ) { maCachedOutputStream.flush(); maMarkStack.top()->setCurrentElement( Element ); } writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); writeId(Element); if (pAttrList) writeFastAttributeList(*pAttrList); else writeTokenValueList(); writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket)); } ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream() { return maCachedOutputStream.getOutputStream(); } void FastSaxSerializer::writeTokenValueList() { #ifdef DBG_UTIL ::std::set DebugAttributes; #endif for (size_t j = 0; j < maTokenValues.size(); j++) { writeBytes(sSpace, N_CHARS(sSpace)); sal_Int32 nToken = maTokenValues[j].nToken; writeId(nToken); #ifdef DBG_UTIL // Well-formedness constraint: Unique Att Spec OString const nameId(getId(nToken)); assert(DebugAttributes.find(nameId) == DebugAttributes.end()); DebugAttributes.insert(nameId); #endif writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote)); write(maTokenValues[j].pValue, -1, true); writeBytes(sQuote, N_CHARS(sQuote)); } maTokenValues.clear(); } void FastSaxSerializer::writeFastAttributeList(FastAttributeList& rAttrList) { #ifdef DBG_UTIL ::std::set DebugAttributes; #endif const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens(); for (size_t j = 0; j < Tokens.size(); j++) { writeBytes(sSpace, N_CHARS(sSpace)); sal_Int32 nToken = Tokens[j]; writeId(nToken); #ifdef DBG_UTIL // Well-formedness constraint: Unique Att Spec OString const nameId(getId(nToken)); assert(DebugAttributes.find(nameId) == DebugAttributes.end()); DebugAttributes.insert(nameId); #endif writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote)); write(rAttrList.getFastAttributeValue(j), rAttrList.AttributeValueLength(j), true); writeBytes(sQuote, N_CHARS(sQuote)); } } void FastSaxSerializer::mark( const Int32Sequence& aOrder ) { if ( aOrder.hasElements() ) { boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) ); maMarkStack.push( pSort ); maCachedOutputStream.setOutput( pSort ); } else { boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) ); maMarkStack.push( pMerge ); maCachedOutputStream.setOutput( pMerge ); } mbMarkStackEmpty = false; } void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType ) { SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge"); if ( mbMarkStackEmpty ) return; // flush, so that we get everything in getData() maCachedOutputStream.flush(); if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE) { Sequence aSeq( maMarkStack.top()->getData() ); maMarkStack.pop(); mbMarkStackEmpty = true; maCachedOutputStream.resetOutputToStream(); maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() ); return; } const Int8Sequence aMerge( maMarkStack.top()->getData() ); maMarkStack.pop(); if (maMarkStack.empty()) { mbMarkStackEmpty = true; maCachedOutputStream.resetOutputToStream(); } else { maCachedOutputStream.setOutput( maMarkStack.top() ); } switch ( eMergeType ) { case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break; case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break; case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break; case MERGE_MARKS_IGNORE : break; } } void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData ) { maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() ); } void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen ) { maCachedOutputStream.writeBytes( reinterpret_cast(pStr), nLen ); } FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData() { merge( maData, maPostponed, true ); maPostponed.realloc( 0 ); return maData; } #if OSL_DEBUG_LEVEL > 0 void FastSaxSerializer::ForMerge::print( ) { std::cerr << "Data: "; for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ ) { std::cerr << maData[i]; } std::cerr << "\nPostponed: "; for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ ) { std::cerr << maPostponed[i]; } std::cerr << "\n"; } #endif void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat ) { merge( maData, rWhat, false ); } void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat ) { merge( maData, rWhat, true ); } void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat ) { merge( maPostponed, rWhat, true ); } void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend ) { sal_Int32 nMergeLen = rMerge.getLength(); if ( nMergeLen > 0 ) { sal_Int32 nTopLen = rTop.getLength(); rTop.realloc( nTopLen + nMergeLen ); if ( bAppend ) { // append the rMerge to the rTop memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen ); } else { // prepend the rMerge to the rTop memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen ); memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen ); } } } void FastSaxSerializer::ForMerge::resetData( ) { maData = Int8Sequence(); } void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement ) { SequenceAsVector< sal_Int32 > aOrder( maOrder ); if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() ) { mnCurrentElement = nElement; if ( maData.find( nElement ) == maData.end() ) maData[ nElement ] = Int8Sequence(); } } void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat ) { append( rWhat ); } void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat ) { merge( maData[mnCurrentElement], rWhat, true ); } void FastSaxSerializer::ForSort::sort() { // Clear the ForMerge data to avoid duplicate items resetData(); // Sort it all std::map< sal_Int32, Int8Sequence >::iterator iter; for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ ) { iter = maData.find( maOrder[i] ); if ( iter != maData.end() ) ForMerge::append( iter->second ); } } FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData() { sort( ); return ForMerge::getData(); } #if OSL_DEBUG_LEVEL > 0 void FastSaxSerializer::ForSort::print( ) { std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin(); while ( iter != maData.end( ) ) { std::cerr << "pair: " << iter->first; for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i ) std::cerr << iter->second[i]; std::cerr << "\n"; ++iter; } sort( ); ForMerge::print(); } #endif } // namespace sax_fastparser /* vim:set shiftwidth=4 softtabstop=4 expandtab: */