/************************************************************************* * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * Copyright 2008 by Sun Microsystems, Inc. * * OpenOffice.org - a multi-platform office productivity suite * * $RCSfile: svparser.cxx,v $ * $Revision: 1.17 $ * * This file is part of OpenOffice.org. * * OpenOffice.org is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License version 3 * only, as published by the Free Software Foundation. * * OpenOffice.org is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License version 3 for more details * (a copy is included in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU Lesser General Public License * version 3 along with OpenOffice.org. If not, see * * for a copy of the LGPLv3 License. * ************************************************************************/ // MARKER(update_precomp.py): autogen include statement, do not remove #include "precompiled_svtools.hxx" /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */ #include #include #include #include #define _SVSTDARR_USHORTS #include #include #include #define SVPAR_CSM_ #define SVPAR_CSM_ANSI 0x0001U #define SVPAR_CSM_UTF8 0x0002U #define SVPAR_CSM_UCS2B 0x0004U #define SVPAR_CSM_UCS2L 0x0008U #define SVPAR_CSM_SWITCH 0x8000U // Struktur, um sich die akt. Daten zumerken struct SvParser_Impl { String aToken; // gescanntes Token ULONG nFilePos; // akt. Position im Stream ULONG nlLineNr; // akt. Zeilen Nummer ULONG nlLinePos; // akt. Spalten Nummer long nTokenValue; // zusaetzlicher Wert (RTF) BOOL bTokenHasValue; // indicates whether nTokenValue is valid int nToken; // akt. Token sal_Unicode nNextCh; // akt. Zeichen int nSaveToken; // das Token vom Continue rtl_TextToUnicodeConverter hConv; rtl_TextToUnicodeContext hContext; #ifndef PRODUCT SvFileStream aOut; #endif SvParser_Impl() : nSaveToken(0), hConv( 0 ), hContext( (rtl_TextToUnicodeContext)1 ) { } }; // Konstruktor SvParser::SvParser( SvStream& rIn, BYTE nStackSize ) : rInput( rIn ) , nlLineNr( 1 ) , nlLinePos( 1 ) , pImplData( 0 ) , nTokenValue( 0 ) , bTokenHasValue( false ) , eState( SVPAR_NOTSTARTED ) , eSrcEnc( RTL_TEXTENCODING_DONTKNOW ) , bDownloadingFile( FALSE ) , nTokenStackSize( nStackSize ) , nTokenStackPos( 0 ) { bUCS2BSrcEnc = bSwitchToUCS2 = FALSE; eState = SVPAR_NOTSTARTED; if( nTokenStackSize < 3 ) nTokenStackSize = 3; pTokenStack = new TokenStackType[ nTokenStackSize ]; pTokenStackPos = pTokenStack; #ifndef PRODUCT // wenn die Datei schon existiert, dann Anhaengen: if( !pImplData ) pImplData = new SvParser_Impl; pImplData->aOut.Open( String::CreateFromAscii( "\\parser.dmp" ), STREAM_STD_WRITE | STREAM_NOCREATE ); if( pImplData->aOut.GetError() || !pImplData->aOut.IsOpen() ) pImplData->aOut.Close(); else { pImplData->aOut.Seek( STREAM_SEEK_TO_END ); pImplData->aOut << "\x0c\n\n >>>>>>>>>>>>>>> Dump Start <<<<<<<<<<<<<<<\n"; } #endif } SvParser::~SvParser() { #ifndef PRODUCT if( pImplData->aOut.IsOpen() ) pImplData->aOut << "\n\n >>>>>>>>>>>>>>> Dump Ende <<<<<<<<<<<<<<<\n"; pImplData->aOut.Close(); #endif if( pImplData && pImplData->hConv ) { rtl_destroyTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); rtl_destroyTextToUnicodeConverter( pImplData->hConv ); } delete pImplData; delete [] pTokenStack; } void SvParser::ClearTxtConvContext() { if( pImplData && pImplData->hConv ) rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); } void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc ) { if( eEnc != eSrcEnc ) { if( pImplData && pImplData->hConv ) { rtl_destroyTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); rtl_destroyTextToUnicodeConverter( pImplData->hConv ); pImplData->hConv = 0; pImplData->hContext = (rtl_TextToUnicodeContext )1; } if( rtl_isOctetTextEncoding(eEnc) || RTL_TEXTENCODING_UCS2 == eEnc ) { eSrcEnc = eEnc; if( !pImplData ) pImplData = new SvParser_Impl; pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc ); DBG_ASSERT( pImplData->hConv, "SvParser::SetSrcEncoding: no converter for source encoding" ); if( !pImplData->hConv ) eSrcEnc = RTL_TEXTENCODING_DONTKNOW; else pImplData->hContext = rtl_createTextToUnicodeContext( pImplData->hConv ); } else { DBG_ASSERT( !this, "SvParser::SetSrcEncoding: invalid source encoding" ); eSrcEnc = RTL_TEXTENCODING_DONTKNOW; } } } void SvParser::RereadLookahead() { rInput.Seek(nNextChPos); nNextCh = GetNextChar(); } sal_Unicode SvParser::GetNextChar() { sal_Unicode c = 0U; // When reading muliple bytes, we don't have to care about the file // position when we run inti the pending state. The file position is // maintained by SaveState/RestoreState. BOOL bErr; if( bSwitchToUCS2 && 0 == rInput.Tell() ) { sal_uChar c1, c2; BOOL bSeekBack = TRUE; rInput >> c1; bErr = rInput.IsEof() || rInput.GetError(); if( !bErr ) { if( 0xff == c1 || 0xfe == c1 ) { rInput >> c2; bErr = rInput.IsEof() || rInput.GetError(); if( !bErr ) { if( 0xfe == c1 && 0xff == c2 ) { eSrcEnc = RTL_TEXTENCODING_UCS2; bUCS2BSrcEnc = TRUE; bSeekBack = FALSE; } else if( 0xff == c1 && 0xfe == c2 ) { eSrcEnc = RTL_TEXTENCODING_UCS2; bUCS2BSrcEnc = FALSE; bSeekBack = FALSE; } } } } if( bSeekBack ) rInput.Seek( 0 ); bSwitchToUCS2 = FALSE; } nNextChPos = rInput.Tell(); if( RTL_TEXTENCODING_UCS2 == eSrcEnc ) { sal_Unicode cUC = USHRT_MAX; sal_uChar c1, c2; rInput >> c1 >> c2; if( 2 == rInput.Tell() && !(rInput.IsEof() || rInput.GetError()) && ( (bUCS2BSrcEnc && 0xfe == c1 && 0xff == c2) || (!bUCS2BSrcEnc && 0xff == c1 && 0xfe == c2) ) ) rInput >> c1 >> c2; bErr = rInput.IsEof() || rInput.GetError(); if( !bErr ) { if( bUCS2BSrcEnc ) cUC = (sal_Unicode(c1) << 8) | c2; else cUC = (sal_Unicode(c2) << 8) | c1; } if( !bErr ) { c = cUC; } } else { sal_Size nChars = 0; do { sal_Char c1; // signed, that's the text converter expects rInput >> c1; bErr = rInput.IsEof() || rInput.GetError(); if( !bErr ) { if ( RTL_TEXTENCODING_DONTKNOW == eSrcEnc || RTL_TEXTENCODING_SYMBOL == eSrcEnc ) { // no convserion shall take place c = (sal_Unicode)c1; nChars = 1; } else { DBG_ASSERT( pImplData && pImplData->hConv, "no text converter!" ); sal_Unicode cUC; sal_uInt32 nInfo = 0; sal_Size nCvtBytes; nChars = rtl_convertTextToUnicode( pImplData->hConv, pImplData->hContext, &c1, 1, &cUC, 1, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, &nInfo, &nCvtBytes); if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) { // The conversion wasn't successfull because we haven't // read enough characters. if( pImplData->hContext != (rtl_TextToUnicodeContext)1 ) { while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) { rInput >> c1; bErr = rInput.IsEof() || rInput.GetError(); if( bErr ) break; nChars = rtl_convertTextToUnicode( pImplData->hConv, pImplData->hContext, &c1, 1, &cUC, 1, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, &nInfo, &nCvtBytes); } if( !bErr ) { if( 1 == nChars && 0 == nInfo ) { c = cUC; } else if( 0 != nChars || 0 != nInfo ) { DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, "source buffer is to small" ); DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0, "there is a conversion error" ); DBG_ASSERT( 0 == nChars, "there is a converted character, but an error" ); // There are still errors, but nothing we can // do c = (sal_Unicode)'?'; nChars = 1; } } } else { sal_Char sBuffer[10]; sBuffer[0] = c1; sal_uInt16 nLen = 1; while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 && nLen < 10 ) { rInput >> c1; bErr = rInput.IsEof() || rInput.GetError(); if( bErr ) break; sBuffer[nLen++] = c1; nChars = rtl_convertTextToUnicode( pImplData->hConv, 0, sBuffer, nLen, &cUC, 1, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, &nInfo, &nCvtBytes); } if( !bErr ) { if( 1 == nChars && 0 == nInfo ) { DBG_ASSERT( nCvtBytes == nLen, "no all bytes have been converted!" ); c = cUC; } else { DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, "source buffer is to small" ); DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0, "there is a conversion error" ); DBG_ASSERT( 0 == nChars, "there is a converted character, but an error" ); // There are still errors, so we use the first // character and restart after that. c = (sal_Unicode)sBuffer[0]; rInput.SeekRel( -(nLen-1) ); nChars = 1; } } } } else if( 1 == nChars && 0 == nInfo ) { // The conversion was successfull DBG_ASSERT( nCvtBytes == 1, "no all bytes have been converted!" ); c = cUC; } else if( 0 != nChars || 0 != nInfo ) { DBG_ASSERT( 0 == nChars, "there is a converted character, but an error" ); DBG_ASSERT( 0 != nInfo, "there is no converted character and no error" ); // #73398#: If the character could not be converted, // because a conversion is not available, do no conversion at all. c = (sal_Unicode)c1; nChars = 1; } } } } while( 0 == nChars && !bErr ); } if( bErr ) { if( ERRCODE_IO_PENDING == rInput.GetError() ) { eState = SVPAR_PENDING; return c; } else return sal_Unicode(EOF); } #ifndef PRODUCT if( pImplData->aOut.IsOpen() ) pImplData->aOut << ByteString::ConvertFromUnicode( c, RTL_TEXTENCODING_MS_1251 ); #endif if( c == '\n' ) { IncLineNr(); SetLinePos( 1L ); } else IncLinePos(); return c; } int SvParser::GetNextToken() { int nRet = 0; if( !nTokenStackPos ) { aToken.Erase(); // Token-Buffer loeschen nTokenValue = -1; // Kennzeichen fuer kein Value gelesen bTokenHasValue = false; nRet = _GetNextToken(); if( SVPAR_PENDING == eState ) return nRet; } ++pTokenStackPos; if( pTokenStackPos == pTokenStack + nTokenStackSize ) pTokenStackPos = pTokenStack; // vom Stack holen ?? if( nTokenStackPos ) { --nTokenStackPos; nTokenValue = pTokenStackPos->nTokenValue; bTokenHasValue = pTokenStackPos->bTokenHasValue; aToken = pTokenStackPos->sToken; nRet = pTokenStackPos->nTokenId; } // nein, dann das aktuelle auf den Stack else if( SVPAR_WORKING == eState ) { pTokenStackPos->sToken = aToken; pTokenStackPos->nTokenValue = nTokenValue; pTokenStackPos->bTokenHasValue = bTokenHasValue; pTokenStackPos->nTokenId = nRet; } else if( SVPAR_ACCEPTED != eState && SVPAR_PENDING != eState ) eState = SVPAR_ERROR; // irgend ein Fehler return nRet; } int SvParser::SkipToken( short nCnt ) // n Tokens zurueck "skippen" { pTokenStackPos = GetStackPtr( nCnt ); short nTmp = nTokenStackPos - nCnt; if( nTmp < 0 ) nTmp = 0; else if( nTmp > nTokenStackSize ) nTmp = nTokenStackSize; nTokenStackPos = BYTE(nTmp); // und die Werte zurueck aToken = pTokenStackPos->sToken; nTokenValue = pTokenStackPos->nTokenValue; bTokenHasValue = pTokenStackPos->bTokenHasValue; return pTokenStackPos->nTokenId; } SvParser::TokenStackType* SvParser::GetStackPtr( short nCnt ) { BYTE nAktPos = BYTE(pTokenStackPos - pTokenStack ); if( nCnt > 0 ) { if( nCnt >= nTokenStackSize ) nCnt = (nTokenStackSize-1); if( nAktPos + nCnt < nTokenStackSize ) nAktPos = sal::static_int_cast< BYTE >(nAktPos + nCnt); else nAktPos = sal::static_int_cast< BYTE >( nAktPos + (nCnt - nTokenStackSize)); } else if( nCnt < 0 ) { if( -nCnt >= nTokenStackSize ) nCnt = -nTokenStackSize+1; if( -nCnt <= nAktPos ) nAktPos = sal::static_int_cast< BYTE >(nAktPos + nCnt); else nAktPos = sal::static_int_cast< BYTE >( nAktPos + (nCnt + nTokenStackSize)); } return pTokenStack + nAktPos; } // wird fuer jedes Token gerufen, das in CallParser erkannt wird void SvParser::NextToken( int ) { } // fuers asynchrone lesen aus dem SvStream int SvParser::GetSaveToken() const { return pImplData ? pImplData->nSaveToken : 0; } void SvParser::SaveState( int nToken ) { // aktuellen Status merken if( !pImplData ) { pImplData = new SvParser_Impl; pImplData->nSaveToken = 0; } pImplData->nFilePos = rInput.Tell(); pImplData->nToken = nToken; pImplData->aToken = aToken; pImplData->nlLineNr = nlLineNr; pImplData->nlLinePos = nlLinePos; pImplData->nTokenValue= nTokenValue; pImplData->bTokenHasValue = bTokenHasValue; pImplData->nNextCh = nNextCh; } void SvParser::RestoreState() { // alten Status wieder zurueck setzen if( pImplData ) { if( ERRCODE_IO_PENDING == rInput.GetError() ) rInput.ResetError(); aToken = pImplData->aToken; nlLineNr = pImplData->nlLineNr; nlLinePos = pImplData->nlLinePos; nTokenValue= pImplData->nTokenValue; bTokenHasValue=pImplData->bTokenHasValue; nNextCh = pImplData->nNextCh; pImplData->nSaveToken = pImplData->nToken; rInput.Seek( pImplData->nFilePos ); } } void SvParser::Continue( int ) { } void SvParser::BuildWhichTbl( SvUShorts &rWhichMap, USHORT *pWhichIds, USHORT nWhichIds ) { USHORT aNewRange[2]; for( USHORT nCnt = 0; nCnt < nWhichIds; ++nCnt, ++pWhichIds ) if( *pWhichIds ) { aNewRange[0] = aNewRange[1] = *pWhichIds; BOOL bIns = TRUE; // Position suchen for ( USHORT nOfs = 0; rWhichMap[nOfs]; nOfs += 2 ) { if( *pWhichIds < rWhichMap[nOfs] - 1 ) { // neuen Range davor rWhichMap.Insert( aNewRange, 2, nOfs ); bIns = FALSE; break; } else if( *pWhichIds == rWhichMap[nOfs] - 1 ) { // diesen Range nach unten erweitern rWhichMap[nOfs] = *pWhichIds; bIns = FALSE; break; } else if( *pWhichIds == rWhichMap[nOfs+1] + 1 ) { if( rWhichMap[nOfs+2] != 0 && rWhichMap[nOfs+2] == *pWhichIds + 1 ) { // mit dem naechsten Bereich mergen rWhichMap[nOfs+1] = rWhichMap[nOfs+3]; rWhichMap.Remove( nOfs+2, 2 ); } else // diesen Range nach oben erweitern rWhichMap[nOfs+1] = *pWhichIds; bIns = FALSE; break; } } // einen Range hinten anhaengen if( bIns ) rWhichMap.Insert( aNewRange, 2, rWhichMap.Count()-1 ); } } IMPL_STATIC_LINK( SvParser, NewDataRead, void*, EMPTYARG ) { switch( pThis->eState ) { case SVPAR_PENDING: // Wenn gerade ein File geladen wird duerfen wir nicht weiterlaufen, // sondern muessen den Aufruf ignorieren. if( pThis->IsDownloadingFile() ) break; pThis->eState = SVPAR_WORKING; pThis->RestoreState(); pThis->Continue( pThis->pImplData->nToken ); if( ERRCODE_IO_PENDING == pThis->rInput.GetError() ) pThis->rInput.ResetError(); if( SVPAR_PENDING != pThis->eState ) pThis->ReleaseRef(); // ansonsten sind wir fertig! break; case SVPAR_WAITFORDATA: pThis->eState = SVPAR_WORKING; break; case SVPAR_NOTSTARTED: case SVPAR_WORKING: break; default: pThis->ReleaseRef(); // ansonsten sind wir fertig! break; } return 0; } /*======================================================================== * * SvKeyValueIterator. * *======================================================================*/ SV_DECL_PTRARR_DEL(SvKeyValueList_Impl, SvKeyValue*, 0, 4) SV_IMPL_PTRARR(SvKeyValueList_Impl, SvKeyValue*); /* * SvKeyValueIterator. */ SvKeyValueIterator::SvKeyValueIterator (void) : m_pList (new SvKeyValueList_Impl), m_nPos (0) { } /* * ~SvKeyValueIterator. */ SvKeyValueIterator::~SvKeyValueIterator (void) { delete m_pList; } /* * GetFirst. */ BOOL SvKeyValueIterator::GetFirst (SvKeyValue &rKeyVal) { m_nPos = m_pList->Count(); return GetNext (rKeyVal); } /* * GetNext. */ BOOL SvKeyValueIterator::GetNext (SvKeyValue &rKeyVal) { if (m_nPos > 0) { rKeyVal = *m_pList->GetObject(--m_nPos); return TRUE; } else { // Nothing to do. return FALSE; } } /* * Append. */ void SvKeyValueIterator::Append (const SvKeyValue &rKeyVal) { SvKeyValue *pKeyVal = new SvKeyValue (rKeyVal); m_pList->C40_INSERT(SvKeyValue, pKeyVal, m_pList->Count()); } /* vi:set tabstop=4 shiftwidth=4 expandtab: */