diff options
author | Michael Brauer <mib@openoffice.org> | 2001-11-22 09:48:35 +0000 |
---|---|---|
committer | Michael Brauer <mib@openoffice.org> | 2001-11-22 09:48:35 +0000 |
commit | dfcfa273529fa04875319b7df5e8e8ce69c47e72 (patch) | |
tree | f79715b706a780d2edcfde43515eb896ce5481d2 /svtools | |
parent | a2905c387d8337bb343031471c9c03d8325aff1c (diff) |
#87140#: Support for text encodings with contexts
Diffstat (limited to 'svtools')
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 13 | ||||
-rw-r--r-- | svtools/source/svrtf/svparser.cxx | 132 |
2 files changed, 103 insertions, 42 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index 00f0f486063b..f1c9a8cbf736 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -2,9 +2,9 @@ * * $RCSfile: parhtml.cxx,v $ * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * - * last change: $Author: mib $ $Date: 2001-10-31 08:30:08 $ + * last change: $Author: mib $ $Date: 2001-11-22 10:48:35 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -568,6 +568,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ((nPos-i)*GetCharSize()) ); nlLinePos -= sal_uInt32(nPos-i); nPos = i; + ClearTxtConvContext(); break; } } @@ -590,6 +591,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) "Falsche Zeilen-Position" ); rInput.Seek( nStreamPos ); nlLinePos = nLinePos; + ClearTxtConvContext(); break; } @@ -634,6 +636,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) "Falsche Zeilen-Position" ); rInput.Seek( nStreamPos ); nlLinePos = nLinePos; + ClearTxtConvContext(); return HTML_TEXTTOKEN; } @@ -697,6 +700,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) nNextCh = 0U; rInput.Seek( nStreamPos-(sal_uInt32)GetCharSize() ); nlLinePos = nLinePos-1UL; + ClearTxtConvContext(); bReadNextChar = TRUE; } bNextCh = FALSE; @@ -1009,6 +1013,7 @@ int HTMLParser::_GetNextRawToken() rInput.Seek( nStreamPos ); SetLineNr( nLineNr ); SetLinePos( nLinePos ); + ClearTxtConvContext(); nNextCh = '<'; // den String wollen wir nicht an das Token haengen @@ -1240,6 +1245,7 @@ int __EXPORT HTMLParser::_GetNextToken() rInput.Seek( nCStreamPos ); SetLineNr( nCLineNr ); SetLinePos( nCLinePos ); + ClearTxtConvContext(); aToken.Erase( nCStrLen ); nNextCh = '>'; } @@ -1261,6 +1267,7 @@ int __EXPORT HTMLParser::_GetNextToken() rInput.Seek( nStreamPos ); SetLineNr( nLineNr ); SetLinePos( nLinePos ); + ClearTxtConvContext(); aToken = '<'; nRet = HTML_TEXTTOKEN; @@ -1285,6 +1292,7 @@ int __EXPORT HTMLParser::_GetNextToken() rInput.Seek( nStreamPos ); SetLineNr( nLineNr ); SetLinePos( nLinePos ); + ClearTxtConvContext(); aToken = '<'; nRet = HTML_TEXTTOKEN; @@ -1321,6 +1329,7 @@ int __EXPORT HTMLParser::_GetNextToken() rInput.Seek( nCStreamPos ); SetLineNr( nCLineNr ); SetLinePos( nCLinePos ); + ClearTxtConvContext(); aToken.AssignAscii( "<%", 2UL ); nRet = HTML_TEXTTOKEN; break; diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx index 6a7e1476207c..2be328ccf360 100644 --- a/svtools/source/svrtf/svparser.cxx +++ b/svtools/source/svrtf/svparser.cxx @@ -2,9 +2,9 @@ * * $RCSfile: svparser.cxx,v $ * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * - * last change: $Author: mib $ $Date: 2001-10-15 08:49:57 $ + * last change: $Author: mib $ $Date: 2001-11-22 10:47:38 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -119,6 +119,7 @@ struct SvParser_Impl int nSaveToken; // das Token vom Continue rtl_TextToUnicodeConverter hConv; + rtl_TextToUnicodeContext hContext; #ifdef ASYNCHRON_TEST // HACK @@ -130,7 +131,7 @@ _SvLockBytes_Impl* pLB; #endif SvParser_Impl() : - nSaveToken(0), hConv( 0 ) + nSaveToken(0), hConv( 0 ), hContext( (rtl_TextToUnicodeContext)1 ) { } @@ -196,6 +197,8 @@ delete pImplData->pLB; if( pImplData && pImplData->hConv ) { + rtl_destroyTextToUnicodeContext( pImplData->hConv, + pImplData->hContext ); rtl_destroyTextToUnicodeConverter( pImplData->hConv ); } @@ -211,6 +214,12 @@ delete pImplData->pLB; #endif } +void SvParser::ClearTxtConvContext() +{ + if( pImplData && pImplData->hConv ) + rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); +} + void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc ) { @@ -218,8 +227,11 @@ void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc ) { if( pImplData && pImplData->hConv ) { + rtl_destroyTextToUnicodeContext( pImplData->hConv, + pImplData->hContext ); rtl_destroyTextToUnicodeConverter( pImplData->hConv ); pImplData->hConv = 0; + pImplData->hContext = (rtl_TextToUnicodeContext )1; } if( eEnc < RTL_TEXTENCODING_STD_COUNT || @@ -233,7 +245,9 @@ void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc ) "SvParser::SetSrcEncoding: no converter for source encoding" ); if( !pImplData->hConv ) eSrcEnc = RTL_TEXTENCODING_DONTKNOW; - + else + pImplData->hContext = + rtl_createTextToUnicodeContext( pImplData->hConv ); } else { @@ -366,55 +380,93 @@ sal_Unicode SvParser::GetNextChar() sal_uInt32 nInfo = 0; sal_Size nCvtBytes; sal_Size nChars = rtl_convertTextToUnicode( - pImplData->hConv, 0, &c1, 1, &cUC, 1, + pImplData->hConv, pImplData->hContext, + &c1, 1, &cUC, 1, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR| - RTL_TEXTTOUNICODE_FLAGS_FLUSH, + RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, &nInfo, &nCvtBytes); if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) { // The conversion wasn't successfull because we haven't // read enough characters. - sal_Char sBuffer[10]; - sBuffer[0] = c1; - sal_uInt16 nLen = 1; - while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 && - nLen < 10 ) + if( pImplData->hContext != (rtl_TextToUnicodeContext)1 ) { - rInput >> c1; - if( (bErr = (rInput.IsEof() || rInput.GetError())) ) - break; - - sBuffer[nLen++] = c1; - nChars = rtl_convertTextToUnicode( - pImplData->hConv, 0, sBuffer, nLen, &cUC, 1, - RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR| - RTL_TEXTTOUNICODE_FLAGS_FLUSH, - &nInfo, &nCvtBytes); + while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) + { + rInput >> c1; + if( (bErr = (rInput.IsEof() || rInput.GetError())) ) + break; + + nChars = rtl_convertTextToUnicode( + pImplData->hConv, pImplData->hContext, + &c1, 1, &cUC, 1, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| + RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| + RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, + &nInfo, &nCvtBytes); + } + if( !bErr ) + { + if( 1 == nChars && 0 == nInfo ) + { + c = cUC; + } + else + { + DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, + "source buffer is to small" ); + DBG_ASSERT( 0 == nChars, + "there is a converted character, but an error" ); + DBG_ASSERT( 0 != nInfo, + "there is no converted character and no error" ); + // There are still errors, but nothing we can + // do + c = (sal_Unicode)'?'; + } + } } - if( !bErr ) + else { - if( 1 == nChars && 0 == nInfo ) + sal_Char sBuffer[10]; + sBuffer[0] = c1; + sal_uInt16 nLen = 1; + while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 && + nLen < 10 ) { - DBG_ASSERT( nCvtBytes == nLen, - "no all bytes have been converted!" ); - c = cUC; + rInput >> c1; + if( (bErr = (rInput.IsEof() || rInput.GetError())) ) + break; + + sBuffer[nLen++] = c1; + nChars = rtl_convertTextToUnicode( + pImplData->hConv, 0, sBuffer, nLen, &cUC, 1, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| + RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| + RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, + &nInfo, &nCvtBytes); } - else + if( !bErr ) { - DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, - "source buffer is to small" ); - DBG_ASSERT( 0 == nChars, - "there is a converted character, but an error" ); - DBG_ASSERT( 0 != nInfo, - "there is no converted character and no error" ); - // There are still errors, so we use the first - // character and restart after that. - c = (sal_Unicode)sBuffer[0]; - rInput.SeekRel( -(nLen-1) ); + if( 1 == nChars && 0 == nInfo ) + { + DBG_ASSERT( nCvtBytes == nLen, + "no all bytes have been converted!" ); + c = cUC; + } + else + { + DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, + "source buffer is to small" ); + DBG_ASSERT( 0 == nChars, + "there is a converted character, but an error" ); + DBG_ASSERT( 0 != nInfo, + "there is no converted character and no error" ); + // There are still errors, so we use the first + // character and restart after that. + c = (sal_Unicode)sBuffer[0]; + rInput.SeekRel( -(nLen-1) ); + } } } } |