#87140#: Support for text encodings with contexts

author: Michael Brauer <mib@openoffice.org> 2001-11-22 09:48:35 +0000
committer: Michael Brauer <mib@openoffice.org> 2001-11-22 09:48:35 +0000
commit: dfcfa273529fa04875319b7df5e8e8ce69c47e72 (patch)
tree: f79715b706a780d2edcfde43515eb896ce5481d2 /svtools
parent: a2905c387d8337bb343031471c9c03d8325aff1c (diff)
2 files changed, 103 insertions, 42 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 00f0f486063b..f1c9a8cbf736 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -2,9 +2,9 @@
  *
  *  $RCSfile: parhtml.cxx,v $
  *
- *  $Revision: 1.4 $
+ *  $Revision: 1.5 $
  *
- *  last change: $Author: mib $ $Date: 2001-10-31 08:30:08 $
+ *  last change: $Author: mib $ $Date: 2001-11-22 10:48:35 $
  *
  *  The Contents of this file are made available subject to the terms of
  *  either of the following licenses
@@ -568,6 +568,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
                                             ((nPos-i)*GetCharSize()) );
                                     nlLinePos -= sal_uInt32(nPos-i);
                                     nPos = i;
+                                    ClearTxtConvContext();
                                     break;
                                 }
                             }
@@ -590,6 +591,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
                                         "Falsche Zeilen-Position" );
                             rInput.Seek( nStreamPos );
                             nlLinePos = nLinePos;
+                            ClearTxtConvContext();
                             break;
                         }
 
@@ -634,6 +636,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
                                                     "Falsche Zeilen-Position" );
                                         rInput.Seek( nStreamPos );
                                         nlLinePos = nLinePos;
+                                        ClearTxtConvContext();
                                         return HTML_TEXTTOKEN;
                                     }
 
@@ -697,6 +700,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
                         nNextCh = 0U;
                         rInput.Seek( nStreamPos-(sal_uInt32)GetCharSize() );
                         nlLinePos = nLinePos-1UL;
+                        ClearTxtConvContext();
                         bReadNextChar = TRUE;
                     }
                     bNextCh = FALSE;
@@ -1009,6 +1013,7 @@ int HTMLParser::_GetNextRawToken()
                     rInput.Seek( nStreamPos );
                     SetLineNr( nLineNr );
                     SetLinePos( nLinePos );
+                    ClearTxtConvContext();
                     nNextCh = '<';
 
                     // den String wollen wir nicht an das Token haengen
@@ -1240,6 +1245,7 @@ int __EXPORT HTMLParser::_GetNextToken()
                             rInput.Seek( nCStreamPos );
                             SetLineNr( nCLineNr );
                             SetLinePos( nCLinePos );
+                            ClearTxtConvContext();
                             aToken.Erase( nCStrLen );
                             nNextCh = '>';
                         }
@@ -1261,6 +1267,7 @@ int __EXPORT HTMLParser::_GetNextToken()
                             rInput.Seek( nStreamPos );
                             SetLineNr( nLineNr );
                             SetLinePos( nLinePos );
+                            ClearTxtConvContext();
 
                             aToken = '<';
                             nRet = HTML_TEXTTOKEN;
@@ -1285,6 +1292,7 @@ int __EXPORT HTMLParser::_GetNextToken()
                             rInput.Seek( nStreamPos );
                             SetLineNr( nLineNr );
                             SetLinePos( nLinePos );
+                            ClearTxtConvContext();
 
                             aToken = '<';
                             nRet = HTML_TEXTTOKEN;
@@ -1321,6 +1329,7 @@ int __EXPORT HTMLParser::_GetNextToken()
                             rInput.Seek( nCStreamPos );
                             SetLineNr( nCLineNr );
                             SetLinePos( nCLinePos );
+                            ClearTxtConvContext();
                             aToken.AssignAscii( "<%", 2UL );
                             nRet = HTML_TEXTTOKEN;
                             break;
diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx
index 6a7e1476207c..2be328ccf360 100644
--- a/svtools/source/svrtf/svparser.cxx
+++ b/svtools/source/svrtf/svparser.cxx
@@ -2,9 +2,9 @@
  *
  *  $RCSfile: svparser.cxx,v $
  *
- *  $Revision: 1.3 $
+ *  $Revision: 1.4 $
  *
- *  last change: $Author: mib $ $Date: 2001-10-15 08:49:57 $
+ *  last change: $Author: mib $ $Date: 2001-11-22 10:47:38 $
  *
  *  The Contents of this file are made available subject to the terms of
  *  either of the following licenses
@@ -119,6 +119,7 @@ struct SvParser_Impl
     int             nSaveToken;         // das Token vom Continue
 
     rtl_TextToUnicodeConverter hConv;
+    rtl_TextToUnicodeContext   hContext;
 
 #ifdef ASYNCHRON_TEST
 // HACK
@@ -130,7 +131,7 @@ _SvLockBytes_Impl* pLB;
 #endif
 
     SvParser_Impl() :
-        nSaveToken(0), hConv( 0 )
+        nSaveToken(0), hConv( 0 ), hContext( (rtl_TextToUnicodeContext)1 )
     {
     }
 
@@ -196,6 +197,8 @@ delete pImplData->pLB;
 
     if( pImplData && pImplData->hConv )
     {
+        rtl_destroyTextToUnicodeContext( pImplData->hConv,
+                                         pImplData->hContext );
         rtl_destroyTextToUnicodeConverter( pImplData->hConv );
     }
 
@@ -211,6 +214,12 @@ delete pImplData->pLB;
 #endif
 }
 
+void SvParser::ClearTxtConvContext()
+{
+    if( pImplData && pImplData->hConv )
+        rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext );
+}
+
 void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc )
 {
 
@@ -218,8 +227,11 @@ void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc )
     {
         if( pImplData && pImplData->hConv )
         {
+            rtl_destroyTextToUnicodeContext( pImplData->hConv,
+                                             pImplData->hContext );
             rtl_destroyTextToUnicodeConverter( pImplData->hConv );
             pImplData->hConv = 0;
+            pImplData->hContext = (rtl_TextToUnicodeContext )1;
         }
 
         if( eEnc < RTL_TEXTENCODING_STD_COUNT ||
@@ -233,7 +245,9 @@ void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc )
                         "SvParser::SetSrcEncoding: no converter for source encoding" );
             if( !pImplData->hConv )
                 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
-
+            else
+                pImplData->hContext =
+                    rtl_createTextToUnicodeContext( pImplData->hConv );
         }
         else
         {
@@ -366,55 +380,93 @@ sal_Unicode SvParser::GetNextChar()
                 sal_uInt32 nInfo = 0;
                 sal_Size nCvtBytes;
                 sal_Size nChars = rtl_convertTextToUnicode(
-                            pImplData->hConv, 0, &c1, 1, &cUC, 1,
+                            pImplData->hConv, pImplData->hContext,
+                            &c1, 1, &cUC, 1,
                             RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
                             RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
-                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR|
-                            RTL_TEXTTOUNICODE_FLAGS_FLUSH,
+                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
                             &nInfo, &nCvtBytes);
                 if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
                 {
                     // The conversion wasn't successfull because we haven't
                     // read enough characters.
-                    sal_Char sBuffer[10];
-                    sBuffer[0] = c1;
-                    sal_uInt16 nLen = 1;
-                    while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 &&
-                            nLen < 10 )
+                    if( pImplData->hContext != (rtl_TextToUnicodeContext)1 )
                     {
-                        rInput >> c1;
-                        if( (bErr = (rInput.IsEof() || rInput.GetError())) )
-                            break;
-
-                        sBuffer[nLen++] = c1;
-                        nChars = rtl_convertTextToUnicode(
-                                    pImplData->hConv, 0, sBuffer, nLen, &cUC, 1,
-                                    RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
-                                    RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
-                                    RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR|
-                                    RTL_TEXTTOUNICODE_FLAGS_FLUSH,
-                                    &nInfo, &nCvtBytes);
+                        while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
+                        {
+                            rInput >> c1;
+                            if( (bErr = (rInput.IsEof() || rInput.GetError())) )
+                                break;
+
+                            nChars = rtl_convertTextToUnicode(
+                                        pImplData->hConv, pImplData->hContext,
+                                        &c1, 1, &cUC, 1,
+                                        RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
+                                        RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
+                                        RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
+                                        &nInfo, &nCvtBytes);
+                        }
+                        if( !bErr )
+                        {
+                            if( 1 == nChars && 0 == nInfo )
+                            {
+                                c = cUC;
+                            }
+                            else
+                            {
+                                DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
+                                    "source buffer is to small" );
+                                DBG_ASSERT( 0 == nChars,
+                                   "there is a converted character, but an error" );
+                                DBG_ASSERT( 0 != nInfo,
+                                   "there is no converted character and no error" );
+                                // There are still errors, but nothing we can
+                                // do
+                                c = (sal_Unicode)'?';
+                            }
+                        }
                     }
-                    if( !bErr )
+                    else
                     {
-                        if( 1 == nChars && 0 == nInfo )
+                        sal_Char sBuffer[10];
+                        sBuffer[0] = c1;
+                        sal_uInt16 nLen = 1;
+                        while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 &&
+                                nLen < 10 )
                         {
-                            DBG_ASSERT( nCvtBytes == nLen,
-                                        "no all bytes have been converted!" );
-                            c = cUC;
+                            rInput >> c1;
+                            if( (bErr = (rInput.IsEof() || rInput.GetError())) )
+                                break;
+
+                            sBuffer[nLen++] = c1;
+                            nChars = rtl_convertTextToUnicode(
+                                        pImplData->hConv, 0, sBuffer, nLen, &cUC, 1,
+                                        RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
+                                        RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
+                                        RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
+                                        &nInfo, &nCvtBytes);
                         }
-                        else
+                        if( !bErr )
                         {
-                            DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
-                                "source buffer is to small" );
-                            DBG_ASSERT( 0 == nChars,
-                               "there is a converted character, but an error" );
-                            DBG_ASSERT( 0 != nInfo,
-                               "there is no converted character and no error" );
-                            // There are still errors, so we use the first
-                            // character and restart after that.
-                            c = (sal_Unicode)sBuffer[0];
-                            rInput.SeekRel( -(nLen-1) );
+                            if( 1 == nChars && 0 == nInfo )
+                            {
+                                DBG_ASSERT( nCvtBytes == nLen,
+                                            "no all bytes have been converted!" );
+                                c = cUC;
+                            }
+                            else
+                            {
+                                DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
+                                    "source buffer is to small" );
+                                DBG_ASSERT( 0 == nChars,
+                                   "there is a converted character, but an error" );
+                                DBG_ASSERT( 0 != nInfo,
+                                   "there is no converted character and no error" );
+                                // There are still errors, so we use the first
+                                // character and restart after that.
+                                c = (sal_Unicode)sBuffer[0];
+                                rInput.SeekRel( -(nLen-1) );
+                            }
                         }
                     }
                 }
author	Michael Brauer <mib@openoffice.org>	2001-11-22 09:48:35 +0000
committer	Michael Brauer <mib@openoffice.org>	2001-11-22 09:48:35 +0000
commit	dfcfa273529fa04875319b7df5e8e8ce69c47e72 (patch)
tree	f79715b706a780d2edcfde43515eb896ce5481d2 /svtools
parent	a2905c387d8337bb343031471c9c03d8325aff1c (diff)