summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorAndreas Bregas <ab@openoffice.org>2011-02-11 15:06:12 +0100
committerAndreas Bregas <ab@openoffice.org>2011-02-11 15:06:12 +0100
commitef8e9d51cee9b439380c5fe3ecd4123ffc738b69 (patch)
tree4739da221ddb1b6393ae600e9abc3fe02b2f4d7c /tools
parent7640d6a69f69172127221455b5bc5ae644f595ca (diff)
ab80: Adding changes of cws mib21, ab77, ab77run2, dr77i to dev300
ab77: #163789# Handle class module factories document specific ab77: #163732# Defer removal of documents until XCloseListener::notifyClosing ab77: #163808# make VBA symbols Application.ScreenUpdating and Application.Interactive work globally on all documents ab77: wae ab77: #163840# read UTF-8 BOM ab77: #163732# VBA UserForm_Terminate triggered too late while closing document ab77: minor correction ab77: assertion: do not call ::rtl::OUString::copy() with negative index ab77run2: #163869# do not call Class_Terminate VBA macros when document disposes dr77i: #163941# do not update drawing layer when pasting from clipboard after cut mib21: #163944# ignore trailing whitespace in Basic source lines mib21: #163948# allow to manually pack MSVC DLLs into extensions mib21: #163948# multiprocess build fix Authors: Andreas Bregas <ab@openoffice.org> Daniel Rentz [dr] <daniel.rentz@oracle.com> Eike Rathke [er] <eike.rathke@oracle.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/inc/tools/stream.hxx17
-rw-r--r--tools/source/stream/stream.cxx46
2 files changed, 56 insertions, 7 deletions
diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx
index 61af361aa5b7..440eb2283f20 100644
--- a/tools/inc/tools/stream.hxx
+++ b/tools/inc/tools/stream.hxx
@@ -460,9 +460,20 @@ public:
/// Switch to no endian swapping and write 0xfeff
sal_Bool StartWritingUnicodeText();
- /// Read 16bit, if 0xfeff do nothing, if 0xfffe switch
- /// endian swapping, if none of them put back
- sal_Bool StartReadingUnicodeText();
+
+ /** If eReadBomCharSet==RTL_TEXTENCODING_DONTKNOW: read 16bit,
+ if 0xfeff do nothing (UTF-16), if 0xfffe switch endian
+ swapping (UTF-16), if 0xefbb or 0xbbef read another byte
+ and check for UTF-8. If no UTF-* BOM was detected put all
+ read bytes back. This means that if 2 bytes were read it
+ was an UTF-16 BOM, if 3 bytes were read it was an UTF-8
+ BOM. There is no UTF-7, UTF-32 or UTF-EBCDIC BOM detection!
+
+ If eReadBomCharSet!=RTL_TEXTENCODING_DONTKNOW: only read a
+ BOM of that encoding and switch endian swapping if UTF-16
+ and 0xfffe.
+ */
+ sal_Bool StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet );
/// Read a line of Unicode
sal_Bool ReadUniStringLine( String& rStr );
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index 9fdef8436f1a..92ce4214dfc4 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -1046,21 +1046,59 @@ sal_Bool SvStream::StartWritingUnicodeText()
|*
*************************************************************************/
-sal_Bool SvStream::StartReadingUnicodeText()
+sal_Bool SvStream::StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet )
{
+ if (!( eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW ||
+ eReadBomCharSet == RTL_TEXTENCODING_UNICODE ||
+ eReadBomCharSet == RTL_TEXTENCODING_UTF8))
+ return sal_True; // nothing to read
+
+ bool bTryUtf8 = false;
sal_uInt16 nFlag;
+ sal_sSize nBack = sizeof(nFlag);
*this >> nFlag;
switch ( nFlag )
{
case 0xfeff :
- // native
+ // native UTF-16
+ if ( eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW ||
+ eReadBomCharSet == RTL_TEXTENCODING_UNICODE)
+ nBack = 0;
break;
case 0xfffe :
- SetEndianSwap( !bSwap );
+ // swapped UTF-16
+ if ( eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW ||
+ eReadBomCharSet == RTL_TEXTENCODING_UNICODE)
+ {
+ SetEndianSwap( !bSwap );
+ nBack = 0;
+ }
+ break;
+ case 0xefbb :
+ if (nNumberFormatInt == NUMBERFORMAT_INT_BIGENDIAN &&
+ (eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW ||
+ eReadBomCharSet == RTL_TEXTENCODING_UTF8))
+ bTryUtf8 = true;
+ break;
+ case 0xbbef :
+ if (nNumberFormatInt == NUMBERFORMAT_INT_LITTLEENDIAN &&
+ (eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW ||
+ eReadBomCharSet == RTL_TEXTENCODING_UTF8))
+ bTryUtf8 = true;
break;
default:
- SeekRel( -((sal_sSize)sizeof(nFlag)) ); // no BOM, pure data
+ ; // nothing
+ }
+ if (bTryUtf8)
+ {
+ sal_uChar nChar;
+ nBack += sizeof(nChar);
+ *this >> nChar;
+ if (nChar == 0xbf)
+ nBack = 0; // it is UTF-8
}
+ if (nBack)
+ SeekRel( -nBack ); // no BOM, pure data
return nError == SVSTREAM_OK;
}