diff options
author | Andreas Bregas <ab@openoffice.org> | 2011-02-11 15:06:12 +0100 |
---|---|---|
committer | Andreas Bregas <ab@openoffice.org> | 2011-02-11 15:06:12 +0100 |
commit | ef8e9d51cee9b439380c5fe3ecd4123ffc738b69 (patch) | |
tree | 4739da221ddb1b6393ae600e9abc3fe02b2f4d7c /tools | |
parent | 7640d6a69f69172127221455b5bc5ae644f595ca (diff) |
ab80: Adding changes of cws mib21, ab77, ab77run2, dr77i to dev300
ab77: #163789# Handle class module factories document specific
ab77: #163732# Defer removal of documents until XCloseListener::notifyClosing
ab77: #163808# make VBA symbols Application.ScreenUpdating and Application.Interactive work globally on all documents
ab77: wae
ab77: #163840# read UTF-8 BOM
ab77: #163732# VBA UserForm_Terminate triggered too late while closing document
ab77: minor correction
ab77: assertion: do not call ::rtl::OUString::copy() with negative index
ab77run2: #163869# do not call Class_Terminate VBA macros when document disposes
dr77i: #163941# do not update drawing layer when pasting from clipboard after cut
mib21: #163944# ignore trailing whitespace in Basic source lines
mib21: #163948# allow to manually pack MSVC DLLs into extensions
mib21: #163948# multiprocess build fix
Authors:
Andreas Bregas <ab@openoffice.org>
Daniel Rentz [dr] <daniel.rentz@oracle.com>
Eike Rathke [er] <eike.rathke@oracle.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/inc/tools/stream.hxx | 17 | ||||
-rw-r--r-- | tools/source/stream/stream.cxx | 46 |
2 files changed, 56 insertions, 7 deletions
diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx index 61af361aa5b7..440eb2283f20 100644 --- a/tools/inc/tools/stream.hxx +++ b/tools/inc/tools/stream.hxx @@ -460,9 +460,20 @@ public: /// Switch to no endian swapping and write 0xfeff sal_Bool StartWritingUnicodeText(); - /// Read 16bit, if 0xfeff do nothing, if 0xfffe switch - /// endian swapping, if none of them put back - sal_Bool StartReadingUnicodeText(); + + /** If eReadBomCharSet==RTL_TEXTENCODING_DONTKNOW: read 16bit, + if 0xfeff do nothing (UTF-16), if 0xfffe switch endian + swapping (UTF-16), if 0xefbb or 0xbbef read another byte + and check for UTF-8. If no UTF-* BOM was detected put all + read bytes back. This means that if 2 bytes were read it + was an UTF-16 BOM, if 3 bytes were read it was an UTF-8 + BOM. There is no UTF-7, UTF-32 or UTF-EBCDIC BOM detection! + + If eReadBomCharSet!=RTL_TEXTENCODING_DONTKNOW: only read a + BOM of that encoding and switch endian swapping if UTF-16 + and 0xfffe. + */ + sal_Bool StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet ); /// Read a line of Unicode sal_Bool ReadUniStringLine( String& rStr ); diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx index 9fdef8436f1a..92ce4214dfc4 100644 --- a/tools/source/stream/stream.cxx +++ b/tools/source/stream/stream.cxx @@ -1046,21 +1046,59 @@ sal_Bool SvStream::StartWritingUnicodeText() |* *************************************************************************/ -sal_Bool SvStream::StartReadingUnicodeText() +sal_Bool SvStream::StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet ) { + if (!( eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW || + eReadBomCharSet == RTL_TEXTENCODING_UNICODE || + eReadBomCharSet == RTL_TEXTENCODING_UTF8)) + return sal_True; // nothing to read + + bool bTryUtf8 = false; sal_uInt16 nFlag; + sal_sSize nBack = sizeof(nFlag); *this >> nFlag; switch ( nFlag ) { case 0xfeff : - // native + // native UTF-16 + if ( eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW || + eReadBomCharSet == RTL_TEXTENCODING_UNICODE) + nBack = 0; break; case 0xfffe : - SetEndianSwap( !bSwap ); + // swapped UTF-16 + if ( eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW || + eReadBomCharSet == RTL_TEXTENCODING_UNICODE) + { + SetEndianSwap( !bSwap ); + nBack = 0; + } + break; + case 0xefbb : + if (nNumberFormatInt == NUMBERFORMAT_INT_BIGENDIAN && + (eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW || + eReadBomCharSet == RTL_TEXTENCODING_UTF8)) + bTryUtf8 = true; + break; + case 0xbbef : + if (nNumberFormatInt == NUMBERFORMAT_INT_LITTLEENDIAN && + (eReadBomCharSet == RTL_TEXTENCODING_DONTKNOW || + eReadBomCharSet == RTL_TEXTENCODING_UTF8)) + bTryUtf8 = true; break; default: - SeekRel( -((sal_sSize)sizeof(nFlag)) ); // no BOM, pure data + ; // nothing + } + if (bTryUtf8) + { + sal_uChar nChar; + nBack += sizeof(nChar); + *this >> nChar; + if (nChar == 0xbf) + nBack = 0; // it is UTF-8 } + if (nBack) + SeekRel( -nBack ); // no BOM, pure data return nError == SVSTREAM_OK; } |