diff options
Diffstat (limited to 'sax')
-rw-r--r-- | sax/source/expatwrap/xml2utf.cxx | 24 |
1 files changed, 17 insertions, 7 deletions
diff --git a/sax/source/expatwrap/xml2utf.cxx b/sax/source/expatwrap/xml2utf.cxx index 8460c2f2ee60..4a1e6a14e2ab 100644 --- a/sax/source/expatwrap/xml2utf.cxx +++ b/sax/source/expatwrap/xml2utf.cxx @@ -2,9 +2,9 @@ * * $RCSfile: xml2utf.cxx,v $ * - * $Revision: 1.6 $ + * $Revision: 1.7 $ * - * last change: $Author: hr $ $Date: 2004-02-04 13:40:37 $ + * last change: $Author: vg $ $Date: 2005-02-22 10:06:12 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -257,7 +257,7 @@ sal_Bool XMLFile2UTFConverter::isEncodingRecognizable( const Sequence< sal_Int8 sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq ) { - const sal_Int8 *pSource = seq.getConstArray(); + const sal_uInt8 *pSource = reinterpret_cast<const sal_uInt8*>( seq.getConstArray() ); sal_Bool bReturn = sal_True; if( seq.getLength() < 4 ) { @@ -299,14 +299,14 @@ sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq ) } } } - else if( 0xFE == static_cast<unsigned char> (pSource[0]) && - 0xFF == static_cast<unsigned char> (pSource[1]) ) { + else if( 0xFE == pSource[0] && + 0xFF == pSource[1] ) { // UTF-16 big endian // conversion is done so that encoding information can be easily extracted m_sEncoding = "utf-16"; } - else if( 0xFF == static_cast<unsigned char> (pSource[0]) && - 0xFE == static_cast<unsigned char> (pSource[1]) ) { + else if( 0xFF == pSource[0] && + 0xFE == pSource[1] ) { // UTF-16 little endian // conversion is done so that encoding information can be easily extracted m_sEncoding = "utf-16"; @@ -334,6 +334,16 @@ sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq ) m_sEncoding = "utf-16"; } + else if( 0xEF == pSource[0] && + 0xBB == pSource[1] && + 0xBF == pSource[2] ) + { + // UTF-8 BOM (byte order mark); signifies utf-8, and not byte order + // The BOM is removed. + memmove( seq.getArray(), &( seq.getArray()[3] ), seq.getLength()-3 ); + seq.realloc( seq.getLength() - 3 ); + m_sEncoding = "utf-8"; + } else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) { // UCS-4 big endian m_sEncoding = "ucs-4"; |