diff options
author | Ivo Hinkelmann <ihi@openoffice.org> | 2006-08-03 12:53:35 +0000 |
---|---|---|
committer | Ivo Hinkelmann <ihi@openoffice.org> | 2006-08-03 12:53:35 +0000 |
commit | a0cdd5bd580d1ea4f1188ae9224be1072470b816 (patch) | |
tree | 49bbf82f8aebd0abe7badee588220122176ab48d | |
parent | cb91be70d5d950e48a0252992a0dd4b4bbd7c684 (diff) |
INTEGRATION: CWS swqbf76 (1.10.68); FILE MERGED
2006/07/26 12:24:56 od 1.10.68.2: #138464# method <HTMLParser::ScanText(..)>
- revise fix due to performance.
2006/07/26 09:37:40 od 1.10.68.1: #138464# method <HTMLParser::ScanText(..)>
- handle hexadecimal digits in special character encoding - &<digit>;
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 37 |
1 files changed, 31 insertions, 6 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index ff6cf51c71ce..403410563025 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -4,9 +4,9 @@ * * $RCSfile: parhtml.cxx,v $ * - * $Revision: 1.10 $ + * $Revision: 1.11 $ * - * last change: $Author: hr $ $Date: 2006-06-19 21:26:23 $ + * last change: $Author: ihi $ $Date: 2006-08-03 13:53:35 $ * * The Contents of this file are made available subject to * the terms of GNU Lesser General Public License Version 2.1. @@ -449,6 +449,9 @@ int HTMLParser::FilterToken( int nToken ) #define HTML_ISALNUM( c ) ( HTML_ISALPHA(c) || HTML_ISDIGIT(c) ) #define HTML_ISSPACE( c ) ( ' ' == c || (c >= 0x09 && c <= 0x0d) ) #define HTML_ISPRINTABLE( c ) ( c >= 32 && c != 127) +// --> OD 2006-07-26 #138464# +#define HTML_ISHEXDIGIT( c ) ( HTML_ISDIGIT(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') ) +// <-- int HTMLParser::ScanText( const sal_Unicode cBreak ) { @@ -475,14 +478,35 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) if( '#' == (nNextCh = GetNextChar()) ) { nNextCh = GetNextChar(); - if( HTML_ISDIGIT(nNextCh) ) + // --> OD 2006-07-26 #138464# + // consider hexadecimal digits + const sal_Bool bIsHex( 'x' == nNextCh ); + const sal_Bool bIsDecOrHex( bIsHex || HTML_ISDIGIT(nNextCh) ); + if ( bIsDecOrHex ) { - do + if ( bIsHex ) { - cChar = cChar * 10U + sal_Unicode( nNextCh - '0'); nNextCh = GetNextChar(); + while ( HTML_ISHEXDIGIT(nNextCh) ) + { + cChar = cChar * 16U + + ( nNextCh <= '9' + ? sal_Unicode( nNextCh - '0' ) + : ( nNextCh <= 'F' + ? sal_Unicode( nNextCh - 'A' + 10 ) + : sal_Unicode( nNextCh - 'a' + 10 ) ) ); + nNextCh = GetNextChar(); + } + } + else + { + do + { + cChar = cChar * 10U + sal_Unicode( nNextCh - '0'); + nNextCh = GetNextChar(); + } + while( HTML_ISDIGIT(nNextCh) ); } - while( HTML_ISDIGIT(nNextCh) ); if( RTL_TEXTENCODING_DONTKNOW != eSrcEnc && RTL_TEXTENCODING_UCS2 != eSrcEnc && @@ -501,6 +525,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) } } } + // <-- else nNextCh = 0U; } |