summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIvo Hinkelmann <ihi@openoffice.org>2006-08-03 12:53:35 +0000
committerIvo Hinkelmann <ihi@openoffice.org>2006-08-03 12:53:35 +0000
commita0cdd5bd580d1ea4f1188ae9224be1072470b816 (patch)
tree49bbf82f8aebd0abe7badee588220122176ab48d
parentcb91be70d5d950e48a0252992a0dd4b4bbd7c684 (diff)
INTEGRATION: CWS swqbf76 (1.10.68); FILE MERGED
2006/07/26 12:24:56 od 1.10.68.2: #138464# method <HTMLParser::ScanText(..)> - revise fix due to performance. 2006/07/26 09:37:40 od 1.10.68.1: #138464# method <HTMLParser::ScanText(..)> - handle hexadecimal digits in special character encoding - &<digit>;
-rw-r--r--svtools/source/svhtml/parhtml.cxx37
1 files changed, 31 insertions, 6 deletions
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index ff6cf51c71ce..403410563025 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -4,9 +4,9 @@
*
* $RCSfile: parhtml.cxx,v $
*
- * $Revision: 1.10 $
+ * $Revision: 1.11 $
*
- * last change: $Author: hr $ $Date: 2006-06-19 21:26:23 $
+ * last change: $Author: ihi $ $Date: 2006-08-03 13:53:35 $
*
* The Contents of this file are made available subject to
* the terms of GNU Lesser General Public License Version 2.1.
@@ -449,6 +449,9 @@ int HTMLParser::FilterToken( int nToken )
#define HTML_ISALNUM( c ) ( HTML_ISALPHA(c) || HTML_ISDIGIT(c) )
#define HTML_ISSPACE( c ) ( ' ' == c || (c >= 0x09 && c <= 0x0d) )
#define HTML_ISPRINTABLE( c ) ( c >= 32 && c != 127)
+// --> OD 2006-07-26 #138464#
+#define HTML_ISHEXDIGIT( c ) ( HTML_ISDIGIT(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') )
+// <--
int HTMLParser::ScanText( const sal_Unicode cBreak )
{
@@ -475,14 +478,35 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
if( '#' == (nNextCh = GetNextChar()) )
{
nNextCh = GetNextChar();
- if( HTML_ISDIGIT(nNextCh) )
+ // --> OD 2006-07-26 #138464#
+ // consider hexadecimal digits
+ const sal_Bool bIsHex( 'x' == nNextCh );
+ const sal_Bool bIsDecOrHex( bIsHex || HTML_ISDIGIT(nNextCh) );
+ if ( bIsDecOrHex )
{
- do
+ if ( bIsHex )
{
- cChar = cChar * 10U + sal_Unicode( nNextCh - '0');
nNextCh = GetNextChar();
+ while ( HTML_ISHEXDIGIT(nNextCh) )
+ {
+ cChar = cChar * 16U +
+ ( nNextCh <= '9'
+ ? sal_Unicode( nNextCh - '0' )
+ : ( nNextCh <= 'F'
+ ? sal_Unicode( nNextCh - 'A' + 10 )
+ : sal_Unicode( nNextCh - 'a' + 10 ) ) );
+ nNextCh = GetNextChar();
+ }
+ }
+ else
+ {
+ do
+ {
+ cChar = cChar * 10U + sal_Unicode( nNextCh - '0');
+ nNextCh = GetNextChar();
+ }
+ while( HTML_ISDIGIT(nNextCh) );
}
- while( HTML_ISDIGIT(nNextCh) );
if( RTL_TEXTENCODING_DONTKNOW != eSrcEnc &&
RTL_TEXTENCODING_UCS2 != eSrcEnc &&
@@ -501,6 +525,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
}
}
}
+ // <--
else
nNextCh = 0U;
}