summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kyoshida@novell.com>2010-09-28 11:26:03 -0400
committerKohei Yoshida <kyoshida@novell.com>2010-09-28 11:26:03 -0400
commitd19be7acbe78e7b25fc6349e1db7fd9c5974cc72 (patch)
treef96e51207823a7ff726aa83be408bde239fb7bfa
parentafc5c0c4402ac15a0c54f8c83bea9aae6f53b685 (diff)
Ported sal-strintern-speed-char-upper.diff from ooo-build.
-rw-r--r--sal/rtl/source/ustring.c46
-rw-r--r--sal/textenc/tcvtbyte.c45
-rw-r--r--sal/textenc/tcvtlat1.tab16
-rw-r--r--sal/textenc/tenchelp.h5
4 files changed, 104 insertions, 8 deletions
diff --git a/sal/rtl/source/ustring.c b/sal/rtl/source/ustring.c
index b0bdd2d98e73..0781d14ee1ac 100644
--- a/sal/rtl/source/ustring.c
+++ b/sal/rtl/source/ustring.c
@@ -800,6 +800,29 @@ void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
}
}
+static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding )
+{
+ // FIXME: Maybe we should use a bit flag in the higher bits of the
+ // eTextEncoding value itself to determine the encoding type. But if we
+ // do, be sure to mask the value in certain places that expect the values
+ // to be numbered serially from 0 and up. One such place is
+ // Impl_getTextEncodingData().
+
+ switch ( eTextEncoding )
+ {
+ // 1 to 1 (with no zero elements)
+ case RTL_TEXTENCODING_IBM_437:
+ case RTL_TEXTENCODING_IBM_850:
+ case RTL_TEXTENCODING_IBM_860:
+ case RTL_TEXTENCODING_IBM_861:
+ case RTL_TEXTENCODING_IBM_863:
+ case RTL_TEXTENCODING_IBM_865:
+ return len;
+ break;
+ }
+ return 0;
+}
+
void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
const sal_Char * str,
sal_Int32 len,
@@ -817,6 +840,7 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
if ( len < 256 )
{ // try various optimisations
+ sal_Int32 ulen;
if ( len < 0 )
len = strlen( str );
if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
@@ -836,6 +860,28 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
return;
}
+ else if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 )
+ {
+ rtl_uString *pScratch;
+ rtl_TextToUnicodeConverter hConverter;
+ sal_Size nDestChars, nSrcBytes;
+ sal_uInt32 nInfo;
+
+ pScratch = alloca( sizeof(rtl_uString) + ulen * sizeof (IMPL_RTL_STRCODE) );
+
+ hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
+ nDestChars = rtl_convertTextToUnicode(
+ hConverter, 0, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes );
+ rtl_destroyTextToUnicodeConverter( hConverter );
+
+ if (pInfo)
+ *pInfo = nInfo;
+
+ pScratch->length = ulen;
+ rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
+ return;
+ }
+
/* FIXME: we want a nice UTF-8 / alloca shortcut here */
}
diff --git a/sal/textenc/tcvtbyte.c b/sal/textenc/tcvtbyte.c
index 238c51fba223..d8f5be85848d 100644
--- a/sal/textenc/tcvtbyte.c
+++ b/sal/textenc/tcvtbyte.c
@@ -640,6 +640,51 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData,
/* ----------------------------------------------------------------------- */
+sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData,
+ void* pContext,
+ const sal_Char* pSrcBuf, sal_Size nSrcBytes,
+ sal_Unicode* pDestBuf, sal_Size nDestChars,
+ sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtBytes )
+{
+ sal_uChar c;
+ sal_Unicode cConv;
+ const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData;
+ sal_Unicode* pEndDestBuf;
+ const sal_Char* pEndSrcBuf;
+
+ (void) pContext; /* unused */
+ (void) nFlags; /* unused */
+
+ *pInfo = 0;
+ pEndDestBuf = pDestBuf+nDestChars;
+ pEndSrcBuf = pSrcBuf+nSrcBytes;
+ if ( pDestBuf == pEndDestBuf )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
+ *pSrcCvtBytes = 0;
+ return 0;
+ }
+ while ( pSrcBuf < pEndSrcBuf )
+ {
+ c = (sal_uChar)*pSrcBuf;
+ if (c < 0x80)
+ cConv = c;
+ else
+ // c <= 0xFF is implied.
+ cConv = pConvertData->mpToUniTab1[c - 0x80];
+
+ *pDestBuf = cConv;
+ pDestBuf++;
+ pSrcBuf++;
+ }
+
+ *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
+ return (nDestChars - (pEndDestBuf-pDestBuf));
+}
+
+/* ----------------------------------------------------------------------- */
+
// Writes 0--2 characters to dest:
static int ImplConvertUnicodeCharToChar(
const ImplByteConvertData* pConvertData, sal_Unicode c, sal_Char * dest )
diff --git a/sal/textenc/tcvtlat1.tab b/sal/textenc/tcvtlat1.tab
index 6e8e55cf1d65..60c3218b64b3 100644
--- a/sal/textenc/tcvtlat1.tab
+++ b/sal/textenc/tcvtlat1.tab
@@ -188,7 +188,7 @@ static ImplByteConvertData const aImplIBM437ByteCvtData =
static ImplTextEncodingData const aImplIBM437TextEncodingData
= { { &aImplIBM437ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -322,7 +322,7 @@ static ImplByteConvertData const aImplIBM850ByteCvtData =
static ImplTextEncodingData const aImplIBM850TextEncodingData
= { { &aImplIBM850ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -495,7 +495,7 @@ static ImplByteConvertData const aImplIBM860ByteCvtData =
static ImplTextEncodingData const aImplIBM860TextEncodingData
= { { &aImplIBM860ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -670,7 +670,7 @@ static ImplByteConvertData const aImplIBM861ByteCvtData =
static ImplTextEncodingData const aImplIBM861TextEncodingData
= { { &aImplIBM861ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -845,7 +845,7 @@ static ImplByteConvertData const aImplIBM863ByteCvtData =
static ImplTextEncodingData const aImplIBM863TextEncodingData
= { { &aImplIBM863ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -1020,7 +1020,7 @@ static ImplByteConvertData const aImplIBM865ByteCvtData =
static ImplTextEncodingData const aImplIBM865TextEncodingData
= { { &aImplIBM865ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -1560,7 +1560,7 @@ static ImplByteConvertData const aImplAPPLEICELANDByteCvtData =
static ImplTextEncodingData const aImplAPPLEICELANDTextEncodingData
= { { &aImplAPPLEICELANDByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -1707,7 +1707,7 @@ static ImplByteConvertData const aImplAPPLEROMANByteCvtData =
static ImplTextEncodingData const aImplAPPLEROMANTextEncodingData
= { { &aImplAPPLEROMANByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
diff --git a/sal/textenc/tenchelp.h b/sal/textenc/tenchelp.h
index 98be923472ba..876543c47221 100644
--- a/sal/textenc/tenchelp.h
+++ b/sal/textenc/tenchelp.h
@@ -230,6 +230,11 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, void* pContext,
const sal_Char* pSrcBuf, sal_Size nSrcBytes,
sal_Unicode* pDestBuf, sal_Size nDestChars,
sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
+/** For those encodings only with unicode range of 0x80 to 0xFF. */
+sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, void* pContext,
+ const sal_Char* pSrcBuf, sal_Size nSrcBytes,
+ sal_Unicode* pDestBuf, sal_Size nDestChars,
+ sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, void* pContext,
const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
sal_Char* pDestBuf, sal_Size nDestBytes,