diff options
author | Stephan Bergmann <sb@openoffice.org> | 2001-10-12 09:44:53 +0000 |
---|---|---|
committer | Stephan Bergmann <sb@openoffice.org> | 2001-10-12 09:44:53 +0000 |
commit | ee2c73f6a8711bb1e9f6ed5ea253b152b6742144 (patch) | |
tree | 71f35641cb1998a6ee204f91469988b588278813 /sal/textenc | |
parent | bf4d03e8e0ec2a50462ba0d959915b17e515f8c0 (diff) |
#87140# Cleaned up, added EUC-TW and GB-18030.
Diffstat (limited to 'sal/textenc')
30 files changed, 4272 insertions, 3722 deletions
diff --git a/sal/textenc/context.c b/sal/textenc/context.c new file mode 100644 index 000000000000..8e76a8578212 --- /dev/null +++ b/sal/textenc/context.c @@ -0,0 +1,86 @@ +/************************************************************************* + * + * $RCSfile: context.c,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H +#include "context.h" +#endif + +#ifndef _RTL_ALLOC_H_ +#include "rtl/alloc.h" +#endif + +void * ImplCreateUnicodeToTextContext(void) +{ + void * pContext = rtl_allocateMemory(sizeof (ImplUnicodeToTextContext)); + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate = 0; + return pContext; +} + +void ImplResetUnicodeToTextContext(void * pContext) +{ + if (pContext) + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate = 0; +} + +void ImplDestroyContext(void * pContext) +{ + rtl_freeMemory(pContext); +} diff --git a/sal/textenc/context.h b/sal/textenc/context.h new file mode 100644 index 000000000000..1f3b3c7c0157 --- /dev/null +++ b/sal/textenc/context.h @@ -0,0 +1,88 @@ +/************************************************************************* + * + * $RCSfile: context.h,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H +#define INCLUDED_RTL_TEXTENC_CONTEXT_H + +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +#if defined __cplusplus +extern "C" { +#endif /* __cpluscplus */ + +typedef struct +{ + sal_Unicode m_nHighSurrogate; +} ImplUnicodeToTextContext; + +void * ImplCreateUnicodeToTextContext(void) SAL_THROW_EXTERN_C(); + +void ImplResetUnicodeToTextContext(void * pContext) SAL_THROW_EXTERN_C(); + +void ImplDestroyContext(void * pContext) SAL_THROW_EXTERN_C(); + +#if defined __cplusplus +} +#endif /* __cpluscplus */ + +#endif /* INCLUDED_RTL_TEXTENC_CONTEXT_H */ diff --git a/sal/textenc/converter.c b/sal/textenc/converter.c new file mode 100644 index 000000000000..a5917c20600b --- /dev/null +++ b/sal/textenc/converter.c @@ -0,0 +1,163 @@ +/************************************************************************* + * + * $RCSfile: converter.c,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONVERTER_H +#include "converter.h" +#endif + +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif + +#ifndef _RTL_TEXTCVT_H +#include "rtl/textcvt.h" +#endif +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +ImplBadInputConversionAction +ImplHandleBadInputMbTextToUnicodeConversion(sal_Bool bUndefined, + sal_uInt32 nFlags, + sal_Unicode ** pDestBufPtr, + sal_Unicode * pDestBufEnd, + sal_uInt32 * pInfo) +{ + *pInfo |= bUndefined ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED : + RTL_TEXTTOUNICODE_INFO_INVALID; + switch (nFlags & (bUndefined ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK : + RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK)) + { + case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR: + case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR: + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + return IMPL_BAD_INPUT_STOP; + + case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE: + case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE: + return IMPL_BAD_INPUT_CONTINUE; + + default: /* RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT, + RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT */ + if (*pDestBufPtr != pDestBufEnd) + { + *(*pDestBufPtr)++ = RTL_UNICODE_CHAR_DEFAULT; + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; + } +} + +ImplBadInputConversionAction +ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined, + sal_uInt32 nFlags, + sal_Char ** pDestBufPtr, + sal_Char * pDestBufEnd, + sal_uInt32 * pInfo) +{ + *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED : + RTL_UNICODETOTEXT_INFO_INVALID; + switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK : + RTL_UNICODETOTEXT_FLAGS_INVALID_MASK)) + { + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR: + case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR: + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; + return IMPL_BAD_INPUT_STOP; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE: + case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE: + return IMPL_BAD_INPUT_CONTINUE; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: + case RTL_UNICODETOTEXT_FLAGS_INVALID_0: + if (*pDestBufPtr != pDestBufEnd) + { + *(*pDestBufPtr)++ = 0; + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: + case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: + default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT, + RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ + if (*pDestBufPtr != pDestBufEnd) + { + *(*pDestBufPtr)++ = '?'; + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: + case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: + if (*pDestBufPtr != pDestBufEnd) + { + *(*pDestBufPtr)++ = '_'; + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; + } +} diff --git a/sal/textenc/converter.h b/sal/textenc/converter.h new file mode 100644 index 000000000000..efa7a0f91e03 --- /dev/null +++ b/sal/textenc/converter.h @@ -0,0 +1,100 @@ +/************************************************************************* + * + * $RCSfile: converter.h,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONVERTER_H +#define INCLUDED_RTL_TEXTENC_CONVERTER_H + +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +#if defined __cplusplus +extern "C" { +#endif /* __cpluscplus */ + +typedef enum +{ + IMPL_BAD_INPUT_STOP, + IMPL_BAD_INPUT_CONTINUE, + IMPL_BAD_INPUT_NO_OUTPUT +} ImplBadInputConversionAction; + +ImplBadInputConversionAction +ImplHandleBadInputMbTextToUnicodeConversion(sal_Bool bUndefined, + sal_uInt32 nFlags, + sal_Unicode ** pDestBufPtr, + sal_Unicode * pDestBufEnd, + sal_uInt32 * pInfo) + SAL_THROW_EXTERN_C(); + +ImplBadInputConversionAction +ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined, + sal_uInt32 nFlags, + sal_Char ** pDestBufPtr, + sal_Char * pDestBufEnd, + sal_uInt32 * pInfo) + SAL_THROW_EXTERN_C(); + +#if defined __cplusplus +} +#endif /* __cpluscplus */ + +#endif /* INCLUDED_RTL_TEXTENC_CONVERTER_H */ diff --git a/sal/textenc/converteuctw.c b/sal/textenc/converteuctw.c new file mode 100644 index 000000000000..8e6f908f16dc --- /dev/null +++ b/sal/textenc/converteuctw.c @@ -0,0 +1,518 @@ +/************************************************************************* + * + * $RCSfile: converteuctw.c,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H +#include "converteuctw.h" +#endif + +#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H +#include "context.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_CONVERTER_H +#include "converter.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif + +#ifndef _RTL_ALLOC_H_ +#include "rtl/alloc.h" +#endif +#ifndef _RTL_TEXTCVT_H +#include "rtl/textcvt.h" +#endif +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +typedef enum +{ + IMPL_EUC_TW_TO_UNICODE_STATE_0, + IMPL_EUC_TW_TO_UNICODE_STATE_1, + IMPL_EUC_TW_TO_UNICODE_STATE_2_1, + IMPL_EUC_TW_TO_UNICODE_STATE_2_2, + IMPL_EUC_TW_TO_UNICODE_STATE_2_3 +} ImplEucTwToUnicodeState; + +typedef struct +{ + ImplEucTwToUnicodeState m_eState; + sal_Int32 m_nPlane; /* 0--15 */ + sal_Int32 m_nRow; /* 0--93 */ +} ImplEucTwToUnicodeContext; + +void * ImplCreateEucTwToUnicodeContext(void) +{ + void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext)); + ((ImplEucTwToUnicodeContext *) pContext)->m_eState + = IMPL_EUC_TW_TO_UNICODE_STATE_0; + return pContext; +} + +void ImplResetEucTwToUnicodeContext(void * pContext) +{ + if (pContext) + ((ImplEucTwToUnicodeContext *) pContext)->m_eState + = IMPL_EUC_TW_TO_UNICODE_STATE_0; +} + +sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, + void * pContext, + sal_Char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + sal_Unicode const * pCns116431992Data + = ((ImplEucTwConverterData const *) pData)-> + m_pCns116431992ToUnicodeData; + sal_Int32 const * pCns116431992RowOffsets + = ((ImplEucTwConverterData const *) pData)-> + m_pCns116431992ToUnicodeRowOffsets; + sal_Int32 const * pCns116431992PlaneOffsets + = ((ImplEucTwConverterData const *) pData)-> + m_pCns116431992ToUnicodePlaneOffsets; + ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + sal_Int32 nPlane; + sal_Int32 nRow; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState; + nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane; + nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + sal_Bool bUndefined = sal_True; + sal_uInt32 nChar = *((sal_uChar const *) pSrcBuf)++; + switch (eState) + { + case IMPL_EUC_TW_TO_UNICODE_STATE_0: + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else if (nChar >= 0xA1 && nChar <= 0xFE) + { + nRow = nChar - 0xA1; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_1; + } + else if (nChar == 0x8E) + eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1; + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_1: + if (nChar >= 0xA1 && nChar <= 0xFE) + { + sal_Int32 nOffset = pCns116431992RowOffsets[nRow]; + if (nOffset == -1) + goto bad_input; + else + { + sal_Unicode nUnicode; + nOffset += nChar - 0xA1; + nUnicode = pCns116431992Data[nOffset]; + if (nUnicode == 0xFFFF) + goto bad_input; + else if (nUnicode < RTL_UNICODE_START_HIGH_SURROGATES + || nUnicode > RTL_UNICODE_END_HIGH_SURROGATES) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = nUnicode; + else + goto no_output; + else + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = nUnicode; + *pDestBufPtr++ = pCns116431992Data[nOffset + 94]; + } + else + goto no_output; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + } + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_2_1: + if (nChar >= 0xA1 && nChar <= 0xB0) + { + nPlane = nChar - 0xA1; + ++eState; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_2_2: + if (nChar >= 0xA1 && nChar <= 0xFE) + { + nRow = nChar - 0xA1; + ++eState; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_2_3: + if (nChar >= 0xA1 && nChar <= 0xFE) + { + sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; + if (nPlaneOffset == -1) + goto bad_input; + else + { + sal_Int32 nOffset + = pCns116431992RowOffsets[nPlaneOffset + nRow]; + if (nOffset == -1) + goto bad_input; + else + { + sal_Unicode nUnicode; + nOffset += nChar - 0xA1; + nUnicode = pCns116431992Data[nOffset]; + if (nUnicode == 0xFFFF) + goto bad_input; + else if (nUnicode < RTL_UNICODE_START_HIGH_SURROGATES + || nUnicode + > RTL_UNICODE_END_HIGH_SURROGATES) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = nUnicode; + else + goto no_output; + else + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = nUnicode; + *pDestBufPtr++ + = pCns116431992Data[nOffset + 94]; + } + else + goto no_output; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + } + } + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + } + continue; + + bad_input: + switch (ImplHandleBadInputMbTextToUnicodeConversion(bUndefined, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputMbTextToUnicodeConversion(sal_False, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (pContext) + { + ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState; + ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane; + ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + sal_Char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR + RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 + RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE + RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE + RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE + RTL_UNICODETOTEXT_FLAGS_NOCOMPOSITE */ + + sal_uInt32 const * pCns116431992Data + = ((ImplEucTwConverterData const *) pData)-> + m_pUnicodeToCns116431992Data; + sal_Int32 const * pCns116431992PageOffsets + = ((ImplEucTwConverterData const *) pData)-> + m_pUnicodeToCns116431992PageOffsets; + sal_Int32 const * pCns116431992PlaneOffsets + = ((ImplEucTwConverterData const *) pData)-> + m_pUnicodeToCns116431992PlaneOffsets; + sal_Unicode nHighSurrogate = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Char * pDestBufPtr = pDestBuf; + sal_Char * pDestBufEnd = pDestBuf + nDestBytes; + + if (pContext) + nHighSurrogate + = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; + + for (; nConverted < nSrcChars; ++nConverted) + { + sal_Bool bUndefined = sal_True; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Char) nChar; + else + goto no_output; + else if (nChar >= RTL_UNICODE_START_HIGH_SURROGATES + && nChar <= RTL_UNICODE_END_HIGH_SURROGATES) + nHighSurrogate = (sal_Unicode) nChar; + else if ((nChar < RTL_UNICODE_START_LOW_SURROGATES + || nChar > RTL_UNICODE_END_LOW_SURROGATES) + && (nChar < 0xFDD0 || nChar > 0xFDEF) + && nChar < 0xFFFE) + goto translate; + else + { + bUndefined = sal_False; + goto bad_input; + } + else + if (nChar >= RTL_UNICODE_START_LOW_SURROGATES + && nChar <= RTL_UNICODE_END_LOW_SURROGATES) + { + nChar = ((nHighSurrogate & 0x3FF) << 10 | nChar & 0x3FF) + + 0x10000; + if ((nChar & 0xFFFF) < 0xFFFE) + goto translate; + else + { + bUndefined = sal_False; + goto bad_input; + } + } + else + { + bUndefined = sal_False; + goto bad_input; + } + continue; + + translate: + { + sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; + sal_uInt32 nData; + sal_uInt32 nPlane; + if (nOffset == -1) + goto bad_input; + nOffset + = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; + if (nOffset == -1) + goto bad_input; + nData = pCns116431992Data[nOffset + (nChar & 0xFF)]; + if (nData == 0) + goto bad_input; + nPlane = nData >> 16; + if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4)) + goto no_output; + if (nPlane != 1) + { + *pDestBufPtr++ = (sal_Char) 0x8E; + *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane); + } + *pDestBufPtr++ = (sal_Char) (nData >> 8 & 0xFF); + *pDestBufPtr++ = (sal_Char) (nData & 0xFF); + nHighSurrogate = 0; + } + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nHighSurrogate != 0 + && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (pContext) + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate + = nHighSurrogate; + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} diff --git a/sal/textenc/converteuctw.h b/sal/textenc/converteuctw.h new file mode 100644 index 000000000000..9f176b8dcd42 --- /dev/null +++ b/sal/textenc/converteuctw.h @@ -0,0 +1,117 @@ +/************************************************************************* + * + * $RCSfile: converteuctw.h,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H +#define INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H + +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif + +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +#if defined __cplusplus +extern "C" { +#endif /* __cpluscplus */ + +typedef struct +{ + sal_Unicode const * m_pCns116431992ToUnicodeData; + sal_Int32 const * m_pCns116431992ToUnicodeRowOffsets; + sal_Int32 const * m_pCns116431992ToUnicodePlaneOffsets; + sal_uInt32 const * m_pUnicodeToCns116431992Data; + sal_Int32 const * m_pUnicodeToCns116431992PageOffsets; + sal_Int32 const * m_pUnicodeToCns116431992PlaneOffsets; +} ImplEucTwConverterData; + +void * ImplCreateEucTwToUnicodeContext(void) SAL_THROW_EXTERN_C(); + +void ImplResetEucTwToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); + +sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, + void * pContext, + sal_Char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) + SAL_THROW_EXTERN_C(); + +sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + sal_Char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) + SAL_THROW_EXTERN_C(); + +#if defined __cplusplus +} +#endif /* __cpluscplus */ + +#endif /* INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H */ diff --git a/sal/textenc/converteuctw.tab b/sal/textenc/converteuctw.tab new file mode 100644 index 000000000000..79b8595b0a56 --- /dev/null +++ b/sal/textenc/converteuctw.tab @@ -0,0 +1,109 @@ +/************************************************************************* + * + * $RCSfile: converteuctw.tab,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H +#include "context.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H +#include "converteuctw.h" +#endif + +#ifndef _RTL_TENCINFO_H +#include "rtl/tencinfo.h" +#endif +#ifndef _RTL_TEXTENC_H +#include "rtl/textenc.h" +#endif + +#ifndef INCLUDED_STDDEF_H +#include <stddef.h> +#define INCLUDED_STDDEF_H +#endif + +#include "generate/cns116431992.tab" + +static ImplEucTwConverterData const aImplEucTwConvertData + = { aImplCns116431992ToUnicodeData, + aImplCns116431992ToUnicodeRowOffsets, + aImplCns116431992ToUnicodePlaneOffsets, + aImplUnicodeToCns116431992Data, + aImplUnicodeToCns116431992PageOffsets, + aImplUnicodeToCns116431992PlaneOffsets }; + +static ImplTextEncodingData const aImplEucTwTextEncodingData + = { { &aImplEucTwConvertData, + &ImplConvertEucTwToUnicode, + &ImplConvertUnicodeToEucTw, + &ImplCreateEucTwToUnicodeContext, + &ImplDestroyContext, + &ImplResetEucTwToUnicodeContext, + &ImplCreateUnicodeToTextContext, + &ImplDestroyContext, + &ImplResetUnicodeToTextContext }, + RTL_TEXTENCODING_EUC_TW, + 1, + 4, + 2, + 0, /* TODO! */ + NULL, /* TODO! */ + NULL, /* TODO! */ + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_CHINESE_TRADITIONAL */ diff --git a/sal/textenc/convertgb18030.c b/sal/textenc/convertgb18030.c new file mode 100644 index 000000000000..318ed01740e6 --- /dev/null +++ b/sal/textenc/convertgb18030.c @@ -0,0 +1,533 @@ +/************************************************************************* + * + * $RCSfile: convertgb18030.c,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONVERTGB18030_H +#include "convertgb18030.h" +#endif + +#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H +#include "context.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_CONVERTER_H +#include "converter.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif + +#ifndef _RTL_ALLOC_H_ +#include "rtl/alloc.h" +#endif +#ifndef _RTL_TEXTCVT_H +#include "rtl/textcvt.h" +#endif +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +typedef enum +{ + IMPL_GB_18030_TO_UNICODE_STATE_0, + IMPL_GB_18030_TO_UNICODE_STATE_1, + IMPL_GB_18030_TO_UNICODE_STATE_2, + IMPL_GB_18030_TO_UNICODE_STATE_3 +} ImplGb18030ToUnicodeState; + +typedef struct +{ + ImplGb18030ToUnicodeState m_eState; + sal_uInt32 m_nCode; +} ImplGb18030ToUnicodeContext; + +void * ImplCreateGb18030ToUnicodeContext(void) +{ + void * pContext + = rtl_allocateMemory(sizeof (ImplGb18030ToUnicodeContext)); + ((ImplGb18030ToUnicodeContext *) pContext)->m_eState + = IMPL_GB_18030_TO_UNICODE_STATE_0; + return pContext; +} + +void ImplResetGb18030ToUnicodeContext(void * pContext) +{ + if (pContext) + ((ImplGb18030ToUnicodeContext *) pContext)->m_eState + = IMPL_GB_18030_TO_UNICODE_STATE_0; +} + +sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, + void * pContext, + sal_Char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + sal_Unicode const * pGb18030Data + = ((ImplGb18030ConverterData const *) pData)->m_pGb18030ToUnicodeData; + ImplGb180302000ToUnicodeRange const * pGb18030Ranges + = ((ImplGb18030ConverterData const *) pData)-> + m_pGb18030ToUnicodeRanges; + ImplGb18030ToUnicodeState eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + sal_uInt32 nCode; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = ((ImplGb18030ToUnicodeContext *) pContext)->m_eState; + nCode = ((ImplGb18030ToUnicodeContext *) pContext)->m_nCode; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + sal_Bool bUndefined = sal_True; + sal_uInt32 nChar = *((sal_uChar const *) pSrcBuf)++; + switch (eState) + { + case IMPL_GB_18030_TO_UNICODE_STATE_0: + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else if (nChar == 0x80) + goto bad_input; + else if (nChar <= 0xFE) + { + nCode = nChar - 0x81; + eState = IMPL_GB_18030_TO_UNICODE_STATE_1; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_GB_18030_TO_UNICODE_STATE_1: + if (nChar >= 0x30 && nChar <= 0x39) + { + nCode = nCode * 10 + (nChar - 0x30); + eState = IMPL_GB_18030_TO_UNICODE_STATE_2; + } + else if (nChar >= 0x40 && nChar <= 0x7E + || nChar >= 0x80 && nChar <= 0xFE) + { + nCode = nCode * 190 + (nChar <= 0x7E ? nChar - 0x40 : + nChar - 0x80 + 63); + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = pGb18030Data[nCode]; + else + goto no_output; + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_GB_18030_TO_UNICODE_STATE_2: + if (nChar >= 0x81 && nChar <= 0xFE) + { + nCode = nCode * 126 + (nChar - 0x81); + eState = IMPL_GB_18030_TO_UNICODE_STATE_3; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + + case IMPL_GB_18030_TO_UNICODE_STATE_3: + if (nChar >= 0x30 && nChar <= 0x39) + { + nCode = nCode * 10 + (nChar - 0x30); + + /* 90 30 81 30 to E3 32 9A 35 maps to U+10000 to U+10FFFF: */ + if (nCode >= 189000 && nCode <= 1237575) + if (pDestBufEnd - pDestBufPtr >= 2) + { + nCode -= 189000; + *pDestBufPtr++ + = (sal_Unicode) + (RTL_UNICODE_START_HIGH_SURROGATES + | (nCode >> 10)); + *pDestBufPtr++ + = (sal_Unicode) + (RTL_UNICODE_START_LOW_SURROGATES + | (nCode & 0x3FF)); + } + else + goto no_output; + else + { + ImplGb180302000ToUnicodeRange const * pRange + = pGb18030Ranges; + sal_uInt32 nFirstNonRange = 0; + for (;;) + { + if (pRange->m_nNonRangeDataIndex == -1) + goto bad_input; + else if (nCode < pRange->m_nFirstLinear) + { + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ + = pGb18030Data[ + pRange->m_nNonRangeDataIndex + + (nCode - nFirstNonRange)]; + else + goto no_output; + break; + } + else if (nCode < pRange->m_nPastLinear) + { + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ + = (sal_Unicode) + (pRange->m_nFirstUnicode + + (nCode + - pRange-> + m_nFirstLinear)); + else + goto no_output; + break; + } + nFirstNonRange = (pRange++)->m_nPastLinear; + } + } + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + break; + } + continue; + + bad_input: + switch (ImplHandleBadInputMbTextToUnicodeConversion(bUndefined, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState != IMPL_GB_18030_TO_UNICODE_STATE_0 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputMbTextToUnicodeConversion(sal_False, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (pContext) + { + ((ImplGb18030ToUnicodeContext *) pContext)->m_eState = eState; + ((ImplGb18030ToUnicodeContext *) pContext)->m_nCode = nCode; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + sal_Char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR + RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 + RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE + RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE + RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE + RTL_UNICODETOTEXT_FLAGS_NOCOMPOSITE */ + + sal_uInt32 const * pGb18030Data + = ((ImplGb18030ConverterData const *) pData)-> + m_pUnicodeToGb18030Data; + ImplUnicodeToGb180302000Range const * pGb18030Ranges + = ((ImplGb18030ConverterData const *) pData)-> + m_pUnicodeToGb18030Ranges; + sal_Unicode nHighSurrogate = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Char * pDestBufPtr = pDestBuf; + sal_Char * pDestBufEnd = pDestBuf + nDestBytes; + + if (pContext) + nHighSurrogate + = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; + + for (; nConverted < nSrcChars; ++nConverted) + { + sal_Bool bUndefined = sal_True; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Char) nChar; + else + goto no_output; + else if (nChar >= RTL_UNICODE_START_HIGH_SURROGATES + && nChar <= RTL_UNICODE_END_HIGH_SURROGATES) + nHighSurrogate = (sal_Unicode) nChar; + else if ((nChar < RTL_UNICODE_START_LOW_SURROGATES + || nChar > RTL_UNICODE_END_LOW_SURROGATES) + && (nChar < 0xFDD0 || nChar > 0xFDEF) + && nChar < 0xFFFE) + { + ImplUnicodeToGb180302000Range const * pRange + = pGb18030Ranges; + sal_Unicode nFirstNonRange = 0x80; + for (;;) + { + if (nChar < pRange->m_nFirstUnicode) + { + sal_uInt32 nCode + = pGb18030Data[pRange->m_nNonRangeDataIndex + + (nChar - nFirstNonRange)]; + if (pDestBufEnd - pDestBufPtr + >= (nCode <= 0xFFFF ? 2 : 4)) + { + if (nCode > 0xFFFF) + { + *pDestBufPtr++ = (sal_Char) (nCode >> 24); + *pDestBufPtr++ = (sal_Char) (nCode >> 16 + & 0xFF); + } + *pDestBufPtr++ = (sal_Char) (nCode >> 8 & 0xFF); + *pDestBufPtr++ = (sal_Char) (nCode & 0xFF); + } + else + goto no_output; + break; + } + else if (nChar <= pRange->m_nLastUnicode) + { + if (pDestBufEnd - pDestBufPtr >= 4) + { + sal_uInt32 nCode + = pRange->m_nFirstLinear + + (nChar - pRange->m_nFirstUnicode); + *pDestBufPtr++ = (sal_Char) (nCode / 12600 + + 0x81); + *pDestBufPtr++ = (sal_Char) (nCode / 1260 % 10 + + 0x30); + *pDestBufPtr++ = (sal_Char) (nCode / 10 % 126 + + 0x81); + *pDestBufPtr++ = (sal_Char) (nCode % 10 + 0x30); + } + else + goto no_output; + break; + } + nFirstNonRange = (sal_Unicode) ((pRange++)->m_nLastUnicode + + 1); + } + nHighSurrogate = 0; + } + else + { + bUndefined = sal_False; + goto bad_input; + } + else + if (nChar >= RTL_UNICODE_START_LOW_SURROGATES + && nChar <= RTL_UNICODE_END_LOW_SURROGATES) + { + sal_uInt32 nCode = (nHighSurrogate & 0x3FF) << 10 + | nChar & 0x3FF; + if ((nCode & 0xFFFF) < 0xFFFE) + if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = (sal_Char) (nCode / 12600 + 0x90); + *pDestBufPtr++ = (sal_Char) (nCode / 1260 % 10 + + 0x30); + *pDestBufPtr++ = (sal_Char) (nCode / 10 % 126 + 0x81); + *pDestBufPtr++ = (sal_Char) (nCode % 10 + 0x30); + nHighSurrogate = 0; + } + else + goto no_output; + else + { + bUndefined = sal_False; + goto bad_input; + } + } + else + { + bUndefined = sal_False; + goto bad_input; + } + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nHighSurrogate != 0 + && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (pContext) + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate + = nHighSurrogate; + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} diff --git a/sal/textenc/convertgb18030.h b/sal/textenc/convertgb18030.h new file mode 100644 index 000000000000..d265e46f4254 --- /dev/null +++ b/sal/textenc/convertgb18030.h @@ -0,0 +1,131 @@ +/************************************************************************* + * + * $RCSfile: convertgb18030.h,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONVERTGB18030_H +#define INCLUDED_RTL_TEXTENC_CONVERTGB18030_H + +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif + +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +#if defined __cplusplus +extern "C" { +#endif /* __cpluscplus */ + +typedef struct +{ + sal_Int32 m_nNonRangeDataIndex; + sal_uInt32 m_nFirstLinear; + sal_uInt32 m_nPastLinear; + sal_Unicode m_nFirstUnicode; +} ImplGb180302000ToUnicodeRange; + +typedef struct +{ + sal_Int32 m_nNonRangeDataIndex; + sal_Unicode m_nFirstUnicode; + sal_Unicode m_nLastUnicode; + sal_uInt32 m_nFirstLinear; +} ImplUnicodeToGb180302000Range; + +typedef struct +{ + sal_Unicode const * m_pGb18030ToUnicodeData; + ImplGb180302000ToUnicodeRange const * m_pGb18030ToUnicodeRanges; + sal_uInt32 const * m_pUnicodeToGb18030Data; + ImplUnicodeToGb180302000Range const * m_pUnicodeToGb18030Ranges; +} ImplGb18030ConverterData; + +void * ImplCreateGb18030ToUnicodeContext(void) SAL_THROW_EXTERN_C(); + +void ImplResetGb18030ToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); + +sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, + void * pContext, + sal_Char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) + SAL_THROW_EXTERN_C(); + +sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + sal_Char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) + SAL_THROW_EXTERN_C(); + +#if defined __cplusplus +} +#endif /* __cpluscplus */ + +#endif /* INCLUDED_RTL_TEXTENC_CONVERTGB18030_H */ diff --git a/sal/textenc/convertgb18030.tab b/sal/textenc/convertgb18030.tab new file mode 100644 index 000000000000..b3337ad4245b --- /dev/null +++ b/sal/textenc/convertgb18030.tab @@ -0,0 +1,107 @@ +/************************************************************************* + * + * $RCSfile: convertgb18030.tab,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc.. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H +#include "context.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_CONVERTGB18030_H +#include "convertgb18030.h" +#endif + +#ifndef _RTL_TENCINFO_H +#include "rtl/tencinfo.h" +#endif +#ifndef _RTL_TEXTENC_H +#include "rtl/textenc.h" +#endif + +#ifndef INCLUDED_STDDEF_H +#include <stddef.h> +#define INCLUDED_STDDEF_H +#endif + +#include "generate/gb180302000.tab" + +static ImplGb18030ConverterData const aImplGb18030ConvertData + = { aImplGb180302000ToUnicodeData, + aImplGb180302000ToUnicodeRanges, + aImplUnicodeToGb180302000Data, + aImplUnicodeToGb180302000Ranges }; + +static ImplTextEncodingData const aImplGb18030TextEncodingData + = { { &aImplGb18030ConvertData, + &ImplConvertGb18030ToUnicode, + &ImplConvertUnicodeToGb18030, + &ImplCreateGb18030ToUnicodeContext, + &ImplDestroyContext, + &ImplResetGb18030ToUnicodeContext, + &ImplCreateUnicodeToTextContext, + &ImplDestroyContext, + &ImplResetUnicodeToTextContext }, + RTL_TEXTENCODING_GB_18030, + 1, + 4, + 2, + 0, /* TODO! */ + NULL, /* TODO! */ + "UTF-8", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_CHINESE_SIMPLIFIED */ diff --git a/sal/textenc/gettextencodingdata.h b/sal/textenc/gettextencodingdata.h new file mode 100644 index 000000000000..57d5d817195d --- /dev/null +++ b/sal/textenc/gettextencodingdata.h @@ -0,0 +1,87 @@ +/************************************************************************* + * + * $RCSfile: gettextencodingdata.h,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#ifndef INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H +#define INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H + +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif + +#ifndef _RTL_TEXTENC_H +#include "rtl/textenc.h" +#endif +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" +#endif + +#if defined __cplusplus +extern "C" { +#endif /* __cplusplus */ + +ImplTextEncodingData const * +Impl_getTextEncodingData(rtl_TextEncoding nEncoding) SAL_THROW_EXTERN_C(); + +#if defined __cplusplus +} +#endif /* __cplusplus */ + +#endif /* INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H */ diff --git a/sal/textenc/makefile.mk b/sal/textenc/makefile.mk index 1a996483b3e1..c55034166c39 100644 --- a/sal/textenc/makefile.mk +++ b/sal/textenc/makefile.mk @@ -2,9 +2,9 @@ # # $RCSfile: makefile.mk,v $ # -# $Revision: 1.2 $ +# $Revision: 1.3 $ # -# last change: $Author: hr $ $Date: 2001-05-30 12:20:10 $ +# last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ # # The Contents of this file are made available subject to the terms of # either of the following licenses @@ -57,49 +57,54 @@ # Contributor(s): _______________________________________ # # -# #************************************************************************* -PRJ=.. +PRJ = .. +PRJNAME = sal +TARGET = textenc + +.INCLUDE: settings.mk -PRJNAME=sal -TARGET=textenc +SLOFILES = \ + $(SLO)$/context.obj \ + $(SLO)$/converter.obj \ + $(SLO)$/converteuctw.obj \ + $(SLO)$/convertgb18030.obj \ + $(SLO)$/tcvtbyte.obj \ + $(SLO)$/tcvtmb.obj \ + $(SLO)$/tcvtutf7.obj \ + $(SLO)$/tcvtutf8.obj \ + $(SLO)$/tenchelp.obj \ + $(SLO)$/tencinfo.obj \ + $(SLO)$/textcvt.obj \ + $(SLO)$/textenc.obj -# --- Settings ----------------------------------------------------- +.IF "$(UPDATER)" != "" -.INCLUDE : svpre.mk -.INCLUDE : settings.mk -.INCLUDE : sv.mk +OBJFILES = \ + $(OBJ)$/context.obj \ + $(OBJ)$/converter.obj \ + $(OBJ)$/converteuctw.obj \ + $(OBJ)$/convertgb18030.obj \ + $(OBJ)$/tcvtbyte.obj \ + $(OBJ)$/tcvtmb.obj \ + $(OBJ)$/tcvtutf7.obj \ + $(OBJ)$/tcvtutf8.obj \ + $(OBJ)$/tenchelp.obj \ + $(OBJ)$/tencinfo.obj \ + $(OBJ)$/textcvt.obj \ + $(OBJ)$/textenc.obj -# --- Files -------------------------------------------------------- +.ENDIF # UPDATER -SLOFILES= $(SLO)$/textenc.obj \ - $(SLO)$/tencinfo.obj \ - $(SLO)$/tenchelp.obj \ - $(SLO)$/textcvt.obj \ - $(SLO)$/tcvtbyte.obj \ - $(SLO)$/tcvtmb.obj \ - $(SLO)$/tcvtutf7.obj \ - $(SLO)$/tcvtutf8.obj +# Optimization off on Solaris Intel due to internal compiler error; to be +# reevaluated after compiler upgrade: +.IF "$(OS)$(CPU)" == "SOLARISI" -.IF "$(UPDATER)"!="" -OBJFILES= $(OBJ)$/textenc.obj \ - $(OBJ)$/tencinfo.obj \ - $(OBJ)$/tenchelp.obj \ - $(OBJ)$/textcvt.obj \ - $(OBJ)$/tcvtbyte.obj \ - $(OBJ)$/tcvtmb.obj \ - $(OBJ)$/tcvtutf7.obj \ - $(OBJ)$/tcvtutf8.obj -.ENDIF +NOOPTFILES = \ + $(OBJ)$/textenc.obj \ + $(SLO)$/textenc.obj -# Optimization of on Solaris Intel due to internal compiler error -# To be reevaluated after compiler upgrade -.IF "$(OS)$(CPU)"=="SOLARISI" -NOOPTFILES= \ - $(SLO)$/textenc.obj \ - $(OBJ)$/textenc.obj -.ENDIF -# --- Targets ------------------------------------------------------ +.ENDIF # OS, CPU, SOLARISI -.INCLUDE : target.mk +.INCLUDE: target.mk diff --git a/sal/textenc/tcvtarb1.tab b/sal/textenc/tcvtarb1.tab index 2c3a0948f722..a0e55e1fb715 100644 --- a/sal/textenc/tcvtarb1.tab +++ b/sal/textenc/tcvtarb1.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtarb1.tab,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: th $ $Date: 2001-07-16 11:04:34 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -238,46 +238,27 @@ static ImplByteConvertData const aImplIBM862ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM862TextConverterData = -{ - &aImplIBM862ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM862TextConverter = -{ - &aImplIBM862TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM862TextEncodingData = -{ - &aImplIBM862TextConverter, - RTL_TEXTENCODING_IBM_862, - 1, 1, 1, - 177, - 862, - 5, - (const sal_Char*)"iso8859-8", - (const sal_Char*)"DOS-862", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_R2L | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_HEBREW -}; +static ImplTextEncodingData const aImplIBM862TextEncodingData + = { { &aImplIBM862ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_862, + 1, + 1, + 1, + 177, + "iso8859-8", + "DOS-862", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MIME + | RTL_TEXTENCODING_INFO_R2L }; + /* DOS/OS2, SCRIPT_HEBREW, pc code page 862, mac encoding 5 */ /* ======================================================================= */ @@ -416,45 +397,25 @@ static ImplByteConvertData const aImplIBM864ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM864TextConverterData = -{ - &aImplIBM864ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM864TextConverter = -{ - &aImplIBM864TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM864TextEncodingData = -{ - &aImplIBM864TextConverter, - RTL_TEXTENCODING_IBM_864, - 1, 1, 1, - 178, - 864, - 4, - (const sal_Char*)"iso8859-6", - (const sal_Char*)"iso-8859-6", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_R2L | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_ARABIC -}; +static ImplTextEncodingData const aImplIBM864TextEncodingData + = { { &aImplIBM864ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_864, + 1, + 1, + 1, + 178, + "iso8859-6", + "iso-8859-6", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_R2L }; + /* DOS/OS2, SCRIPT_ARABIC, pc code page 864, mac encoding 4 */ /* ======================================================================= */ @@ -588,46 +549,27 @@ static ImplByteConvertData const aImplMS1255ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1255TextConverterData = -{ - &aImplMS1255ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1255TextConverter = -{ - &aImplMS1255TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1255TextEncodingData = -{ - &aImplMS1255TextConverter, - RTL_TEXTENCODING_MS_1255, - 1, 1, 1, - 177, - 862, - 5, - (const sal_Char*)"iso8859-8", - (const sal_Char*)"windows-1255", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_R2L | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_HEBREW -}; +static ImplTextEncodingData const aImplMS1255TextEncodingData + = { { &aImplMS1255ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1255, + 1, + 1, + 1, + 177, + "iso8859-8", + "windows-1255", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MIME + | RTL_TEXTENCODING_INFO_R2L }; + /* WIN, SCRIPT_HEBREW, pc code page 862, mac encoding 5 */ /* ======================================================================= */ @@ -779,46 +721,27 @@ static ImplByteConvertData const aImplMS1256ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1256TextConverterData = -{ - &aImplMS1256ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1256TextConverter = -{ - &aImplMS1256TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1256TextEncodingData = -{ - &aImplMS1256TextConverter, - RTL_TEXTENCODING_MS_1256, - 1, 1, 1, - 178, - 864, - 4, - (const sal_Char*)"iso8859-6", - (const sal_Char*)"windows-1256", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_R2L | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_ARABIC -}; +static ImplTextEncodingData const aImplMS1256TextEncodingData + = { { &aImplMS1256ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1256, + 1, + 1, + 1, + 178, + "iso8859-6", + "windows-1256", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MIME + | RTL_TEXTENCODING_INFO_R2L }; + /* WIN, SCRIPT_ARABIC, pc code page 864, mac encoding 4 */ /* ======================================================================= */ @@ -899,45 +822,27 @@ static ImplByteConvertData const aImplISO88596ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88596TextConverterData = -{ - &aImplISO88596ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88596TextConverter = -{ - &aImplISO88596TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88596TextEncodingData = -{ - &aImplISO88596TextConverter, - RTL_TEXTENCODING_ISO_8859_8, - 1, 1, 1, - 178, - 864, - 4, - (const sal_Char*)"iso8859-6", - (const sal_Char*)"iso-8859-6", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_R2L | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_ARABIC -}; +static ImplTextEncodingData const aImplISO88596TextEncodingData + = { { &aImplISO88596ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_8, + 1, + 1, + 1, + 178, + "iso8859-6", + "iso-8859-6", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_R2L + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_ARABIC, pc code page 864, mac encoding 4 */ /* ======================================================================= */ @@ -1040,42 +945,24 @@ static ImplByteConvertData const aImplISO88598ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88598TextConverterData = -{ - &aImplISO88598ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88598TextConverter = -{ - &aImplISO88598TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88598TextEncodingData = -{ - &aImplISO88598TextConverter, - RTL_TEXTENCODING_ISO_8859_8, - 1, 1, 1, - 177, - 862, - 5, - (const sal_Char*)"iso8859-8", - (const sal_Char*)"iso-8859-8", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_R2L | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_HEBREW -}; +static ImplTextEncodingData const aImplISO88598TextEncodingData + = { { &aImplISO88598ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_8, + 1, + 1, + 1, + 177, + "iso8859-8", + "iso-8859-8", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_R2L + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_HEBREW, pc code page 862, mac encoding 5 */ diff --git a/sal/textenc/tcvtbyte.c b/sal/textenc/tcvtbyte.c index 0e1e2d5e821b..9d607194532b 100644 --- a/sal/textenc/tcvtbyte.c +++ b/sal/textenc/tcvtbyte.c @@ -2,9 +2,9 @@ * * $RCSfile: tcvtbyte.c,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:28 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,17 +59,451 @@ * ************************************************************************/ -#define _RTL_TCVTBYTE_C - -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" #endif + #ifndef _RTL_TEXTCVT_H -#include <rtl/textcvt.h> +#include "rtl/textcvt.h" #endif /* ======================================================================= */ +#define IMPL_MAX_REPLACECHAR 5 + +sal_uInt16 ImplGetReplaceChar(sal_Unicode c); + +sal_uInt16 const * ImplGetReplaceString(sal_Unicode c); + +/* ----------------------------------------------------------------------- */ + +typedef struct +{ + sal_uInt16 mnUniChar; + sal_uInt16 mnReplaceChar; +} ImplReplaceCharData; + +static ImplReplaceCharData const aImplRepCharTab[] = +{ + { 0x00A0, 0x0020 }, /* NO-BREAK-SPACE */ + { 0x00A1, 0x0021 }, /* INVERTED EXCLAMATION MARK */ + { 0x00B7, 0x0045 }, /* MIDDLE DOT */ + { 0x00BF, 0x003F }, /* INVERTED QUESTION MARK */ + { 0x00D7, 0x002A }, /* MULTIPLIKATION SIGN */ + { 0x00F7, 0x002F }, /* DIVISION SIGN */ + { 0x2000, 0x0020 }, /* EN QUAD */ + { 0x2001, 0x0020 }, /* EM QUAD */ + { 0x2002, 0x0020 }, /* EN SPACE */ + { 0x2003, 0x0020 }, /* EM SPACE */ + { 0x2004, 0x0020 }, /* THREE-PER-EM SPACE */ + { 0x2005, 0x0020 }, /* FOUR-PER-EM SPACE */ + { 0x2006, 0x0020 }, /* SIX-PER-EM SPACE */ + { 0x2007, 0x0020 }, /* FIGURE SPACE */ + { 0x2008, 0x0020 }, /* PUNCTATION SPACE */ + { 0x2009, 0x0020 }, /* THIN SPACE */ + { 0x200A, 0x0020 }, /* HAIR SPACE */ + { 0x2010, 0x002D }, /* HYPHEN */ + { 0x2011, 0x002D }, /* NON-BREAKING HYPHEN */ + { 0x2012, 0x002D }, /* FIGURE DASH */ + { 0x2013, 0x002D }, /* EN DASH */ + { 0x2014, 0x002D }, /* EM DASH */ + { 0x2015, 0x002D }, /* HORIZONTAL BAR */ + { 0x2018, 0x0027 }, /* LEFT SINGLE QUOTATION MARK */ + { 0x2019, 0x0027 }, /* RIGHT SINGLE QUOTATION MARK */ + { 0x201A, 0x002C }, /* SINGLE LOW-9 QUOTATION MARK */ + { 0x201B, 0x0027 }, /* SINGLE HIGH-RESERVED-9 QUOTATION MARK */ + { 0x201C, 0x0022 }, /* LEFT DOUBLE QUOTATION MARK */ + { 0x201D, 0x0022 }, /* RIGHT DOUBLE QUOTATION MARK */ + { 0x201E, 0x0022 }, /* DOUBLE LOW-9 QUOTATION MARK */ + { 0x201F, 0x0022 }, /* DOUBLE HIGH-RESERVED-9 QUOTATION MARK */ + { 0x2022, 0x002D }, /* BULLET */ + { 0x2023, 0x002D }, /* TRIANGULAR BULLET */ + { 0x2024, 0x002D }, /* ONE DOT LEADER */ + { 0x2027, 0x002D }, /* HYPHENATION POINT */ + { 0x2028, 0x000A }, /* LINE SEPARATOR */ + { 0x2029, 0x000D }, /* PARAGRAPH SEPARATOR */ + { 0x2032, 0x0027 }, /* PRIME */ + { 0x2033, 0x0022 }, /* DOUBLE PRIME */ + { 0x2035, 0x0027 }, /* RESERVED PRIME */ + { 0x2036, 0x0022 }, /* RESERVED DOUBLE PRIME */ + { 0x2039, 0x003C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ + { 0x203A, 0x003E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ + { 0x2043, 0x002D }, /* HYPHEN BULLET */ + { 0x2044, 0x002F }, /* FRACTION SLASH */ + { 0x2160, 0x0049 }, /* ROMAN NUMERAL ONE */ + { 0x2164, 0x0056 }, /* ROMAN NUMERAL FIVE */ + { 0x2169, 0x0058 }, /* ROMAN NUMERAL TEN */ + { 0x216C, 0x004C }, /* ROMAN NUMERAL FIFTY */ + { 0x216D, 0x0043 }, /* ROMAN NUMERAL ONE HUNDRED */ + { 0x216E, 0x0044 }, /* ROMAN NUMERAL FIVE HUNDRED */ + { 0x216F, 0x004D }, /* ROMAN NUMERAL ONE THOUSAND */ + { 0x2170, 0x0069 }, /* SMALL ROMAN NUMERAL ONE */ + { 0x2174, 0x0076 }, /* SMALL ROMAN NUMERAL FIVE */ + { 0x2179, 0x0078 }, /* SMALL ROMAN NUMERAL TEN */ + { 0x217C, 0x006C }, /* SMALL ROMAN NUMERAL FIFTY */ + { 0x217D, 0x0063 }, /* SMALL ROMAN NUMERAL ONE HUNDRED */ + { 0x217E, 0x0064 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED */ + { 0x217F, 0x006D }, /* SMALL ROMAN NUMERAL ONE THOUSAND */ + { 0x2215, 0x002F }, /* DIVISION SLASH */ + { 0x2217, 0x002A }, /* ASTERIX OPERATOR */ + { 0xFF00, 0x0020 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF01, 0x0021 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF02, 0x0022 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF03, 0x0023 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF04, 0x0024 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF05, 0x0025 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF06, 0x0026 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF07, 0x0027 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF08, 0x0028 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF09, 0x0029 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF0A, 0x002A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF0B, 0x002B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF0C, 0x002C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF0D, 0x002D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF0E, 0x002E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF0F, 0x002F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF10, 0x0030 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF11, 0x0031 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF12, 0x0032 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF13, 0x0033 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF14, 0x0034 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF15, 0x0035 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF16, 0x0036 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF17, 0x0037 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF18, 0x0038 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF19, 0x0039 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF1A, 0x003A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF1B, 0x003B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF1C, 0x003C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF1D, 0x003D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF1E, 0x003E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF1F, 0x003F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF20, 0x0040 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF21, 0x0041 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF22, 0x0042 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF23, 0x0043 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF24, 0x0044 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF25, 0x0045 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF26, 0x0046 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF27, 0x0047 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF28, 0x0048 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF29, 0x0049 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF2A, 0x004A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF2B, 0x004B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF2C, 0x004C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF2D, 0x004D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF2E, 0x004E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF2F, 0x004F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF30, 0x0050 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF31, 0x0051 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF32, 0x0052 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF33, 0x0053 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF34, 0x0054 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF35, 0x0055 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF36, 0x0056 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF37, 0x0057 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF38, 0x0058 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF39, 0x0059 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF3A, 0x005A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF3B, 0x005B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF3C, 0x005C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF3D, 0x005D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF3E, 0x005E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF3F, 0x005F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF40, 0x0060 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF41, 0x0061 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF42, 0x0062 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF43, 0x0063 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF44, 0x0064 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF45, 0x0065 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF46, 0x0066 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF47, 0x0067 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF48, 0x0068 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF49, 0x0069 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF4A, 0x006A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF4B, 0x006B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF4C, 0x006C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF4D, 0x006D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF4E, 0x006E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF4F, 0x006F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF50, 0x0070 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF51, 0x0071 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF52, 0x0072 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF53, 0x0073 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF54, 0x0074 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF55, 0x0075 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF56, 0x0076 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF57, 0x0077 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF58, 0x0078 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF59, 0x0079 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF5A, 0x007A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF5B, 0x007B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF5C, 0x007C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF5D, 0x007D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF5E, 0x007E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF5F, 0x007F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF61, 0x3002 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF62, 0x300C }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF63, 0x300D }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF64, 0x3001 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF65, 0x30FB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF66, 0x30F2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF67, 0x30A1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF68, 0x30A3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF69, 0x30A5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6A, 0x30A7 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6B, 0x30A9 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6C, 0x30E3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6D, 0x30E5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6E, 0x30E7 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6F, 0x30C3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF70, 0x30FC }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF71, 0x30A2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF72, 0x30A4 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF73, 0x30A6 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF74, 0x30A8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF75, 0x30AA }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF76, 0x30AB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF77, 0x30AD }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF78, 0x30AF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF79, 0x30B1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7A, 0x30B3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7B, 0x30B5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7C, 0x30B7 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7D, 0x30B9 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7E, 0x30BB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7F, 0x30BD }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF80, 0x30BF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF81, 0x30C1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF82, 0x30C4 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF83, 0x30C6 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF84, 0x30C8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF85, 0x30CA }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF86, 0x30CB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF87, 0x30CC }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF88, 0x30CD }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF89, 0x30CE }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8A, 0x30CF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8B, 0x30D2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8C, 0x30D5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8D, 0x30D8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8E, 0x30DB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8F, 0x30DE }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF90, 0x30DF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF91, 0x30E0 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF92, 0x30E1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF93, 0x30E2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF94, 0x30E4 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF95, 0x30E6 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF96, 0x30E8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF97, 0x30E9 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF98, 0x30EA }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF99, 0x30EB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9A, 0x30EC }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9B, 0x30ED }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9C, 0x30EF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9D, 0x30F3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9E, 0x309B }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9F, 0x309C }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFFA0, 0x3164 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA1, 0x3131 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA2, 0x3132 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA3, 0x3133 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA4, 0x3134 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA5, 0x3135 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA6, 0x3136 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA7, 0x3137 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA8, 0x3138 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA9, 0x3139 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAA, 0x313A }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAB, 0x313B }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAC, 0x313C }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAD, 0x313D }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAE, 0x313E }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAF, 0x313F }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB0, 0x3140 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB1, 0x3141 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB2, 0x3142 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB3, 0x3143 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB4, 0x3144 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB5, 0x3145 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB6, 0x3146 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB7, 0x3147 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB8, 0x3148 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB9, 0x3149 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBA, 0x314A }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBB, 0x314B }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBC, 0x314C }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBD, 0x314D }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBE, 0x314E }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC2, 0x314F }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC3, 0x3150 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC4, 0x3151 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC5, 0x3152 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC6, 0x3153 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC7, 0x3154 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCA, 0x3155 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCB, 0x3156 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCC, 0x3157 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCD, 0x3158 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCE, 0x3159 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCF, 0x315A }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD2, 0x315B }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD3, 0x315C }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD4, 0x315D }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD5, 0x315E }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD6, 0x315F }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD7, 0x3160 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFDA, 0x3161 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFDB, 0x3162 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFDC, 0x3163 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFE0, 0x00A2 }, /* FULLWIDTH CENT SIGN */ + { 0xFFE1, 0x00A3 }, /* FULLWIDTH POUND SIGN */ + { 0xFFE2, 0x00AC }, /* FULLWIDTH NOT SIGN */ + { 0xFFE3, 0x00AF }, /* FULLWIDTH MACRON */ + { 0xFFE4, 0x00A6 }, /* FULLWIDTH BROKEN BAR */ + { 0xFFE5, 0x00A5 }, /* FULLWIDTH YEN SIGN */ + { 0xFFE6, 0x20A9 }, /* FULLWIDTH WON SIGN */ + { 0xFFE8, 0x2502 }, /* HALFWIDTH FORMS LIGHT VERTICAL */ + { 0xFFE9, 0x2190 }, /* HALFWIDTH LEFTWARDS ARROW */ + { 0xFFEA, 0x2191 }, /* HALFWIDTH UPWARDS ARROW */ + { 0xFFEB, 0x2192 }, /* HALFWIDTH RIGHTWARDS ARROW */ + { 0xFFEC, 0x2193 }, /* HALFWIDTH DOWNWARDS ARROW */ + { 0xFFED, 0x25A0 }, /* HALFWIDTH BLACK SQUARE */ + { 0xFFEE, 0x25CB }, /* HALFWIDTH WHITE CIRCLE */ + { 0xFFFD, 0x003F } /* REPLACEMENT CHARACTER */ +}; + +sal_uInt16 ImplGetReplaceChar( sal_Unicode c ) +{ + sal_uInt16 nLow; + sal_uInt16 nHigh; + sal_uInt16 nMid; + sal_uInt16 nCompareChar; + const ImplReplaceCharData* pCharData; + + nLow = 0; + nHigh = (sizeof( aImplRepCharTab )/sizeof( ImplReplaceCharData ))-1; + do + { + nMid = (nLow+nHigh)/2; + pCharData = aImplRepCharTab+nMid; + nCompareChar = pCharData->mnUniChar; + if ( c < nCompareChar ) + { + if ( !nMid ) + break; + nHigh = nMid-1; + } + else + { + if ( c > nCompareChar ) + nLow = nMid+1; + else + return pCharData->mnReplaceChar; + } + } + while ( nLow <= nHigh ); + + return 0; +} + +/* ----------------------------------------------------------------------- */ + +typedef struct +{ + sal_uInt16 mnUniChar; + sal_uInt16 maReplaceChars[IMPL_MAX_REPLACECHAR]; +} ImplReplaceCharStrData; + +static ImplReplaceCharStrData const aImplRepCharStrTab[] = +{ + { 0x00A9, { 0x0028, 0x0063, 0x0029, 0x0000, 0x0000 } }, /* COPYRIGHT SIGN */ + { 0x00AB, { 0x003C, 0x003C, 0x0000, 0x0000, 0x0000 } }, /* LEFT-POINTING-DOUBLE ANGLE QUOTATION MARK */ + { 0x0AE0, { 0x0028, 0x0072, 0x0029, 0x0000, 0x0000 } }, /* REGISTERED SIGN */ + { 0x00BB, { 0x003E, 0x003E, 0x0000, 0x0000, 0x0000 } }, /* RIGHT-POINTING-DOUBLE ANGLE QUOTATION MARK */ + { 0x00BC, { 0x0031, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE QUARTER */ + { 0x00BD, { 0x0031, 0x002F, 0x0032, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE HALF */ + { 0x00BE, { 0x0033, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE QUARTERS */ + { 0x00C6, { 0x0041, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LETTER AE */ + { 0x00E6, { 0x0061, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LETTER AE */ + { 0x0152, { 0x004F, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LIGATURE OE */ + { 0x0153, { 0x006F, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LIGATURE OE */ + { 0x2025, { 0x002E, 0x002E, 0x0000, 0x0000, 0x0000 } }, /* TWO DOT LEADER */ + { 0x2026, { 0x002E, 0x002E, 0x002E, 0x0000, 0x0000 } }, /* HORIZONTAL ELLIPSES */ + { 0x2034, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* TRIPPLE PRIME */ + { 0x2037, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* RESERVED TRIPPLE PRIME */ + { 0x20AC, { 0x0045, 0x0055, 0x0052, 0x0000, 0x0000 } }, /* EURO SIGN */ + { 0x2122, { 0x0028, 0x0074, 0x006D, 0x0029, 0x0000 } }, /* TRADE MARK SIGN */ + { 0x2153, { 0x0031, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE THIRD */ + { 0x2154, { 0x0032, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO THIRD */ + { 0x2155, { 0x0031, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE FIFTH */ + { 0x2156, { 0x0032, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO FIFTH */ + { 0x2157, { 0x0033, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE FIFTH */ + { 0x2158, { 0x0034, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION FOUR FIFTH */ + { 0x2159, { 0x0031, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE SIXTH */ + { 0x215A, { 0x0035, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE SIXTH */ + { 0x215B, { 0x0031, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE EIGHTH */ + { 0x215C, { 0x0033, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE EIGHTH */ + { 0x215D, { 0x0035, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE EIGHTH */ + { 0x215E, { 0x0037, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION SEVEN EIGHTH */ + { 0x215F, { 0x0031, 0x002F, 0x0000, 0x0000, 0x0000 } }, /* FRACTION NUMERATOR ONE */ + { 0x2161, { 0x0049, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWO */ + { 0x2162, { 0x0049, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL THREE */ + { 0x2163, { 0x0049, 0x0056, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL FOUR */ + { 0x2165, { 0x0056, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SIX */ + { 0x2166, { 0x0056, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SEVEN */ + { 0x2168, { 0x0056, 0x0049, 0x0049, 0x0049, 0x0000 } }, /* ROMAN NUMERAL EIGHT */ + { 0x2169, { 0x0049, 0x0058, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL NINE */ + { 0x216A, { 0x0058, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL ELEVEN */ + { 0x216B, { 0x0058, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWELVE */ + { 0x2171, { 0x0069, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL TWO */ + { 0x2172, { 0x0069, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL THREE */ + { 0x2173, { 0x0069, 0x0076, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL FOUR */ + { 0x2175, { 0x0076, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SIX */ + { 0x2176, { 0x0076, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SEVEN */ + { 0x2178, { 0x0076, 0x0069, 0x0069, 0x0069, 0x0000 } }, /* SMALL ROMAN NUMERAL EIGHT */ + { 0x2179, { 0x0069, 0x0078, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL NINE */ + { 0x217A, { 0x0078, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL ELEVEN */ + { 0x217B, { 0x0058, 0x0069, 0x0069, 0x0000, 0x0000 } } /* SMALL ROMAN NUMERAL TWELVE */ +}; + +const sal_uInt16* ImplGetReplaceString( sal_Unicode c ) +{ + sal_uInt16 nLow; + sal_uInt16 nHigh; + sal_uInt16 nMid; + sal_uInt16 nCompareChar; + const ImplReplaceCharStrData* pCharData; + + nLow = 0; + nHigh = (sizeof( aImplRepCharStrTab )/sizeof( ImplReplaceCharStrData ))-1; + do + { + nMid = (nLow+nHigh)/2; + pCharData = aImplRepCharStrTab+nMid; + nCompareChar = pCharData->mnUniChar; + if ( c < nCompareChar ) + { + if ( !nMid ) + break; + nHigh = nMid-1; + } + else + { + if ( c > nCompareChar ) + nLow = nMid+1; + else + return pCharData->maReplaceChars; + } + } + while ( nLow <= nHigh ); + + return 0; +} + +/* ======================================================================= */ + sal_Size ImplSymbolToUnicode( const ImplTextConverterData* pData, void* pContext, const sal_Char* pSrcBuf, sal_Size nSrcBytes, @@ -115,7 +549,6 @@ sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ) { - int nAction; sal_Unicode c; sal_Char* pEndDestBuf; const sal_Unicode* pEndSrcBuf; @@ -157,11 +590,13 @@ sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, /* Handle undefined and surrogates characters */ /* (all surrogates characters are undefined) */ - nAction = ImplHandleUndefinedUnicodeToTextChar( pData, - &pSrcBuf, pEndSrcBuf, - &pDestBuf, pEndDestBuf, - nFlags, pInfo ); - if ( nAction == IMPL_TEXTCVT_BREAK ) + if (!ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) break; } } @@ -181,7 +616,7 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, { sal_uChar c; sal_Unicode cConv; - const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)(pData->mpConvertTables); + const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; sal_Unicode* pEndDestBuf; const sal_Char* pEndSrcBuf; @@ -215,7 +650,7 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, continue; } else - cConv = ImplGetUndefinedUnicodeChar( c, nFlags, pData ); + cConv = ImplGetUndefinedUnicodeChar(c, nFlags); } } @@ -304,10 +739,9 @@ sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ) { - int nAction; sal_Unicode c; sal_Char cConv; - const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)(pData->mpConvertTables); + const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; sal_Char* pEndDestBuf; const sal_Unicode* pEndSrcBuf; int i; @@ -378,14 +812,16 @@ sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, /* Handle undefined and surrogates characters */ /* (all surrogates characters are undefined) */ - nAction = ImplHandleUndefinedUnicodeToTextChar( pData, - &pSrcBuf, pEndSrcBuf, - &pDestBuf, pEndDestBuf, - nFlags, pInfo ); - if ( nAction == IMPL_TEXTCVT_BREAK ) - break; - else + if (ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) continue; + else + break; } } } @@ -404,4 +840,3 @@ sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); return (nDestBytes - (pEndDestBuf-pDestBuf)); } - diff --git a/sal/textenc/tcvteas1.tab b/sal/textenc/tcvteas1.tab index b0a29e00ae3e..b939c0fa0cb3 100644 --- a/sal/textenc/tcvteas1.tab +++ b/sal/textenc/tcvteas1.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvteas1.tab,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: th $ $Date: 2001-07-16 11:19:26 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -155,46 +155,25 @@ static ImplByteConvertData const aImplMS874ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS874TextConverterData = -{ - &aImplMS874ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS874TextConverter = -{ - &aImplMS874TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS874TextEncodingData = -{ - &aImplMS874TextConverter, - RTL_TEXTENCODING_MS_874, - 1, 1, 1, - 222, - 874, - 21, - (const sal_Char*)"iso8859-1", /* ??? */ - (const sal_Char*)"windows-874", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_THAI -}; +static ImplTextEncodingData const aImplMS874TextEncodingData + = { { &aImplMS874ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_874, + 1, + 1, + 1, + 222, + "iso8859-1", /* TODO! correct? */ + "windows-874", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN/DOS/OS2, SCRIPT_THAI, pc code page 874, mac encoding 21 */ /* ======================================================================= */ @@ -314,42 +293,22 @@ static ImplByteConvertData const aImplMS1258ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1258TextConverterData = -{ - &aImplMS1258ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1258TextConverter = -{ - &aImplMS1258TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1258TextEncodingData = -{ - &aImplMS1258TextConverter, - RTL_TEXTENCODING_MS_1258, - 1, 1, 1, - 163, - 0, - 30, - (const sal_Char*)"iso8859-1", /* ??? */ - (const sal_Char*)"windows-1258", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_VIETNAMESE -}; +static ImplTextEncodingData const aImplMS1258TextEncodingData + = { { &aImplMS1258ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1258, + 1, + 1, + 1, + 163, + "iso8859-1", /* TODO! correct? */ + "windows-1258", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_VIETNAMESE, mac encoding 30 */ diff --git a/sal/textenc/tcvtest1.tab b/sal/textenc/tcvtest1.tab index 51c281238b49..17b618a313cc 100644 --- a/sal/textenc/tcvtest1.tab +++ b/sal/textenc/tcvtest1.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtest1.tab,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: th $ $Date: 2001-07-16 11:20:11 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -204,44 +204,25 @@ static ImplByteConvertData const aImplIBM737ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM737TextConverterData = -{ - &aImplIBM737ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM737TextConverter = -{ - &aImplIBM737TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM737TextEncodingData = -{ - &aImplIBM737TextConverter, - RTL_TEXTENCODING_IBM_737, - 1, 1, 1, - 161, - 737, - 6, - (const sal_Char*)"iso8859-7", - (const sal_Char*)"iso-8859-7", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_GREEK -}; +static ImplTextEncodingData const aImplIBM737TextEncodingData + = { { &aImplIBM737ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_737, + 1, + 1, + 1, + 161, + "iso8859-7", + "iso-8859-7", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_GREEK, pc code page 737, mac encoding 6 */ /* ======================================================================= */ @@ -430,44 +411,25 @@ static ImplByteConvertData const aImplIBM775ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM775TextConverterData = -{ - &aImplIBM775ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM775TextConverter = -{ - &aImplIBM775TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM775TextEncodingData = -{ - &aImplIBM775TextConverter, - RTL_TEXTENCODING_IBM_775, - 1, 1, 1, - 186, - 775, - 29, /* ??? */ - (const sal_Char*)"iso8859-4", - (const sal_Char*)"iso-8859-4", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_BALTIC -}; +static ImplTextEncodingData const aImplIBM775TextEncodingData + = { { &aImplIBM775ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_775, + 1, + 1, + 1, + 186, + "iso8859-4", + "iso-8859-4", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_BALTIC, pc code page 775, mac encoding 29 (?) */ /* ======================================================================= */ @@ -656,45 +618,25 @@ static ImplByteConvertData const aImplIBM852ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM852TextConverterData = -{ - &aImplIBM852ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM852TextConverter = -{ - &aImplIBM852TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM852TextEncodingData = -{ - &aImplIBM852TextConverter, - RTL_TEXTENCODING_IBM_852, - 1, 1, 1, - 238, - 852, - 29, - (const sal_Char*)"iso8859-2", - (const sal_Char*)"ibm852", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplIBM852TextEncodingData + = { { &aImplIBM852ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_852, + 1, + 1, + 1, + 238, + "iso8859-2", + "ibm852", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* DOS/OS2, SCRIPT_EASTEUROPE, pc code page 852, mac encoding 29 */ /* ======================================================================= */ @@ -813,44 +755,25 @@ static ImplByteConvertData const aImplIBM855ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM855TextConverterData = -{ - &aImplIBM855ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM855TextConverter = -{ - &aImplIBM855TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM855TextEncodingData = -{ - &aImplIBM855TextConverter, - RTL_TEXTENCODING_IBM_855, - 1, 1, 1, - 204, - 855, - 7, - (const sal_Char*)"iso8859-5", - (const sal_Char*)"iso-8859-5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_CYRILLIC -}; +static ImplTextEncodingData const aImplIBM855TextEncodingData + = { { &aImplIBM855ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_855, + 1, + 1, + 1, + 204, + "iso8859-5", + "iso-8859-5", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_CYRILLIC, pc code page 855, mac encoding 7 */ /* ======================================================================= */ @@ -968,44 +891,25 @@ static ImplByteConvertData const aImplIBM857ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM857TextConverterData = -{ - &aImplIBM857ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM857TextConverter = -{ - &aImplIBM857TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM857TextEncodingData = -{ - &aImplIBM857TextConverter, - RTL_TEXTENCODING_IBM_857, - 1, 1, 1, - 162, - 857, - 35, - (const sal_Char*)"iso8859-9", - (const sal_Char*)"iso-8859-9", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_TURKISH -}; +static ImplTextEncodingData const aImplIBM857TextEncodingData + = { { &aImplIBM857ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_857, + 1, + 1, + 1, + 162, + "iso8859-9", + "iso-8859-9", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_TURKISH, pc code page 857, mac encoding 35 */ /* ======================================================================= */ @@ -1144,45 +1048,25 @@ static ImplByteConvertData const aImplIBM866ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM866TextConverterData = -{ - &aImplIBM866ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM866TextConverter = -{ - &aImplIBM866TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM866TextEncodingData = -{ - &aImplIBM866TextConverter, - RTL_TEXTENCODING_IBM_866, - 1, 1, 1, - 204, - 866, - 7, - (const sal_Char*)"iso8859-5", - (const sal_Char*)"cp866", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM866TextEncodingData + = { { &aImplIBM866ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_866, + 1, + 1, + 1, + 204, + "iso8859-5", + "cp866", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 866, mac encoding 7 */ /* ======================================================================= */ @@ -1311,44 +1195,25 @@ static ImplByteConvertData const aImplIBM869ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM869TextConverterData = -{ - &aImplIBM869ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM869TextConverter = -{ - &aImplIBM869TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM869TextEncodingData = -{ - &aImplIBM869TextConverter, - RTL_TEXTENCODING_IBM_869, - 1, 1, 1, - 161, - 869, - 6, - (const sal_Char*)"iso8859-7", - (const sal_Char*)"iso-8859-7", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM869TextEncodingData + = { { &aImplIBM869ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_869, + 1, + 1, + 1, + 161, + "iso8859-7", + "iso-8859-7", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 869, mac encoding 6 */ /* ======================================================================= */ @@ -1532,45 +1397,25 @@ static ImplByteConvertData const aImplMS1250ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1250TextConverterData = -{ - &aImplMS1250ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1250TextConverter = -{ - &aImplMS1250TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1250TextEncodingData = -{ - &aImplMS1250TextConverter, - RTL_TEXTENCODING_MS_1250, - 1, 1, 1, - 238, - 852, - 29, - (const sal_Char*)"iso8859-2", - (const sal_Char*)"windows-1250", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplMS1250TextEncodingData + = { { &aImplMS1250ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1250, + 1, + 1, + 1, + 238, + "iso8859-2", + "windows-1250", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_EASTEUROPE, pc code page 852, mac encoding 29 */ /* ======================================================================= */ @@ -1688,45 +1533,25 @@ static ImplByteConvertData const aImplMS1251ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1251TextConverterData = -{ - &aImplMS1251ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1251TextConverter = -{ - &aImplMS1251TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1251TextEncodingData = -{ - &aImplMS1251TextConverter, - RTL_TEXTENCODING_MS_1251, - 1, 1, 1, - 204, - 866, /* ??? 855 */ - 7, - (const sal_Char*)"iso8859-5", - (const sal_Char*)"windows-1251", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_CYRILLIC -}; +static ImplTextEncodingData const aImplMS1251TextEncodingData + = { { &aImplMS1251ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1251, + 1, + 1, + 1, + 204, + "iso8859-5", + "windows-1251", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_CYRILLIC, pc code page 866 (855?), mac encoding 7 */ /* ======================================================================= */ @@ -1847,45 +1672,25 @@ static ImplByteConvertData const aImplMS1253ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1253TextConverterData = -{ - &aImplMS1253ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1253TextConverter = -{ - &aImplMS1253TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1253TextEncodingData = -{ - &aImplMS1253TextConverter, - RTL_TEXTENCODING_MS_1253, - 1, 1, 1, - 161, - 869, - 6, - (const sal_Char*)"iso8859-7", - (const sal_Char*)"windows-1253", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_GREEK -}; +static ImplTextEncodingData const aImplMS1253TextEncodingData + = { { &aImplMS1253ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1253, + 1, + 1, + 1, + 161, + "iso8859-7", + "windows-1253", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_GREEK, pc code page 869, mac encoding 6 */ /* ======================================================================= */ @@ -1999,45 +1804,25 @@ static ImplByteConvertData const aImplMS1254ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1254TextConverterData = -{ - &aImplMS1254ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1254TextConverter = -{ - &aImplMS1254TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1254TextEncodingData = -{ - &aImplMS1254TextConverter, - RTL_TEXTENCODING_MS_1254, - 1, 1, 1, - 162, - 857, - 35, - (const sal_Char*)"iso8859-9", - (const sal_Char*)"windows-1254", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_TURKISH -}; +static ImplTextEncodingData const aImplMS1254TextEncodingData + = { { &aImplMS1254ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1254, + 1, + 1, + 1, + 162, + "iso8859-9", + "windows-1254", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_TURKISH, pc code page 857, mac encoding 35 */ /* ======================================================================= */ @@ -2156,45 +1941,25 @@ static ImplByteConvertData const aImplMS1257ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1257TextConverterData = -{ - &aImplMS1257ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1257TextConverter = -{ - &aImplMS1257TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1257TextEncodingData = -{ - &aImplMS1257TextConverter, - RTL_TEXTENCODING_MS_1257, - 1, 1, 1, - 186, - 775, - 29, - (const sal_Char*)"iso8859-4", - (const sal_Char*)"windows-1257", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_BALTIC -}; +static ImplTextEncodingData const aImplMS1257TextEncodingData + = { { &aImplMS1257ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1257, + 1, + 1, + 1, + 186, + "iso8859-4", + "windows-1257", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_BALTIC, pc code page 775, mac encoding 29 */ /* ======================================================================= */ @@ -2345,44 +2110,25 @@ static ImplByteConvertData const aImplISO88592ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88592TextConverterData = -{ - &aImplISO88592ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88592TextConverter = -{ - &aImplISO88592TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88592TextEncodingData = -{ - &aImplISO88592TextConverter, - RTL_TEXTENCODING_ISO_8859_2, - 1, 1, 1, - 238, - 852, - 29, - (const sal_Char*)"iso8859-2", - (const sal_Char*)"iso-8859-2", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplISO88592TextEncodingData + = { { &aImplISO88592ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_2, + 1, + 1, + 1, + 238, + "iso8859-2", + "iso-8859-2", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_EASTEUROPE, pc code page 852, mac encoding 29 */ /* ======================================================================= */ @@ -2526,44 +2272,25 @@ static ImplByteConvertData const aImplISO88593ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88593TextConverterData = -{ - &aImplISO88593ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88593TextConverter = -{ - &aImplISO88593TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88593TextEncodingData = -{ - &aImplISO88593TextConverter, - RTL_TEXTENCODING_ISO_8859_3, - 1, 1, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-3", - (const sal_Char*)"iso-8859-3", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplISO88593TextEncodingData + = { { &aImplISO88593ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_3, + 1, + 1, + 1, + 0, + "iso8859-3", + "iso-8859-3", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -2714,44 +2441,25 @@ static ImplByteConvertData const aImplISO88594ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88594TextConverterData = -{ - &aImplISO88594ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88594TextConverter = -{ - &aImplISO88594TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88594TextEncodingData = -{ - &aImplISO88594TextConverter, - RTL_TEXTENCODING_ISO_8859_4, - 1, 1, 1, - 186, - 775, - 29, /* ??? */ - (const sal_Char*)"iso8859-4", - (const sal_Char*)"iso-8859-4", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_BALTIC -}; +static ImplTextEncodingData const aImplISO88594TextEncodingData + = { { &aImplISO88594ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_4, + 1, + 1, + 1, + 186, + "iso8859-4", + "iso-8859-4", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_BALTIC, pc code page 775, mac encoding 29 (?) */ /* ======================================================================= */ @@ -2832,44 +2540,25 @@ static ImplByteConvertData const aImplISO88595ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88595TextConverterData = -{ - &aImplISO88595ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88595TextConverter = -{ - &aImplISO88595TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88595TextEncodingData = -{ - &aImplISO88595TextConverter, - RTL_TEXTENCODING_ISO_8859_5, - 1, 1, 1, - 204, - 866, /* ??? 855 */ - 7, - (const sal_Char*)"iso8859-5", - (const sal_Char*)"iso-8859-5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_CYRILLIC -}; +static ImplTextEncodingData const aImplISO88595TextEncodingData + = { { &aImplISO88595ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_5, + 1, + 1, + 1, + 204, + "iso8859-5", + "iso-8859-5", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_CYRILLIC, pc code page 866 (855?), mac encoding 7 */ /* ======================================================================= */ @@ -2963,44 +2652,25 @@ static ImplByteConvertData const aImplISO88597ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88597TextConverterData = -{ - &aImplISO88597ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88597TextConverter = -{ - &aImplISO88597TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88597TextEncodingData = -{ - &aImplISO88597TextConverter, - RTL_TEXTENCODING_ISO_8859_7, - 1, 1, 1, - 161, - 869, - 6, - (const sal_Char*)"iso8859-7", - (const sal_Char*)"iso-8859-7", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_GREEK -}; +static ImplTextEncodingData const aImplISO88597TextEncodingData + = { { &aImplISO88597ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_7, + 1, + 1, + 1, + 161, + "iso8859-7", + "iso-8859-7", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_GREEK, pc code page 869, mac encoding 6 */ /* ======================================================================= */ @@ -3083,44 +2753,25 @@ static ImplByteConvertData const aImplISO88599ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88599TextConverterData = -{ - &aImplISO88599ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88599TextConverter = -{ - &aImplISO88599TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88599TextEncodingData = -{ - &aImplISO88599TextConverter, - RTL_TEXTENCODING_ISO_8859_9, - 1, 1, 1, - 162, - 857, - 35, - (const sal_Char*)"iso8859-9", - (const sal_Char*)"iso-8859-9", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_TURKISH -}; +static ImplTextEncodingData const aImplISO88599TextEncodingData + = { { &aImplISO88599ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_9, + 1, + 1, + 1, + 162, + "iso8859-9", + "iso-8859-9", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_TURKISH, pc code page 857, mac encoding 35 */ /* ======================================================================= */ @@ -3244,44 +2895,25 @@ static ImplByteConvertData const aImplISO885910ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO885910TextConverterData = -{ - &aImplISO885910ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO885910TextConverter = -{ - &aImplISO885910TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO885910TextEncodingData = -{ - &aImplISO885910TextConverter, - RTL_TEXTENCODING_ISO_8859_10, - 1, 1, 1, - 238, - 852, - 29, - (const sal_Char*)"iso8859-10", - (const sal_Char*)"iso-8859-10", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplISO885910TextEncodingData + = { { &aImplISO885910ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_10, + 1, + 1, + 1, + 238, + "iso8859-10", + "iso-8859-10", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_EASTEUROPE, pc code page 852, mac encoding 29 */ /* ======================================================================= */ @@ -3432,44 +3064,25 @@ static ImplByteConvertData const aImplISO885913ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO885913TextConverterData = -{ - &aImplISO885913ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO885913TextConverter = -{ - &aImplISO885913TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO885913TextEncodingData = -{ - &aImplISO885913TextConverter, - RTL_TEXTENCODING_ISO_8859_13, - 1, 1, 1, - 238, - 852, - 29, - (const sal_Char*)"iso8859-13", - (const sal_Char*)"iso-8859-13", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplISO885913TextEncodingData + = { { &aImplISO885913ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_13, + 1, + 1, + 1, + 238, + "iso8859-13", + "iso-8859-13", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_EASTEUROPE, pc code page 852, mac encoding 29 */ /* ======================================================================= */ @@ -3657,44 +3270,25 @@ static ImplByteConvertData const aImplAPPLECENTEUROByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLECENTEUROTextConverterData = -{ - &aImplAPPLECENTEUROByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLECENTEUROTextConverter = -{ - &aImplAPPLECENTEUROTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLECENTEUROTextEncodingData = -{ - &aImplAPPLECENTEUROTextConverter, - RTL_TEXTENCODING_APPLE_CENTEURO, - 1, 1, 1, - 238, - 852, - 29, - (const sal_Char*)"iso8859-2", - (const sal_Char*)"iso-8859-2", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplAPPLECENTEUROTextEncodingData + = { { &aImplAPPLECENTEUROByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_CENTEURO, + 1, + 1, + 1, + 238, + "iso8859-2", + "iso-8859-2", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_EASTEUROPE, pc code page 852, mac encoding 29 */ /* ======================================================================= */ @@ -3825,44 +3419,25 @@ static ImplByteConvertData const aImplAPPLECROATIANByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLECROATIANTextConverterData = -{ - &aImplAPPLECROATIANByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLECROATIANTextConverter = -{ - &aImplAPPLECROATIANTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLECROATIANTextEncodingData = -{ - &aImplAPPLECROATIANTextConverter, - RTL_TEXTENCODING_APPLE_CROATIAN, - 1, 1, 1, - 238, - 852, - 36, - (const sal_Char*)"iso8859-2", - (const sal_Char*)"iso-8859-2", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplAPPLECROATIANTextEncodingData + = { { &aImplAPPLECROATIANByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_CROATIAN, + 1, + 1, + 1, + 238, + "iso8859-2", + "iso-8859-2", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_EASTEUROPE, pc code page 852, mac encoding 36 */ /* ======================================================================= */ @@ -3980,44 +3555,25 @@ static ImplByteConvertData const aImplAPPLECYRILLICByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLECYRILLICTextConverterData = -{ - &aImplAPPLECYRILLICByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLECYRILLICTextConverter = -{ - &aImplAPPLECYRILLICTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLECYRILLICTextEncodingData = -{ - &aImplAPPLECYRILLICTextConverter, - RTL_TEXTENCODING_APPLE_CYRILLIC, - 1, 1, 1, - 204, - 866, /* ??? 855 */ - 7, - (const sal_Char*)"iso8859-5", - (const sal_Char*)"iso-8859-5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_CYRILLIC -}; +static ImplTextEncodingData const aImplAPPLECYRILLICTextEncodingData + = { { &aImplAPPLECYRILLICByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_CYRILLIC, + 1, + 1, + 1, + 204, + "iso8859-5", + "iso-8859-5", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_CYRILLIC, pc code page 866 (855?), mac encoding 7 */ /* ======================================================================= */ @@ -4154,44 +3710,25 @@ static ImplByteConvertData const aImplAPPLEGREEKByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLEGREEKTextConverterData = -{ - &aImplAPPLEGREEKByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLEGREEKTextConverter = -{ - &aImplAPPLEGREEKTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLEGREEKTextEncodingData = -{ - &aImplAPPLEGREEKTextConverter, - RTL_TEXTENCODING_APPLE_GREEK, - 1, 1, 1, - 161, - 869, - 6, - (const sal_Char*)"iso8859-7", - (const sal_Char*)"iso-8859-7", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_GREEK -}; +static ImplTextEncodingData const aImplAPPLEGREEKTextEncodingData + = { { &aImplAPPLEGREEKByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_GREEK, + 1, + 1, + 1, + 161, + "iso8859-7", + "iso-8859-7", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_GREEK, pc code page 869, mac encoding 6 */ /* ======================================================================= */ @@ -4328,44 +3865,25 @@ static ImplByteConvertData const aImplAPPLEROMANIANByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLEROMANIANTextConverterData = -{ - &aImplAPPLEROMANIANByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLEROMANIANTextConverter = -{ - &aImplAPPLEROMANIANTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLEROMANIANTextEncodingData = -{ - &aImplAPPLEROMANIANTextConverter, - RTL_TEXTENCODING_APPLE_ROMANIAN, - 1, 1, 1, - 238, - 852, - 38, - (const sal_Char*)"iso8859-2", - (const sal_Char*)"iso-8859-2", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_EASTEUROPE -}; +static ImplTextEncodingData const aImplAPPLEROMANIANTextEncodingData + = { { &aImplAPPLEROMANIANByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_ROMANIAN, + 1, + 1, + 1, + 238, + "iso8859-2", + "iso-8859-2", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_EASTEUROPE, pc code page 852, mac encoding 38 */ /* ======================================================================= */ @@ -4495,44 +4013,25 @@ static ImplByteConvertData const aImplAPPLETURKISHByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLETURKISHTextConverterData = -{ - &aImplAPPLETURKISHByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLETURKISHTextConverter = -{ - &aImplAPPLETURKISHTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLETURKISHTextEncodingData = -{ - &aImplAPPLETURKISHTextConverter, - RTL_TEXTENCODING_APPLE_TURKISH, - 1, 1, 1, - 162, - 857, - 35, - (const sal_Char*)"iso8859-9", - (const sal_Char*)"iso-8859-9", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_TURKISH -}; +static ImplTextEncodingData const aImplAPPLETURKISHTextEncodingData + = { { &aImplAPPLETURKISHByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_TURKISH, + 1, + 1, + 1, + 162, + "iso8859-9", + "iso-8859-9", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_TURKISH, pc code page 857, mac encoding 35 */ /* ======================================================================= */ @@ -4650,44 +4149,26 @@ static ImplByteConvertData const aImplAPPLEUKRAINIANByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLEUKRAINIANTextConverterData = -{ - &aImplAPPLEUKRAINIANByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLEUKRAINIANTextConverter = -{ - &aImplAPPLEUKRAINIANTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLEUKRAINIANTextEncodingData = -{ - &aImplAPPLEUKRAINIANTextConverter, - RTL_TEXTENCODING_APPLE_UKRAINIAN, - 1, 1, 1, - 204, - 866, /* ??? 855 */ - 0x98, /* Only available from MAC 8.5 */ - (const sal_Char*)"iso8859-5", - (const sal_Char*)"iso-8859-5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_CYRILLIC -}; +static ImplTextEncodingData const aImplAPPLEUKRAINIANTextEncodingData + = { { &aImplAPPLEUKRAINIANByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_UKRAINIAN, + 1, + 1, + 1, + 204, + "iso8859-5", + "iso-8859-5", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_CYRILLIC, pc code page 866 (855?), mac encoding 0x98 + (MAC 8.5 and above) */ /* ======================================================================= */ @@ -4828,42 +4309,22 @@ static ImplByteConvertData const aImplKOI8RByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplKOI8RTextConverterData = -{ - &aImplKOI8RByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplKOI8RTextConverter = -{ - &aImplKOI8RTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplKOI8RTextEncodingData = -{ - &aImplKOI8RTextConverter, - RTL_TEXTENCODING_KOI8_R, - 1, 1, 1, - 204, - 866, /* ??? 855 */ - 7, - (const sal_Char*)"koi8-r", - (const sal_Char*)"koi8-r", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_CYRILLIC -}; - +static ImplTextEncodingData const aImplKOI8RTextEncodingData + = { { &aImplKOI8RByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_KOI8_R, + 1, + 1, + 1, + 204, + "koi8-r", + "koi8-r", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_CYRILLIC, pc code page 866 (855?), mac encoding 7 */ diff --git a/sal/textenc/tcvtjp6.tab b/sal/textenc/tcvtjp6.tab index 4fa806787f5d..f03e8516ca63 100644 --- a/sal/textenc/tcvtjp6.tab +++ b/sal/textenc/tcvtjp6.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtjp6.tab,v $ * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * - * last change: $Author: th $ $Date: 2000-12-13 22:22:57 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -616,45 +616,27 @@ static ImplDBCSConvertData const aImplSJISDBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplSJISTextConverterData = -{ - &aImplSJISDBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplSJISTextConverter = -{ - &aImplSJISTextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplSJISTextEncodingData = -{ - &aImplSJISTextConverter, - RTL_TEXTENCODING_SHIFT_JIS, - 1, 2, 2, - 128, - 932, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"shift_jis", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_JAPANESE -}; +static ImplTextEncodingData const aImplSJISTextEncodingData + = { { &aImplSJISDBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_SHIFT_JIS, + 1, + 2, + 2, + 128, + "euc-jp", + "shift_jis", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ /* ======================================================================= */ @@ -1193,46 +1175,25 @@ static ImplDBCSConvertData const aImplMS932DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS932TextConverterData = -{ - &aImplMS932DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS932TextConverter = -{ - &aImplMS932TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS932TextEncodingData = -{ - &aImplMS932TextConverter, - RTL_TEXTENCODING_MS_932, - 1, 2, 2, - 128, - 932, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"shift_jis", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_WIN | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_JAPANESE -}; +static ImplTextEncodingData const aImplMS932TextEncodingData + = { { &aImplMS932DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_932, + 1, + 2, + 2, + 128, + "euc-jp", + "shift_jis", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* WIN/DOS/OS2, SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ /* ======================================================================= */ @@ -1771,45 +1732,25 @@ static ImplDBCSConvertData const aImplAPPLEJAPANESEDBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLEJAPANESETextConverterData = -{ - &aImplAPPLEJAPANESEDBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLEJAPANESETextConverter = -{ - &aImplAPPLEJAPANESETextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLEJAPANESETextEncodingData = -{ - &aImplAPPLEJAPANESETextConverter, - RTL_TEXTENCODING_APPLE_JAPANESE, - 1, 2, 2, - 128, - 932, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"shift_jis", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_JAPANESE -}; +static ImplTextEncodingData const aImplAPPLEJAPANESETextEncodingData + = { { &aImplAPPLEJAPANESEDBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_JAPANESE, + 1, + 2, + 2, + 128, + "euc-jp", + "shift_jis", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* MAC, SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ /* ======================================================================= */ @@ -2615,45 +2556,27 @@ static ImplEUCJPConvertData const aImplEUCJPCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplEUCJPTextConverterData = -{ - &aImplEUCJPCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplEUCJPTextConverter = -{ - &aImplEUCJPTextConverterData, - ImplEUCJPToUnicode, - ImplUnicodeToEUCJP, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplEUCJPTextEncodingData = -{ - &aImplEUCJPTextConverter, - RTL_TEXTENCODING_EUC_JP, - 1, 3, 2, - 128, - 932, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"euc-jp", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_JAPANESE -}; +static ImplTextEncodingData const aImplEUCJPTextEncodingData + = { { &aImplEUCJPCvtData, + ImplEUCJPToUnicode, + ImplUnicodeToEUCJP, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_EUC_JP, + 1, + 3, + 2, + 128, + "euc-jp", + "euc-jp", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ /* ======================================================================= */ @@ -2662,7 +2585,7 @@ static ImplTextEncodingData const aImplEUCJPTextEncodingData = #define HALFWIDTHKANA_START 0xFF61 #define HALFWIDTHKANA_END 0xFF9F -static sal_uChar aImplJISX0201Tab[HALFWIDTHKANA_END - HALFWIDTHKANA_START + 1] = +static sal_uChar const aImplJISX0201Tab[HALFWIDTHKANA_END - HALFWIDTHKANA_START + 1] = { 0xA1, 0xA2, 0xA3, 0xA4, 0x5A, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, @@ -2672,7 +2595,7 @@ static sal_uChar aImplJISX0201Tab[HALFWIDTHKANA_END - HALFWIDTHKANA_START + 1] = /* ----------------------------------------------------------------------- */ -static ImplByteConvertData aImplJISX0201ByteCvtData = +static ImplByteConvertData const aImplJISX0201ByteCvtData = { NULL, NULL, @@ -2688,43 +2611,25 @@ static ImplByteConvertData aImplJISX0201ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData aImplJISX0201TextConverterData = -{ - &aImplJISX0201ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter aImplJISX0201TextConverter = -{ - &aImplJISX0201TextConverterData, - NULL, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData aImplJISX0201TextEncodingData = -{ - &aImplJISX0201TextConverter, - RTL_TEXTENCODING_JIS_X_0201, - 1, 1, 1, - 128, - 923, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"euc-jp", - RTL_TEXTENCODING_INFO_MULTIBYTE, - SCRIPT_JAPANESE, -}; +static ImplTextEncodingData const aImplJISX0201TextEncodingData + = { { &aImplJISX0201ByteCvtData, + NULL, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_JIS_X_0201, + 1, + 1, + 1, + 128, + "euc-jp", + "euc-jp", + RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ /* ======================================================================= */ @@ -2739,43 +2644,25 @@ static ImplDBCSConvertData const aImplJISX0208DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData aImplJISX0208TextConverterData = -{ - &aImplJISX0208DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT, -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter aImplJISX0208TextConverter = -{ - &aImplJISX0208TextConverterData, - NULL, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData aImplJISX0208TextEncodingData = -{ - &aImplJISX0208TextConverter, - RTL_TEXTENCODING_JIS_X_0208, - 2, 2, 2, - 128, - 932, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"euc-jp", - RTL_TEXTENCODING_INFO_MULTIBYTE, - SCRIPT_JAPANESE, -}; +static ImplTextEncodingData const aImplJISX0208TextEncodingData + = { { &aImplJISX0208DBCSCvtData, + NULL, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_JIS_X_0208, + 2, + 2, + 2, + 128, + "euc-jp", + "euc-jp", + RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ /* ======================================================================= */ @@ -2790,40 +2677,22 @@ static ImplDBCSConvertData const aImplJISX0212DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData aImplJISX0212TextConverterData = -{ - &aImplJISX0212DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT, -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter aImplJISX0212TextConverter = -{ - &aImplJISX0212TextConverterData, - NULL, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData aImplJISX0212TextEncodingData = -{ - &aImplJISX0212TextConverter, - RTL_TEXTENCODING_JIS_X_0212, - 2, 2, 2, - 128, - 923, - 1, - (const sal_Char*)"euc-jp", - (const sal_Char*)"euc-jp", - RTL_TEXTENCODING_INFO_MULTIBYTE, - SCRIPT_JAPANESE, -}; +static ImplTextEncodingData const aImplJISX0212TextEncodingData + = { { &aImplJISX0212DBCSCvtData, + NULL, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_JIS_X_0212, + 2, + 2, + 2, + 128, + "euc-jp", + "euc-jp", + RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_JAPANESE, pc code page 932, mac encoding 1 */ diff --git a/sal/textenc/tcvtkr6.tab b/sal/textenc/tcvtkr6.tab index 15361447a5c8..d1f06692c5a6 100644 --- a/sal/textenc/tcvtkr6.tab +++ b/sal/textenc/tcvtkr6.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtkr6.tab,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: th $ $Date: 2000-12-13 22:22:20 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -617,86 +617,75 @@ static ImplDBCSConvertData const aImplKSC5601DBCSCvtData = DBCS_EUDC_MS949_COUNT }; -/* ----------------------------------------------------------------------- */ - -static ImplTextConverterData const aImplKSC5601TextConverterData = -{ - &aImplKSC5601DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplKSC5601TextConverter = -{ - &aImplKSC5601TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - /* ======================================================================= */ -static ImplTextEncodingData const aImplEUCKRTextEncodingData = -{ - &aImplKSC5601TextConverter, - RTL_TEXTENCODING_EUC_KR, - 1, 2, 2, - 129, - 934, - 3, - (const sal_Char*)"euc-kr", - (const sal_Char*)"euc-kr", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_KOREAN -}; +static ImplTextEncodingData const aImplEUCKRTextEncodingData + = { { &aImplKSC5601DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_EUC_KR, + 1, + 2, + 2, + 129, + "euc-kr", + "euc-kr", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_KOREAN, pc code page 934, mac encoding 3 */ /* ======================================================================= */ -static ImplTextEncodingData const aImplMS949TextEncodingData = -{ - &aImplKSC5601TextConverter, - RTL_TEXTENCODING_MS_949, - 1, 2, 2, - 129, - 934, - 3, - (const sal_Char*)"euc-kr", - (const sal_Char*)"euc-kr", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_WIN | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_KOREAN -}; +static ImplTextEncodingData const aImplMS949TextEncodingData + = { { &aImplKSC5601DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_949, + 1, + 2, + 2, + 129, + "euc-kr", + "euc-kr", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* WIN/DOS/OS2, SCRIPT_KOREAN, pc code page 934, mac encoding 3 */ /* ======================================================================= */ /* Apple has some extension, which we don't support */ -static ImplTextEncodingData const aImplAPPLEKOREANTextEncodingData = -{ - &aImplKSC5601TextConverter, - RTL_TEXTENCODING_APPLE_KOREAN, - 1, 2, 2, - 129, - 934, - 3, - (const sal_Char*)"euc-kr", - (const sal_Char*)"euc-kr", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MAC | - SCRIPT_KOREAN -}; +static ImplTextEncodingData const aImplAPPLEKOREANTextEncodingData + = { { &aImplKSC5601DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_KOREAN, + 1, + 2, + 2, + 129, + "euc-kr", + "euc-kr", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* MAC, SCRIPT_KOREAN, pc code page 934, mac encoding 3 */ /* ======================================================================= */ @@ -1250,44 +1239,26 @@ static ImplDBCSConvertData const aImplJOHABDBCSCvtData = DBCS_EUDC_MS1361_COUNT }; -/* ----------------------------------------------------------------------- */ - -static ImplTextConverterData const aImplJOHABTextConverterData = -{ - &aImplJOHABDBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplJOHABTextConverter = -{ - &aImplJOHABTextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - /* ======================================================================= */ -static ImplTextEncodingData const aImplMS1361TextEncodingData = -{ - &aImplJOHABTextConverter, - RTL_TEXTENCODING_MS_1361, - 1, 2, 2, - 130, - 1361, - 3, - (const sal_Char*)"euc-kr", - (const sal_Char*)"euc-kr", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_KOREAN -}; +static ImplTextEncodingData const aImplMS1361TextEncodingData + = { { &aImplJOHABDBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1361, + 1, + 2, + 2, + 130, + "euc-kr", + "euc-kr", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_KOREAN, pc code page 1361, mac encoding 3 */ diff --git a/sal/textenc/tcvtlat1.tab b/sal/textenc/tcvtlat1.tab index 3907b060c6b4..51f169507089 100644 --- a/sal/textenc/tcvtlat1.tab +++ b/sal/textenc/tcvtlat1.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtlat1.tab,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: th $ $Date: 2001-07-16 11:33:08 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -66,67 +66,6 @@ /* ======================================================================= */ -/* Hilfstabellen, die fuer mehrere Zeichensaetze gelten */ - -/* ----------------------------------------------------------------------- */ - -#define NOTABCHAR_START 0xFFFF -#define NOTABCHAR_END 0x0000 -#define NOTABUNI_START 0xFF -#define NOTABUNI_END 0x00 - -/* ----------------------------------------------------------------------- */ - -#define SAME8090UNI_START 0x80 -#define SAME8090UNI_END 0x9F -static sal_uInt16 const aImpl8090SameToUniTab[SAME8090UNI_END - SAME8090UNI_START + 1] = -{ -/* 0 1 2 3 4 5 6 7 */ -/* 8 9 A B C D E F */ - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, /* 0x80 */ - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, /* 0x80 */ - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, /* 0x90 */ - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, /* 0x90 */ -}; - -/* ----------------------------------------------------------------------- */ - -#define SAME8090CHAR_START 0x0080 -#define SAME8090CHAR_END 0x009F -static sal_uChar const aImpl8090SameToCharTab[SAME8090CHAR_END - SAME8090CHAR_START + 1] = -{ -/* 0 1 2 3 4 5 6 7 */ -/* 8 9 A B C D E F */ - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x0080 */ - 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, /* 0x0080 */ - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x0090 */ - 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F /* 0x0090 */ -}; - -/* ----------------------------------------------------------------------- */ - -#define SAMEA0FFCHAR_START 0x00A0 -#define SAMEA0FFCHAR_END 0x00FF -static sal_uChar const aImplA0FFSameToCharTab[SAMEA0FFCHAR_END - SAMEA0FFCHAR_START + 1] = -{ -/* 0 1 2 3 4 5 6 7 */ -/* 8 9 A B C D E F */ - 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, /* 0x00A0 */ - 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, /* 0x00A0 */ - 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, /* 0x00B0 */ - 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, /* 0x00B0 */ - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, /* 0x00C0 */ - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, /* 0x00C0 */ - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, /* 0x00D0 */ - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, /* 0x00D0 */ - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, /* 0x00E0 */ - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, /* 0x00E0 */ - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, /* 0x00F0 */ - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF /* 0x00F0 */ -}; - -/* ======================================================================= */ - /* IBM-437 */ /* Dos Latin US - Standard CharSet for Western Script */ /* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme */ @@ -281,45 +220,25 @@ static ImplByteConvertData const aImplIBM437ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM437TextConverterData = -{ - &aImplIBM437ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM437TextConverter = -{ - &aImplIBM437TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM437TextEncodingData = -{ - &aImplIBM437TextConverter, - RTL_TEXTENCODING_IBM_437, - 1, 1, 1, - 0, - 437, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"ibm437", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM437TextEncodingData + = { { &aImplIBM437ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_437, + 1, + 1, + 1, + 0, + "iso8859-1", + "ibm437", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 437 */ /* ======================================================================= */ @@ -436,45 +355,25 @@ static ImplByteConvertData const aImplIBM850ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM850TextConverterData = -{ - &aImplIBM850ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM850TextConverter = -{ - &aImplIBM850TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM850TextEncodingData = -{ - &aImplIBM850TextConverter, - RTL_TEXTENCODING_IBM_850, - 1, 1, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"ibm850", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM850TextEncodingData + = { { &aImplIBM850ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_850, + 1, + 1, + 1, + 0, + "iso8859-1", + "ibm850", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -630,44 +529,25 @@ static ImplByteConvertData const aImplIBM860ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM860TextConverterData = -{ - &aImplIBM860ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM860TextConverter = -{ - &aImplIBM860TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM860TextEncodingData = -{ - &aImplIBM860TextConverter, - RTL_TEXTENCODING_IBM_860, - 1, 1, 1, - 0, - 860, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM860TextEncodingData + = { { &aImplIBM860ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_860, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 860 */ /* ======================================================================= */ @@ -825,44 +705,25 @@ static ImplByteConvertData const aImplIBM861ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM861TextConverterData = -{ - &aImplIBM861ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM861TextConverter = -{ - &aImplIBM861TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM861TextEncodingData = -{ - &aImplIBM861TextConverter, - RTL_TEXTENCODING_IBM_861, - 1, 1, 1, - 0, - 861, - 37, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM861TextEncodingData + = { { &aImplIBM861ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_861, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 861, mac encoding 37 */ /* ======================================================================= */ @@ -1020,44 +881,25 @@ static ImplByteConvertData const aImplIBM863ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM863TextConverterData = -{ - &aImplIBM863ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM863TextConverter = -{ - &aImplIBM863TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM863TextEncodingData = -{ - &aImplIBM863TextConverter, - RTL_TEXTENCODING_IBM_863, - 1, 1, 1, - 0, - 863, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM863TextEncodingData + = { { &aImplIBM863ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_863, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 863 */ /* ======================================================================= */ @@ -1215,44 +1057,25 @@ static ImplByteConvertData const aImplIBM865ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplIBM865TextConverterData = -{ - &aImplIBM865ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplIBM865TextConverter = -{ - &aImplIBM865TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplIBM865TextEncodingData = -{ - &aImplIBM865TextConverter, - RTL_TEXTENCODING_IBM_865, - 1, 1, 1, - 0, - 865, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplIBM865TextEncodingData + = { { &aImplIBM865ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_IBM_865, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII }; + /* DOS/OS2, SCRIPT_LATIN, pc code page 865 */ /* ======================================================================= */ @@ -1340,45 +1163,25 @@ static ImplByteConvertData const aImplMS1252ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS1252TextConverterData = -{ - &aImplMS1252ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS1252TextConverter = -{ - &aImplMS1252TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1252TextEncodingData = -{ - &aImplMS1252TextConverter, - RTL_TEXTENCODING_MS_1252, - 1, 1, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"windows-1252", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_WIN, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplMS1252TextEncodingData + = { { &aImplMS1252ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_1252, + 1, + 1, + 1, + 0, + "iso8859-1", + "windows-1252", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -1426,44 +1229,25 @@ static ImplByteConvertData const aImplISO88591ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO88591TextConverterData = -{ - &aImplISO88591ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO88591TextConverter = -{ - &aImplISO88591TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88591TextEncodingData = -{ - &aImplISO88591TextConverter, - RTL_TEXTENCODING_ISO_8859_1, - 1, 1, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplISO88591TextEncodingData + = { { &aImplISO88591ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_1, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -1574,44 +1358,25 @@ static ImplByteConvertData const aImplISO885914ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO885914TextConverterData = -{ - &aImplISO885914ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO885914TextConverter = -{ - &aImplISO885914TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO885914TextEncodingData = -{ - &aImplISO885914TextConverter, - RTL_TEXTENCODING_ISO_8859_14, - 1, 1, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-14", - (const sal_Char*)"iso-8859-14", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplISO885914TextEncodingData + = { { &aImplISO885914ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_14, + 1, + 1, + 1, + 0, + "iso8859-14", + "iso-8859-14", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -1696,44 +1461,25 @@ static ImplByteConvertData const aImplISO885915ByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplISO885915TextConverterData = -{ - &aImplISO885915ByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplISO885915TextConverter = -{ - &aImplISO885915TextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO885915TextEncodingData = -{ - &aImplISO885915TextConverter, - RTL_TEXTENCODING_ISO_8859_15, - 1, 1, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-15", - (const sal_Char*)"iso-8859-15", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplISO885915TextEncodingData + = { { &aImplISO885915ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ISO_8859_15, + 1, + 1, + 1, + 0, + "iso8859-15", + "iso-8859-15", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -1856,44 +1602,25 @@ static ImplByteConvertData const aImplAPPLEICELANDByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLEICELANDTextConverterData = -{ - &aImplAPPLEICELANDByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLEICELANDTextConverter = -{ - &aImplAPPLEICELANDTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLEICELANDTextEncodingData = -{ - &aImplAPPLEICELANDTextConverter, - RTL_TEXTENCODING_APPLE_ICELAND, - 1, 1, 1, - 0, - 861, - 37, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplAPPLEICELANDTextEncodingData + = { { &aImplAPPLEICELANDByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_ICELAND, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII }; + /* MAC, SCRIPT_LATIN, pc code page 861, mac encoding 37 */ /* ======================================================================= */ @@ -2023,45 +1750,25 @@ static ImplByteConvertData const aImplAPPLEROMANByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLEROMANTextConverterData = -{ - &aImplAPPLEROMANByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLEROMANTextConverter = -{ - &aImplAPPLEROMANTextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLEROMANTextEncodingData = -{ - &aImplAPPLEROMANTextConverter, - RTL_TEXTENCODING_APPLE_ROMAN, - 1, 1, 1, - 77, - 850, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"macintosh", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MIME | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_LATIN -}; +static ImplTextEncodingData const aImplAPPLEROMANTextEncodingData + = { { &aImplAPPLEROMANByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_ROMAN, + 1, + 1, + 1, + 77, + "iso8859-1", + "macintosh", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* MAC, SCRIPT_LATIN, pc code page 850 */ /* ======================================================================= */ @@ -2092,43 +1799,24 @@ static ImplByteConvertData const aImplUSASCIIByteCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplUSASCIITextConverterData = -{ - &aImplUSASCIIByteCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplUSASCIITextConverter = -{ - &aImplUSASCIITextConverterData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplUSASCIITextEncodingData = -{ - &aImplUSASCIITextConverter, - RTL_TEXTENCODING_ASCII_US, - 1, 1, 1, - 0, - 437, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"us-ascii", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_7BIT | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_LATIN -}; - +static ImplTextEncodingData const aImplUSASCIITextEncodingData + = { { &aImplUSASCIIByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_ASCII_US, + 1, + 1, + 1, + 0, + "iso8859-1", + "us-ascii", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_7BIT + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 437 */ diff --git a/sal/textenc/tcvtmb.c b/sal/textenc/tcvtmb.c index 088db1548df1..e7fbea7299d2 100644 --- a/sal/textenc/tcvtmb.c +++ b/sal/textenc/tcvtmb.c @@ -2,9 +2,9 @@ * * $RCSfile: tcvtmb.c,v $ * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * - * last change: $Author: jp $ $Date: 2001-09-19 14:02:35 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,13 +59,12 @@ * ************************************************************************/ -#define _RTL_TCVTMB_C - -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" #endif + #ifndef _RTL_TEXTCVT_H -#include <rtl/textcvt.h> +#include "rtl/textcvt.h" #endif /* ======================================================================= */ @@ -87,7 +86,7 @@ sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, sal_uChar cTrail; sal_Unicode cConv; const ImplDBCSToUniLeadTab* pLeadEntry; - const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)(pData->mpConvertTables); + const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab; sal_Unicode* pEndDestBuf; const sal_Char* pEndSrcBuf; @@ -120,7 +119,7 @@ sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, continue; } else - cConv = ImplGetUndefinedUnicodeChar( cLead, nFlags, pData ); + cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags); } } else @@ -259,13 +258,12 @@ sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ) { - int nAction; sal_uInt16 cConv; sal_Unicode c; sal_uChar nHighChar; sal_uChar nLowChar; const ImplUniToDBCSHighTab* pHighEntry; - const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)(pData->mpConvertTables); + const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab; sal_Char* pEndDestBuf; const sal_Unicode* pEndSrcBuf; @@ -321,7 +319,8 @@ sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, pEUDCTab++; } - if ( (c >= 0xF100) && (c <= 0xF1FF) ) + if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START + && c <= RTL_TEXTCVT_BYTE_PRIVATE_END) { if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) cConv = (sal_Char)(sal_uChar)(c & 0xFF); @@ -342,14 +341,16 @@ sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, /* Handle undefined and surrogates characters */ /* (all surrogates characters are undefined) */ - nAction = ImplHandleUndefinedUnicodeToTextChar( pData, - &pSrcBuf, pEndSrcBuf, - &pDestBuf, pEndDestBuf, - nFlags, pInfo ); - if ( nAction == IMPL_TEXTCVT_BREAK ) - break; - else + if (ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) continue; + else + break; } /* SingleByte */ @@ -405,7 +406,7 @@ sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, sal_Unicode cConv; const ImplDBCSToUniLeadTab* pLeadEntry; const ImplDBCSToUniLeadTab* pLeadTab; - const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)(pData->mpConvertTables); + const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; sal_Unicode* pEndDestBuf; const sal_Char* pEndSrcBuf; @@ -562,14 +563,13 @@ sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ) { - int nAction; sal_uInt32 cConv; sal_Unicode c; sal_uChar nHighChar; sal_uChar nLowChar; const ImplUniToDBCSHighTab* pHighEntry; const ImplUniToDBCSHighTab* pHighTab; - const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)(pData->mpConvertTables); + const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; sal_Char* pEndDestBuf; const sal_Unicode* pEndSrcBuf; @@ -627,14 +627,16 @@ sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, /* Handle undefined and surrogates characters */ /* (all surrogates characters are undefined) */ - nAction = ImplHandleUndefinedUnicodeToTextChar( pData, - &pSrcBuf, pEndSrcBuf, - &pDestBuf, pEndDestBuf, - nFlags, pInfo ); - if ( nAction == IMPL_TEXTCVT_BREAK ) - break; - else + if (ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) continue; + else + break; } } } diff --git a/sal/textenc/tcvtscn6.tab b/sal/textenc/tcvtscn6.tab index 7e3bc169e45c..29681258099b 100644 --- a/sal/textenc/tcvtscn6.tab +++ b/sal/textenc/tcvtscn6.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtscn6.tab,v $ * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * - * last change: $Author: th $ $Date: 2001-07-16 11:42:06 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -629,62 +629,49 @@ static ImplDBCSConvertData const aImplGB2312DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplGB2312TextConverterData = -{ - &aImplGB2312DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplGB2312TextConverter = -{ - &aImplGB2312TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplGB2312TextEncodingData = -{ - &aImplGB2312TextConverter, - RTL_TEXTENCODING_GB_2312, - 1, 2, 2, - 134, - 936, - 25, - (const sal_Char*)"euc-cn", - (const sal_Char*)"gb2312", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_CHINESE_SIMPLIFIED -}; +static ImplTextEncodingData const aImplGB2312TextEncodingData + = { { &aImplGB2312DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_GB_2312, + 1, + 2, + 2, + 134, + "euc-cn", + "gb2312", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_CHINESE_SIMPLIFIED, pc code page 936, mac encoding 25 */ /* ======================================================================= */ -static ImplTextEncodingData aImplEUCCNTextEncodingData = -{ - &aImplGB2312TextConverter, - RTL_TEXTENCODING_EUC_CN, - 1, 2, 2, - 134, - 936, - 25, - (const sal_Char*)"euc-cn", - (const sal_Char*)"gb2312", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE, - SCRIPT_CHINESE_SIMPLIFIED -}; +static ImplTextEncodingData const aImplEUCCNTextEncodingData + = { { &aImplGB2312DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_EUC_CN, + 1, + 2, + 2, + 134, + "euc-cn", + "gb2312", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_CHINESE_SIMPLIFIED, pc code page 936, mac encoding 25 */ /* ======================================================================= */ @@ -1223,45 +1210,27 @@ static ImplDBCSConvertData const aImplGBT12345DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplGBT12345TextConverterData = -{ - &aImplGBT12345DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplGBT12345TextConverter = -{ - &aImplGBT12345TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplGBT12345TextEncodingData = -{ - &aImplGBT12345TextConverter, - RTL_TEXTENCODING_GBT_12345, - 1, 2, 2, - 134, - 936, - 25, - (const sal_Char*)"euc-cn", - (const sal_Char*)"cn-gb-12345", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_CHINESE_SIMPLIFIED -}; +static ImplTextEncodingData const aImplGBT12345TextEncodingData + = { { &aImplGBT12345DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_GBT_12345, + 1, + 2, + 2, + 134, + "euc-cn", + "cn-gb-12345", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_CHINESE_SIMPLIFIED, pc code page 936, mac encoding 25 */ /* ======================================================================= */ @@ -1800,44 +1769,25 @@ static ImplDBCSConvertData const aImplGBKDBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplGBKTextConverterData = -{ - &aImplGBKDBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplGBKTextConverter = -{ - &aImplGBKTextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplGBKTextEncodingData = -{ - &aImplGBKTextConverter, - RTL_TEXTENCODING_GBK, - 1, 2, 2, - 134, - 936, - 25, - (const sal_Char*)"euc-cn", - (const sal_Char*)"gb2312", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE, - SCRIPT_CHINESE_SIMPLIFIED -}; +static ImplTextEncodingData const aImplGBKTextEncodingData + = { { &aImplGBKDBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_GBK, + 1, + 2, + 2, + 134, + "euc-cn", + "gb2312", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* SCRIPT_CHINESE_SIMPLIFIED, pc code page 936, mac encoding 25 */ /* ======================================================================= */ @@ -2376,46 +2326,26 @@ static ImplDBCSConvertData const aImplMS936DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS936TextConverterData = -{ - &aImplMS936DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS936TextConverter = -{ - &aImplMS936TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS936TextEncodingData = -{ - &aImplMS936TextConverter, - RTL_TEXTENCODING_MS_936, - 1, 2, 2, - 134, - 936, - 25, - (const sal_Char*)"euc-cn", - (const sal_Char*)"gb2312", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_WIN | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_CHINESE_SIMPLIFIED -}; +static ImplTextEncodingData const aImplMS936TextEncodingData + = { { &aImplMS936DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_936, + 1, + 2, + 2, + 134, + "euc-cn", + "gb2312", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* WIN/DOS/OS2, SCRIPT_CHINESE_SIMPLIFIED, pc code page 936, mac encoding + 25 */ /* ======================================================================= */ @@ -2954,42 +2884,22 @@ static ImplDBCSConvertData const aImplAPPLECHINSIMPDBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLECHINSIMPTextConverterData = -{ - &aImplAPPLECHINSIMPDBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLECHINSIMPTextConverter = -{ - &aImplAPPLECHINSIMPTextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLECHINSIMPTextEncodingData = -{ - &aImplAPPLECHINSIMPTextConverter, - RTL_TEXTENCODING_APPLE_CHINSIMP, - 1, 2, 2, - 134, - 936, - 25, - (const sal_Char*)"euc-cn", - (const sal_Char*)"gb2312", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_CHINESE_SIMPLIFIED -}; +static ImplTextEncodingData const aImplAPPLECHINSIMPTextEncodingData + = { { &aImplAPPLECHINSIMPDBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_CHINSIMP, + 1, + 2, + 2, + 134, + "euc-cn", + "gb2312", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* MAC, SCRIPT_CHINESE_SIMPLIFIED, pc code page 936, mac encoding 25 */ diff --git a/sal/textenc/tcvtsym1.tab b/sal/textenc/tcvtsym1.tab index bd3d20af4d8d..4752ae6c29f0 100644 --- a/sal/textenc/tcvtsym1.tab +++ b/sal/textenc/tcvtsym1.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtsym1.tab,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:29 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,50 +59,24 @@ * ************************************************************************/ -/* ======================================================================= */ +/* Symbol Font Encodings */ -/* This file contain the tables for 1 byte symbol charsets */ - -/* ======================================================================= */ - -/* Symbol */ - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverterData const aImplSYMBOLTextConverterData = -{ - NULL, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplSYMBOLTextConverter = -{ - &aImplSYMBOLTextConverterData, - ImplSymbolToUnicode, - ImplUnicodeToSymbol, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplSYMBOLTextEncodingData = -{ - &aImplSYMBOLTextConverter, - RTL_TEXTENCODING_SYMBOL, - 1, 1, 1, - 2, - 65400, - 32, - (const sal_Char*)"adobe-fontspecific", - (const sal_Char*)"invariant", - RTL_TEXTENCODING_INFO_SYMBOL, - SCRIPT_SYMBOL -}; +static ImplTextEncodingData const aImplSYMBOLTextEncodingData + = { { NULL, + ImplSymbolToUnicode, + ImplUnicodeToSymbol, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_SYMBOL, + 1, + 1, + 1, + 2, + "adobe-fontspecific", + "invariant", + RTL_TEXTENCODING_INFO_SYMBOL }; + /* SCRIPT_SYMBOL, pc code page 65400, mac encoding 32 */ diff --git a/sal/textenc/tcvttcn6.tab b/sal/textenc/tcvttcn6.tab index 3fec9200f685..92d5b01560ce 100644 --- a/sal/textenc/tcvttcn6.tab +++ b/sal/textenc/tcvttcn6.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvttcn6.tab,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: th $ $Date: 2000-12-13 22:23:46 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -627,45 +627,27 @@ static ImplDBCSConvertData const aImplBIG5DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplBIG5TextConverterData = -{ - &aImplBIG5DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplBIG5TextConverter = -{ - &aImplBIG5TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplBIG5TextEncodingData = -{ - &aImplBIG5TextConverter, - RTL_TEXTENCODING_BIG5, - 1, 2, 2, - 136, - 950, - 2, - (const sal_Char*)"euc-tw", - (const sal_Char*)"big5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_CHINESE_TRADITIONAL -}; +static ImplTextEncodingData const aImplBIG5TextEncodingData + = { { &aImplBIG5DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_BIG5, + 1, + 2, + 2, + 136, + "euc-tw", + "big5", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_CHINESE_TRADITIONAL, pc code page 950, mac encoding 2 */ /* ======================================================================= */ @@ -1204,46 +1186,26 @@ static ImplDBCSConvertData const aImplMS950DBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplMS950TextConverterData = -{ - &aImplMS950DBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplMS950TextConverter = -{ - &aImplMS950TextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS950TextEncodingData = -{ - &aImplMS950TextConverter, - RTL_TEXTENCODING_MS_950, - 1, 2, 2, - 136, - 950, - 2, - (const sal_Char*)"euc-tw", - (const sal_Char*)"big5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_WIN | - RTL_TEXTENCODING_INFO_DOS_OS2, - SCRIPT_CHINESE_TRADITIONAL -}; +static ImplTextEncodingData const aImplMS950TextEncodingData + = { { &aImplMS950DBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_MS_950, + 1, + 2, + 2, + 136, + "euc-tw", + "big5", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* WIN/DOS/OS2, SCRIPT_CHINESE_TRADITIONAL, pc code page 950, mac encoding + 2 */ /* ======================================================================= */ @@ -1782,42 +1744,22 @@ static ImplDBCSConvertData const aImplAPPLECHINTRADDBCSCvtData = /* ----------------------------------------------------------------------- */ -static ImplTextConverterData const aImplAPPLECHINTRADTextConverterData = -{ - &aImplAPPLECHINTRADDBCSCvtData, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplAPPLECHINTRADTextConverter = -{ - &aImplAPPLECHINTRADTextConverterData, - ImplDBCSToUnicode, - ImplUnicodeToDBCS, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplAPPLECHINTRADTextEncodingData = -{ - &aImplAPPLECHINTRADTextConverter, - RTL_TEXTENCODING_APPLE_CHINTRAD, - 1, 2, 2, - 136, - 950, - 2, - (const sal_Char*)"euc-tw", - (const sal_Char*)"big5", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MAC, - SCRIPT_CHINESE_TRADITIONAL -}; +static ImplTextEncodingData const aImplAPPLECHINTRADTextEncodingData + = { { &aImplAPPLECHINTRADDBCSCvtData, + ImplDBCSToUnicode, + ImplUnicodeToDBCS, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_APPLE_CHINTRAD, + 1, + 2, + 2, + 136, + "euc-tw", + "big5", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE }; + /* MAC, SCRIPT_CHINESE_TRADITIONAL, pc code page 950, mac encoding 2 */ diff --git a/sal/textenc/tcvtuni1.tab b/sal/textenc/tcvtuni1.tab index a0c4a14bb84e..e09388e31537 100644 --- a/sal/textenc/tcvtuni1.tab +++ b/sal/textenc/tcvtuni1.tab @@ -2,9 +2,9 @@ * * $RCSfile: tcvtuni1.tab,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:30 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,101 +59,51 @@ * ************************************************************************/ -/* ======================================================================= */ - -/* This file contain the tables for all unicode enabled text encodings */ - -/* ======================================================================= */ - -/* UTF7 */ - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverterData const aImplUTF7TextConverterData = -{ - NULL, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplUTF7TextConverter = -{ - &aImplUTF7TextConverterData, - ImplUTF7ToUnicode, - ImplUnicodeToUTF7, - ImplUTF7CreateUTF7TextToUnicodeContext, - ImplUTF7DestroyTextToUnicodeContext, - ImplUTF7ResetTextToUnicodeContext, - ImplUTF7CreateUnicodeToTextContext, - ImplUTF7DestroyUnicodeToTextContext, - ImplUTF7ResetUnicodeToTextContext -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplUTF7TextEncodingData = -{ - &aImplUTF7TextConverter, - RTL_TEXTENCODING_UTF7, - 1, 6, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"utf-7", - RTL_TEXTENCODING_INFO_CONTEXT | - RTL_TEXTENCODING_INFO_UNICODE | - RTL_TEXTENCODING_INFO_7BIT | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_UNICODE -}; - -/* ======================================================================= */ - -/* UTF8 */ - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverterData const aImplUTF8TextConverterData = -{ - NULL, - NULL, - RTL_UNICODE_CHAR_DEFAULT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextConverter const aImplUTF8TextConverter = -{ - &aImplUTF8TextConverterData, - ImplUTF8ToUnicode, - ImplUnicodeToUTF8, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplUTF8TextEncodingData = -{ - &aImplUTF8TextConverter, - RTL_TEXTENCODING_UTF8, - 1, 6, 1, - 0, - 850, - 0, - (const sal_Char*)"iso8859-1", - (const sal_Char*)"utf-8", - RTL_TEXTENCODING_INFO_ASCII | - RTL_TEXTENCODING_INFO_UNICODE | - RTL_TEXTENCODING_INFO_MULTIBYTE | - RTL_TEXTENCODING_INFO_MIME, - SCRIPT_UNICODE -}; +/* Unicode Encodings */ + +static ImplTextEncodingData const aImplUTF7TextEncodingData + = { { NULL, + ImplUTF7ToUnicode, + ImplUnicodeToUTF7, + ImplUTF7CreateUTF7TextToUnicodeContext, + ImplUTF7DestroyTextToUnicodeContext, + ImplUTF7ResetTextToUnicodeContext, + ImplUTF7CreateUnicodeToTextContext, + ImplUTF7DestroyUnicodeToTextContext, + ImplUTF7ResetUnicodeToTextContext }, + RTL_TEXTENCODING_UTF7, + 1, + 6, + 1, + 0, + "iso8859-1", + "utf-7", + RTL_TEXTENCODING_INFO_CONTEXT + | RTL_TEXTENCODING_INFO_UNICODE + | RTL_TEXTENCODING_INFO_7BIT + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_UNICODE, pc code page 850 */ + +static ImplTextEncodingData const aImplUTF8TextEncodingData + = { { NULL, + ImplUTF8ToUnicode, + ImplUnicodeToUTF8, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + RTL_TEXTENCODING_UTF8, + 1, + 6, + 1, + 0, + "iso8859-1", + "utf-8", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_UNICODE + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_UNICODE, pc code page 850 */ diff --git a/sal/textenc/tcvtutf7.c b/sal/textenc/tcvtutf7.c index 649d4c264632..bd7946ca3e69 100644 --- a/sal/textenc/tcvtutf7.c +++ b/sal/textenc/tcvtutf7.c @@ -2,9 +2,9 @@ * * $RCSfile: tcvtutf7.c,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:30 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,17 +59,15 @@ * ************************************************************************/ -#define _RTL_CCVTUTF7_C - -#ifndef _RTL_ALLOC_H -#include <rtl/alloc.h> +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" #endif -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef _RTL_ALLOC_H +#include "rtl/alloc.h" #endif #ifndef _RTL_TEXTCVT_H -#include <rtl/textcvt.h> +#include "rtl/textcvt.h" #endif /* ======================================================================= */ @@ -139,7 +137,7 @@ static sal_uChar const aImplMustShiftTab[128] = /* ----------------------------------------------------------------------- */ -typedef struct _ImplUTF7ToUCContextData +typedef struct { int mbShifted; int mbFirst; @@ -439,7 +437,7 @@ sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext, /* ======================================================================= */ -typedef struct _ImplUTF7FromUCContextData +typedef struct { int mbShifted; sal_uInt32 mnBitBuffer; diff --git a/sal/textenc/tcvtutf8.c b/sal/textenc/tcvtutf8.c index 9ae71ee1c78a..f7ec9a0ba1c5 100644 --- a/sal/textenc/tcvtutf8.c +++ b/sal/textenc/tcvtutf8.c @@ -2,9 +2,9 @@ * * $RCSfile: tcvtutf8.c,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:30 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,18 +59,17 @@ * ************************************************************************/ -#define _RTL_TCVTUTF8_C - -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" #endif + #ifndef _RTL_TEXTCVT_H -#include <rtl/textcvt.h> +#include "rtl/textcvt.h" #endif -/* ======================================================================= */ - -#define TEXTCVT_MAX_UNICHAR 0x0010FFFF +#define RTL_UNICODE_SURROGATES_HALFMASK 0x3FFUL +#define RTL_UNICODE_SURROGATES_HALFBASE 0x10000UL +#define RTL_UNICODE_SURROGATES_HALFSHIFT 10 /* ----------------------------------------------------------------------- */ @@ -279,7 +278,7 @@ sal_Size ImplUTF8ToUnicode( const ImplTextConverterData* pData, void* pContext, else { pSrcBuf++; - if ( cConv > TEXTCVT_MAX_UNICHAR ) + if ( cConv > 0x10FFFF ) { *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) diff --git a/sal/textenc/tenchelp.c b/sal/textenc/tenchelp.c index 1f00ccb9192c..88986c5c9122 100644 --- a/sal/textenc/tenchelp.c +++ b/sal/textenc/tenchelp.c @@ -2,9 +2,9 @@ * * $RCSfile: tenchelp.c,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:30 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,132 +59,116 @@ * ************************************************************************/ -#define _RTL_TENCHELP_C - -#include <string.h> - -#ifndef _RTL_MEMORY_H -#include <rtl/memory.h> -#endif -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" #endif + #ifndef _RTL_TEXTCVT_H -#include <rtl/textcvt.h> +#include "rtl/textcvt.h" +#endif +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" #endif -/* ======================================================================= */ +static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, + sal_Char * pBuf, + sal_Size nMaxLen); -sal_Unicode ImplGetUndefinedUnicodeChar( sal_uChar c, sal_uInt32 nFlags, const ImplTextConverterData* pData ) -{ - sal_Unicode nUniChar; +static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, + sal_Char * pBuf, + sal_Size nMaxLen); - nFlags &= RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK; - if ( nFlags == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE ) - nUniChar = 0xF100+c; - else +static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags); + +sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, + sal_Char * pBuf, + sal_Size nMaxLen) +{ + if (nMaxLen == 0) + return sal_False; + switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) { - if ( pData ) - nUniChar = pData->mcUnicodeDefChar; - else - nUniChar = RTL_UNICODE_CHAR_DEFAULT; + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: + *pBuf = 0x00; + break; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: + default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ + *pBuf = 0x3F; + break; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: + *pBuf = 0x5F; + break; } - - return nUniChar; + return sal_True; } -/* ----------------------------------------------------------------------- */ - -sal_Size ImplGetUndefinedAsciiMultiByte( sal_uInt32 nFlags, const ImplTextConverterData* pData, sal_Char* pBuf, sal_Size nMaxLen ) +sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, + sal_Char * pBuf, + sal_Size nMaxLen) { - sal_Size nLen = 0; - - if ( nMaxLen ) + if (nMaxLen == 0) + return sal_False; + switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) { - switch ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK ) - { - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: - nLen = 1; - *pBuf = (sal_Char)0x00; - break; - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: - nLen = 1; - *pBuf = (sal_Char)0x3F; - break; - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: - nLen = 1; - *pBuf = (sal_Char)0x5F; - break; - default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ - if ( pData && pData->mpTextDefChar ) - { - nLen = strlen( pData->mpTextDefChar ); - if ( nLen < nMaxLen ) - rtl_copyMemory( pBuf, pData->mpTextDefChar, nLen ); - else - nLen = 0; - } - else - { - nLen = 1; - *pBuf = (sal_Char)0x3F; - } - break; - } + case RTL_UNICODETOTEXT_FLAGS_INVALID_0: + *pBuf = 0x00; + break; + + case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: + default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ + *pBuf = 0x3F; + break; + + case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: + *pBuf = 0x5F; + break; } - - return nLen; + return sal_True; } -/* ----------------------------------------------------------------------- */ - -sal_Size ImplGetInvalidAsciiMultiByte( sal_uInt32 nFlags, const ImplTextConverterData* pData, sal_Char* pBuf, sal_Size nMaxLen ) +int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) { - sal_Size nLen = 0; + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE ) + { + /* !!! */ + } - if ( nMaxLen ) + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE ) { - switch ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK ) - { - case RTL_UNICODETOTEXT_FLAGS_INVALID_0: - nLen = 1; - *pBuf = (sal_Char)0x00; - break; - case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: - nLen = 1; - *pBuf = (sal_Char)0x3F; - break; - case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: - nLen = 1; - *pBuf = (sal_Char)0x5F; - break; - default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ - if ( pData && pData->mpTextDefChar ) - { - nLen = strlen( pData->mpTextDefChar ); - if ( nLen < nMaxLen ) - rtl_copyMemory( pBuf, pData->mpTextDefChar, nLen ); - else - nLen = 0; - } - else - { - nLen = 1; - *pBuf = (sal_Char)0x3F; - } - break; - } + /* !!! */ + } + + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE ) + { + if ( (c >= 0xE000) && (c <= 0xF8FF) ) + return sal_True; } - return nLen; + return sal_False; +} + +/* ======================================================================= */ + +sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags) +{ + return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) + == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ? + RTL_TEXTCVT_BYTE_PRIVATE_START + cChar : + RTL_UNICODE_CHAR_DEFAULT; } /* ----------------------------------------------------------------------- */ -int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, - const sal_Unicode** ppSrcBuf, const sal_Unicode* pEndSrcBuf, - sal_Char** ppDestBuf, const sal_Char* pEndDestBuf, - sal_uInt32 nFlags, sal_uInt32* pInfo ) +sal_Bool +ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, + sal_Unicode const ** ppSrcBuf, + sal_Unicode const * pEndSrcBuf, + sal_Char ** ppDestBuf, + sal_Char const * pEndDestBuf, + sal_uInt32 nFlags, + sal_uInt32 * pInfo) { sal_Unicode c = **ppSrcBuf; @@ -196,7 +180,7 @@ int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START); (*ppDestBuf)++; (*ppSrcBuf)++; - return IMPL_TEXTCVT_CONTINUE; + return sal_True; } } @@ -204,7 +188,7 @@ int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) { (*ppSrcBuf)++; - return IMPL_TEXTCVT_CONTINUE; + return sal_True; } /* Surrogates Characters should result in */ @@ -215,7 +199,7 @@ int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, if ( *ppSrcBuf == pEndSrcBuf ) { *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - return IMPL_TEXTCVT_BREAK; + return sal_False; } c = *((*ppSrcBuf)+1); @@ -228,28 +212,26 @@ int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) { *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; - return IMPL_TEXTCVT_BREAK; + return sal_False; } else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) { (*ppSrcBuf)++; - return IMPL_TEXTCVT_CONTINUE; + return sal_True; + } + else if (ImplGetInvalidAsciiMultiByte(nFlags, + *ppDestBuf, + pEndDestBuf - *ppDestBuf)) + { + ++*ppSrcBuf; + ++*ppDestBuf; + return sal_True; } else { - sal_Size nDefLen = ImplGetInvalidAsciiMultiByte( nFlags, pData, - *ppDestBuf, pEndDestBuf-*ppDestBuf ); - if ( nDefLen ) - { - *ppDestBuf += nDefLen; - (*ppSrcBuf)++; - return IMPL_TEXTCVT_CONTINUE; - } - else - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - return IMPL_TEXTCVT_BREAK; - } + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + return sal_False; } } } @@ -258,475 +240,24 @@ int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) { *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; - return IMPL_TEXTCVT_BREAK; + return sal_False; } else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) (*ppSrcBuf)++; - else - { - sal_Size nDefLen = ImplGetUndefinedAsciiMultiByte( nFlags, pData, - *ppDestBuf, pEndDestBuf-*ppDestBuf ); - if ( nDefLen ) - { - *ppDestBuf += nDefLen; - (*ppSrcBuf)++; - } - else - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - return IMPL_TEXTCVT_BREAK; - } - } - - return IMPL_TEXTCVT_CONTINUE; -} - -/* ----------------------------------------------------------------------- */ - -int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) -{ - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE ) - { - /* !!! */ - } - - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE ) - { - /* !!! */ - } - - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE ) + else if (ImplGetUndefinedAsciiMultiByte(nFlags, + *ppDestBuf, + pEndDestBuf - *ppDestBuf)) { - if ( (c >= 0xE000) && (c <= 0xF8FF) ) - return sal_True; + ++*ppSrcBuf; + ++*ppDestBuf; } - - return sal_False; -} - -/* ----------------------------------------------------------------------- */ - -typedef struct _ImplReplaceCharData -{ - sal_uInt16 mnUniChar; - sal_uInt16 mnReplaceChar; -} ImplReplaceCharData; - -static ImplReplaceCharData const aImplRepCharTab[] = -{ - { 0x00A0, 0x0020 }, /* NO-BREAK-SPACE */ - { 0x00A1, 0x0021 }, /* INVERTED EXCLAMATION MARK */ - { 0x00B7, 0x0045 }, /* MIDDLE DOT */ - { 0x00BF, 0x003F }, /* INVERTED QUESTION MARK */ - { 0x00D7, 0x002A }, /* MULTIPLIKATION SIGN */ - { 0x00F7, 0x002F }, /* DIVISION SIGN */ - { 0x2000, 0x0020 }, /* EN QUAD */ - { 0x2001, 0x0020 }, /* EM QUAD */ - { 0x2002, 0x0020 }, /* EN SPACE */ - { 0x2003, 0x0020 }, /* EM SPACE */ - { 0x2004, 0x0020 }, /* THREE-PER-EM SPACE */ - { 0x2005, 0x0020 }, /* FOUR-PER-EM SPACE */ - { 0x2006, 0x0020 }, /* SIX-PER-EM SPACE */ - { 0x2007, 0x0020 }, /* FIGURE SPACE */ - { 0x2008, 0x0020 }, /* PUNCTATION SPACE */ - { 0x2009, 0x0020 }, /* THIN SPACE */ - { 0x200A, 0x0020 }, /* HAIR SPACE */ - { 0x2010, 0x002D }, /* HYPHEN */ - { 0x2011, 0x002D }, /* NON-BREAKING HYPHEN */ - { 0x2012, 0x002D }, /* FIGURE DASH */ - { 0x2013, 0x002D }, /* EN DASH */ - { 0x2014, 0x002D }, /* EM DASH */ - { 0x2015, 0x002D }, /* HORIZONTAL BAR */ - { 0x2018, 0x0027 }, /* LEFT SINGLE QUOTATION MARK */ - { 0x2019, 0x0027 }, /* RIGHT SINGLE QUOTATION MARK */ - { 0x201A, 0x002C }, /* SINGLE LOW-9 QUOTATION MARK */ - { 0x201B, 0x0027 }, /* SINGLE HIGH-RESERVED-9 QUOTATION MARK */ - { 0x201C, 0x0022 }, /* LEFT DOUBLE QUOTATION MARK */ - { 0x201D, 0x0022 }, /* RIGHT DOUBLE QUOTATION MARK */ - { 0x201E, 0x0022 }, /* DOUBLE LOW-9 QUOTATION MARK */ - { 0x201F, 0x0022 }, /* DOUBLE HIGH-RESERVED-9 QUOTATION MARK */ - { 0x2022, 0x002D }, /* BULLET */ - { 0x2023, 0x002D }, /* TRIANGULAR BULLET */ - { 0x2024, 0x002D }, /* ONE DOT LEADER */ - { 0x2027, 0x002D }, /* HYPHENATION POINT */ - { 0x2028, 0x000A }, /* LINE SEPARATOR */ - { 0x2029, 0x000D }, /* PARAGRAPH SEPARATOR */ - { 0x2032, 0x0027 }, /* PRIME */ - { 0x2033, 0x0022 }, /* DOUBLE PRIME */ - { 0x2035, 0x0027 }, /* RESERVED PRIME */ - { 0x2036, 0x0022 }, /* RESERVED DOUBLE PRIME */ - { 0x2039, 0x003C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ - { 0x203A, 0x003E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ - { 0x2043, 0x002D }, /* HYPHEN BULLET */ - { 0x2044, 0x002F }, /* FRACTION SLASH */ - { 0x2160, 0x0049 }, /* ROMAN NUMERAL ONE */ - { 0x2164, 0x0056 }, /* ROMAN NUMERAL FIVE */ - { 0x2169, 0x0058 }, /* ROMAN NUMERAL TEN */ - { 0x216C, 0x004C }, /* ROMAN NUMERAL FIFTY */ - { 0x216D, 0x0043 }, /* ROMAN NUMERAL ONE HUNDRED */ - { 0x216E, 0x0044 }, /* ROMAN NUMERAL FIVE HUNDRED */ - { 0x216F, 0x004D }, /* ROMAN NUMERAL ONE THOUSAND */ - { 0x2170, 0x0069 }, /* SMALL ROMAN NUMERAL ONE */ - { 0x2174, 0x0076 }, /* SMALL ROMAN NUMERAL FIVE */ - { 0x2179, 0x0078 }, /* SMALL ROMAN NUMERAL TEN */ - { 0x217C, 0x006C }, /* SMALL ROMAN NUMERAL FIFTY */ - { 0x217D, 0x0063 }, /* SMALL ROMAN NUMERAL ONE HUNDRED */ - { 0x217E, 0x0064 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED */ - { 0x217F, 0x006D }, /* SMALL ROMAN NUMERAL ONE THOUSAND */ - { 0x2215, 0x002F }, /* DIVISION SLASH */ - { 0x2217, 0x002A }, /* ASTERIX OPERATOR */ - { 0xFF00, 0x0020 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF01, 0x0021 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF02, 0x0022 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF03, 0x0023 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF04, 0x0024 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF05, 0x0025 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF06, 0x0026 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF07, 0x0027 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF08, 0x0028 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF09, 0x0029 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF0A, 0x002A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF0B, 0x002B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF0C, 0x002C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF0D, 0x002D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF0E, 0x002E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF0F, 0x002F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF10, 0x0030 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF11, 0x0031 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF12, 0x0032 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF13, 0x0033 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF14, 0x0034 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF15, 0x0035 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF16, 0x0036 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF17, 0x0037 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF18, 0x0038 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF19, 0x0039 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF1A, 0x003A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF1B, 0x003B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF1C, 0x003C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF1D, 0x003D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF1E, 0x003E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF1F, 0x003F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF20, 0x0040 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF21, 0x0041 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF22, 0x0042 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF23, 0x0043 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF24, 0x0044 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF25, 0x0045 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF26, 0x0046 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF27, 0x0047 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF28, 0x0048 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF29, 0x0049 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF2A, 0x004A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF2B, 0x004B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF2C, 0x004C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF2D, 0x004D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF2E, 0x004E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF2F, 0x004F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF30, 0x0050 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF31, 0x0051 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF32, 0x0052 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF33, 0x0053 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF34, 0x0054 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF35, 0x0055 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF36, 0x0056 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF37, 0x0057 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF38, 0x0058 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF39, 0x0059 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF3A, 0x005A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF3B, 0x005B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF3C, 0x005C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF3D, 0x005D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF3E, 0x005E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF3F, 0x005F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF40, 0x0060 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF41, 0x0061 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF42, 0x0062 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF43, 0x0063 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF44, 0x0064 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF45, 0x0065 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF46, 0x0066 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF47, 0x0067 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF48, 0x0068 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF49, 0x0069 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF4A, 0x006A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF4B, 0x006B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF4C, 0x006C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF4D, 0x006D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF4E, 0x006E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF4F, 0x006F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF50, 0x0070 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF51, 0x0071 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF52, 0x0072 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF53, 0x0073 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF54, 0x0074 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF55, 0x0075 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF56, 0x0076 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF57, 0x0077 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF58, 0x0078 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF59, 0x0079 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF5A, 0x007A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF5B, 0x007B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF5C, 0x007C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF5D, 0x007D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF5E, 0x007E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF5F, 0x007F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF61, 0x3002 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF62, 0x300C }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF63, 0x300D }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF64, 0x3001 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF65, 0x30FB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF66, 0x30F2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF67, 0x30A1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF68, 0x30A3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF69, 0x30A5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6A, 0x30A7 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6B, 0x30A9 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6C, 0x30E3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6D, 0x30E5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6E, 0x30E7 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6F, 0x30C3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF70, 0x30FC }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF71, 0x30A2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF72, 0x30A4 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF73, 0x30A6 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF74, 0x30A8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF75, 0x30AA }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF76, 0x30AB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF77, 0x30AD }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF78, 0x30AF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF79, 0x30B1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7A, 0x30B3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7B, 0x30B5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7C, 0x30B7 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7D, 0x30B9 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7E, 0x30BB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7F, 0x30BD }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF80, 0x30BF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF81, 0x30C1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF82, 0x30C4 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF83, 0x30C6 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF84, 0x30C8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF85, 0x30CA }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF86, 0x30CB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF87, 0x30CC }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF88, 0x30CD }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF89, 0x30CE }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8A, 0x30CF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8B, 0x30D2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8C, 0x30D5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8D, 0x30D8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8E, 0x30DB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8F, 0x30DE }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF90, 0x30DF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF91, 0x30E0 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF92, 0x30E1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF93, 0x30E2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF94, 0x30E4 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF95, 0x30E6 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF96, 0x30E8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF97, 0x30E9 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF98, 0x30EA }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF99, 0x30EB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9A, 0x30EC }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9B, 0x30ED }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9C, 0x30EF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9D, 0x30F3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9E, 0x309B }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9F, 0x309C }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFFA0, 0x3164 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA1, 0x3131 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA2, 0x3132 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA3, 0x3133 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA4, 0x3134 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA5, 0x3135 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA6, 0x3136 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA7, 0x3137 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA8, 0x3138 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA9, 0x3139 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAA, 0x313A }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAB, 0x313B }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAC, 0x313C }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAD, 0x313D }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAE, 0x313E }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAF, 0x313F }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB0, 0x3140 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB1, 0x3141 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB2, 0x3142 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB3, 0x3143 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB4, 0x3144 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB5, 0x3145 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB6, 0x3146 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB7, 0x3147 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB8, 0x3148 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB9, 0x3149 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBA, 0x314A }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBB, 0x314B }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBC, 0x314C }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBD, 0x314D }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBE, 0x314E }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC2, 0x314F }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC3, 0x3150 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC4, 0x3151 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC5, 0x3152 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC6, 0x3153 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC7, 0x3154 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCA, 0x3155 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCB, 0x3156 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCC, 0x3157 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCD, 0x3158 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCE, 0x3159 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCF, 0x315A }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD2, 0x315B }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD3, 0x315C }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD4, 0x315D }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD5, 0x315E }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD6, 0x315F }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD7, 0x3160 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFDA, 0x3161 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFDB, 0x3162 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFDC, 0x3163 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFE0, 0x00A2 }, /* FULLWIDTH CENT SIGN */ - { 0xFFE1, 0x00A3 }, /* FULLWIDTH POUND SIGN */ - { 0xFFE2, 0x00AC }, /* FULLWIDTH NOT SIGN */ - { 0xFFE3, 0x00AF }, /* FULLWIDTH MACRON */ - { 0xFFE4, 0x00A6 }, /* FULLWIDTH BROKEN BAR */ - { 0xFFE5, 0x00A5 }, /* FULLWIDTH YEN SIGN */ - { 0xFFE6, 0x20A9 }, /* FULLWIDTH WON SIGN */ - { 0xFFE8, 0x2502 }, /* HALFWIDTH FORMS LIGHT VERTICAL */ - { 0xFFE9, 0x2190 }, /* HALFWIDTH LEFTWARDS ARROW */ - { 0xFFEA, 0x2191 }, /* HALFWIDTH UPWARDS ARROW */ - { 0xFFEB, 0x2192 }, /* HALFWIDTH RIGHTWARDS ARROW */ - { 0xFFEC, 0x2193 }, /* HALFWIDTH DOWNWARDS ARROW */ - { 0xFFED, 0x25A0 }, /* HALFWIDTH BLACK SQUARE */ - { 0xFFEE, 0x25CB }, /* HALFWIDTH WHITE CIRCLE */ - { 0xFFFD, 0x003F } /* REPLACEMENT CHARACTER */ -}; - -sal_uInt16 ImplGetReplaceChar( sal_Unicode c ) -{ - sal_uInt16 nLow; - sal_uInt16 nHigh; - sal_uInt16 nMid; - sal_uInt16 nCompareChar; - const ImplReplaceCharData* pCharData; - - nLow = 0; - nHigh = (sizeof( aImplRepCharTab )/sizeof( ImplReplaceCharData ))-1; - do + else { - nMid = (nLow+nHigh)/2; - pCharData = aImplRepCharTab+nMid; - nCompareChar = pCharData->mnUniChar; - if ( c < nCompareChar ) - { - if ( !nMid ) - break; - nHigh = nMid-1; - } - else - { - if ( c > nCompareChar ) - nLow = nMid+1; - else - return pCharData->mnReplaceChar; - } + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + return sal_False; } - while ( nLow <= nHigh ); - return 0; + return sal_True; } -/* ----------------------------------------------------------------------- */ - -typedef struct _ImplReplaceCharStrData -{ - sal_uInt16 mnUniChar; - sal_uInt16 maReplaceChars[IMPL_MAX_REPLACECHAR]; -} ImplReplaceCharStrData; - -static ImplReplaceCharStrData const aImplRepCharStrTab[] = -{ - { 0x00A9, { 0x0028, 0x0063, 0x0029, 0x0000, 0x0000 } }, /* COPYRIGHT SIGN */ - { 0x00AB, { 0x003C, 0x003C, 0x0000, 0x0000, 0x0000 } }, /* LEFT-POINTING-DOUBLE ANGLE QUOTATION MARK */ - { 0x0AE0, { 0x0028, 0x0072, 0x0029, 0x0000, 0x0000 } }, /* REGISTERED SIGN */ - { 0x00BB, { 0x003E, 0x003E, 0x0000, 0x0000, 0x0000 } }, /* RIGHT-POINTING-DOUBLE ANGLE QUOTATION MARK */ - { 0x00BC, { 0x0031, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE QUARTER */ - { 0x00BD, { 0x0031, 0x002F, 0x0032, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE HALF */ - { 0x00BE, { 0x0033, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE QUARTERS */ - { 0x00C6, { 0x0041, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LETTER AE */ - { 0x00E6, { 0x0061, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LETTER AE */ - { 0x0152, { 0x004F, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LIGATURE OE */ - { 0x0153, { 0x006F, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LIGATURE OE */ - { 0x2025, { 0x002E, 0x002E, 0x0000, 0x0000, 0x0000 } }, /* TWO DOT LEADER */ - { 0x2026, { 0x002E, 0x002E, 0x002E, 0x0000, 0x0000 } }, /* HORIZONTAL ELLIPSES */ - { 0x2034, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* TRIPPLE PRIME */ - { 0x2037, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* RESERVED TRIPPLE PRIME */ - { 0x20AC, { 0x0045, 0x0055, 0x0052, 0x0000, 0x0000 } }, /* EURO SIGN */ - { 0x2122, { 0x0028, 0x0074, 0x006D, 0x0029, 0x0000 } }, /* TRADE MARK SIGN */ - { 0x2153, { 0x0031, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE THIRD */ - { 0x2154, { 0x0032, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO THIRD */ - { 0x2155, { 0x0031, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE FIFTH */ - { 0x2156, { 0x0032, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO FIFTH */ - { 0x2157, { 0x0033, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE FIFTH */ - { 0x2158, { 0x0034, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION FOUR FIFTH */ - { 0x2159, { 0x0031, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE SIXTH */ - { 0x215A, { 0x0035, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE SIXTH */ - { 0x215B, { 0x0031, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE EIGHTH */ - { 0x215C, { 0x0033, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE EIGHTH */ - { 0x215D, { 0x0035, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE EIGHTH */ - { 0x215E, { 0x0037, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION SEVEN EIGHTH */ - { 0x215F, { 0x0031, 0x002F, 0x0000, 0x0000, 0x0000 } }, /* FRACTION NUMERATOR ONE */ - { 0x2161, { 0x0049, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWO */ - { 0x2162, { 0x0049, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL THREE */ - { 0x2163, { 0x0049, 0x0056, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL FOUR */ - { 0x2165, { 0x0056, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SIX */ - { 0x2166, { 0x0056, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SEVEN */ - { 0x2168, { 0x0056, 0x0049, 0x0049, 0x0049, 0x0000 } }, /* ROMAN NUMERAL EIGHT */ - { 0x2169, { 0x0049, 0x0058, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL NINE */ - { 0x216A, { 0x0058, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL ELEVEN */ - { 0x216B, { 0x0058, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWELVE */ - { 0x2171, { 0x0069, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL TWO */ - { 0x2172, { 0x0069, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL THREE */ - { 0x2173, { 0x0069, 0x0076, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL FOUR */ - { 0x2175, { 0x0076, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SIX */ - { 0x2176, { 0x0076, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SEVEN */ - { 0x2178, { 0x0076, 0x0069, 0x0069, 0x0069, 0x0000 } }, /* SMALL ROMAN NUMERAL EIGHT */ - { 0x2179, { 0x0069, 0x0078, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL NINE */ - { 0x217A, { 0x0078, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL ELEVEN */ - { 0x217B, { 0x0058, 0x0069, 0x0069, 0x0000, 0x0000 } } /* SMALL ROMAN NUMERAL TWELVE */ -}; - -const sal_uInt16* ImplGetReplaceString( sal_Unicode c ) -{ - sal_uInt16 nLow; - sal_uInt16 nHigh; - sal_uInt16 nMid; - sal_uInt16 nCompareChar; - const ImplReplaceCharStrData* pCharData; - - nLow = 0; - nHigh = (sizeof( aImplRepCharStrTab )/sizeof( ImplReplaceCharStrData ))-1; - do - { - nMid = (nLow+nHigh)/2; - pCharData = aImplRepCharStrTab+nMid; - nCompareChar = pCharData->mnUniChar; - if ( c < nCompareChar ) - { - if ( !nMid ) - break; - nHigh = nMid-1; - } - else - { - if ( c > nCompareChar ) - nLow = nMid+1; - else - return pCharData->maReplaceChars; - } - } - while ( nLow <= nHigh ); - - return 0; -} diff --git a/sal/textenc/tenchelp.h b/sal/textenc/tenchelp.h index 2a5c2c85a23e..96e14f8c2dea 100644 --- a/sal/textenc/tenchelp.h +++ b/sal/textenc/tenchelp.h @@ -2,9 +2,9 @@ * * $RCSfile: tenchelp.h,v $ * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * - * last change: $Author: th $ $Date: 2001-05-18 13:53:18 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,159 +59,114 @@ * ************************************************************************/ -#ifndef _RTL_TENCHELP_H -#define _RTL_TENCHELP_H +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#define INCLUDED_RTL_TEXTENC_TENCHELP_H -#ifndef _RTL_TEXTENC_H -#include <rtl/textenc.h> -#endif #ifndef _RTL_TENCINFO_H -#include <rtl/tencinfo.h> +#include "rtl/tencinfo.h" #endif - -/* ----------- */ -/* - Scripts - */ -/* ----------- */ - -typedef sal_uInt16 rtl_Script; -#define SCRIPT_DONTKNOW ((rtl_Script)0) -#define SCRIPT_UNICODE ((rtl_Script)1) -#define SCRIPT_SYMBOL ((rtl_Script)2) -#define SCRIPT_LATIN ((rtl_Script)3) -#define SCRIPT_EASTEUROPE ((rtl_Script)4) -#define SCRIPT_CYRILLIC ((rtl_Script)5) -#define SCRIPT_BALTIC ((rtl_Script)6) -#define SCRIPT_TURKISH ((rtl_Script)7) -#define SCRIPT_GREEK ((rtl_Script)8) -#define SCRIPT_JAPANESE ((rtl_Script)9) -#define SCRIPT_CHINESE_SIMPLIFIED ((rtl_Script)10) -#define SCRIPT_CHINESE_TRADITIONAL ((rtl_Script)11) -#define SCRIPT_KOREAN ((rtl_Script)12) -#define SCRIPT_ARABIC ((rtl_Script)13) -#define SCRIPT_HEBREW ((rtl_Script)14) -#define SCRIPT_ARMENIAN ((rtl_Script)15) -#define SCRIPT_DEVANAGARI ((rtl_Script)16) -#define SCRIPT_BENGALI ((rtl_Script)17) -#define SCRIPT_GURMUKHI ((rtl_Script)18) -#define SCRIPT_GUJARATI ((rtl_Script)19) -#define SCRIPT_ORIYA ((rtl_Script)20) -#define SCRIPT_TAMIL ((rtl_Script)21) -#define SCRIPT_TELUGU ((rtl_Script)22) -#define SCRIPT_KANNADA ((rtl_Script)23) -#define SCRIPT_MALAYALAM ((rtl_Script)24) -#define SCRIPT_THAI ((rtl_Script)25) -#define SCRIPT_VIETNAMESE ((rtl_Script)26) -#define SCRIPT_LAO ((rtl_Script)27) -#define SCRIPT_GEORGIEN ((rtl_Script)28) - -/* -------- */ -/* - Help - */ -/* -------- */ - -#ifndef NULL -#define NULL ((void*)0) +#ifndef _RTL_TEXTENC_H +#include "rtl/textenc.h" +#endif +#ifndef _SAL_TYPES_H_ +#include "sal/types.h" #endif -#define RTL_UNICODE_START_HIGH_SURROGATES 0xD800 -#define RTL_UNICODE_END_HIGH_SURROGATES 0xDBFF -#define RTL_UNICODE_START_LOW_SURROGATES 0xDC00 -#define RTL_UNICODE_END_LOW_SURROGATES 0xDFFF +#if defined __cplusplus +extern "C" { +#endif /* __cplusplus */ -#define RTL_UNICODE_SURROGATES_HALFMASK 0x03FFUL -#define RTL_UNICODE_SURROGATES_HALFBASE 0x0010000UL -#define RTL_UNICODE_SURROGATES_HALFSHIFT 10 +#define RTL_UNICODE_START_HIGH_SURROGATES 0xD800 +#define RTL_UNICODE_END_HIGH_SURROGATES 0xDBFF +#define RTL_UNICODE_START_LOW_SURROGATES 0xDC00 +#define RTL_UNICODE_END_LOW_SURROGATES 0xDFFF -#define RTL_UNICODE_CHAR_DEFAULT 0xFFFD +#define RTL_UNICODE_CHAR_DEFAULT 0xFFFD -#define RTL_TEXTCVT_BYTE_PRIVATE_START 0xF100 -#define RTL_TEXTCVT_BYTE_PRIVATE_END 0xF1FF +#define RTL_TEXTCVT_BYTE_PRIVATE_START 0xF100 +#define RTL_TEXTCVT_BYTE_PRIVATE_END 0xF1FF /* ----------------- */ /* - TextConverter - */ /* ----------------- */ -typedef struct _ImplTextConverterData -{ - const void* mpConvertTables; - sal_Char* mpTextDefChar; - sal_Unicode mcUnicodeDefChar; -} ImplTextConverterData; - -typedef sal_Size (*ImplConvertToUnicodeProc)( const ImplTextConverterData* pData, - void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ); -typedef sal_Size (*ImplConvertToTextProc)( const ImplTextConverterData* pData, - void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ); -typedef void* (*ImplCreateTextContextProc)( void ); -typedef void (*ImplDestroyTextContextProc)( void* pContext ); -typedef void (*ImplResetTextContextProc)( void* pContext ); -typedef void* (*ImplCreateUnicodeContextProc)( void ); -typedef void (*ImplDestroyUnicodeContextProc)( void* pContext ); -typedef void (*ImplResetUnicodeContextProc)( void* pContext ); - -typedef struct _ImplTextConverter -{ - const ImplTextConverterData* mpConvertData; - ImplConvertToUnicodeProc mpConvertTextToUnicodeProc; - ImplConvertToTextProc mpConvertUnicodeToTextProc; - ImplCreateTextContextProc mpCreateTextToUnicodeContext; - ImplDestroyTextContextProc mpDestroyTextToUnicodeContext; - ImplResetTextContextProc mpResetTextToUnicodeContext; - ImplCreateUnicodeContextProc mpCreateUnicodeToTextContext; - ImplDestroyUnicodeContextProc mpDestroyUnicodeToTextContext; - ImplResetUnicodeContextProc mpResetUnicodeToTextContext; -} ImplTextConverter; +typedef void ImplTextConverterData; + +typedef +sal_Size (* ImplConvertToUnicodeProc)(ImplTextConverterData const * pData, + void * pContext, + sal_Char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +typedef +sal_Size (* ImplConvertToTextProc)(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + sal_Char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +typedef void * (* ImplCreateTextContextProc)(void); -#define RTL_TEXTTOUNICODECONTEXT_NOTUSED ((rtl_TextToUnicodeContext)1) -#define RTL_UNICODETOTEXTCONTEXT_NOTUSED ((rtl_UnicodeToTextContext)1) +typedef void (* ImplDestroyTextContextProc)(void * pContext); -/* ---------------------------- */ -/* - TextEncoding - InfoFlags - */ -/* ---------------------------- */ +typedef void (* ImplResetTextContextProc)(void * pContext); -#define RTL_TEXTENCODING_INFO_WIN ((sal_uInt32)0x0100) -#define RTL_TEXTENCODING_INFO_DOS_OS2 ((sal_uInt32)0x0200) -#define RTL_TEXTENCODING_INFO_MAC ((sal_uInt32)0x0400) +typedef void * (* ImplCreateUnicodeContextProc)(void); + +typedef void (* ImplDestroyUnicodeContextProc)(void * pContext); + +typedef void (* ImplResetUnicodeContextProc)(void * pContext); + +typedef struct +{ + ImplTextConverterData const * mpConvertData; + ImplConvertToUnicodeProc mpConvertTextToUnicodeProc; + ImplConvertToTextProc mpConvertUnicodeToTextProc; + ImplCreateTextContextProc mpCreateTextToUnicodeContext; + ImplDestroyTextContextProc mpDestroyTextToUnicodeContext; + ImplResetTextContextProc mpResetTextToUnicodeContext; + ImplCreateUnicodeContextProc mpCreateUnicodeToTextContext; + ImplDestroyUnicodeContextProc mpDestroyUnicodeToTextContext; + ImplResetUnicodeContextProc mpResetUnicodeToTextContext; +} ImplTextConverter; /* ----------------------------- */ /* - TextEncoding - Structures - */ /* ----------------------------- */ -typedef struct _ImplTextEncodingData +typedef struct { - const ImplTextConverter* mpConverter; - rtl_TextEncoding meTextEncoding; - sal_uInt8 mnMinCharSize; - sal_uInt8 mnMaxCharSize; - sal_uInt8 mnAveCharSize; - sal_uInt8 mnBestWindowsCharset; - sal_uInt32 mnBestPCCodePage; - sal_uInt32 mnBestMacTextEncoding; - const sal_Char* mpBestUnixCharset; - const sal_Char* mpBestMimeCharset; - sal_uInt32 mnInfoFlags; - rtl_Script mnScript; + ImplTextConverter maConverter; + rtl_TextEncoding meTextEncoding; + sal_uInt8 mnMinCharSize; + sal_uInt8 mnMaxCharSize; + sal_uInt8 mnAveCharSize; + sal_uInt8 mnBestWindowsCharset; + char const * mpBestUnixCharset; + char const * mpBestMimeCharset; + sal_uInt32 mnInfoFlags; } ImplTextEncodingData; - /* ----------------------------------- */ /* - TextConverter - Byte-Structures - */ /* ----------------------------------- */ -typedef struct _ImplUniCharTabData +typedef struct { sal_uInt16 mnUniChar; sal_uChar mnChar; } ImplUniCharTabData; -typedef struct _ImplByteConvertData +typedef struct { const sal_uInt16* mpToUniTab1; const sal_uInt16* mpToUniTab2; @@ -233,7 +188,7 @@ typedef struct _ImplByteConvertData /* - TextConverter - DBCS-Structures - */ /* ----------------------------------- */ -typedef struct _ImplDBCSEUDCData +typedef struct { sal_uChar mnLeadStart; sal_uChar mnLeadEnd; @@ -249,7 +204,7 @@ typedef struct _ImplDBCSEUDCData sal_uInt16 mnUniEnd; } ImplDBCSEUDCData; -typedef struct _ImplDBCSToUniLeadTab +typedef struct { sal_uInt16 mnUniChar; sal_uInt8 mnTrailStart; @@ -257,14 +212,14 @@ typedef struct _ImplDBCSToUniLeadTab const sal_uInt16* mpToUniTrailTab; } ImplDBCSToUniLeadTab; -typedef struct _ImplUniToDBCSHighTab +typedef struct { sal_uInt8 mnLowStart; sal_uInt8 mnLowEnd; const sal_uInt16* mpToUniTrailTab; } ImplUniToDBCSHighTab; -typedef struct _ImplDBCSConvertData +typedef struct { const ImplDBCSToUniLeadTab* mpToUniLeadTab; const ImplUniToDBCSHighTab* mpToDBCSHighTab; @@ -278,7 +233,7 @@ typedef struct _ImplDBCSConvertData /* - TextConverter - EUC-Structures - */ /* ---------------------------------- */ -typedef struct _ImplEUCJPConvertData +typedef struct { const ImplDBCSToUniLeadTab* mpJIS0208ToUniLeadTab; const ImplDBCSToUniLeadTab* mpJIS0212ToUniLeadTab; @@ -286,32 +241,21 @@ typedef struct _ImplEUCJPConvertData const ImplUniToDBCSHighTab* mpUniToJIS0212HighTab; } ImplEUCJPConvertData; -/* -------------------------------------- */ -/* - TextConverter - ISO2022-Structures - */ -/* -------------------------------------- */ - -/* ---------------------------- */ -/* - TextEncoding - Functions - */ -/* ---------------------------- */ - -const ImplTextEncodingData* Impl_getTextEncodingData( rtl_TextEncoding eTextEncoding ); - /* --------------------------------- */ /* - TextConverter - HelpFunctions - */ /* --------------------------------- */ -sal_Unicode ImplGetUndefinedUnicodeChar( sal_uChar c, sal_uInt32 nFlags, const ImplTextConverterData* pData ); -sal_Size ImplGetUndefinedAsciiMultiByte( sal_uInt32 nFlags, const ImplTextConverterData* pData, sal_Char* pBuf, sal_Size nMaxLen ); -sal_Size ImplGetInvalidAsciiMultiByte( sal_uInt32 nFlags, const ImplTextConverterData* pData, sal_Char* pBuf, sal_Size nMaxLen ); +sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags); -#define IMPL_TEXTCVT_BREAK 1 -#define IMPL_TEXTCVT_CONTINUE 2 -int ImplHandleUndefinedUnicodeToTextChar( const ImplTextConverterData* pData, - const sal_Unicode** ppSrcBuf, const sal_Unicode* pEndSrcBuf, - sal_Char** ppDestBuf, const sal_Char* pEndDestBuf, - sal_uInt32 nFlags, sal_uInt32* pInfo ); - -int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ); +sal_Bool +ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, + sal_Unicode const ** ppSrcBuf, + sal_Unicode const * pEndSrcBuf, + sal_Char ** ppDestBuf, + sal_Char const * pEndDestBuf, + sal_uInt32 nFlags, + sal_uInt32 * pInfo); + /* sal_True means 'continue,' sal_False means 'break' */ /* ----------------------------- */ /* - TextConverter - Functions - */ @@ -382,12 +326,8 @@ sal_Size ImplUnicodeToUTF8( const ImplTextConverterData* pData, void* pContext, sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ); -/* ------------------------------------ */ -/* - TextConverter - ReplaceFunctions - */ -/* ------------------------------------ */ - -#define IMPL_MAX_REPLACECHAR 5 -sal_uInt16 ImplGetReplaceChar( sal_Unicode c ); -const sal_uInt16* ImplGetReplaceString( sal_Unicode c ); +#if defined __cplusplus +} +#endif /* __cplusplus */ -#endif /* _RTL_CCVTHELP_HXX */ +#endif /* INCLUDED_RTL_TEXTENC_TENCHELP_H */ diff --git a/sal/textenc/tencinfo.c b/sal/textenc/tencinfo.c index af056e4a15c6..4e8ff018dcb0 100644 --- a/sal/textenc/tencinfo.c +++ b/sal/textenc/tencinfo.c @@ -2,9 +2,9 @@ * * $RCSfile: tencinfo.c,v $ * - * $Revision: 1.11 $ + * $Revision: 1.12 $ * - * last change: $Author: th $ $Date: 2001-08-22 09:52:54 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,18 +59,28 @@ * ************************************************************************/ -#define _RTL_TENCINFO_C +#ifndef _RTL_TENCINFO_H +#include "rtl/tencinfo.h" +#endif -#include <string.h> +#ifndef INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H +#include "gettextencodingdata.h" +#endif +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" +#endif #ifndef _RTL_ALLOC_H -#include <rtl/alloc.h> +#include "rtl/alloc.h" #endif -#ifndef _RTL_TENCINFO_H -#include <rtl/tencinfo.h> + +#ifndef INCLUDED_STDDEF_H +#include <stddef.h> +#define INCLUDED_STDDEF_H #endif -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef INCLUDED_STRING_H +#include <string.h> +#define INCLUDED_STRING_H #endif /* ======================================================================= */ @@ -139,13 +149,13 @@ static sal_Bool Impl_matchString( const sal_Char* pCompStr, const sal_Char* pMat /* ======================================================================= */ -typedef struct _ImplStrCharsetDef +typedef struct { const sal_Char* mpCharsetStr; rtl_TextEncoding meTextEncoding; } ImplStrCharsetDef; -typedef struct _ImplStrFirstPartCharsetDef +typedef struct { const sal_Char* mpCharsetStr; const ImplStrCharsetDef* mpSecondPartTab; @@ -954,36 +964,14 @@ sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding e } /* ----------------------------------------------------------------------- */ -/* -sal_uInt32 SAL_CALL rtl_getBestPCCodePageFromTextEncoding( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return pData->mnBestPCCodePage; - else - return 0; -} -*/ -/* ----------------------------------------------------------------------- */ -/* -sal_uInt32 SAL_CALL rtl_getBestMacTextEncodingFromTextEncoding( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return pData->mnBestMacTextEncoding; - else - return 0xFFFFFFFF; -} -*/ -/* ----------------------------------------------------------------------- */ const sal_Char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) { const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); if ( pData ) - return pData->mpBestUnixCharset; + return (sal_Char const *) pData->mpBestUnixCharset; else if( eTextEncoding == RTL_TEXTENCODING_UNICODE ) - return "iso10646-1"; + return (sal_Char const *) "iso10646-1"; else return 0; } @@ -994,7 +982,7 @@ const sal_Char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncodin { const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); if ( pData ) - return pData->mpBestMimeCharset; + return (sal_Char const *) pData->mpBestMimeCharset; else return 0; } diff --git a/sal/textenc/textcvt.c b/sal/textenc/textcvt.c index 20780f7188e4..385b42ab5700 100644 --- a/sal/textenc/textcvt.c +++ b/sal/textenc/textcvt.c @@ -2,9 +2,9 @@ * * $RCSfile: textcvt.c,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-09-18 15:17:30 $ + * last change: $Author: sb $ $Date: 2001-10-12 10:44:53 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,13 +59,15 @@ * ************************************************************************/ -#define _RTL_TEXTCVT_C +#ifndef _RTL_TEXTCVT_H +#include "rtl/textcvt.h" +#endif -#ifndef _RTL_TENCHELP_H -#include <tenchelp.h> +#ifndef INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H +#include "gettextencodingdata.h" #endif -#ifndef _RTL_TEXTCVT_H -#include <rtl/textcvt.h> +#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H +#include "tenchelp.h" #endif /* ======================================================================= */ @@ -150,7 +152,7 @@ rtl_TextToUnicodeConverter SAL_CALL rtl_createTextToUnicodeConverter( rtl_TextEn { const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); if ( pData ) - return (rtl_TextToUnicodeConverter)pData->mpConverter; + return (rtl_TextToUnicodeConverter) &pData->maConverter; else return 0; } @@ -168,10 +170,10 @@ rtl_TextToUnicodeContext SAL_CALL rtl_createTextToUnicodeContext( rtl_TextToUnic const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; if ( !pConverter ) return 0; - if ( pConverter->mpCreateTextToUnicodeContext ) + else if ( pConverter->mpCreateTextToUnicodeContext ) return (rtl_TextToUnicodeContext)pConverter->mpCreateTextToUnicodeContext(); else - return RTL_TEXTTOUNICODECONTEXT_NOTUSED; + return (rtl_TextToUnicodeContext)1; } /* ----------------------------------------------------------------------- */ @@ -228,7 +230,7 @@ rtl_UnicodeToTextConverter SAL_CALL rtl_createUnicodeToTextConverter( rtl_TextEn { const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); if ( pData ) - return (rtl_TextToUnicodeConverter)pData->mpConverter; + return (rtl_TextToUnicodeConverter) &pData->maConverter; else return 0; } @@ -246,10 +248,10 @@ rtl_UnicodeToTextContext SAL_CALL rtl_createUnicodeToTextContext( rtl_UnicodeToT const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; if ( !pConverter ) return 0; - if ( pConverter->mpCreateUnicodeToTextContext ) + else if ( pConverter->mpCreateUnicodeToTextContext ) return (rtl_UnicodeToTextContext)pConverter->mpCreateUnicodeToTextContext(); else - return RTL_UNICODETOTEXTCONTEXT_NOTUSED; + return (rtl_UnicodeToTextContext)1; } /* ----------------------------------------------------------------------- */ |