From b234008ba5096f251fffec4c467f2103f4ba3cc0 Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Thu, 5 Jan 2012 23:18:19 +0100 Subject: Changed C files to C++. --- sal/Library_sal.mk | 8 +- sal/Library_sal_textenc.mk | 7 +- sal/textenc/context.c | 50 -- sal/textenc/context.cxx | 52 ++ sal/textenc/context.h | 55 -- sal/textenc/context.hxx | 49 ++ sal/textenc/convertadobe.tab | 10 +- sal/textenc/convertbig5hkscs.c | 494 ------------ sal/textenc/convertbig5hkscs.cxx | 501 +++++++++++++ sal/textenc/convertbig5hkscs.h | 84 --- sal/textenc/convertbig5hkscs.hxx | 75 ++ sal/textenc/convertbig5hkscs.tab | 14 +- sal/textenc/converter.c | 167 ----- sal/textenc/converter.cxx | 170 +++++ sal/textenc/converter.h | 69 -- sal/textenc/converter.hxx | 61 ++ sal/textenc/converteuctw.c | 455 ----------- sal/textenc/converteuctw.cxx | 460 ++++++++++++ sal/textenc/converteuctw.h | 81 -- sal/textenc/converteuctw.hxx | 74 ++ sal/textenc/converteuctw.tab | 14 +- sal/textenc/convertgb18030.c | 471 ------------ sal/textenc/convertgb18030.cxx | 475 ++++++++++++ sal/textenc/convertgb18030.h | 95 --- sal/textenc/convertgb18030.hxx | 88 +++ sal/textenc/convertgb18030.tab | 13 +- sal/textenc/convertiscii.tab | 6 +- sal/textenc/convertiso2022cn.c | 865 --------------------- sal/textenc/convertiso2022cn.cxx | 868 +++++++++++++++++++++ sal/textenc/convertiso2022cn.h | 87 --- sal/textenc/convertiso2022cn.hxx | 80 ++ sal/textenc/convertiso2022cn.tab | 18 +- sal/textenc/convertiso2022jp.c | 572 -------------- sal/textenc/convertiso2022jp.cxx | 577 ++++++++++++++ sal/textenc/convertiso2022jp.h | 81 -- sal/textenc/convertiso2022jp.hxx | 74 ++ sal/textenc/convertiso2022jp.tab | 18 +- sal/textenc/convertiso2022kr.c | 530 ------------- sal/textenc/convertiso2022kr.cxx | 534 +++++++++++++ sal/textenc/convertiso2022kr.h | 81 -- sal/textenc/convertiso2022kr.hxx | 74 ++ sal/textenc/convertiso2022kr.tab | 18 +- sal/textenc/convertsinglebytetobmpunicode.cxx | 11 +- sal/textenc/convertsinglebytetobmpunicode.hxx | 10 +- sal/textenc/generate/gb180302000.tab | 8 +- sal/textenc/gettextencodingdata.h | 49 -- sal/textenc/gettextencodingdata.hxx | 43 ++ sal/textenc/tables.cxx | 15 +- sal/textenc/tcvtbyte.c | 858 --------------------- sal/textenc/tcvtbyte.cxx | 848 +++++++++++++++++++++ sal/textenc/tcvtmb.c | 695 ----------------- sal/textenc/tcvtmb.cxx | 690 +++++++++++++++++ sal/textenc/tcvtutf7.c | 591 --------------- sal/textenc/tcvtutf7.cxx | 583 ++++++++++++++ sal/textenc/tcvtutf8.c | 422 ----------- sal/textenc/tcvtutf8.cxx | 422 +++++++++++ sal/textenc/tenchelp.c | 215 ------ sal/textenc/tenchelp.cxx | 216 ++++++ sal/textenc/tenchelp.h | 305 -------- sal/textenc/tenchelp.hxx | 297 ++++++++ sal/textenc/tencinfo.c | 1002 ------------------------- sal/textenc/tencinfo.cxx | 994 ++++++++++++++++++++++++ sal/textenc/textcvt.c | 267 ------- sal/textenc/textcvt.cxx | 266 +++++++ sal/textenc/textenc.cxx | 20 +- sal/textenc/unichars.c | 139 ---- sal/textenc/unichars.cxx | 140 ++++ sal/textenc/unichars.h | 65 -- sal/textenc/unichars.hxx | 58 ++ 69 files changed, 8840 insertions(+), 8964 deletions(-) delete mode 100644 sal/textenc/context.c create mode 100644 sal/textenc/context.cxx delete mode 100644 sal/textenc/context.h create mode 100644 sal/textenc/context.hxx delete mode 100644 sal/textenc/convertbig5hkscs.c create mode 100644 sal/textenc/convertbig5hkscs.cxx delete mode 100644 sal/textenc/convertbig5hkscs.h create mode 100644 sal/textenc/convertbig5hkscs.hxx delete mode 100644 sal/textenc/converter.c create mode 100644 sal/textenc/converter.cxx delete mode 100644 sal/textenc/converter.h create mode 100644 sal/textenc/converter.hxx delete mode 100644 sal/textenc/converteuctw.c create mode 100644 sal/textenc/converteuctw.cxx delete mode 100644 sal/textenc/converteuctw.h create mode 100644 sal/textenc/converteuctw.hxx delete mode 100644 sal/textenc/convertgb18030.c create mode 100644 sal/textenc/convertgb18030.cxx delete mode 100644 sal/textenc/convertgb18030.h create mode 100644 sal/textenc/convertgb18030.hxx delete mode 100644 sal/textenc/convertiso2022cn.c create mode 100644 sal/textenc/convertiso2022cn.cxx delete mode 100644 sal/textenc/convertiso2022cn.h create mode 100644 sal/textenc/convertiso2022cn.hxx delete mode 100644 sal/textenc/convertiso2022jp.c create mode 100644 sal/textenc/convertiso2022jp.cxx delete mode 100644 sal/textenc/convertiso2022jp.h create mode 100644 sal/textenc/convertiso2022jp.hxx delete mode 100644 sal/textenc/convertiso2022kr.c create mode 100644 sal/textenc/convertiso2022kr.cxx delete mode 100644 sal/textenc/convertiso2022kr.h create mode 100644 sal/textenc/convertiso2022kr.hxx delete mode 100644 sal/textenc/gettextencodingdata.h create mode 100644 sal/textenc/gettextencodingdata.hxx delete mode 100644 sal/textenc/tcvtbyte.c create mode 100644 sal/textenc/tcvtbyte.cxx delete mode 100644 sal/textenc/tcvtmb.c create mode 100644 sal/textenc/tcvtmb.cxx delete mode 100644 sal/textenc/tcvtutf7.c create mode 100644 sal/textenc/tcvtutf7.cxx delete mode 100644 sal/textenc/tcvtutf8.c create mode 100644 sal/textenc/tcvtutf8.cxx delete mode 100644 sal/textenc/tenchelp.c create mode 100644 sal/textenc/tenchelp.cxx delete mode 100644 sal/textenc/tenchelp.h create mode 100644 sal/textenc/tenchelp.hxx delete mode 100644 sal/textenc/tencinfo.c create mode 100644 sal/textenc/tencinfo.cxx delete mode 100644 sal/textenc/textcvt.c create mode 100644 sal/textenc/textcvt.cxx delete mode 100644 sal/textenc/unichars.c create mode 100644 sal/textenc/unichars.cxx delete mode 100644 sal/textenc/unichars.h create mode 100644 sal/textenc/unichars.hxx diff --git a/sal/Library_sal.mk b/sal/Library_sal.mk index f039ca749d59..c8446349e948 100644 --- a/sal/Library_sal.mk +++ b/sal/Library_sal.mk @@ -119,10 +119,6 @@ $(eval $(call gb_Library_add_exception_objects,sal,\ sal/rtl/source/ustrbuf \ sal/rtl/source/ustring \ sal/rtl/source/uuid \ - sal/textenc/textenc \ -)) -$(eval $(call gb_Library_add_cobjects,sal,\ - sal/osl/all/filepath \ sal/textenc/context \ sal/textenc/converter \ sal/textenc/tcvtbyte \ @@ -130,8 +126,12 @@ $(eval $(call gb_Library_add_cobjects,sal,\ sal/textenc/tenchelp \ sal/textenc/tencinfo \ sal/textenc/textcvt \ + sal/textenc/textenc \ sal/textenc/unichars \ )) +$(eval $(call gb_Library_add_cobjects,sal,\ + sal/osl/all/filepath \ +)) ifeq ($(GUI),UNX) $(eval $(call gb_Library_add_exception_objects,sal,\ diff --git a/sal/Library_sal_textenc.mk b/sal/Library_sal_textenc.mk index 1280e872cb83..27ff5c08fa33 100644 --- a/sal/Library_sal_textenc.mk +++ b/sal/Library_sal_textenc.mk @@ -39,11 +39,6 @@ $(eval $(call gb_Library_add_defs,sal_textenc,\ )) $(eval $(call gb_Library_add_exception_objects,sal_textenc,\ - sal/textenc/convertsinglebytetobmpunicode \ - sal/textenc/tables \ -)) - -$(eval $(call gb_Library_add_cobjects,sal_textenc,\ sal/textenc/context \ sal/textenc/convertbig5hkscs \ sal/textenc/converter \ @@ -52,6 +47,8 @@ $(eval $(call gb_Library_add_cobjects,sal_textenc,\ sal/textenc/convertiso2022cn \ sal/textenc/convertiso2022jp \ sal/textenc/convertiso2022kr \ + sal/textenc/convertsinglebytetobmpunicode \ + sal/textenc/tables \ sal/textenc/tcvtbyte \ sal/textenc/tcvtmb \ sal/textenc/tcvtutf7 \ diff --git a/sal/textenc/context.c b/sal/textenc/context.c deleted file mode 100644 index 26232cbd89c8..000000000000 --- a/sal/textenc/context.c +++ /dev/null @@ -1,50 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "context.h" -#include "rtl/alloc.h" - -void * ImplCreateUnicodeToTextContext(void) -{ - void * pContext = rtl_allocateMemory(sizeof (ImplUnicodeToTextContext)); - ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate = 0; - return pContext; -} - -void ImplResetUnicodeToTextContext(void * pContext) -{ - if (pContext) - ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate = 0; -} - -void ImplDestroyContext(void * pContext) -{ - rtl_freeMemory(pContext); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/context.cxx b/sal/textenc/context.cxx new file mode 100644 index 000000000000..4770727b45e6 --- /dev/null +++ b/sal/textenc/context.cxx @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "context.hxx" + +void * ImplCreateUnicodeToTextContext() +{ + ImplUnicodeToTextContext * pContext = new ImplUnicodeToTextContext; + pContext->m_nHighSurrogate = 0; + return pContext; +} + +void ImplResetUnicodeToTextContext(void * pContext) +{ + if (pContext) + static_cast< ImplUnicodeToTextContext * >(pContext)->m_nHighSurrogate = + 0; +} + +void ImplDestroyContext(void * pContext) +{ + delete static_cast< ImplUnicodeToTextContext * >(pContext); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/context.h b/sal/textenc/context.h deleted file mode 100644 index 6b7740ebadbb..000000000000 --- a/sal/textenc/context.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#define INCLUDED_RTL_TEXTENC_CONTEXT_H - -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - sal_Unicode m_nHighSurrogate; -} ImplUnicodeToTextContext; - -void * ImplCreateUnicodeToTextContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetUnicodeToTextContext(void * pContext) SAL_THROW_EXTERN_C(); - -void ImplDestroyContext(void * pContext) SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONTEXT_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/context.hxx b/sal/textenc/context.hxx new file mode 100644 index 000000000000..a83006ee3548 --- /dev/null +++ b/sal/textenc/context.hxx @@ -0,0 +1,49 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONTEXT_HXX +#define INCLUDED_SAL_TEXTENC_CONTEXT_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +struct ImplUnicodeToTextContext +{ + sal_Unicode m_nHighSurrogate; +}; + +void * ImplCreateUnicodeToTextContext(); + +void ImplResetUnicodeToTextContext(void * pContext); + +void ImplDestroyContext(void * pContext); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertadobe.tab b/sal/textenc/convertadobe.tab index 4267aa100811..98633bb55588 100644 --- a/sal/textenc/convertadobe.tab +++ b/sal/textenc/convertadobe.tab @@ -25,12 +25,14 @@ * ************************************************************************/ -#include "context.h" -#include "convertsinglebytetobmpunicode.hxx" -#include "tenchelp.h" +#include "sal/config.h" #include "rtl/tencinfo.h" -#include +#include "sal/macros.h" + +#include "context.hxx" +#include "convertsinglebytetobmpunicode.hxx" +#include "tenchelp.hxx" namespace { diff --git a/sal/textenc/convertbig5hkscs.c b/sal/textenc/convertbig5hkscs.c deleted file mode 100644 index 0839e56acee1..000000000000 --- a/sal/textenc/convertbig5hkscs.c +++ /dev/null @@ -1,494 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "convertbig5hkscs.h" -#include "context.h" -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "osl/diagnose.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -typedef struct -{ - sal_Int32 m_nRow; /* 0--255; 0 means none */ -} ImplBig5HkscsToUnicodeContext; - -void * ImplCreateBig5HkscsToUnicodeContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext)); - ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0; - return pContext; -} - -void ImplResetBig5HkscsToUnicodeContext(void * pContext) -{ - if (pContext) - ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0; -} - -sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) -{ - sal_uInt16 const * pBig5Hkscs2001Data - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pBig5Hkscs2001ToUnicodeData; - sal_Int32 const * pBig5Hkscs2001RowOffsets - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pBig5Hkscs2001ToUnicodeRowOffsets; - ImplDBCSToUniLeadTab const * pBig5Data - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pBig5ToUnicodeData; - sal_Int32 nRow = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; - - if (pContext) - nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow; - - for (; nConverted < nSrcBytes; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; - if (nRow == 0) - if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nChar; - else - goto no_output; - else if (nChar >= 0x81 && nChar <= 0xFE) - nRow = nChar; - else - { - bUndefined = sal_False; - goto bad_input; - } - else - if ((nChar >= 0x40 && nChar <= 0x7E) - || (nChar >= 0xA1 && nChar <= 0xFE)) - { - sal_uInt32 nUnicode = 0xFFFF; - sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow]; - sal_uInt32 nFirst=0; - sal_uInt32 nLast=0; - if (nOffset != -1) - { - sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; - nFirst = nFirstLast & 0xFF; - nLast = nFirstLast >> 8; - if (nChar >= nFirst && nChar <= nLast) - nUnicode - = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)]; - } - if (nUnicode == 0xFFFF) - { - sal_uInt32 n = pBig5Data[nRow].mnTrailStart; - if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd) - { - nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n]; - if (nUnicode == 0) - nUnicode = 0xFFFF; - OSL_VERIFY(!ImplIsHighSurrogate(nUnicode)); - } - } - if (nUnicode == 0xFFFF) - { - ImplDBCSEUDCData const * p - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pEudcData; - sal_uInt32 nCount - = ((ImplBig5HkscsConverterData const *) pData)-> - m_nEudcCount; - sal_uInt32 i; - for (i = 0; i < nCount; ++i) - { - if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd) - { - if (nChar < p->mnTrail1Start) - break; - if (nChar <= p->mnTrail1End) - { - nUnicode - = p->mnUniStart - + (nRow - p->mnLeadStart) - * p->mnTrailRangeCount - + (nChar - p->mnTrail1Start); - break; - } - if (p->mnTrailCount < 2 - || nChar < p->mnTrail2Start) - break; - if (nChar <= p->mnTrail2End) - { - nUnicode - = p->mnUniStart - + (nRow - p->mnLeadStart) - * p->mnTrailRangeCount - + (nChar - p->mnTrail2Start) - + (p->mnTrail1End - p->mnTrail1Start - + 1); - break; - } - if (p->mnTrailCount < 3 - || nChar < p->mnTrail3Start) - break; - if (nChar <= p->mnTrail3End) - { - nUnicode - = p->mnUniStart - + (nRow - p->mnLeadStart) - * p->mnTrailRangeCount - + (nChar - p->mnTrail3Start) - + (p->mnTrail1End - p->mnTrail1Start - + 1) - + (p->mnTrail2End - p->mnTrail2Start - + 1); - break; - } - break; - } - ++p; - } - OSL_VERIFY(!ImplIsHighSurrogate(nUnicode)); - } - if (nUnicode == 0xFFFF) - goto bad_input; - if (ImplIsHighSurrogate(nUnicode)) - if (pDestBufEnd - pDestBufPtr >= 2) - { - nOffset += nLast - nFirst + 1; - nFirst = pBig5Hkscs2001Data[nOffset++]; - *pDestBufPtr++ = (sal_Unicode) nUnicode; - *pDestBufPtr++ - = (sal_Unicode) pBig5Hkscs2001Data[ - nOffset + (nChar - nFirst)]; - } - else - goto no_output; - else - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nUnicode; - else - goto no_output; - nRow = 0; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - continue; - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - nRow = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - nRow = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (nRow != 0 - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, - pDestBufEnd, &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - nRow = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow; - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtBytes) - *pSrcCvtBytes = nConverted; - - return pDestBufPtr - pDestBuf; -} - -sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) -{ - sal_uInt16 const * pBig5Hkscs2001Data - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pUnicodeToBig5Hkscs2001Data; - sal_Int32 const * pBig5Hkscs2001PageOffsets - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pUnicodeToBig5Hkscs2001PageOffsets; - sal_Int32 const * pBig5Hkscs2001PlaneOffsets - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pUnicodeToBig5Hkscs2001PlaneOffsets; - ImplUniToDBCSHighTab const * pBig5Data - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pUnicodeToBig5Data; - sal_Unicode nHighSurrogate = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBuf + nDestBytes; - - if (pContext) - nHighSurrogate - = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; - - for (; nConverted < nSrcChars; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *pSrcBuf++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar)) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - { - bUndefined = sal_False; - goto bad_input; - } - - if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) - { - bUndefined = sal_False; - goto bad_input; - } - - if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - else - { - sal_uInt32 nBytes = 0; - sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16]; - if (nOffset != -1) - { - nOffset - = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00) - >> 8)]; - if (nOffset != -1) - { - sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; - sal_uInt32 nFirst = nFirstLast & 0xFF; - sal_uInt32 nLast = nFirstLast >> 8; - sal_uInt32 nIndex = nChar & 0xFF; - if (nIndex >= nFirst && nIndex <= nLast) - { - nBytes - = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)]; - } - } - } - if (nBytes == 0) - { - sal_uInt32 nIndex1 = nChar >> 8; - if (nIndex1 < 0x100) - { - sal_uInt32 nIndex2 = nChar & 0xFF; - sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart; - if (nIndex2 >= nFirst - && nIndex2 <= pBig5Data[nIndex1].mnLowEnd) - nBytes = pBig5Data[nIndex1]. - mpToUniTrailTab[nIndex2 - nFirst]; - } - } - if (nBytes == 0) - { - ImplDBCSEUDCData const * p - = ((ImplBig5HkscsConverterData const *) pData)-> - m_pEudcData; - sal_uInt32 nCount - = ((ImplBig5HkscsConverterData const *) pData)-> - m_nEudcCount; - sal_uInt32 i; - for (i = 0; i < nCount; ++i) { - if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd) - { - sal_uInt32 nIndex = nChar - p->mnUniStart; - sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount; - sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount; - sal_uInt32 nSize; - nBytes = (p->mnLeadStart + nLeadOff) << 8; - nSize = p->mnTrail1End - p->mnTrail1Start + 1; - if (nTrailOff < nSize) - { - nBytes |= p->mnTrail1Start + nTrailOff; - break; - } - nTrailOff -= nSize; - nSize = p->mnTrail2End - p->mnTrail2Start + 1; - if (nTrailOff < nSize) - { - nBytes |= p->mnTrail2Start + nTrailOff; - break; - } - nTrailOff -= nSize; - nBytes |= p->mnTrail3Start + nTrailOff; - break; - } - ++p; - } - } - if (nBytes == 0) - goto bad_input; - if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = (sal_Char) (nBytes >> 8); - *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF); - } - else - goto no_output; - } - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, - nChar, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - NULL, - 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if (nHighSurrogate != 0 - && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, - 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - NULL, - 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate - = nHighSurrogate; - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtChars) - *pSrcCvtChars = nConverted; - - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertbig5hkscs.cxx b/sal/textenc/convertbig5hkscs.cxx new file mode 100644 index 000000000000..113a90e2dccb --- /dev/null +++ b/sal/textenc/convertbig5hkscs.cxx @@ -0,0 +1,501 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "context.hxx" +#include "convertbig5hkscs.hxx" +#include "converter.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +namespace { + +struct ImplBig5HkscsToUnicodeContext +{ + sal_Int32 m_nRow; // 0--255; 0 means none +}; + +} + +void * ImplCreateBig5HkscsToUnicodeContext() +{ + ImplBig5HkscsToUnicodeContext * pContext = + new ImplBig5HkscsToUnicodeContext; + pContext->m_nRow = 0; + return pContext; +} + +void ImplResetBig5HkscsToUnicodeContext(void * pContext) +{ + if (pContext) + static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0; +} + +sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + sal_uInt16 const * pBig5Hkscs2001Data + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pBig5Hkscs2001ToUnicodeData; + sal_Int32 const * pBig5Hkscs2001RowOffsets + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pBig5Hkscs2001ToUnicodeRowOffsets; + ImplDBCSToUniLeadTab const * pBig5Data + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pBig5ToUnicodeData; + sal_Int32 nRow = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow; + + for (; nConverted < nSrcBytes; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; + if (nRow == 0) + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else if (nChar >= 0x81 && nChar <= 0xFE) + nRow = nChar; + else + { + bUndefined = false; + goto bad_input; + } + else + if ((nChar >= 0x40 && nChar <= 0x7E) + || (nChar >= 0xA1 && nChar <= 0xFE)) + { + sal_uInt32 nUnicode = 0xFFFF; + sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow]; + sal_uInt32 nFirst=0; + sal_uInt32 nLast=0; + if (nOffset != -1) + { + sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; + nFirst = nFirstLast & 0xFF; + nLast = nFirstLast >> 8; + if (nChar >= nFirst && nChar <= nLast) + nUnicode + = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)]; + } + if (nUnicode == 0xFFFF) + { + sal_uInt32 n = pBig5Data[nRow].mnTrailStart; + if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd) + { + nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n]; + if (nUnicode == 0) + nUnicode = 0xFFFF; + assert(!ImplIsHighSurrogate(nUnicode)); + } + } + if (nUnicode == 0xFFFF) + { + ImplDBCSEUDCData const * p + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pEudcData; + sal_uInt32 nCount + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_nEudcCount; + sal_uInt32 i; + for (i = 0; i < nCount; ++i) + { + if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd) + { + if (nChar < p->mnTrail1Start) + break; + if (nChar <= p->mnTrail1End) + { + nUnicode + = p->mnUniStart + + (nRow - p->mnLeadStart) + * p->mnTrailRangeCount + + (nChar - p->mnTrail1Start); + break; + } + if (p->mnTrailCount < 2 + || nChar < p->mnTrail2Start) + break; + if (nChar <= p->mnTrail2End) + { + nUnicode + = p->mnUniStart + + (nRow - p->mnLeadStart) + * p->mnTrailRangeCount + + (nChar - p->mnTrail2Start) + + (p->mnTrail1End - p->mnTrail1Start + + 1); + break; + } + if (p->mnTrailCount < 3 + || nChar < p->mnTrail3Start) + break; + if (nChar <= p->mnTrail3End) + { + nUnicode + = p->mnUniStart + + (nRow - p->mnLeadStart) + * p->mnTrailRangeCount + + (nChar - p->mnTrail3Start) + + (p->mnTrail1End - p->mnTrail1Start + + 1) + + (p->mnTrail2End - p->mnTrail2Start + + 1); + break; + } + break; + } + ++p; + } + assert(!ImplIsHighSurrogate(nUnicode)); + } + if (nUnicode == 0xFFFF) + goto bad_input; + if (ImplIsHighSurrogate(nUnicode)) + if (pDestBufEnd - pDestBufPtr >= 2) + { + nOffset += nLast - nFirst + 1; + nFirst = pBig5Hkscs2001Data[nOffset++]; + *pDestBufPtr++ = (sal_Unicode) nUnicode; + *pDestBufPtr++ + = (sal_Unicode) pBig5Hkscs2001Data[ + nOffset + (nChar - nFirst)]; + } + else + goto no_output; + else + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nUnicode; + else + goto no_output; + nRow = 0; + } + else + { + bUndefined = false; + goto bad_input; + } + continue; + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + nRow = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nRow = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nRow != 0 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nRow = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow; + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + sal_uInt16 const * pBig5Hkscs2001Data + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pUnicodeToBig5Hkscs2001Data; + sal_Int32 const * pBig5Hkscs2001PageOffsets + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pUnicodeToBig5Hkscs2001PageOffsets; + sal_Int32 const * pBig5Hkscs2001PlaneOffsets + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pUnicodeToBig5Hkscs2001PlaneOffsets; + ImplUniToDBCSHighTab const * pBig5Data + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pUnicodeToBig5Data; + sal_Unicode nHighSurrogate = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBuf + nDestBytes; + + if (pContext) + nHighSurrogate + = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; + + for (; nConverted < nSrcChars; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar)) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar)) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + { + bUndefined = false; + goto bad_input; + } + + if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) + { + bUndefined = false; + goto bad_input; + } + + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + else + { + sal_uInt32 nBytes = 0; + sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16]; + if (nOffset != -1) + { + nOffset + = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00) + >> 8)]; + if (nOffset != -1) + { + sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; + sal_uInt32 nFirst = nFirstLast & 0xFF; + sal_uInt32 nLast = nFirstLast >> 8; + sal_uInt32 nIndex = nChar & 0xFF; + if (nIndex >= nFirst && nIndex <= nLast) + { + nBytes + = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)]; + } + } + } + if (nBytes == 0) + { + sal_uInt32 nIndex1 = nChar >> 8; + if (nIndex1 < 0x100) + { + sal_uInt32 nIndex2 = nChar & 0xFF; + sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart; + if (nIndex2 >= nFirst + && nIndex2 <= pBig5Data[nIndex1].mnLowEnd) + nBytes = pBig5Data[nIndex1]. + mpToUniTrailTab[nIndex2 - nFirst]; + } + } + if (nBytes == 0) + { + ImplDBCSEUDCData const * p + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_pEudcData; + sal_uInt32 nCount + = static_cast< ImplBig5HkscsConverterData const * >(pData)-> + m_nEudcCount; + sal_uInt32 i; + for (i = 0; i < nCount; ++i) { + if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd) + { + sal_uInt32 nIndex = nChar - p->mnUniStart; + sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount; + sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount; + sal_uInt32 nSize; + nBytes = (p->mnLeadStart + nLeadOff) << 8; + nSize = p->mnTrail1End - p->mnTrail1Start + 1; + if (nTrailOff < nSize) + { + nBytes |= p->mnTrail1Start + nTrailOff; + break; + } + nTrailOff -= nSize; + nSize = p->mnTrail2End - p->mnTrail2Start + 1; + if (nTrailOff < nSize) + { + nBytes |= p->mnTrail2Start + nTrailOff; + break; + } + nTrailOff -= nSize; + nBytes |= p->mnTrail3Start + nTrailOff; + break; + } + ++p; + } + } + if (nBytes == 0) + goto bad_input; + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = static_cast< char >(nBytes >> 8); + *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF); + } + else + goto no_output; + } + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, + nChar, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + NULL, + 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nHighSurrogate != 0 + && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion(false, + 0, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + NULL, + 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate + = nHighSurrogate; + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertbig5hkscs.h b/sal/textenc/convertbig5hkscs.h deleted file mode 100644 index e7e66434b9c9..000000000000 --- a/sal/textenc/convertbig5hkscs.h +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTBIG5HKSCS_H -#define INCLUDED_RTL_TEXTENC_CONVERTBIG5HKSCS_H - -#include "tenchelp.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - sal_uInt16 const * m_pBig5Hkscs2001ToUnicodeData; - sal_Int32 const * m_pBig5Hkscs2001ToUnicodeRowOffsets; - ImplDBCSToUniLeadTab const * m_pBig5ToUnicodeData; - sal_uInt16 const * m_pUnicodeToBig5Hkscs2001Data; - sal_Int32 const * m_pUnicodeToBig5Hkscs2001PageOffsets; - sal_Int32 const * m_pUnicodeToBig5Hkscs2001PlaneOffsets; - ImplUniToDBCSHighTab const * m_pUnicodeToBig5Data; - ImplDBCSEUDCData const * m_pEudcData; - int m_nEudcCount; -} ImplBig5HkscsConverterData; - -void * ImplCreateBig5HkscsToUnicodeContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetBig5HkscsToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTBIG5HKSCS_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertbig5hkscs.hxx b/sal/textenc/convertbig5hkscs.hxx new file mode 100644 index 000000000000..92fc3724d07e --- /dev/null +++ b/sal/textenc/convertbig5hkscs.hxx @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTBIG5HKSCS_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTBIG5HKSCS_HXX + +#include "sal/types.h" + +#include "tenchelp.hxx" + +struct ImplBig5HkscsConverterData +{ + sal_uInt16 const * m_pBig5Hkscs2001ToUnicodeData; + sal_Int32 const * m_pBig5Hkscs2001ToUnicodeRowOffsets; + ImplDBCSToUniLeadTab const * m_pBig5ToUnicodeData; + sal_uInt16 const * m_pUnicodeToBig5Hkscs2001Data; + sal_Int32 const * m_pUnicodeToBig5Hkscs2001PageOffsets; + sal_Int32 const * m_pUnicodeToBig5Hkscs2001PlaneOffsets; + ImplUniToDBCSHighTab const * m_pUnicodeToBig5Data; + ImplDBCSEUDCData const * m_pEudcData; + int m_nEudcCount; +}; + +void * ImplCreateBig5HkscsToUnicodeContext(); + +void ImplResetBig5HkscsToUnicodeContext(void * pContext); + +sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertbig5hkscs.tab b/sal/textenc/convertbig5hkscs.tab index 9fe2a47b5156..29720a842b59 100644 --- a/sal/textenc/convertbig5hkscs.tab +++ b/sal/textenc/convertbig5hkscs.tab @@ -25,19 +25,13 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#include "context.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_CONVERTBIG5HKSCS_H -#include "convertbig5hkscs.h" -#endif +#include "sal/config.h" -#ifndef _RTL_TENCINFO_H #include "rtl/tencinfo.h" -#endif -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif + +#include "context.hxx" +#include "convertbig5hkscs.hxx" #include "generate/big5hkscs2001.tab" diff --git a/sal/textenc/converter.c b/sal/textenc/converter.c deleted file mode 100644 index 07c1e04c9ba3..000000000000 --- a/sal/textenc/converter.c +++ /dev/null @@ -1,167 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -ImplBadInputConversionAction ImplHandleBadInputTextToUnicodeConversion( - sal_Bool bUndefined, sal_Bool bMultiByte, sal_Char cByte, sal_uInt32 nFlags, - sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo) -{ - *pInfo |= bUndefined - ? (bMultiByte - ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED - : RTL_TEXTTOUNICODE_INFO_UNDEFINED) - : RTL_TEXTTOUNICODE_INFO_INVALID; - switch (nFlags - & (bUndefined - ? (bMultiByte - ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK - : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) - : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK)) - { - case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR: - case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR: - case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR: - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - return IMPL_BAD_INPUT_STOP; - - case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE: - case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE: - case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE: - return IMPL_BAD_INPUT_CONTINUE; - - case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE: - if (*pDestBufPtr != pDestBufEnd) - { - *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START - | ((sal_uChar) cByte); - return IMPL_BAD_INPUT_CONTINUE; - } - else - return IMPL_BAD_INPUT_NO_OUTPUT; - - default: /* RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT, - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT, - RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT */ - if (*pDestBufPtr != pDestBufEnd) - { - *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - return IMPL_BAD_INPUT_CONTINUE; - } - else - return IMPL_BAD_INPUT_NO_OUTPUT; - } -} - -ImplBadInputConversionAction -ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined, - sal_uInt32 nUtf32, - sal_uInt32 nFlags, - sal_Char ** pDestBufPtr, - sal_Char * pDestBufEnd, - sal_uInt32 * pInfo, - sal_Char const * pPrefix, - sal_Size nPrefixLen, - sal_Bool * pPrefixWritten) -{ - /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE - RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR */ - - sal_Char cReplace; - - if (bUndefined) - { - if (ImplIsControlOrFormat(nUtf32)) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0) - nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; - } - else if (ImplIsPrivateUse(nUtf32)) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0) - nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; - else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0) - nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0; - } - else if (ImplIsZeroWidth(nUtf32)) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0) - nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; - } - } - *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED : - RTL_UNICODETOTEXT_INFO_INVALID; - switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK : - RTL_UNICODETOTEXT_FLAGS_INVALID_MASK)) - { - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR: - case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR: - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; - return IMPL_BAD_INPUT_STOP; - - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE: - case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE: - if (pPrefixWritten) - *pPrefixWritten = sal_False; - return IMPL_BAD_INPUT_CONTINUE; - - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: - case RTL_UNICODETOTEXT_FLAGS_INVALID_0: - cReplace = 0; - break; - - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: - case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: - default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT, - RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ - cReplace = '?'; - break; - - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: - case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: - cReplace = '_'; - break; - } - if ((sal_Size) (pDestBufEnd - *pDestBufPtr) > nPrefixLen) - { - while (nPrefixLen-- > 0) - *(*pDestBufPtr)++ = *pPrefix++; - *(*pDestBufPtr)++ = cReplace; - if (pPrefixWritten) - *pPrefixWritten = sal_True; - return IMPL_BAD_INPUT_CONTINUE; - } - else - return IMPL_BAD_INPUT_NO_OUTPUT; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converter.cxx b/sal/textenc/converter.cxx new file mode 100644 index 000000000000..cde368c133ae --- /dev/null +++ b/sal/textenc/converter.cxx @@ -0,0 +1,170 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "converter.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +ImplBadInputConversionAction ImplHandleBadInputTextToUnicodeConversion( + bool bUndefined, bool bMultiByte, char cByte, sal_uInt32 nFlags, + sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo) +{ + *pInfo |= bUndefined + ? (bMultiByte + ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED + : RTL_TEXTTOUNICODE_INFO_UNDEFINED) + : RTL_TEXTTOUNICODE_INFO_INVALID; + switch (nFlags + & (bUndefined + ? (bMultiByte + ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK + : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) + : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK)) + { + case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR: + case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR: + case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR: + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + return IMPL_BAD_INPUT_STOP; + + case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE: + case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE: + case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE: + return IMPL_BAD_INPUT_CONTINUE; + + case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE: + if (*pDestBufPtr != pDestBufEnd) + { + *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START + | ((sal_uChar) cByte); + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; + + default: // RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT, + // RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT, + // RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT + if (*pDestBufPtr != pDestBufEnd) + { + *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; + } +} + +ImplBadInputConversionAction +ImplHandleBadInputUnicodeToTextConversion(bool bUndefined, + sal_uInt32 nUtf32, + sal_uInt32 nFlags, + char ** pDestBufPtr, + char * pDestBufEnd, + sal_uInt32 * pInfo, + char const * pPrefix, + sal_Size nPrefixLen, + bool * pPrefixWritten) +{ + // TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE + // RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR + + char cReplace; + + if (bUndefined) + { + if (ImplIsControlOrFormat(nUtf32)) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0) + nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; + } + else if (ImplIsPrivateUse(nUtf32)) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0) + nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; + else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0) + nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0; + } + else if (ImplIsZeroWidth(nUtf32)) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0) + nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; + } + } + *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED : + RTL_UNICODETOTEXT_INFO_INVALID; + switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK : + RTL_UNICODETOTEXT_FLAGS_INVALID_MASK)) + { + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR: + case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR: + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; + return IMPL_BAD_INPUT_STOP; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE: + case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE: + if (pPrefixWritten) + *pPrefixWritten = false; + return IMPL_BAD_INPUT_CONTINUE; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: + case RTL_UNICODETOTEXT_FLAGS_INVALID_0: + cReplace = 0; + break; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: + case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: + default: // RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT, + // RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT + cReplace = '?'; + break; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: + case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: + cReplace = '_'; + break; + } + if ((sal_Size) (pDestBufEnd - *pDestBufPtr) > nPrefixLen) + { + while (nPrefixLen-- > 0) + *(*pDestBufPtr)++ = *pPrefix++; + *(*pDestBufPtr)++ = cReplace; + if (pPrefixWritten) + *pPrefixWritten = true; + return IMPL_BAD_INPUT_CONTINUE; + } + else + return IMPL_BAD_INPUT_NO_OUTPUT; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converter.h b/sal/textenc/converter.h deleted file mode 100644 index c74652704781..000000000000 --- a/sal/textenc/converter.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTER_H -#define INCLUDED_RTL_TEXTENC_CONVERTER_H - -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef enum -{ - IMPL_BAD_INPUT_STOP, - IMPL_BAD_INPUT_CONTINUE, - IMPL_BAD_INPUT_NO_OUTPUT -} ImplBadInputConversionAction; - -ImplBadInputConversionAction -ImplHandleBadInputTextToUnicodeConversion( - sal_Bool bUndefined, sal_Bool bMultiByte, sal_Char cByte, sal_uInt32 nFlags, - sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo) - SAL_THROW_EXTERN_C(); - -ImplBadInputConversionAction -ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined, - sal_uInt32 nUtf32, - sal_uInt32 nFlags, - sal_Char ** pDestBufPtr, - sal_Char * pDestBufEnd, - sal_uInt32 * pInfo, - sal_Char const * pPrefix, - sal_Size nPrefixLen, - sal_Bool * pPrefixWritten) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTER_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converter.hxx b/sal/textenc/converter.hxx new file mode 100644 index 000000000000..50338c2efaa5 --- /dev/null +++ b/sal/textenc/converter.hxx @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTER_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTER_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +enum ImplBadInputConversionAction +{ + IMPL_BAD_INPUT_STOP, + IMPL_BAD_INPUT_CONTINUE, + IMPL_BAD_INPUT_NO_OUTPUT +}; + +ImplBadInputConversionAction +ImplHandleBadInputTextToUnicodeConversion( + bool bUndefined, bool bMultiByte, char cByte, sal_uInt32 nFlags, + sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo); + +ImplBadInputConversionAction +ImplHandleBadInputUnicodeToTextConversion(bool bUndefined, + sal_uInt32 nUtf32, + sal_uInt32 nFlags, + char ** pDestBufPtr, + char * pDestBufEnd, + sal_uInt32 * pInfo, + char const * pPrefix, + sal_Size nPrefixLen, + bool * pPrefixWritten); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converteuctw.c b/sal/textenc/converteuctw.c deleted file mode 100644 index 062862abf76a..000000000000 --- a/sal/textenc/converteuctw.c +++ /dev/null @@ -1,455 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "converteuctw.h" -#include "context.h" -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -typedef enum -{ - IMPL_EUC_TW_TO_UNICODE_STATE_0, - IMPL_EUC_TW_TO_UNICODE_STATE_1, - IMPL_EUC_TW_TO_UNICODE_STATE_2_1, - IMPL_EUC_TW_TO_UNICODE_STATE_2_2, - IMPL_EUC_TW_TO_UNICODE_STATE_2_3 -} ImplEucTwToUnicodeState; - -typedef struct -{ - ImplEucTwToUnicodeState m_eState; - sal_Int32 m_nPlane; /* 0--15 */ - sal_Int32 m_nRow; /* 0--93 */ -} ImplEucTwToUnicodeContext; - -void * ImplCreateEucTwToUnicodeContext(void) -{ - void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext)); - ((ImplEucTwToUnicodeContext *) pContext)->m_eState - = IMPL_EUC_TW_TO_UNICODE_STATE_0; - return pContext; -} - -void ImplResetEucTwToUnicodeContext(void * pContext) -{ - if (pContext) - ((ImplEucTwToUnicodeContext *) pContext)->m_eState - = IMPL_EUC_TW_TO_UNICODE_STATE_0; -} - -sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) -{ - sal_uInt16 const * pCns116431992Data - = ((ImplEucTwConverterData const *) pData)-> - m_pCns116431992ToUnicodeData; - sal_Int32 const * pCns116431992RowOffsets - = ((ImplEucTwConverterData const *) pData)-> - m_pCns116431992ToUnicodeRowOffsets; - sal_Int32 const * pCns116431992PlaneOffsets - = ((ImplEucTwConverterData const *) pData)-> - m_pCns116431992ToUnicodePlaneOffsets; - ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; - sal_Int32 nPlane = 0; - sal_Int32 nRow = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; - - if (pContext) - { - eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState; - nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane; - nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow; - } - - for (; nConverted < nSrcBytes; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; - switch (eState) - { - case IMPL_EUC_TW_TO_UNICODE_STATE_0: - if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nChar; - else - goto no_output; - else if (nChar >= 0xA1 && nChar <= 0xFE) - { - nRow = nChar - 0xA1; - eState = IMPL_EUC_TW_TO_UNICODE_STATE_1; - } - else if (nChar == 0x8E) - eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_EUC_TW_TO_UNICODE_STATE_1: - if (nChar >= 0xA1 && nChar <= 0xFE) - { - nPlane = 0; - goto transform; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_EUC_TW_TO_UNICODE_STATE_2_1: - if (nChar >= 0xA1 && nChar <= 0xB0) - { - nPlane = nChar - 0xA1; - ++eState; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_EUC_TW_TO_UNICODE_STATE_2_2: - if (nChar >= 0xA1 && nChar <= 0xFE) - { - nRow = nChar - 0xA1; - ++eState; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_EUC_TW_TO_UNICODE_STATE_2_3: - if (nChar >= 0xA1 && nChar <= 0xFE) - goto transform; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - } - continue; - - transform: - { - sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; - if (nPlaneOffset == -1) - goto bad_input; - else - { - sal_Int32 nOffset - = pCns116431992RowOffsets[nPlaneOffset + nRow]; - if (nOffset == -1) - goto bad_input; - else - { - sal_uInt32 nFirstLast = pCns116431992Data[nOffset++]; - sal_uInt32 nFirst = nFirstLast & 0xFF; - sal_uInt32 nLast = nFirstLast >> 8; - nChar -= 0xA0; - if (nChar >= nFirst && nChar <= nLast) - { - sal_uInt32 nUnicode - = pCns116431992Data[nOffset + (nChar - nFirst)]; - if (nUnicode == 0xFFFF) - goto bad_input; - else if (ImplIsHighSurrogate(nUnicode)) - if (pDestBufEnd - pDestBufPtr >= 2) - { - nOffset += nLast - nFirst + 1; - nFirst = pCns116431992Data[nOffset++]; - *pDestBufPtr++ = (sal_Unicode) nUnicode; - *pDestBufPtr++ - = (sal_Unicode) - pCns116431992Data[ - nOffset + (nChar - nFirst)]; - } - else - goto no_output; - else - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nUnicode; - else - goto no_output; - } - else - goto bad_input; - eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; - } - } - continue; - } - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0 - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, - pDestBufEnd, &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - { - ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState; - ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane; - ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtBytes) - *pSrcCvtBytes = nConverted; - - return pDestBufPtr - pDestBuf; -} - -sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) -{ - sal_uInt8 const * pCns116431992Data - = ((ImplEucTwConverterData const *) pData)-> - m_pUnicodeToCns116431992Data; - sal_Int32 const * pCns116431992PageOffsets - = ((ImplEucTwConverterData const *) pData)-> - m_pUnicodeToCns116431992PageOffsets; - sal_Int32 const * pCns116431992PlaneOffsets - = ((ImplEucTwConverterData const *) pData)-> - m_pUnicodeToCns116431992PlaneOffsets; - sal_Unicode nHighSurrogate = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBuf + nDestBytes; - - if (pContext) - nHighSurrogate - = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; - - for (; nConverted < nSrcChars; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *pSrcBuf++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar)) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - { - bUndefined = sal_False; - goto bad_input; - } - - if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) - { - bUndefined = sal_False; - goto bad_input; - } - - if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - else - { - sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; - sal_uInt32 nFirst; - sal_uInt32 nLast; - sal_uInt32 nPlane; - if (nOffset == -1) - goto bad_input; - nOffset - = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; - if (nOffset == -1) - goto bad_input; - nFirst = pCns116431992Data[nOffset++]; - nLast = pCns116431992Data[nOffset++]; - nChar &= 0xFF; - if (nChar < nFirst || nChar > nLast) - goto bad_input; - nOffset += 3 * (nChar - nFirst); - nPlane = pCns116431992Data[nOffset++]; - if (nPlane == 0) - goto bad_input; - if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4)) - goto no_output; - if (nPlane != 1) - { - *pDestBufPtr++ = (sal_Char) (unsigned char) 0x8E; - *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane); - } - *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset++]); - *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset]); - } - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, - nChar, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - NULL, - 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if (nHighSurrogate != 0 - && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, - 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - NULL, - 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate - = nHighSurrogate; - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtChars) - *pSrcCvtChars = nConverted; - - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converteuctw.cxx b/sal/textenc/converteuctw.cxx new file mode 100644 index 000000000000..0b6b41bb52d4 --- /dev/null +++ b/sal/textenc/converteuctw.cxx @@ -0,0 +1,460 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "context.hxx" +#include "converter.hxx" +#include "converteuctw.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +namespace { + +enum ImplEucTwToUnicodeState +{ + IMPL_EUC_TW_TO_UNICODE_STATE_0, + IMPL_EUC_TW_TO_UNICODE_STATE_1, + IMPL_EUC_TW_TO_UNICODE_STATE_2_1, + IMPL_EUC_TW_TO_UNICODE_STATE_2_2, + IMPL_EUC_TW_TO_UNICODE_STATE_2_3 +}; + +struct ImplEucTwToUnicodeContext +{ + ImplEucTwToUnicodeState m_eState; + sal_Int32 m_nPlane; // 0--15 + sal_Int32 m_nRow; // 0--93 +}; + +} + +void * ImplCreateEucTwToUnicodeContext() +{ + ImplEucTwToUnicodeContext * pContext = new ImplEucTwToUnicodeContext; + pContext->m_eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + return pContext; +} + +void ImplResetEucTwToUnicodeContext(void * pContext) +{ + if (pContext) + static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState + = IMPL_EUC_TW_TO_UNICODE_STATE_0; +} + +sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + sal_uInt16 const * pCns116431992Data + = static_cast< ImplEucTwConverterData const * >(pData)-> + m_pCns116431992ToUnicodeData; + sal_Int32 const * pCns116431992RowOffsets + = static_cast< ImplEucTwConverterData const * >(pData)-> + m_pCns116431992ToUnicodeRowOffsets; + sal_Int32 const * pCns116431992PlaneOffsets + = static_cast< ImplEucTwConverterData const * >(pData)-> + m_pCns116431992ToUnicodePlaneOffsets; + ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + sal_Int32 nPlane = 0; + sal_Int32 nRow = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState; + nPlane = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane; + nRow = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; + switch (eState) + { + case IMPL_EUC_TW_TO_UNICODE_STATE_0: + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else if (nChar >= 0xA1 && nChar <= 0xFE) + { + nRow = nChar - 0xA1; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_1; + } + else if (nChar == 0x8E) + eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_1: + if (nChar >= 0xA1 && nChar <= 0xFE) + { + nPlane = 0; + goto transform; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_2_1: + if (nChar >= 0xA1 && nChar <= 0xB0) + { + nPlane = nChar - 0xA1; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_2; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_2_2: + if (nChar >= 0xA1 && nChar <= 0xFE) + { + nRow = nChar - 0xA1; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_3; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_EUC_TW_TO_UNICODE_STATE_2_3: + if (nChar >= 0xA1 && nChar <= 0xFE) + goto transform; + else + { + bUndefined = false; + goto bad_input; + } + break; + } + continue; + + transform: + { + sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; + if (nPlaneOffset == -1) + goto bad_input; + else + { + sal_Int32 nOffset + = pCns116431992RowOffsets[nPlaneOffset + nRow]; + if (nOffset == -1) + goto bad_input; + else + { + sal_uInt32 nFirstLast = pCns116431992Data[nOffset++]; + sal_uInt32 nFirst = nFirstLast & 0xFF; + sal_uInt32 nLast = nFirstLast >> 8; + nChar -= 0xA0; + if (nChar >= nFirst && nChar <= nLast) + { + sal_uInt32 nUnicode + = pCns116431992Data[nOffset + (nChar - nFirst)]; + if (nUnicode == 0xFFFF) + goto bad_input; + else if (ImplIsHighSurrogate(nUnicode)) + if (pDestBufEnd - pDestBufPtr >= 2) + { + nOffset += nLast - nFirst + 1; + nFirst = pCns116431992Data[nOffset++]; + *pDestBufPtr++ = (sal_Unicode) nUnicode; + *pDestBufPtr++ + = (sal_Unicode) + pCns116431992Data[ + nOffset + (nChar - nFirst)]; + } + else + goto no_output; + else + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nUnicode; + else + goto no_output; + } + else + goto bad_input; + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + } + } + continue; + } + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + { + static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState = eState; + static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane = nPlane; + static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow = nRow; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + sal_uInt8 const * pCns116431992Data + = static_cast< ImplEucTwConverterData const * >(pData)-> + m_pUnicodeToCns116431992Data; + sal_Int32 const * pCns116431992PageOffsets + = static_cast< ImplEucTwConverterData const * >(pData)-> + m_pUnicodeToCns116431992PageOffsets; + sal_Int32 const * pCns116431992PlaneOffsets + = static_cast< ImplEucTwConverterData const * >(pData)-> + m_pUnicodeToCns116431992PlaneOffsets; + sal_Unicode nHighSurrogate = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBuf + nDestBytes; + + if (pContext) + nHighSurrogate + = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; + + for (; nConverted < nSrcChars; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar)) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar)) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + { + bUndefined = false; + goto bad_input; + } + + if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) + { + bUndefined = false; + goto bad_input; + } + + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + else + { + sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; + sal_uInt32 nFirst; + sal_uInt32 nLast; + sal_uInt32 nPlane; + if (nOffset == -1) + goto bad_input; + nOffset + = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; + if (nOffset == -1) + goto bad_input; + nFirst = pCns116431992Data[nOffset++]; + nLast = pCns116431992Data[nOffset++]; + nChar &= 0xFF; + if (nChar < nFirst || nChar > nLast) + goto bad_input; + nOffset += 3 * (nChar - nFirst); + nPlane = pCns116431992Data[nOffset++]; + if (nPlane == 0) + goto bad_input; + if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4)) + goto no_output; + if (nPlane != 1) + { + *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0x8E)); + *pDestBufPtr++ = static_cast< char >(0xA0 + nPlane); + } + *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset++]); + *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset]); + } + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, + nChar, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + NULL, + 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nHighSurrogate != 0 + && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion(false, + 0, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + NULL, + 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate + = nHighSurrogate; + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converteuctw.h b/sal/textenc/converteuctw.h deleted file mode 100644 index 6d30fced354a..000000000000 --- a/sal/textenc/converteuctw.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H -#define INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H - -#include "tenchelp.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - sal_uInt16 const * m_pCns116431992ToUnicodeData; - sal_Int32 const * m_pCns116431992ToUnicodeRowOffsets; - sal_Int32 const * m_pCns116431992ToUnicodePlaneOffsets; - sal_uInt8 const * m_pUnicodeToCns116431992Data; - sal_Int32 const * m_pUnicodeToCns116431992PageOffsets; - sal_Int32 const * m_pUnicodeToCns116431992PlaneOffsets; -} ImplEucTwConverterData; - -void * ImplCreateEucTwToUnicodeContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetEucTwToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converteuctw.hxx b/sal/textenc/converteuctw.hxx new file mode 100644 index 000000000000..72c24953ba32 --- /dev/null +++ b/sal/textenc/converteuctw.hxx @@ -0,0 +1,74 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTEUCTW_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTEUCTW_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +#include "tenchelp.hxx" + +struct ImplEucTwConverterData +{ + sal_uInt16 const * m_pCns116431992ToUnicodeData; + sal_Int32 const * m_pCns116431992ToUnicodeRowOffsets; + sal_Int32 const * m_pCns116431992ToUnicodePlaneOffsets; + sal_uInt8 const * m_pUnicodeToCns116431992Data; + sal_Int32 const * m_pUnicodeToCns116431992PageOffsets; + sal_Int32 const * m_pUnicodeToCns116431992PlaneOffsets; +}; + +void * ImplCreateEucTwToUnicodeContext(); + +void ImplResetEucTwToUnicodeContext(void * pContext); + +sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/converteuctw.tab b/sal/textenc/converteuctw.tab index c9bfeea697a9..398120bd0696 100644 --- a/sal/textenc/converteuctw.tab +++ b/sal/textenc/converteuctw.tab @@ -25,19 +25,13 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#include "context.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_CONVERTEUCTW_H -#include "converteuctw.h" -#endif +#include "sal/config.h" -#ifndef _RTL_TENCINFO_H #include "rtl/tencinfo.h" -#endif -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif + +#include "context.hxx" +#include "converteuctw.hxx" #include "generate/cns116431992.tab" diff --git a/sal/textenc/convertgb18030.c b/sal/textenc/convertgb18030.c deleted file mode 100644 index c9cac4bf7693..000000000000 --- a/sal/textenc/convertgb18030.c +++ /dev/null @@ -1,471 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "convertgb18030.h" -#include "context.h" -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -typedef enum -{ - IMPL_GB_18030_TO_UNICODE_STATE_0, - IMPL_GB_18030_TO_UNICODE_STATE_1, - IMPL_GB_18030_TO_UNICODE_STATE_2, - IMPL_GB_18030_TO_UNICODE_STATE_3 -} ImplGb18030ToUnicodeState; - -typedef struct -{ - ImplGb18030ToUnicodeState m_eState; - sal_uInt32 m_nCode; -} ImplGb18030ToUnicodeContext; - -void * ImplCreateGb18030ToUnicodeContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplGb18030ToUnicodeContext)); - ((ImplGb18030ToUnicodeContext *) pContext)->m_eState - = IMPL_GB_18030_TO_UNICODE_STATE_0; - return pContext; -} - -void ImplResetGb18030ToUnicodeContext(void * pContext) -{ - if (pContext) - ((ImplGb18030ToUnicodeContext *) pContext)->m_eState - = IMPL_GB_18030_TO_UNICODE_STATE_0; -} - -sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) -{ - sal_Unicode const * pGb18030Data - = ((ImplGb18030ConverterData const *) pData)->m_pGb18030ToUnicodeData; - ImplGb180302000ToUnicodeRange const * pGb18030Ranges - = ((ImplGb18030ConverterData const *) pData)-> - m_pGb18030ToUnicodeRanges; - ImplGb18030ToUnicodeState eState = IMPL_GB_18030_TO_UNICODE_STATE_0; - sal_uInt32 nCode = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; - - if (pContext) - { - eState = ((ImplGb18030ToUnicodeContext *) pContext)->m_eState; - nCode = ((ImplGb18030ToUnicodeContext *) pContext)->m_nCode; - } - - for (; nConverted < nSrcBytes; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; - switch (eState) - { - case IMPL_GB_18030_TO_UNICODE_STATE_0: - if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nChar; - else - goto no_output; - else if (nChar == 0x80) - goto bad_input; - else if (nChar <= 0xFE) - { - nCode = nChar - 0x81; - eState = IMPL_GB_18030_TO_UNICODE_STATE_1; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_GB_18030_TO_UNICODE_STATE_1: - if (nChar >= 0x30 && nChar <= 0x39) - { - nCode = nCode * 10 + (nChar - 0x30); - eState = IMPL_GB_18030_TO_UNICODE_STATE_2; - } - else if ((nChar >= 0x40 && nChar <= 0x7E) - || (nChar >= 0x80 && nChar <= 0xFE)) - { - nCode = nCode * 190 + (nChar <= 0x7E ? nChar - 0x40 : - nChar - 0x80 + 63); - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = pGb18030Data[nCode]; - else - goto no_output; - eState = IMPL_GB_18030_TO_UNICODE_STATE_0; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_GB_18030_TO_UNICODE_STATE_2: - if (nChar >= 0x81 && nChar <= 0xFE) - { - nCode = nCode * 126 + (nChar - 0x81); - eState = IMPL_GB_18030_TO_UNICODE_STATE_3; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_GB_18030_TO_UNICODE_STATE_3: - if (nChar >= 0x30 && nChar <= 0x39) - { - nCode = nCode * 10 + (nChar - 0x30); - - /* 90 30 81 30 to E3 32 9A 35 maps to U+10000 to U+10FFFF: */ - if (nCode >= 189000 && nCode <= 1237575) - if (pDestBufEnd - pDestBufPtr >= 2) - { - nCode -= 189000 - 0x10000; - *pDestBufPtr++ - = (sal_Unicode) ImplGetHighSurrogate(nCode); - *pDestBufPtr++ - = (sal_Unicode) ImplGetLowSurrogate(nCode); - } - else - goto no_output; - else - { - ImplGb180302000ToUnicodeRange const * pRange - = pGb18030Ranges; - sal_uInt32 nFirstNonRange = 0; - for (;;) - { - if (pRange->m_nNonRangeDataIndex == -1) - goto bad_input; - else if (nCode < pRange->m_nFirstLinear) - { - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ - = pGb18030Data[ - pRange->m_nNonRangeDataIndex - + (nCode - nFirstNonRange)]; - else - goto no_output; - break; - } - else if (nCode < pRange->m_nPastLinear) - { - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ - = (sal_Unicode) - (pRange->m_nFirstUnicode - + (nCode - - pRange-> - m_nFirstLinear)); - else - goto no_output; - break; - } - nFirstNonRange = (pRange++)->m_nPastLinear; - } - } - eState = IMPL_GB_18030_TO_UNICODE_STATE_0; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - } - continue; - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - eState = IMPL_GB_18030_TO_UNICODE_STATE_0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_GB_18030_TO_UNICODE_STATE_0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (eState != IMPL_GB_18030_TO_UNICODE_STATE_0 - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, - pDestBufEnd, &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_GB_18030_TO_UNICODE_STATE_0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - { - ((ImplGb18030ToUnicodeContext *) pContext)->m_eState = eState; - ((ImplGb18030ToUnicodeContext *) pContext)->m_nCode = nCode; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtBytes) - *pSrcCvtBytes = nConverted; - - return pDestBufPtr - pDestBuf; -} - -sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) -{ - sal_uInt32 const * pGb18030Data - = ((ImplGb18030ConverterData const *) pData)-> - m_pUnicodeToGb18030Data; - ImplUnicodeToGb180302000Range const * pGb18030Ranges - = ((ImplGb18030ConverterData const *) pData)-> - m_pUnicodeToGb18030Ranges; - sal_Unicode nHighSurrogate = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBuf + nDestBytes; - - if (pContext) - nHighSurrogate - = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; - - for (; nConverted < nSrcChars; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *pSrcBuf++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar)) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - { - bUndefined = sal_False; - goto bad_input; - } - - if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) - { - bUndefined = sal_False; - goto bad_input; - } - - if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - else if (nChar < 0x10000) - { - ImplUnicodeToGb180302000Range const * pRange = pGb18030Ranges; - sal_Unicode nFirstNonRange = 0x80; - for (;;) - { - if (nChar < pRange->m_nFirstUnicode) - { - sal_uInt32 nCode - = pGb18030Data[pRange->m_nNonRangeDataIndex - + (nChar - nFirstNonRange)]; - if (pDestBufEnd - pDestBufPtr - >= (nCode <= 0xFFFF ? 2 : 4)) - { - if (nCode > 0xFFFF) - { - *pDestBufPtr++ = (sal_Char) (nCode >> 24); - *pDestBufPtr++ = (sal_Char) (nCode >> 16 & 0xFF); - } - *pDestBufPtr++ = (sal_Char) (nCode >> 8 & 0xFF); - *pDestBufPtr++ = (sal_Char) (nCode & 0xFF); - } - else - goto no_output; - break; - } - else if (nChar <= pRange->m_nLastUnicode) - { - if (pDestBufEnd - pDestBufPtr >= 4) - { - sal_uInt32 nCode - = pRange->m_nFirstLinear - + (nChar - pRange->m_nFirstUnicode); - *pDestBufPtr++ = (sal_Char) (nCode / 12600 + 0x81); - *pDestBufPtr++ - = (sal_Char) (nCode / 1260 % 10 + 0x30); - *pDestBufPtr++ = (sal_Char) (nCode / 10 % 126 + 0x81); - *pDestBufPtr++ = (sal_Char) (nCode % 10 + 0x30); - } - else - goto no_output; - break; - } - nFirstNonRange - = (sal_Unicode) ((pRange++)->m_nLastUnicode + 1); - } - } - else - if (pDestBufEnd - pDestBufPtr >= 4) - { - sal_uInt32 nCode = nChar - 0x10000; - *pDestBufPtr++ = (sal_Char) (nCode / 12600 + 0x90); - *pDestBufPtr++ = (sal_Char) (nCode / 1260 % 10 + 0x30); - *pDestBufPtr++ = (sal_Char) (nCode / 10 % 126 + 0x81); - *pDestBufPtr++ = (sal_Char) (nCode % 10 + 0x30); - } - else - goto no_output; - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, - nChar, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - NULL, - 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if (nHighSurrogate != 0 - && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, - 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - NULL, - 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate - = nHighSurrogate; - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtChars) - *pSrcCvtChars = nConverted; - - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertgb18030.cxx b/sal/textenc/convertgb18030.cxx new file mode 100644 index 000000000000..61daa30c5295 --- /dev/null +++ b/sal/textenc/convertgb18030.cxx @@ -0,0 +1,475 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "context.hxx" +#include "converter.hxx" +#include "convertgb18030.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +namespace { + +enum ImplGb18030ToUnicodeState +{ + IMPL_GB_18030_TO_UNICODE_STATE_0, + IMPL_GB_18030_TO_UNICODE_STATE_1, + IMPL_GB_18030_TO_UNICODE_STATE_2, + IMPL_GB_18030_TO_UNICODE_STATE_3 +}; + +struct ImplGb18030ToUnicodeContext +{ + ImplGb18030ToUnicodeState m_eState; + sal_uInt32 m_nCode; +}; + +} + +void * ImplCreateGb18030ToUnicodeContext() +{ + ImplGb18030ToUnicodeContext * pContext = new ImplGb18030ToUnicodeContext; + pContext->m_eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + return pContext; +} + +void ImplResetGb18030ToUnicodeContext(void * pContext) +{ + if (pContext) + static_cast< ImplGb18030ToUnicodeContext * >(pContext)->m_eState + = IMPL_GB_18030_TO_UNICODE_STATE_0; +} + +sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + sal_Unicode const * pGb18030Data + = static_cast< ImplGb18030ConverterData const * >(pData)->m_pGb18030ToUnicodeData; + ImplGb180302000ToUnicodeRange const * pGb18030Ranges + = static_cast< ImplGb18030ConverterData const * >(pData)-> + m_pGb18030ToUnicodeRanges; + ImplGb18030ToUnicodeState eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + sal_uInt32 nCode = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = static_cast< ImplGb18030ToUnicodeContext * >(pContext)->m_eState; + nCode = static_cast< ImplGb18030ToUnicodeContext * >(pContext)->m_nCode; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; + switch (eState) + { + case IMPL_GB_18030_TO_UNICODE_STATE_0: + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else if (nChar == 0x80) + goto bad_input; + else if (nChar <= 0xFE) + { + nCode = nChar - 0x81; + eState = IMPL_GB_18030_TO_UNICODE_STATE_1; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_GB_18030_TO_UNICODE_STATE_1: + if (nChar >= 0x30 && nChar <= 0x39) + { + nCode = nCode * 10 + (nChar - 0x30); + eState = IMPL_GB_18030_TO_UNICODE_STATE_2; + } + else if ((nChar >= 0x40 && nChar <= 0x7E) + || (nChar >= 0x80 && nChar <= 0xFE)) + { + nCode = nCode * 190 + (nChar <= 0x7E ? nChar - 0x40 : + nChar - 0x80 + 63); + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = pGb18030Data[nCode]; + else + goto no_output; + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_GB_18030_TO_UNICODE_STATE_2: + if (nChar >= 0x81 && nChar <= 0xFE) + { + nCode = nCode * 126 + (nChar - 0x81); + eState = IMPL_GB_18030_TO_UNICODE_STATE_3; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_GB_18030_TO_UNICODE_STATE_3: + if (nChar >= 0x30 && nChar <= 0x39) + { + nCode = nCode * 10 + (nChar - 0x30); + + // 90 30 81 30 to E3 32 9A 35 maps to U+10000 to U+10FFFF: + if (nCode >= 189000 && nCode <= 1237575) + if (pDestBufEnd - pDestBufPtr >= 2) + { + nCode -= 189000 - 0x10000; + *pDestBufPtr++ + = (sal_Unicode) ImplGetHighSurrogate(nCode); + *pDestBufPtr++ + = (sal_Unicode) ImplGetLowSurrogate(nCode); + } + else + goto no_output; + else + { + ImplGb180302000ToUnicodeRange const * pRange + = pGb18030Ranges; + sal_uInt32 nFirstNonRange = 0; + for (;;) + { + if (pRange->m_nNonRangeDataIndex == -1) + goto bad_input; + else if (nCode < pRange->m_nFirstLinear) + { + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ + = pGb18030Data[ + pRange->m_nNonRangeDataIndex + + (nCode - nFirstNonRange)]; + else + goto no_output; + break; + } + else if (nCode < pRange->m_nPastLinear) + { + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ + = (sal_Unicode) + (pRange->m_nFirstUnicode + + (nCode + - pRange-> + m_nFirstLinear)); + else + goto no_output; + break; + } + nFirstNonRange = (pRange++)->m_nPastLinear; + } + } + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + } + continue; + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState != IMPL_GB_18030_TO_UNICODE_STATE_0 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_GB_18030_TO_UNICODE_STATE_0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + { + static_cast< ImplGb18030ToUnicodeContext * >(pContext)->m_eState = eState; + static_cast< ImplGb18030ToUnicodeContext * >(pContext)->m_nCode = nCode; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + sal_uInt32 const * pGb18030Data + = static_cast< ImplGb18030ConverterData const * >(pData)-> + m_pUnicodeToGb18030Data; + ImplUnicodeToGb180302000Range const * pGb18030Ranges + = static_cast< ImplGb18030ConverterData const * >(pData)-> + m_pUnicodeToGb18030Ranges; + sal_Unicode nHighSurrogate = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBuf + nDestBytes; + + if (pContext) + nHighSurrogate + = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; + + for (; nConverted < nSrcChars; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar)) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar)) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + { + bUndefined = false; + goto bad_input; + } + + if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) + { + bUndefined = false; + goto bad_input; + } + + if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + else if (nChar < 0x10000) + { + ImplUnicodeToGb180302000Range const * pRange = pGb18030Ranges; + sal_Unicode nFirstNonRange = 0x80; + for (;;) + { + if (nChar < pRange->m_nFirstUnicode) + { + sal_uInt32 nCode + = pGb18030Data[pRange->m_nNonRangeDataIndex + + (nChar - nFirstNonRange)]; + if (pDestBufEnd - pDestBufPtr + >= (nCode <= 0xFFFF ? 2 : 4)) + { + if (nCode > 0xFFFF) + { + *pDestBufPtr++ = static_cast< char >(nCode >> 24); + *pDestBufPtr++ = static_cast< char >(nCode >> 16 & 0xFF); + } + *pDestBufPtr++ = static_cast< char >(nCode >> 8 & 0xFF); + *pDestBufPtr++ = static_cast< char >(nCode & 0xFF); + } + else + goto no_output; + break; + } + else if (nChar <= pRange->m_nLastUnicode) + { + if (pDestBufEnd - pDestBufPtr >= 4) + { + sal_uInt32 nCode + = pRange->m_nFirstLinear + + (nChar - pRange->m_nFirstUnicode); + *pDestBufPtr++ = static_cast< char >(nCode / 12600 + 0x81); + *pDestBufPtr++ + = static_cast< char >(nCode / 1260 % 10 + 0x30); + *pDestBufPtr++ = static_cast< char >(nCode / 10 % 126 + 0x81); + *pDestBufPtr++ = static_cast< char >(nCode % 10 + 0x30); + } + else + goto no_output; + break; + } + nFirstNonRange + = (sal_Unicode) ((pRange++)->m_nLastUnicode + 1); + } + } + else + if (pDestBufEnd - pDestBufPtr >= 4) + { + sal_uInt32 nCode = nChar - 0x10000; + *pDestBufPtr++ = static_cast< char >(nCode / 12600 + 0x90); + *pDestBufPtr++ = static_cast< char >(nCode / 1260 % 10 + 0x30); + *pDestBufPtr++ = static_cast< char >(nCode / 10 % 126 + 0x81); + *pDestBufPtr++ = static_cast< char >(nCode % 10 + 0x30); + } + else + goto no_output; + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, + nChar, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + NULL, + 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nHighSurrogate != 0 + && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion(false, + 0, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + NULL, + 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate + = nHighSurrogate; + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertgb18030.h b/sal/textenc/convertgb18030.h deleted file mode 100644 index b80224d62d35..000000000000 --- a/sal/textenc/convertgb18030.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTGB18030_H -#define INCLUDED_RTL_TEXTENC_CONVERTGB18030_H - -#include "tenchelp.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - sal_Int32 m_nNonRangeDataIndex; - sal_uInt32 m_nFirstLinear; - sal_uInt32 m_nPastLinear; - sal_Unicode m_nFirstUnicode; -} ImplGb180302000ToUnicodeRange; - -typedef struct -{ - sal_Int32 m_nNonRangeDataIndex; - sal_Unicode m_nFirstUnicode; - sal_Unicode m_nLastUnicode; - sal_uInt32 m_nFirstLinear; -} ImplUnicodeToGb180302000Range; - -typedef struct -{ - sal_Unicode const * m_pGb18030ToUnicodeData; - ImplGb180302000ToUnicodeRange const * m_pGb18030ToUnicodeRanges; - sal_uInt32 const * m_pUnicodeToGb18030Data; - ImplUnicodeToGb180302000Range const * m_pUnicodeToGb18030Ranges; -} ImplGb18030ConverterData; - -void * ImplCreateGb18030ToUnicodeContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetGb18030ToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTGB18030_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertgb18030.hxx b/sal/textenc/convertgb18030.hxx new file mode 100644 index 000000000000..cbdc4f3430ce --- /dev/null +++ b/sal/textenc/convertgb18030.hxx @@ -0,0 +1,88 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTGB18030_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTGB18030_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +#include "tenchelp.hxx" + +struct ImplGb180302000ToUnicodeRange +{ + sal_Int32 m_nNonRangeDataIndex; + sal_uInt32 m_nFirstLinear; + sal_uInt32 m_nPastLinear; + sal_Unicode m_nFirstUnicode; +}; + +struct ImplUnicodeToGb180302000Range +{ + sal_Int32 m_nNonRangeDataIndex; + sal_Unicode m_nFirstUnicode; + sal_Unicode m_nLastUnicode; + sal_uInt32 m_nFirstLinear; +}; + +struct ImplGb18030ConverterData +{ + sal_Unicode const * m_pGb18030ToUnicodeData; + ImplGb180302000ToUnicodeRange const * m_pGb18030ToUnicodeRanges; + sal_uInt32 const * m_pUnicodeToGb18030Data; + ImplUnicodeToGb180302000Range const * m_pUnicodeToGb18030Ranges; +}; + +void * ImplCreateGb18030ToUnicodeContext(); + +void ImplResetGb18030ToUnicodeContext(void * pContext); + +sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertgb18030.tab b/sal/textenc/convertgb18030.tab index d904d4298389..2bc290c69578 100644 --- a/sal/textenc/convertgb18030.tab +++ b/sal/textenc/convertgb18030.tab @@ -25,20 +25,13 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#include "context.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_CONVERTGB18030_H -#include "convertgb18030.h" -#endif +#include "sal/config.h" -#ifndef _RTL_TENCINFO_H #include "rtl/tencinfo.h" -#endif -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif +#include "context.hxx" +#include "convertgb18030.hxx" #include "generate/gb180302000.tab" static ImplGb18030ConverterData const aImplGb18030ConvertData diff --git a/sal/textenc/convertiscii.tab b/sal/textenc/convertiscii.tab index 96f5dc8582c5..57e5bbdd3707 100644 --- a/sal/textenc/convertiscii.tab +++ b/sal/textenc/convertiscii.tab @@ -25,12 +25,14 @@ * ************************************************************************/ -#include "tenchelp.h" +#include "sal/config.h" + +#include #include "rtl/tencinfo.h" #include "sal/types.h" -#include +#include "tenchelp.hxx" /* Conversion tables for the Devanagari version of ISCII (IS 13194:1991). * diff --git a/sal/textenc/convertiso2022cn.c b/sal/textenc/convertiso2022cn.c deleted file mode 100644 index 40a0039aa3f0..000000000000 --- a/sal/textenc/convertiso2022cn.c +++ /dev/null @@ -1,865 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "convertiso2022cn.h" -#include "context.h" -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -typedef enum /* order is important: */ -{ - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN, - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK -} ImplIso2022CnToUnicodeState; - -typedef struct -{ - ImplIso2022CnToUnicodeState m_eState; - sal_uInt32 m_nRow; - sal_Bool m_bSo; - sal_Bool m_b116431; -} ImplIso2022CnToUnicodeContext; - -typedef enum -{ - IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE, - IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312, - IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431 -} ImplUnicodeToIso2022CnDesignator; - -typedef struct -{ - sal_Unicode m_nHighSurrogate; - ImplUnicodeToIso2022CnDesignator m_eSoDesignator; - sal_Bool m_b116432Designator; - sal_Bool m_bSo; -} ImplUnicodeToIso2022CnContext; - -void * ImplCreateIso2022CnToUnicodeContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplIso2022CnToUnicodeContext)); - ((ImplIso2022CnToUnicodeContext *) pContext)->m_eState - = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_bSo = sal_False; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_b116431 = sal_False; - return pContext; -} - -void ImplResetIso2022CnToUnicodeContext(void * pContext) -{ - if (pContext) - { - ((ImplIso2022CnToUnicodeContext *) pContext)->m_eState - = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_bSo = sal_False; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_b116431 = sal_False; - } -} - -sal_Size ImplConvertIso2022CnToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) -{ - ImplDBCSToUniLeadTab const * pGb2312Data - = ((ImplIso2022CnConverterData const *) pData)-> - m_pGb2312ToUnicodeData; - sal_uInt16 const * pCns116431992Data - = ((ImplIso2022CnConverterData const *) pData)-> - m_pCns116431992ToUnicodeData; - sal_Int32 const * pCns116431992RowOffsets - = ((ImplIso2022CnConverterData const *) pData)-> - m_pCns116431992ToUnicodeRowOffsets; - sal_Int32 const * pCns116431992PlaneOffsets - = ((ImplIso2022CnConverterData const *) pData)-> - m_pCns116431992ToUnicodePlaneOffsets; - ImplIso2022CnToUnicodeState eState - = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - sal_uInt32 nRow = 0; - sal_Bool bSo = sal_False; - sal_Bool b116431 = sal_False; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; - - if (pContext) - { - eState = ((ImplIso2022CnToUnicodeContext *) pContext)->m_eState; - nRow = ((ImplIso2022CnToUnicodeContext *) pContext)->m_nRow; - bSo = ((ImplIso2022CnToUnicodeContext *) pContext)->m_bSo; - b116431 = ((ImplIso2022CnToUnicodeContext *) pContext)->m_b116431; - } - - for (; nConverted < nSrcBytes; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; - sal_uInt32 nPlane; - switch (eState) - { - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII: - if (nChar == 0x0E) /* SO */ - { - bSo = sal_True; - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO; - } - else if (nChar == 0x1B) /* ESC */ - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC; - else if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nChar; - else - goto no_output; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO: - if (nChar == 0x0F) /* SI */ - { - bSo = sal_False; - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - } - else if (nChar == 0x1B) /* ESC */ - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC; - else if (nChar >= 0x21 && nChar <= 0x7E) - { - nRow = nChar; - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2: - if (nChar >= 0x21 && nChar <= 0x7E) - if (b116431) - { - nPlane = 0; - goto transform; - } - else - { - sal_uInt16 nUnicode = 0; - sal_uInt32 nFirst; - nRow += 0x80; - nChar += 0x80; - nFirst = pGb2312Data[nRow].mnTrailStart; - if (nChar >= nFirst - && nChar <= pGb2312Data[nRow].mnTrailEnd) - nUnicode = pGb2312Data[nRow]. - mpToUniTrailTab[nChar - nFirst]; - if (nUnicode != 0) - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = (sal_Unicode) nUnicode; - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO; - } - else - goto no_output; - else - goto bad_input; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432: - if (nChar >= 0x21 && nChar <= 0x7E) - { - nRow = nChar; - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2: - if (nChar >= 0x21 && nChar <= 0x7E) - { - nPlane = 1; - goto transform; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC: - if (nChar == 0x24) /* $ */ - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR; - else if (nChar == 0x4E) /* N */ - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR: - if (nChar == 0x29) /* ) */ - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN; - else if (nChar == 0x2A) /* * */ - eState - = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN: - if (nChar == 0x41) /* A */ - { - b116431 = sal_False; - eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - } - else if (nChar == 0x47) /* G */ - { - b116431 = sal_True; - eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK: - if (nChar == 0x48) /* H */ - eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - } - continue; - - transform: - { - sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; - if (nPlaneOffset == -1) - goto bad_input; - else - { - sal_Int32 nOffset - = pCns116431992RowOffsets[nPlaneOffset + (nRow - 0x21)]; - if (nOffset == -1) - goto bad_input; - else - { - sal_uInt32 nFirstLast = pCns116431992Data[nOffset++]; - sal_uInt32 nFirst = nFirstLast & 0xFF; - sal_uInt32 nLast = nFirstLast >> 8; - nChar -= 0x20; - if (nChar >= nFirst && nChar <= nLast) - { - sal_uInt32 nUnicode - = pCns116431992Data[nOffset + (nChar - nFirst)]; - if (nUnicode == 0xFFFF) - goto bad_input; - else if (ImplIsHighSurrogate(nUnicode)) - if (pDestBufEnd - pDestBufPtr >= 2) - { - nOffset += nLast - nFirst + 1; - nFirst = pCns116431992Data[nOffset++]; - *pDestBufPtr++ = (sal_Unicode) nUnicode; - *pDestBufPtr++ - = (sal_Unicode) - pCns116431992Data[ - nOffset + (nChar - nFirst)]; - } - else - goto no_output; - else - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nUnicode; - else - goto no_output; - } - else - goto bad_input; - eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : - IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - } - } - continue; - } - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - b116431 = sal_False; - break; - - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - b116431 = sal_False; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (eState > IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; - b116431 = sal_False; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - { - ((ImplIso2022CnToUnicodeContext *) pContext)->m_eState = eState; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_nRow = nRow; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_bSo = bSo; - ((ImplIso2022CnToUnicodeContext *) pContext)->m_b116431 = b116431; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtBytes) - *pSrcCvtBytes = nConverted; - - return pDestBufPtr - pDestBuf; -} - -void * ImplCreateUnicodeToIso2022CnContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022CnContext)); - ((ImplUnicodeToIso2022CnContext *) pContext)->m_nHighSurrogate = 0; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_eSoDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_b116432Designator - = sal_False; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_bSo = sal_False; - return pContext; -} - -void ImplResetUnicodeToIso2022CnContext(void * pContext) -{ - if (pContext) - { - ((ImplUnicodeToIso2022CnContext *) pContext)->m_nHighSurrogate = 0; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_eSoDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_b116432Designator - = sal_False; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_bSo = sal_False; - } -} - -static sal_uInt32 ImplIso2022CnTranslateTo2312(ImplUniToDBCSHighTab const * - pGb2312Data, - sal_uInt32 nChar) -{ - sal_uInt32 nIndex1 = nChar >> 8; - if (nIndex1 < 0x100) - { - sal_uInt32 nIndex2 = nChar & 0xFF; - sal_uInt32 nFirst = pGb2312Data[nIndex1].mnLowStart; - if (nIndex2 >= nFirst && nIndex2 <= pGb2312Data[nIndex1].mnLowEnd) - return pGb2312Data[nIndex1].mpToUniTrailTab[nIndex2 - nFirst] - & 0x7F7F; - } - return 0; -} - -static sal_uInt32 -ImplIso2022CnTranslateTo116431(sal_uInt8 const * pCns116431992Data, - sal_Int32 const * pCns116431992PageOffsets, - sal_Int32 const * pCns116431992PlaneOffsets, - sal_uInt32 nChar) -{ - sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; - sal_uInt32 nFirst; - sal_uInt32 nLast; - sal_uInt32 nPlane; - if (nOffset == -1) - return 0; - nOffset = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; - if (nOffset == -1) - return 0; - nFirst = pCns116431992Data[nOffset++]; - nLast = pCns116431992Data[nOffset++]; - nChar &= 0xFF; - if (nChar < nFirst || nChar > nLast) - return 0; - nOffset += 3 * (nChar - nFirst); - nPlane = pCns116431992Data[nOffset++]; - if (nPlane != 1) - return 0; - return (0x20 + pCns116431992Data[nOffset]) << 8 - | (0x20 + pCns116431992Data[nOffset + 1]); -} - -sal_Size ImplConvertUnicodeToIso2022Cn(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) -{ - ImplUniToDBCSHighTab const * pGb2312Data - = ((ImplIso2022CnConverterData const *) pData)-> - m_pUnicodeToGb2312Data; - sal_uInt8 const * pCns116431992Data - = ((ImplIso2022CnConverterData const *) pData)-> - m_pUnicodeToCns116431992Data; - sal_Int32 const * pCns116431992PageOffsets - = ((ImplIso2022CnConverterData const *) pData)-> - m_pUnicodeToCns116431992PageOffsets; - sal_Int32 const * pCns116431992PlaneOffsets - = ((ImplIso2022CnConverterData const *) pData)-> - m_pUnicodeToCns116431992PlaneOffsets; - sal_Unicode nHighSurrogate = 0; - ImplUnicodeToIso2022CnDesignator eSoDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - sal_Bool b116432Designator = sal_False; - sal_Bool bSo = sal_False; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBuf + nDestBytes; - sal_Bool bWritten; - - if (pContext) - { - nHighSurrogate - = ((ImplUnicodeToIso2022CnContext *) pContext)->m_nHighSurrogate; - eSoDesignator - = ((ImplUnicodeToIso2022CnContext *) pContext)->m_eSoDesignator; - b116432Designator = ((ImplUnicodeToIso2022CnContext *) pContext)-> - m_b116432Designator; - bSo = ((ImplUnicodeToIso2022CnContext *) pContext)->m_bSo; - } - - for (; nConverted < nSrcChars; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *pSrcBuf++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar)) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - { - bUndefined = sal_False; - goto bad_input; - } - - if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) - { - bUndefined = sal_False; - goto bad_input; - } - - if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */ - { - if (bSo) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - bSo = sal_False; - eSoDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - b116432Designator = sal_False; - } - else - goto no_output; - } - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - } - else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B) - goto bad_input; - else if (nChar < 0x80) - { - if (bSo) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - bSo = sal_False; - } - else - goto no_output; - } - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - } - else - { - sal_uInt32 nBytes = 0; - ImplUnicodeToIso2022CnDesignator eNewDesignator = - IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - switch (eSoDesignator) - { - case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE: - nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar); - if (nBytes != 0) - { - eNewDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312; - break; - } - nBytes = ImplIso2022CnTranslateTo116431( - pCns116431992Data, - pCns116431992PageOffsets, - pCns116431992PlaneOffsets, - nChar); - if (nBytes != 0) - { - eNewDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431; - break; - } - break; - - case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312: - nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar); - if (nBytes != 0) - { - eNewDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - break; - } - nBytes = ImplIso2022CnTranslateTo116431( - pCns116431992Data, - pCns116431992PageOffsets, - pCns116431992PlaneOffsets, - nChar); - if (nBytes != 0) - { - eNewDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431; - break; - } - break; - - case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431: - nBytes = ImplIso2022CnTranslateTo116431( - pCns116431992Data, - pCns116431992PageOffsets, - pCns116431992PlaneOffsets, - nChar); - if (nBytes != 0) - { - eNewDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; - break; - } - nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar); - if (nBytes != 0) - { - eNewDesignator - = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312; - break; - } - break; - } - if (nBytes != 0) - { - if (eNewDesignator - != IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE) - { - if (bSo) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - bSo = sal_False; - } - else - goto no_output; - } - if (pDestBufEnd - pDestBufPtr >= 4) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x24; /* $ */ - *pDestBufPtr++ = 0x29; /* ) */ - *pDestBufPtr++ - = eNewDesignator - == IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312 ? - 0x41 : 0x47; /* A, G */ - eSoDesignator = eNewDesignator; - } - else - goto no_output; - } - if (!bSo) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0E; /* SO */ - bSo = sal_True; - } - else - goto no_output; - } - if (pDestBufEnd - pDestBufPtr >= 4) - { - *pDestBufPtr++ = (sal_Char) (nBytes >> 8); - *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF); - } - else - goto no_output; - } - else - { - sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; - sal_uInt32 nFirst; - sal_uInt32 nLast; - sal_uInt32 nPlane; - if (nOffset == -1) - goto bad_input; - nOffset - = pCns116431992PageOffsets[nOffset - + ((nChar & 0xFF00) >> 8)]; - if (nOffset == -1) - goto bad_input; - nFirst = pCns116431992Data[nOffset++]; - nLast = pCns116431992Data[nOffset++]; - nChar &= 0xFF; - if (nChar < nFirst || nChar > nLast) - goto bad_input; - nOffset += 3 * (nChar - nFirst); - nPlane = pCns116431992Data[nOffset++]; - if (nPlane != 2) - goto bad_input; - if (!b116432Designator) - { - if (pDestBufEnd - pDestBufPtr >= 4) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x24; /* $ */ - *pDestBufPtr++ = 0x2A; /* * */ - *pDestBufPtr++ = 0x48; /* H */ - b116432Designator = sal_True; - } - else - goto no_output; - } - if (pDestBufEnd - pDestBufPtr >= 4) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x4E; /* N */ - *pDestBufPtr++ - = (sal_Char) (0x20 + pCns116431992Data[nOffset++]); - *pDestBufPtr++ - = (sal_Char) (0x20 + pCns116431992Data[nOffset]); - } - else - goto no_output; - } - } - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, - nChar, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - "\x0F", /* SI */ - bSo ? 1 : 0, - &bWritten)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - if (bWritten) - bSo = sal_False; - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - sal_Bool bFlush = sal_True; - if (nHighSurrogate != 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion( - sal_False, - 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - "\x0F", /* SI */ - bSo ? 1 : 0, - &bWritten)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - bFlush = sal_False; - break; - - case IMPL_BAD_INPUT_CONTINUE: - if (bWritten) - bSo = sal_False; - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - if (bFlush && bSo && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - bSo = sal_False; - } - else - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - } - } - - if (pContext) - { - ((ImplUnicodeToIso2022CnContext *) pContext)->m_nHighSurrogate - = nHighSurrogate; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_eSoDesignator - = eSoDesignator; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_b116432Designator - = b116432Designator; - ((ImplUnicodeToIso2022CnContext *) pContext)->m_bSo = bSo; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtChars) - *pSrcCvtChars = nConverted; - - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022cn.cxx b/sal/textenc/convertiso2022cn.cxx new file mode 100644 index 000000000000..6c317225ae67 --- /dev/null +++ b/sal/textenc/convertiso2022cn.cxx @@ -0,0 +1,868 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "context.hxx" +#include "converter.hxx" +#include "convertiso2022cn.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +namespace { + +enum ImplIso2022CnToUnicodeState // order is important: +{ + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN, + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK +}; + +struct ImplIso2022CnToUnicodeContext +{ + ImplIso2022CnToUnicodeState m_eState; + sal_uInt32 m_nRow; + bool m_bSo; + bool m_b116431; +}; + +enum ImplUnicodeToIso2022CnDesignator +{ + IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE, + IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312, + IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431 +}; + +struct ImplUnicodeToIso2022CnContext +{ + sal_Unicode m_nHighSurrogate; + ImplUnicodeToIso2022CnDesignator m_eSoDesignator; + bool m_b116432Designator; + bool m_bSo; +}; + +} + +void * ImplCreateIso2022CnToUnicodeContext() +{ + ImplIso2022CnToUnicodeContext * pContext = + new ImplIso2022CnToUnicodeContext; + pContext->m_eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + pContext->m_bSo = false; + pContext->m_b116431 = false; + return pContext; +} + +void ImplResetIso2022CnToUnicodeContext(void * pContext) +{ + if (pContext) + { + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState + = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo = false; + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431 = false; + } +} + +sal_Size ImplConvertIso2022CnToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + ImplDBCSToUniLeadTab const * pGb2312Data + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pGb2312ToUnicodeData; + sal_uInt16 const * pCns116431992Data + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pCns116431992ToUnicodeData; + sal_Int32 const * pCns116431992RowOffsets + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pCns116431992ToUnicodeRowOffsets; + sal_Int32 const * pCns116431992PlaneOffsets + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pCns116431992ToUnicodePlaneOffsets; + ImplIso2022CnToUnicodeState eState + = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + sal_uInt32 nRow = 0; + bool bSo = false; + bool b116431 = false; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState; + nRow = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_nRow; + bSo = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo; + b116431 = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; + sal_uInt32 nPlane; + switch (eState) + { + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII: + if (nChar == 0x0E) // SO + { + bSo = true; + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO; + } + else if (nChar == 0x1B) // ESC + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC; + else if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO: + if (nChar == 0x0F) // SI + { + bSo = false; + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + } + else if (nChar == 0x1B) // ESC + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC; + else if (nChar >= 0x21 && nChar <= 0x7E) + { + nRow = nChar; + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2: + if (nChar >= 0x21 && nChar <= 0x7E) + if (b116431) + { + nPlane = 0; + goto transform; + } + else + { + sal_uInt16 nUnicode = 0; + sal_uInt32 nFirst; + nRow += 0x80; + nChar += 0x80; + nFirst = pGb2312Data[nRow].mnTrailStart; + if (nChar >= nFirst + && nChar <= pGb2312Data[nRow].mnTrailEnd) + nUnicode = pGb2312Data[nRow]. + mpToUniTrailTab[nChar - nFirst]; + if (nUnicode != 0) + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = (sal_Unicode) nUnicode; + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO; + } + else + goto no_output; + else + goto bad_input; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432: + if (nChar >= 0x21 && nChar <= 0x7E) + { + nRow = nChar; + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2: + if (nChar >= 0x21 && nChar <= 0x7E) + { + nPlane = 1; + goto transform; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC: + if (nChar == 0x24) // $ + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR; + else if (nChar == 0x4E) // N + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR: + if (nChar == 0x29) // ) + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN; + else if (nChar == 0x2A) // * + eState + = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN: + if (nChar == 0x41) // A + { + b116431 = false; + eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + } + else if (nChar == 0x47) // G + { + b116431 = true; + eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK: + if (nChar == 0x48) // H + eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + else + { + bUndefined = false; + goto bad_input; + } + break; + } + continue; + + transform: + { + sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; + if (nPlaneOffset == -1) + goto bad_input; + else + { + sal_Int32 nOffset + = pCns116431992RowOffsets[nPlaneOffset + (nRow - 0x21)]; + if (nOffset == -1) + goto bad_input; + else + { + sal_uInt32 nFirstLast = pCns116431992Data[nOffset++]; + sal_uInt32 nFirst = nFirstLast & 0xFF; + sal_uInt32 nLast = nFirstLast >> 8; + nChar -= 0x20; + if (nChar >= nFirst && nChar <= nLast) + { + sal_uInt32 nUnicode + = pCns116431992Data[nOffset + (nChar - nFirst)]; + if (nUnicode == 0xFFFF) + goto bad_input; + else if (ImplIsHighSurrogate(nUnicode)) + if (pDestBufEnd - pDestBufPtr >= 2) + { + nOffset += nLast - nFirst + 1; + nFirst = pCns116431992Data[nOffset++]; + *pDestBufPtr++ = (sal_Unicode) nUnicode; + *pDestBufPtr++ + = (sal_Unicode) + pCns116431992Data[ + nOffset + (nChar - nFirst)]; + } + else + goto no_output; + else + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nUnicode; + else + goto no_output; + } + else + goto bad_input; + eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO : + IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + } + } + continue; + } + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + b116431 = false; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + b116431 = false; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState > IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII; + b116431 = false; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + { + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState = eState; + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_nRow = nRow; + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo = bSo; + static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431 = b116431; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +void * ImplCreateUnicodeToIso2022CnContext(void) +{ + ImplUnicodeToIso2022CnContext * pContext = + new ImplUnicodeToIso2022CnContext; + pContext->m_nHighSurrogate = 0; + pContext->m_eSoDesignator = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + pContext->m_b116432Designator = false; + pContext->m_bSo = false; + return pContext; +} + +void ImplResetUnicodeToIso2022CnContext(void * pContext) +{ + if (pContext) + { + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate = 0; + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_b116432Designator + = false; + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo = false; + } +} + +static sal_uInt32 ImplIso2022CnTranslateTo2312(ImplUniToDBCSHighTab const * + pGb2312Data, + sal_uInt32 nChar) +{ + sal_uInt32 nIndex1 = nChar >> 8; + if (nIndex1 < 0x100) + { + sal_uInt32 nIndex2 = nChar & 0xFF; + sal_uInt32 nFirst = pGb2312Data[nIndex1].mnLowStart; + if (nIndex2 >= nFirst && nIndex2 <= pGb2312Data[nIndex1].mnLowEnd) + return pGb2312Data[nIndex1].mpToUniTrailTab[nIndex2 - nFirst] + & 0x7F7F; + } + return 0; +} + +static sal_uInt32 +ImplIso2022CnTranslateTo116431(sal_uInt8 const * pCns116431992Data, + sal_Int32 const * pCns116431992PageOffsets, + sal_Int32 const * pCns116431992PlaneOffsets, + sal_uInt32 nChar) +{ + sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; + sal_uInt32 nFirst; + sal_uInt32 nLast; + sal_uInt32 nPlane; + if (nOffset == -1) + return 0; + nOffset = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; + if (nOffset == -1) + return 0; + nFirst = pCns116431992Data[nOffset++]; + nLast = pCns116431992Data[nOffset++]; + nChar &= 0xFF; + if (nChar < nFirst || nChar > nLast) + return 0; + nOffset += 3 * (nChar - nFirst); + nPlane = pCns116431992Data[nOffset++]; + if (nPlane != 1) + return 0; + return (0x20 + pCns116431992Data[nOffset]) << 8 + | (0x20 + pCns116431992Data[nOffset + 1]); +} + +sal_Size ImplConvertUnicodeToIso2022Cn(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + ImplUniToDBCSHighTab const * pGb2312Data + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pUnicodeToGb2312Data; + sal_uInt8 const * pCns116431992Data + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pUnicodeToCns116431992Data; + sal_Int32 const * pCns116431992PageOffsets + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pUnicodeToCns116431992PageOffsets; + sal_Int32 const * pCns116431992PlaneOffsets + = static_cast< ImplIso2022CnConverterData const * >(pData)-> + m_pUnicodeToCns116431992PlaneOffsets; + sal_Unicode nHighSurrogate = 0; + ImplUnicodeToIso2022CnDesignator eSoDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + bool b116432Designator = false; + bool bSo = false; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBuf + nDestBytes; + bool bWritten; + + if (pContext) + { + nHighSurrogate + = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate; + eSoDesignator + = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator; + b116432Designator = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)-> + m_b116432Designator; + bSo = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo; + } + + for (; nConverted < nSrcChars; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar)) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar)) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + { + bUndefined = false; + goto bad_input; + } + + if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) + { + bUndefined = false; + goto bad_input; + } + + if (nChar == 0x0A || nChar == 0x0D) // LF, CR + { + if (bSo) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + bSo = false; + eSoDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + b116432Designator = false; + } + else + goto no_output; + } + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + } + else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B) + goto bad_input; + else if (nChar < 0x80) + { + if (bSo) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + bSo = false; + } + else + goto no_output; + } + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + } + else + { + sal_uInt32 nBytes = 0; + ImplUnicodeToIso2022CnDesignator eNewDesignator = + IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + switch (eSoDesignator) + { + case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE: + nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar); + if (nBytes != 0) + { + eNewDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312; + break; + } + nBytes = ImplIso2022CnTranslateTo116431( + pCns116431992Data, + pCns116431992PageOffsets, + pCns116431992PlaneOffsets, + nChar); + if (nBytes != 0) + { + eNewDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431; + break; + } + break; + + case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312: + nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar); + if (nBytes != 0) + { + eNewDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + break; + } + nBytes = ImplIso2022CnTranslateTo116431( + pCns116431992Data, + pCns116431992PageOffsets, + pCns116431992PlaneOffsets, + nChar); + if (nBytes != 0) + { + eNewDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431; + break; + } + break; + + case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431: + nBytes = ImplIso2022CnTranslateTo116431( + pCns116431992Data, + pCns116431992PageOffsets, + pCns116431992PlaneOffsets, + nChar); + if (nBytes != 0) + { + eNewDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE; + break; + } + nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar); + if (nBytes != 0) + { + eNewDesignator + = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312; + break; + } + break; + } + if (nBytes != 0) + { + if (eNewDesignator + != IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE) + { + if (bSo) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + bSo = false; + } + else + goto no_output; + } + if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x24; // $ + *pDestBufPtr++ = 0x29; // ) + *pDestBufPtr++ + = eNewDesignator + == IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312 ? + 0x41 : 0x47; // A, G + eSoDesignator = eNewDesignator; + } + else + goto no_output; + } + if (!bSo) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0E; // SO + bSo = true; + } + else + goto no_output; + } + if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = static_cast< char >(nBytes >> 8); + *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF); + } + else + goto no_output; + } + else + { + sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; + sal_uInt32 nFirst; + sal_uInt32 nLast; + sal_uInt32 nPlane; + if (nOffset == -1) + goto bad_input; + nOffset + = pCns116431992PageOffsets[nOffset + + ((nChar & 0xFF00) >> 8)]; + if (nOffset == -1) + goto bad_input; + nFirst = pCns116431992Data[nOffset++]; + nLast = pCns116431992Data[nOffset++]; + nChar &= 0xFF; + if (nChar < nFirst || nChar > nLast) + goto bad_input; + nOffset += 3 * (nChar - nFirst); + nPlane = pCns116431992Data[nOffset++]; + if (nPlane != 2) + goto bad_input; + if (!b116432Designator) + { + if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x24; // $ + *pDestBufPtr++ = 0x2A; // * + *pDestBufPtr++ = 0x48; // H + b116432Designator = true; + } + else + goto no_output; + } + if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x4E; // N + *pDestBufPtr++ + = static_cast< char >(0x20 + pCns116431992Data[nOffset++]); + *pDestBufPtr++ + = static_cast< char >(0x20 + pCns116431992Data[nOffset]); + } + else + goto no_output; + } + } + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, + nChar, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + "\x0F", // SI + bSo ? 1 : 0, + &bWritten)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + if (bWritten) + bSo = false; + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + bool bFlush = true; + if (nHighSurrogate != 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion( + false, + 0, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + "\x0F", // SI + bSo ? 1 : 0, + &bWritten)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + bFlush = false; + break; + + case IMPL_BAD_INPUT_CONTINUE: + if (bWritten) + bSo = false; + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + if (bFlush && bSo && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + bSo = false; + } + else + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + } + } + + if (pContext) + { + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate + = nHighSurrogate; + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator + = eSoDesignator; + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_b116432Designator + = b116432Designator; + static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo = bSo; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022cn.h b/sal/textenc/convertiso2022cn.h deleted file mode 100644 index bf5186d66e1f..000000000000 --- a/sal/textenc/convertiso2022cn.h +++ /dev/null @@ -1,87 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTISO2022CN_H -#define INCLUDED_RTL_TEXTENC_CONVERTISO2022CN_H - -#include "tenchelp.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - ImplDBCSToUniLeadTab const * m_pGb2312ToUnicodeData; - sal_uInt16 const * m_pCns116431992ToUnicodeData; - sal_Int32 const * m_pCns116431992ToUnicodeRowOffsets; - sal_Int32 const * m_pCns116431992ToUnicodePlaneOffsets; - ImplUniToDBCSHighTab const * m_pUnicodeToGb2312Data; - sal_uInt8 const * m_pUnicodeToCns116431992Data; - sal_Int32 const * m_pUnicodeToCns116431992PageOffsets; - sal_Int32 const * m_pUnicodeToCns116431992PlaneOffsets; -} ImplIso2022CnConverterData; - -void * ImplCreateIso2022CnToUnicodeContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetIso2022CnToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertIso2022CnToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); - -void * ImplCreateUnicodeToIso2022CnContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetUnicodeToIso2022CnContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertUnicodeToIso2022Cn(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTISO2022CN_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022cn.hxx b/sal/textenc/convertiso2022cn.hxx new file mode 100644 index 000000000000..dc2cccf62d3d --- /dev/null +++ b/sal/textenc/convertiso2022cn.hxx @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTISO2022CN_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTISO2022CN_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +#include "tenchelp.hxx" + +struct ImplIso2022CnConverterData +{ + ImplDBCSToUniLeadTab const * m_pGb2312ToUnicodeData; + sal_uInt16 const * m_pCns116431992ToUnicodeData; + sal_Int32 const * m_pCns116431992ToUnicodeRowOffsets; + sal_Int32 const * m_pCns116431992ToUnicodePlaneOffsets; + ImplUniToDBCSHighTab const * m_pUnicodeToGb2312Data; + sal_uInt8 const * m_pUnicodeToCns116431992Data; + sal_Int32 const * m_pUnicodeToCns116431992PageOffsets; + sal_Int32 const * m_pUnicodeToCns116431992PlaneOffsets; +}; + +void * ImplCreateIso2022CnToUnicodeContext(); + +void ImplResetIso2022CnToUnicodeContext(void * pContext); + +sal_Size ImplConvertIso2022CnToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +void * ImplCreateUnicodeToIso2022CnContext(); + +void ImplResetUnicodeToIso2022CnContext(void * pContext); + +sal_Size ImplConvertUnicodeToIso2022Cn(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022cn.tab b/sal/textenc/convertiso2022cn.tab index 937437693209..319325f3a243 100644 --- a/sal/textenc/convertiso2022cn.tab +++ b/sal/textenc/convertiso2022cn.tab @@ -25,22 +25,14 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#include "context.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_CONVERTISO2022CN_H -#include "convertiso2022cn.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H -#include "tenchelp.h" -#endif +#include "sal/config.h" -#ifndef _RTL_TENCINFO_H #include "rtl/tencinfo.h" -#endif -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif + +#include "context.hxx" +#include "convertiso2022cn.hxx" +#include "tenchelp.hxx" static ImplIso2022CnConverterData const aImplIso2022CnConvertData = { aGB2312UniLeadTab, /* from tcvtscn6.tab */ diff --git a/sal/textenc/convertiso2022jp.c b/sal/textenc/convertiso2022jp.c deleted file mode 100644 index e4792b573373..000000000000 --- a/sal/textenc/convertiso2022jp.c +++ /dev/null @@ -1,572 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "convertiso2022jp.h" -#include "context.h" -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -typedef enum /* order is important: */ -{ - IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII, - IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN, - IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208, - IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2, - IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC, - IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN, - IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR -} ImplIso2022JpToUnicodeState; - -typedef struct -{ - ImplIso2022JpToUnicodeState m_eState; - sal_uInt32 m_nRow; -} ImplIso2022JpToUnicodeContext; - -typedef struct -{ - sal_Unicode m_nHighSurrogate; - sal_Bool m_b0208; -} ImplUnicodeToIso2022JpContext; - -void * ImplCreateIso2022JpToUnicodeContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplIso2022JpToUnicodeContext)); - ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState - = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; - return pContext; -} - -void ImplResetIso2022JpToUnicodeContext(void * pContext) -{ - if (pContext) - ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState - = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; -} - -sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) -{ - ImplDBCSToUniLeadTab const * pJisX0208Data - = ((ImplIso2022JpConverterData const *) pData)-> - m_pJisX0208ToUnicodeData; - ImplIso2022JpToUnicodeState eState - = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; - sal_uInt32 nRow = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; - - if (pContext) - { - eState = ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState; - nRow = ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow; - } - - for (; nConverted < nSrcBytes; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; - switch (eState) - { - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII: - if (nChar == 0x1B) /* ESC */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC; - else if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nChar; - else - goto no_output; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN: - if (nChar == 0x1B) /* ESC */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC; - else if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - { - switch (nChar) - { - case 0x5C: /* \ */ - nChar = 0xA5; /* YEN SIGN */ - break; - - case 0x7E: /* ~ */ - nChar = 0xAF; /* MACRON */ - break; - } - *pDestBufPtr++ = (sal_Unicode) nChar; - } - else - goto no_output; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208: - if (nChar == 0x1B) /* ESC */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC; - else if (nChar >= 0x21 && nChar <= 0x7E) - { - nRow = nChar; - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2: - if (nChar >= 0x21 && nChar <= 0x7E) - { - sal_uInt16 nUnicode = 0; - sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart; - if (nChar >= nFirst - && nChar <= pJisX0208Data[nRow].mnTrailEnd) - nUnicode = pJisX0208Data[nRow]. - mpToUniTrailTab[nChar - nFirst]; - if (nUnicode != 0) - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = (sal_Unicode) nUnicode; - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208; - } - else - goto no_output; - else - goto bad_input; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC: - switch (nChar) - { - case 0x24: /* $ */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR; - break; - - case 0x28: /* ( */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN; - break; - - default: - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN: - switch (nChar) - { - case 0x42: /* A */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; - break; - - case 0x4A: /* J */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN; - break; - - default: - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR: - switch (nChar) - { - case 0x40: /* @ */ - case 0x42: /* B */ - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208; - break; - - default: - bUndefined = sal_False; - goto bad_input; - } - break; - } - continue; - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; - break; - - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208 - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - { - ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState = eState; - ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow = nRow; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtBytes) - *pSrcCvtBytes = nConverted; - - return pDestBufPtr - pDestBuf; -} - -void * ImplCreateUnicodeToIso2022JpContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022JpContext)); - ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0; - ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False; - return pContext; -} - -void ImplResetUnicodeToIso2022JpContext(void * pContext) -{ - if (pContext) - { - ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0; - ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False; - } -} - -sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) -{ - ImplUniToDBCSHighTab const * pJisX0208Data - = ((ImplIso2022JpConverterData const *) pData)-> - m_pUnicodeToJisX0208Data; - sal_Unicode nHighSurrogate = 0; - sal_Bool b0208 = sal_False; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBuf + nDestBytes; - sal_Bool bWritten; - - if (pContext) - { - nHighSurrogate - = ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate; - b0208 = ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208; - } - - for (; nConverted < nSrcChars; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *pSrcBuf++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar)) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - { - bUndefined = sal_False; - goto bad_input; - } - - if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) - { - bUndefined = sal_False; - goto bad_input; - } - - if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */ - { - if (b0208) - { - if (pDestBufEnd - pDestBufPtr >= 3) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x28; /* ( */ - *pDestBufPtr++ = 0x42; /* B */ - b0208 = sal_False; - } - else - goto no_output; - } - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - } - else if (nChar == 0x1B) - goto bad_input; - else if (nChar < 0x80) - { - if (b0208) - { - if (pDestBufEnd - pDestBufPtr >= 3) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x28; /* ( */ - *pDestBufPtr++ = 0x42; /* B */ - b0208 = sal_False; - } - else - goto no_output; - } - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - } - else - { - sal_uInt16 nBytes = 0; - sal_uInt32 nIndex1 = nChar >> 8; - if (nIndex1 < 0x100) - { - sal_uInt32 nIndex2 = nChar & 0xFF; - sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart; - if (nIndex2 >= nFirst - && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd) - { - nBytes = pJisX0208Data[nIndex1]. - mpToUniTrailTab[nIndex2 - nFirst]; - if (nBytes == 0) - /* For some reason, the tables in tcvtjp4.tab do not - include these two conversions: */ - switch (nChar) - { - case 0xA5: /* YEN SIGN */ - nBytes = 0x216F; - break; - - case 0xAF: /* MACRON */ - nBytes = 0x2131; - break; - } - } - } - if (nBytes != 0) - { - if (!b0208) - { - if (pDestBufEnd - pDestBufPtr >= 3) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x24; /* $ */ - *pDestBufPtr++ = 0x42; /* B */ - b0208 = sal_True; - } - else - goto no_output; - } - if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = (sal_Char) (nBytes >> 8); - *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF); - } - else - goto no_output; - } - else - goto bad_input; - } - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion( - bUndefined, - nChar, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - "\x1B(B", - b0208 ? 3 : 0, - &bWritten)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - if (bWritten) - b0208 = sal_False; - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - sal_Bool bFlush = sal_True; - if (nHighSurrogate != 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion( - sal_False, - 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - "\x1B(B", - b0208 ? 3 : 0, - &bWritten)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - bFlush = sal_False; - break; - - case IMPL_BAD_INPUT_CONTINUE: - if (bWritten) - b0208 = sal_False; - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - if (bFlush - && b0208 - && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - { - if (pDestBufEnd - pDestBufPtr >= 3) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x28; /* ( */ - *pDestBufPtr++ = 0x42; /* B */ - b0208 = sal_False; - } - else - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - } - } - - if (pContext) - { - ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate - = nHighSurrogate; - ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = b0208; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtChars) - *pSrcCvtChars = nConverted; - - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022jp.cxx b/sal/textenc/convertiso2022jp.cxx new file mode 100644 index 000000000000..7961f150e124 --- /dev/null +++ b/sal/textenc/convertiso2022jp.cxx @@ -0,0 +1,577 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "context.hxx" +#include "converter.hxx" +#include "convertiso2022jp.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +namespace { + +enum ImplIso2022JpToUnicodeState // order is important: +{ + IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII, + IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN, + IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208, + IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2, + IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC, + IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN, + IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR +}; + +struct ImplIso2022JpToUnicodeContext +{ + ImplIso2022JpToUnicodeState m_eState; + sal_uInt32 m_nRow; +}; + +struct ImplUnicodeToIso2022JpContext +{ + sal_Unicode m_nHighSurrogate; + bool m_b0208; +}; + +} + +void * ImplCreateIso2022JpToUnicodeContext() +{ + ImplIso2022JpToUnicodeContext * pContext = + new ImplIso2022JpToUnicodeContext; + pContext->m_eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; + return pContext; +} + +void ImplResetIso2022JpToUnicodeContext(void * pContext) +{ + if (pContext) + static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState + = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; +} + +sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + ImplDBCSToUniLeadTab const * pJisX0208Data + = static_cast< ImplIso2022JpConverterData const * >(pData)-> + m_pJisX0208ToUnicodeData; + ImplIso2022JpToUnicodeState eState + = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; + sal_uInt32 nRow = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState; + nRow = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; + switch (eState) + { + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII: + if (nChar == 0x1B) // ESC + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC; + else if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN: + if (nChar == 0x1B) // ESC + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC; + else if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + { + switch (nChar) + { + case 0x5C: // REVERSE SOLIDUS (\) + nChar = 0xA5; // YEN SIGN + break; + + case 0x7E: // ~ + nChar = 0xAF; // MACRON + break; + } + *pDestBufPtr++ = (sal_Unicode) nChar; + } + else + goto no_output; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208: + if (nChar == 0x1B) // ESC + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC; + else if (nChar >= 0x21 && nChar <= 0x7E) + { + nRow = nChar; + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2: + if (nChar >= 0x21 && nChar <= 0x7E) + { + sal_uInt16 nUnicode = 0; + sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart; + if (nChar >= nFirst + && nChar <= pJisX0208Data[nRow].mnTrailEnd) + nUnicode = pJisX0208Data[nRow]. + mpToUniTrailTab[nChar - nFirst]; + if (nUnicode != 0) + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = (sal_Unicode) nUnicode; + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208; + } + else + goto no_output; + else + goto bad_input; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC: + switch (nChar) + { + case 0x24: // $ + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR; + break; + + case 0x28: // ( + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN; + break; + + default: + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN: + switch (nChar) + { + case 0x42: // A + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; + break; + + case 0x4A: // J + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN; + break; + + default: + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR: + switch (nChar) + { + case 0x40: // @ + case 0x42: // B + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208; + break; + + default: + bUndefined = false; + goto bad_input; + } + break; + } + continue; + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + { + static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState = eState; + static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow = nRow; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +void * ImplCreateUnicodeToIso2022JpContext() +{ + ImplUnicodeToIso2022JpContext * pContext = + new ImplUnicodeToIso2022JpContext; + pContext->m_nHighSurrogate = 0; + pContext->m_b0208 = false; + return pContext; +} + +void ImplResetUnicodeToIso2022JpContext(void * pContext) +{ + if (pContext) + { + static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate = 0; + static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = false; + } +} + +sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + ImplUniToDBCSHighTab const * pJisX0208Data + = static_cast< ImplIso2022JpConverterData const * >(pData)-> + m_pUnicodeToJisX0208Data; + sal_Unicode nHighSurrogate = 0; + bool b0208 = false; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBuf + nDestBytes; + bool bWritten; + + if (pContext) + { + nHighSurrogate + = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate; + b0208 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208; + } + + for (; nConverted < nSrcChars; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar)) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar)) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + { + bUndefined = false; + goto bad_input; + } + + if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) + { + bUndefined = false; + goto bad_input; + } + + if (nChar == 0x0A || nChar == 0x0D) // LF, CR + { + if (b0208) + { + if (pDestBufEnd - pDestBufPtr >= 3) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x28; // ( + *pDestBufPtr++ = 0x42; // B + b0208 = false; + } + else + goto no_output; + } + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + } + else if (nChar == 0x1B) + goto bad_input; + else if (nChar < 0x80) + { + if (b0208) + { + if (pDestBufEnd - pDestBufPtr >= 3) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x28; // ( + *pDestBufPtr++ = 0x42; // B + b0208 = false; + } + else + goto no_output; + } + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + } + else + { + sal_uInt16 nBytes = 0; + sal_uInt32 nIndex1 = nChar >> 8; + if (nIndex1 < 0x100) + { + sal_uInt32 nIndex2 = nChar & 0xFF; + sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart; + if (nIndex2 >= nFirst + && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd) + { + nBytes = pJisX0208Data[nIndex1]. + mpToUniTrailTab[nIndex2 - nFirst]; + if (nBytes == 0) + // For some reason, the tables in tcvtjp4.tab do not + // include these two conversions: + switch (nChar) + { + case 0xA5: // YEN SIGN + nBytes = 0x216F; + break; + + case 0xAF: // MACRON + nBytes = 0x2131; + break; + } + } + } + if (nBytes != 0) + { + if (!b0208) + { + if (pDestBufEnd - pDestBufPtr >= 3) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x24; // $ + *pDestBufPtr++ = 0x42; // B + b0208 = true; + } + else + goto no_output; + } + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = static_cast< char >(nBytes >> 8); + *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF); + } + else + goto no_output; + } + else + goto bad_input; + } + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion( + bUndefined, + nChar, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + "\x1B(B", + b0208 ? 3 : 0, + &bWritten)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + if (bWritten) + b0208 = false; + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + bool bFlush = true; + if (nHighSurrogate != 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion( + false, + 0, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + "\x1B(B", + b0208 ? 3 : 0, + &bWritten)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + bFlush = false; + break; + + case IMPL_BAD_INPUT_CONTINUE: + if (bWritten) + b0208 = false; + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + if (bFlush + && b0208 + && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + { + if (pDestBufEnd - pDestBufPtr >= 3) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x28; // ( + *pDestBufPtr++ = 0x42; // B + b0208 = false; + } + else + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + } + } + + if (pContext) + { + static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate + = nHighSurrogate; + static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = b0208; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022jp.h b/sal/textenc/convertiso2022jp.h deleted file mode 100644 index 257558387456..000000000000 --- a/sal/textenc/convertiso2022jp.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTISO2022JP_H -#define INCLUDED_RTL_TEXTENC_CONVERTISO2022JP_H - -#include "tenchelp.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - ImplDBCSToUniLeadTab const * m_pJisX0208ToUnicodeData; - ImplUniToDBCSHighTab const * m_pUnicodeToJisX0208Data; -} ImplIso2022JpConverterData; - -void * ImplCreateIso2022JpToUnicodeContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetIso2022JpToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); - -void * ImplCreateUnicodeToIso2022JpContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetUnicodeToIso2022JpContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTISO2022JP_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022jp.hxx b/sal/textenc/convertiso2022jp.hxx new file mode 100644 index 000000000000..e7f05e8dd517 --- /dev/null +++ b/sal/textenc/convertiso2022jp.hxx @@ -0,0 +1,74 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTISO2022JP_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTISO2022JP_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +#include "tenchelp.hxx" + +struct ImplIso2022JpConverterData +{ + ImplDBCSToUniLeadTab const * m_pJisX0208ToUnicodeData; + ImplUniToDBCSHighTab const * m_pUnicodeToJisX0208Data; +}; + +void * ImplCreateIso2022JpToUnicodeContext(); + +void ImplResetIso2022JpToUnicodeContext(void * pContext); + +sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +void * ImplCreateUnicodeToIso2022JpContext(); + +void ImplResetUnicodeToIso2022JpContext(void * pContext); + +sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022jp.tab b/sal/textenc/convertiso2022jp.tab index 5775491c87f5..a3c97fa7e9fa 100644 --- a/sal/textenc/convertiso2022jp.tab +++ b/sal/textenc/convertiso2022jp.tab @@ -25,22 +25,14 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#include "context.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_CONVERTISO2022JP_H -#include "convertiso2022jp.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H -#include "tenchelp.h" -#endif +#include "sal/config.h" -#ifndef _RTL_TENCINFO_H #include "rtl/tencinfo.h" -#endif -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif + +#include "context.hxx" +#include "convertiso2022jp.hxx" +#include "tenchelp.hxx" static ImplIso2022JpConverterData const aImplIso2022JpConvertData = { aJIS0208UniLeadTab, /* from tcvtjp6.tab */ diff --git a/sal/textenc/convertiso2022kr.c b/sal/textenc/convertiso2022kr.c deleted file mode 100644 index 8354b312ab87..000000000000 --- a/sal/textenc/convertiso2022kr.c +++ /dev/null @@ -1,530 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "convertiso2022kr.h" -#include "context.h" -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -typedef enum /* order is important: */ -{ - IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII, - IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001, - IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2, - IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC, - IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR, - IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN -} ImplIso2022KrToUnicodeState; - -typedef struct -{ - ImplIso2022KrToUnicodeState m_eState; - sal_uInt32 m_nRow; -} ImplIso2022KrToUnicodeContext; - -typedef enum -{ - IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE, - IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII, - IMPL_UNICODE_TO_ISO_2022_KR_SET_1001 -} ImplUnicodeToIso2022KrSet; - -typedef struct -{ - sal_Unicode m_nHighSurrogate; - ImplUnicodeToIso2022KrSet m_eSet; -} ImplUnicodeToIso2022KrContext; - -void * ImplCreateIso2022KrToUnicodeContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplIso2022KrToUnicodeContext)); - ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState - = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - return pContext; -} - -void ImplResetIso2022KrToUnicodeContext(void * pContext) -{ - if (pContext) - ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState - = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; -} - -sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) -{ - ImplDBCSToUniLeadTab const * pKsX1001Data - = ((ImplIso2022KrConverterData const *) pData)-> - m_pKsX1001ToUnicodeData; - ImplIso2022KrToUnicodeState eState - = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - sal_uInt32 nRow = 0; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; - - if (pContext) - { - eState = ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState; - nRow = ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow; - } - - for (; nConverted < nSrcBytes; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; - switch (eState) - { - case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII: - if (nChar == 0x0E) /* SO */ - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001; - else if (nChar == 0x1B) /* ESC */ - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC; - else if (nChar < 0x80) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nChar; - else - goto no_output; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001: - if (nChar == 0x0F) /* SI */ - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - else if (nChar >= 0x21 && nChar <= 0x7E) - { - nRow = nChar + 0x80; - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2: - if (nChar >= 0x21 && nChar <= 0x7E) - { - sal_uInt16 nUnicode = 0; - sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart; - nChar += 0x80; - if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd) - nUnicode = pKsX1001Data[nRow]. - mpToUniTrailTab[nChar - nFirst]; - if (nUnicode != 0) - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = (sal_Unicode) nUnicode; - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001; - } - else - goto no_output; - else - goto bad_input; - } - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC: - if (nChar == 0x24) /* $ */ - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR: - if (nChar == 0x29) /* ) */ - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - - case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN: - if (nChar == 0x43) /* C */ - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - else - { - bUndefined = sal_False; - goto bad_input; - } - break; - } - continue; - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - break; - - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001 - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext) - { - ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState = eState; - ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow = nRow; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtBytes) - *pSrcCvtBytes = nConverted; - - return pDestBufPtr - pDestBuf; -} - -void * ImplCreateUnicodeToIso2022KrContext(void) -{ - void * pContext - = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022KrContext)); - ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0; - ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet - = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; - return pContext; -} - -void ImplResetUnicodeToIso2022KrContext(void * pContext) -{ - if (pContext) - { - ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0; - ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet - = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; - } -} - -sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) -{ - ImplUniToDBCSHighTab const * pKsX1001Data - = ((ImplIso2022KrConverterData const *) pData)-> - m_pUnicodeToKsX1001Data; - sal_Unicode nHighSurrogate = 0; - ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; - sal_uInt32 nInfo = 0; - sal_Size nConverted = 0; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBuf + nDestBytes; - sal_Bool bWritten; - - if (pContext) - { - nHighSurrogate - = ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate; - eSet = ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet; - } - - if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE) - { - if (pDestBufEnd - pDestBufPtr >= 4) - { - *pDestBufPtr++ = 0x1B; /* ESC */ - *pDestBufPtr++ = 0x24; /* $ */ - *pDestBufPtr++ = 0x29; /* ) */ - *pDestBufPtr++ = 0x43; /* C */ - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; - } - else - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - } - - if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0) - for (; nConverted < nSrcChars; ++nConverted) - { - sal_Bool bUndefined = sal_True; - sal_uInt32 nChar = *pSrcBuf++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar)) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar)) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - { - bUndefined = sal_False; - goto bad_input; - } - - if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) - { - bUndefined = sal_False; - goto bad_input; - } - - if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */ - { - if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; - } - else - goto no_output; - } - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - } - else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B) - goto bad_input; - else if (nChar < 0x80) - { - if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; - } - else - goto no_output; - } - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - } - else - { - sal_uInt16 nBytes = 0; - sal_uInt32 nIndex1 = nChar >> 8; - if (nIndex1 < 0x100) - { - sal_uInt32 nIndex2 = nChar & 0xFF; - sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart; - if (nIndex2 >= nFirst - && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd) - nBytes = pKsX1001Data[nIndex1]. - mpToUniTrailTab[nIndex2 - nFirst]; - } - if (nBytes != 0) - { - if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0E; /* SO */ - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001; - } - else - goto no_output; - } - if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = (sal_Char) ((nBytes >> 8) & 0x7F); - *pDestBufPtr++ = (sal_Char) (nBytes & 0x7F); - } - else - goto no_output; - } - else - goto bad_input; - } - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion( - bUndefined, - nChar, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - "\x0F", /* SI */ - eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1, - &bWritten)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - if (bWritten) - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBuf; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - sal_Bool bFlush = sal_True; - if (nHighSurrogate != 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion( - sal_False, - 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, - "\x0F", /* SI */ - eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? - 0 : 1, - &bWritten)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - bFlush = sal_False; - break; - - case IMPL_BAD_INPUT_CONTINUE: - if (bWritten) - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - if (bFlush - && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001 - && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - { - if (pDestBufPtr != pDestBufEnd) - { - *pDestBufPtr++ = 0x0F; /* SI */ - eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; - } - else - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - } - } - - if (pContext) - { - ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate - = nHighSurrogate; - ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet = eSet; - } - if (pInfo) - *pInfo = nInfo; - if (pSrcCvtChars) - *pSrcCvtChars = nConverted; - - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022kr.cxx b/sal/textenc/convertiso2022kr.cxx new file mode 100644 index 000000000000..d2b2e8731bbf --- /dev/null +++ b/sal/textenc/convertiso2022kr.cxx @@ -0,0 +1,534 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "context.hxx" +#include "converter.hxx" +#include "convertiso2022kr.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +namespace { + +enum ImplIso2022KrToUnicodeState // order is important: +{ + IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII, + IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001, + IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2, + IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC, + IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR, + IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN +}; + +struct ImplIso2022KrToUnicodeContext +{ + ImplIso2022KrToUnicodeState m_eState; + sal_uInt32 m_nRow; +}; + +enum ImplUnicodeToIso2022KrSet +{ + IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE, + IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII, + IMPL_UNICODE_TO_ISO_2022_KR_SET_1001 +}; + +struct ImplUnicodeToIso2022KrContext +{ + sal_Unicode m_nHighSurrogate; + ImplUnicodeToIso2022KrSet m_eSet; +}; + +} + +void * ImplCreateIso2022KrToUnicodeContext() +{ + ImplIso2022KrToUnicodeContext * pContext = + new ImplIso2022KrToUnicodeContext; + pContext->m_eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + return pContext; +} + +void ImplResetIso2022KrToUnicodeContext(void * pContext) +{ + if (pContext) + static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState + = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; +} + +sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes) +{ + ImplDBCSToUniLeadTab const * pKsX1001Data + = static_cast< ImplIso2022KrConverterData const * >(pData)-> + m_pKsX1001ToUnicodeData; + ImplIso2022KrToUnicodeState eState + = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + sal_uInt32 nRow = 0; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; + + if (pContext) + { + eState = static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState; + nRow = static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_nRow; + } + + for (; nConverted < nSrcBytes; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; + switch (eState) + { + case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII: + if (nChar == 0x0E) // SO + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001; + else if (nChar == 0x1B) // ESC + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC; + else if (nChar < 0x80) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nChar; + else + goto no_output; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001: + if (nChar == 0x0F) // SI + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + else if (nChar >= 0x21 && nChar <= 0x7E) + { + nRow = nChar + 0x80; + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2: + if (nChar >= 0x21 && nChar <= 0x7E) + { + sal_uInt16 nUnicode = 0; + sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart; + nChar += 0x80; + if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd) + nUnicode = pKsX1001Data[nRow]. + mpToUniTrailTab[nChar - nFirst]; + if (nUnicode != 0) + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = (sal_Unicode) nUnicode; + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001; + } + else + goto no_output; + else + goto bad_input; + } + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC: + if (nChar == 0x24) // $ + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR: + if (nChar == 0x29) // ) + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN; + else + { + bUndefined = false; + goto bad_input; + } + break; + + case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN: + if (nChar == 0x43) // C + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + else + { + bUndefined = false; + goto bad_input; + } + break; + } + continue; + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + break; + + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext) + { + static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState = eState; + static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_nRow = nRow; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtBytes) + *pSrcCvtBytes = nConverted; + + return pDestBufPtr - pDestBuf; +} + +void * ImplCreateUnicodeToIso2022KrContext() +{ + ImplUnicodeToIso2022KrContext * pContext = + new ImplUnicodeToIso2022KrContext; + pContext->m_nHighSurrogate = 0; + pContext->m_eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; + return pContext; +} + +void ImplResetUnicodeToIso2022KrContext(void * pContext) +{ + if (pContext) + { + static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate = 0; + static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet + = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; + } +} + +sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars) +{ + ImplUniToDBCSHighTab const * pKsX1001Data + = static_cast< ImplIso2022KrConverterData const * >(pData)-> + m_pUnicodeToKsX1001Data; + sal_Unicode nHighSurrogate = 0; + ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; + sal_uInt32 nInfo = 0; + sal_Size nConverted = 0; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBuf + nDestBytes; + bool bWritten; + + if (pContext) + { + nHighSurrogate + = static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate; + eSet = static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet; + } + + if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE) + { + if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = 0x1B; // ESC + *pDestBufPtr++ = 0x24; // $ + *pDestBufPtr++ = 0x29; // ) + *pDestBufPtr++ = 0x43; // C + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; + } + else + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + } + + if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0) + for (; nConverted < nSrcChars; ++nConverted) + { + bool bUndefined = true; + sal_uInt32 nChar = *pSrcBuf++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar)) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar)) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + { + bUndefined = false; + goto bad_input; + } + + if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) + { + bUndefined = false; + goto bad_input; + } + + if (nChar == 0x0A || nChar == 0x0D) // LF, CR + { + if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; + } + else + goto no_output; + } + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + } + else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B) + goto bad_input; + else if (nChar < 0x80) + { + if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; + } + else + goto no_output; + } + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + } + else + { + sal_uInt16 nBytes = 0; + sal_uInt32 nIndex1 = nChar >> 8; + if (nIndex1 < 0x100) + { + sal_uInt32 nIndex2 = nChar & 0xFF; + sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart; + if (nIndex2 >= nFirst + && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd) + nBytes = pKsX1001Data[nIndex1]. + mpToUniTrailTab[nIndex2 - nFirst]; + } + if (nBytes != 0) + { + if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0E; // SO + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001; + } + else + goto no_output; + } + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = static_cast< char >((nBytes >> 8) & 0x7F); + *pDestBufPtr++ = static_cast< char >(nBytes & 0x7F); + } + else + goto no_output; + } + else + goto bad_input; + } + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion( + bUndefined, + nChar, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + "\x0F", // SI + eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1, + &bWritten)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + if (bWritten) + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBuf; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + bool bFlush = true; + if (nHighSurrogate != 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion( + false, + 0, + nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, + "\x0F", // SI + eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? + 0 : 1, + &bWritten)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + bFlush = false; + break; + + case IMPL_BAD_INPUT_CONTINUE: + if (bWritten) + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + if (bFlush + && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001 + && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + { + if (pDestBufPtr != pDestBufEnd) + { + *pDestBufPtr++ = 0x0F; // SI + eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; + } + else + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + } + } + + if (pContext) + { + static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate + = nHighSurrogate; + static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet = eSet; + } + if (pInfo) + *pInfo = nInfo; + if (pSrcCvtChars) + *pSrcCvtChars = nConverted; + + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022kr.h b/sal/textenc/convertiso2022kr.h deleted file mode 100644 index 7208bcf63bb0..000000000000 --- a/sal/textenc/convertiso2022kr.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_CONVERTISO2022KR_H -#define INCLUDED_RTL_TEXTENC_CONVERTISO2022KR_H - -#include "tenchelp.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -typedef struct -{ - ImplDBCSToUniLeadTab const * m_pKsX1001ToUnicodeData; - ImplUniToDBCSHighTab const * m_pUnicodeToKsX1001Data; -} ImplIso2022KrConverterData; - -void * ImplCreateIso2022KrToUnicodeContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetIso2022KrToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); - -void * ImplCreateUnicodeToIso2022KrContext(void) SAL_THROW_EXTERN_C(); - -void ImplResetUnicodeToIso2022KrContext(void * pContext) SAL_THROW_EXTERN_C(); - -sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_CONVERTISO2022KR_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022kr.hxx b/sal/textenc/convertiso2022kr.hxx new file mode 100644 index 000000000000..c4a93c63786a --- /dev/null +++ b/sal/textenc/convertiso2022kr.hxx @@ -0,0 +1,74 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_CONVERTISO2022KR_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTISO2022KR_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +#include "tenchelp.hxx" + +struct ImplIso2022KrConverterData +{ + ImplDBCSToUniLeadTab const * m_pKsX1001ToUnicodeData; + ImplUniToDBCSHighTab const * m_pUnicodeToKsX1001Data; +}; + +void * ImplCreateIso2022KrToUnicodeContext(); + +void ImplResetIso2022KrToUnicodeContext(void * pContext); + +sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +void * ImplCreateUnicodeToIso2022KrContext(); + +void ImplResetUnicodeToIso2022KrContext(void * pContext); + +sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/convertiso2022kr.tab b/sal/textenc/convertiso2022kr.tab index 87010f55590f..86cf889c7bf7 100644 --- a/sal/textenc/convertiso2022kr.tab +++ b/sal/textenc/convertiso2022kr.tab @@ -25,22 +25,14 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONTEXT_H -#include "context.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_CONVERTISO2022KR_H -#include "convertiso2022kr.h" -#endif -#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H -#include "tenchelp.h" -#endif +#include "sal/config.h" -#ifndef _RTL_TENCINFO_H #include "rtl/tencinfo.h" -#endif -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif + +#include "context.hxx" +#include "convertiso2022kr.hxx" +#include "tenchelp.hxx" static ImplIso2022KrConverterData const aImplIso2022KrConvertData = { aKSC5601UniLeadTab, /* from tcvtkr6.tab */ diff --git a/sal/textenc/convertsinglebytetobmpunicode.cxx b/sal/textenc/convertsinglebytetobmpunicode.cxx index b563be83abaf..b485f88c7899 100644 --- a/sal/textenc/convertsinglebytetobmpunicode.cxx +++ b/sal/textenc/convertsinglebytetobmpunicode.cxx @@ -26,17 +26,18 @@ * ************************************************************************/ +#include "sal/config.h" -#include "context.h" -#include "converter.h" -#include "convertsinglebytetobmpunicode.hxx" -#include "unichars.h" +#include #include "osl/diagnose.h" #include "rtl/textcvt.h" #include "sal/types.h" -#include +#include "context.hxx" +#include "converter.hxx" +#include "convertsinglebytetobmpunicode.hxx" +#include "unichars.hxx" sal_Size rtl_textenc_convertSingleByteToBmpUnicode( ImplTextConverterData const * data, void *, sal_Char const * srcBuf, diff --git a/sal/textenc/convertsinglebytetobmpunicode.hxx b/sal/textenc/convertsinglebytetobmpunicode.hxx index 39ebed9eabca..a55ca53c8b20 100644 --- a/sal/textenc/convertsinglebytetobmpunicode.hxx +++ b/sal/textenc/convertsinglebytetobmpunicode.hxx @@ -26,14 +26,16 @@ * ************************************************************************/ -#ifndef INCLUDED_sal_textenc_convertsinglebytetobmpunicode_hxx -#define INCLUDED_sal_textenc_convertsinglebytetobmpunicode_hxx +#ifndef INCLUDED_SAL_TEXTENC_CONVERTSINGLEBYTETOBMPUNICODE_HXX +#define INCLUDED_SAL_TEXTENC_CONVERTSINGLEBYTETOBMPUNICODE_HXX -#include "tenchelp.h" +#include "sal/config.h" + +#include #include "sal/types.h" -#include +#include "tenchelp.hxx" /// @HTML diff --git a/sal/textenc/generate/gb180302000.tab b/sal/textenc/generate/gb180302000.tab index 00b58b3cff88..1fece262db2e 100644 --- a/sal/textenc/generate/gb180302000.tab +++ b/sal/textenc/generate/gb180302000.tab @@ -25,13 +25,11 @@ * ************************************************************************/ -#ifndef INCLUDED_RTL_TEXTENC_CONVERTGB18030_H -#include "convertgb18030.h" -#endif +#include "sal/config.h" -#ifndef _SAL_TYPES_H_ #include "sal/types.h" -#endif + +#include "convertgb18030.hxx" static sal_Unicode const aImplGb180302000ToUnicodeData[] = { 0x4E02,0x4E04,0x4E05,0x4E06,0x4E0F,0x4E12,0x4E17,0x4E1F, diff --git a/sal/textenc/gettextencodingdata.h b/sal/textenc/gettextencodingdata.h deleted file mode 100644 index f2d31be82902..000000000000 --- a/sal/textenc/gettextencodingdata.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H -#define INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H - -#include "tenchelp.h" -#include "rtl/textenc.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cplusplus */ - -ImplTextEncodingData const * -Impl_getTextEncodingData(rtl_TextEncoding nEncoding) SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cplusplus */ - -#endif /* INCLUDED_RTL_TEXTENC_GETTEXTENCODINGDATA_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/gettextencodingdata.hxx b/sal/textenc/gettextencodingdata.hxx new file mode 100644 index 000000000000..ab8656967f06 --- /dev/null +++ b/sal/textenc/gettextencodingdata.hxx @@ -0,0 +1,43 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_GETTEXTENCODINGDATA_HXX +#define INCLUDED_SAL_TEXTENC_GETTEXTENCODINGDATA_HXX + +#include "sal/config.h" + +#include "rtl/textenc.h" + +#include "tenchelp.hxx" + +ImplTextEncodingData const * +Impl_getTextEncodingData(rtl_TextEncoding nEncoding); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tables.cxx b/sal/textenc/tables.cxx index 096876119a20..01f91905dea1 100644 --- a/sal/textenc/tables.cxx +++ b/sal/textenc/tables.cxx @@ -29,21 +29,12 @@ #include "sal/config.h" #include +#include -#include "sal/types.h" - -#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H -#include "tenchelp.h" -#endif - -#ifndef _RTL_TEXTENC_H #include "rtl/textenc.h" -#endif +#include "sal/types.h" -#ifndef INCLUDED_STDDEF_H -#include -#define INCLUDED_STDDEF_H -#endif +#include "tenchelp.hxx" #define NOTABUNI_START 0xFF #define NOTABUNI_END 0x00 diff --git a/sal/textenc/tcvtbyte.c b/sal/textenc/tcvtbyte.c deleted file mode 100644 index b8e3ed222f7c..000000000000 --- a/sal/textenc/tcvtbyte.c +++ /dev/null @@ -1,858 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "tenchelp.h" -#include "rtl/textcvt.h" - -/* ======================================================================= */ - -#define IMPL_MAX_REPLACECHAR 5 - -sal_uInt16 ImplGetReplaceChar(sal_Unicode c); - -sal_uInt16 const * ImplGetReplaceString(sal_Unicode c); - -/* ----------------------------------------------------------------------- */ - -typedef struct -{ - sal_uInt16 mnUniChar; - sal_uInt16 mnReplaceChar; -} ImplReplaceCharData; - -static ImplReplaceCharData const aImplRepCharTab[] = -{ - { 0x00A0, 0x0020 }, /* NO-BREAK-SPACE */ - { 0x00A1, 0x0021 }, /* INVERTED EXCLAMATION MARK */ - { 0x00B7, 0x0045 }, /* MIDDLE DOT */ - { 0x00BF, 0x003F }, /* INVERTED QUESTION MARK */ - { 0x00D7, 0x002A }, /* MULTIPLIKATION SIGN */ - { 0x00F7, 0x002F }, /* DIVISION SIGN */ - { 0x2000, 0x0020 }, /* EN QUAD */ - { 0x2001, 0x0020 }, /* EM QUAD */ - { 0x2002, 0x0020 }, /* EN SPACE */ - { 0x2003, 0x0020 }, /* EM SPACE */ - { 0x2004, 0x0020 }, /* THREE-PER-EM SPACE */ - { 0x2005, 0x0020 }, /* FOUR-PER-EM SPACE */ - { 0x2006, 0x0020 }, /* SIX-PER-EM SPACE */ - { 0x2007, 0x0020 }, /* FIGURE SPACE */ - { 0x2008, 0x0020 }, /* PUNCTATION SPACE */ - { 0x2009, 0x0020 }, /* THIN SPACE */ - { 0x200A, 0x0020 }, /* HAIR SPACE */ - { 0x2010, 0x002D }, /* HYPHEN */ - { 0x2011, 0x002D }, /* NON-BREAKING HYPHEN */ - { 0x2012, 0x002D }, /* FIGURE DASH */ - { 0x2013, 0x002D }, /* EN DASH */ - { 0x2014, 0x002D }, /* EM DASH */ - { 0x2015, 0x002D }, /* HORIZONTAL BAR */ - { 0x2018, 0x0027 }, /* LEFT SINGLE QUOTATION MARK */ - { 0x2019, 0x0027 }, /* RIGHT SINGLE QUOTATION MARK */ - { 0x201A, 0x002C }, /* SINGLE LOW-9 QUOTATION MARK */ - { 0x201B, 0x0027 }, /* SINGLE HIGH-RESERVED-9 QUOTATION MARK */ - { 0x201C, 0x0022 }, /* LEFT DOUBLE QUOTATION MARK */ - { 0x201D, 0x0022 }, /* RIGHT DOUBLE QUOTATION MARK */ - { 0x201E, 0x0022 }, /* DOUBLE LOW-9 QUOTATION MARK */ - { 0x201F, 0x0022 }, /* DOUBLE HIGH-RESERVED-9 QUOTATION MARK */ - { 0x2022, 0x002D }, /* BULLET */ - { 0x2023, 0x002D }, /* TRIANGULAR BULLET */ - { 0x2024, 0x002D }, /* ONE DOT LEADER */ - { 0x2027, 0x002D }, /* HYPHENATION POINT */ - { 0x2028, 0x000A }, /* LINE SEPARATOR */ - { 0x2029, 0x000D }, /* PARAGRAPH SEPARATOR */ - { 0x2032, 0x0027 }, /* PRIME */ - { 0x2033, 0x0022 }, /* DOUBLE PRIME */ - { 0x2035, 0x0027 }, /* RESERVED PRIME */ - { 0x2036, 0x0022 }, /* RESERVED DOUBLE PRIME */ - { 0x2039, 0x003C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ - { 0x203A, 0x003E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ - { 0x2043, 0x002D }, /* HYPHEN BULLET */ - { 0x2044, 0x002F }, /* FRACTION SLASH */ - { 0x2160, 0x0049 }, /* ROMAN NUMERAL ONE */ - { 0x2164, 0x0056 }, /* ROMAN NUMERAL FIVE */ - { 0x2169, 0x0058 }, /* ROMAN NUMERAL TEN */ - { 0x216C, 0x004C }, /* ROMAN NUMERAL FIFTY */ - { 0x216D, 0x0043 }, /* ROMAN NUMERAL ONE HUNDRED */ - { 0x216E, 0x0044 }, /* ROMAN NUMERAL FIVE HUNDRED */ - { 0x216F, 0x004D }, /* ROMAN NUMERAL ONE THOUSAND */ - { 0x2170, 0x0069 }, /* SMALL ROMAN NUMERAL ONE */ - { 0x2174, 0x0076 }, /* SMALL ROMAN NUMERAL FIVE */ - { 0x2179, 0x0078 }, /* SMALL ROMAN NUMERAL TEN */ - { 0x217C, 0x006C }, /* SMALL ROMAN NUMERAL FIFTY */ - { 0x217D, 0x0063 }, /* SMALL ROMAN NUMERAL ONE HUNDRED */ - { 0x217E, 0x0064 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED */ - { 0x217F, 0x006D }, /* SMALL ROMAN NUMERAL ONE THOUSAND */ - { 0x2215, 0x002F }, /* DIVISION SLASH */ - { 0x2217, 0x002A }, /* ASTERIX OPERATOR */ - { 0xFF00, 0x0020 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF01, 0x0021 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF02, 0x0022 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF03, 0x0023 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF04, 0x0024 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF05, 0x0025 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF06, 0x0026 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF07, 0x0027 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF08, 0x0028 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF09, 0x0029 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF0A, 0x002A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF0B, 0x002B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF0C, 0x002C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF0D, 0x002D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF0E, 0x002E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF0F, 0x002F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF10, 0x0030 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF11, 0x0031 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF12, 0x0032 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF13, 0x0033 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF14, 0x0034 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF15, 0x0035 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF16, 0x0036 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF17, 0x0037 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF18, 0x0038 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF19, 0x0039 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF1A, 0x003A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF1B, 0x003B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF1C, 0x003C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF1D, 0x003D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF1E, 0x003E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF1F, 0x003F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF20, 0x0040 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF21, 0x0041 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF22, 0x0042 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF23, 0x0043 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF24, 0x0044 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF25, 0x0045 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF26, 0x0046 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF27, 0x0047 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF28, 0x0048 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF29, 0x0049 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF2A, 0x004A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF2B, 0x004B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF2C, 0x004C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF2D, 0x004D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF2E, 0x004E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF2F, 0x004F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF30, 0x0050 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF31, 0x0051 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF32, 0x0052 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF33, 0x0053 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF34, 0x0054 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF35, 0x0055 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF36, 0x0056 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF37, 0x0057 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF38, 0x0058 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF39, 0x0059 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF3A, 0x005A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF3B, 0x005B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF3C, 0x005C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF3D, 0x005D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF3E, 0x005E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF3F, 0x005F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF40, 0x0060 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF41, 0x0061 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF42, 0x0062 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF43, 0x0063 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF44, 0x0064 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF45, 0x0065 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF46, 0x0066 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF47, 0x0067 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF48, 0x0068 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF49, 0x0069 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF4A, 0x006A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF4B, 0x006B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF4C, 0x006C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF4D, 0x006D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF4E, 0x006E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF4F, 0x006F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF50, 0x0070 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF51, 0x0071 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF52, 0x0072 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF53, 0x0073 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF54, 0x0074 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF55, 0x0075 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF56, 0x0076 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF57, 0x0077 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF58, 0x0078 }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF59, 0x0079 }, /* FULLWIDTH ASCII FORMS */ - { 0xFF5A, 0x007A }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF5B, 0x007B }, /* FULLWIDTH ASCII FORMS */ - { 0xFF5C, 0x007C }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF5D, 0x007D }, /* FULLWIDTH ASCII FORMS */ - { 0xFF5E, 0x007E }, /* FULLWIDTH ASCII FORMS*/ - { 0xFF5F, 0x007F }, /* FULLWIDTH ASCII FORMS */ - { 0xFF61, 0x3002 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF62, 0x300C }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF63, 0x300D }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF64, 0x3001 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF65, 0x30FB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF66, 0x30F2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF67, 0x30A1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF68, 0x30A3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF69, 0x30A5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6A, 0x30A7 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6B, 0x30A9 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6C, 0x30E3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6D, 0x30E5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6E, 0x30E7 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF6F, 0x30C3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF70, 0x30FC }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF71, 0x30A2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF72, 0x30A4 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF73, 0x30A6 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF74, 0x30A8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF75, 0x30AA }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF76, 0x30AB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF77, 0x30AD }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF78, 0x30AF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF79, 0x30B1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7A, 0x30B3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7B, 0x30B5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7C, 0x30B7 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7D, 0x30B9 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7E, 0x30BB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF7F, 0x30BD }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF80, 0x30BF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF81, 0x30C1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF82, 0x30C4 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF83, 0x30C6 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF84, 0x30C8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF85, 0x30CA }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF86, 0x30CB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF87, 0x30CC }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF88, 0x30CD }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF89, 0x30CE }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8A, 0x30CF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8B, 0x30D2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8C, 0x30D5 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8D, 0x30D8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8E, 0x30DB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF8F, 0x30DE }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF90, 0x30DF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF91, 0x30E0 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF92, 0x30E1 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF93, 0x30E2 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF94, 0x30E4 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF95, 0x30E6 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF96, 0x30E8 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF97, 0x30E9 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF98, 0x30EA }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF99, 0x30EB }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9A, 0x30EC }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9B, 0x30ED }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9C, 0x30EF }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9D, 0x30F3 }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9E, 0x309B }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFF9F, 0x309C }, /* HALFWIDTH KATAKANA FORMS */ - { 0xFFA0, 0x3164 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA1, 0x3131 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA2, 0x3132 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA3, 0x3133 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA4, 0x3134 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA5, 0x3135 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA6, 0x3136 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA7, 0x3137 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA8, 0x3138 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFA9, 0x3139 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAA, 0x313A }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAB, 0x313B }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAC, 0x313C }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAD, 0x313D }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAE, 0x313E }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFAF, 0x313F }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB0, 0x3140 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB1, 0x3141 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB2, 0x3142 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB3, 0x3143 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB4, 0x3144 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB5, 0x3145 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB6, 0x3146 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB7, 0x3147 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB8, 0x3148 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFB9, 0x3149 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBA, 0x314A }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBB, 0x314B }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBC, 0x314C }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBD, 0x314D }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFBE, 0x314E }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC2, 0x314F }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC3, 0x3150 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC4, 0x3151 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC5, 0x3152 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC6, 0x3153 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFC7, 0x3154 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCA, 0x3155 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCB, 0x3156 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCC, 0x3157 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCD, 0x3158 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCE, 0x3159 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFCF, 0x315A }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD2, 0x315B }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD3, 0x315C }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD4, 0x315D }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD5, 0x315E }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD6, 0x315F }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFD7, 0x3160 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFDA, 0x3161 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFDB, 0x3162 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFDC, 0x3163 }, /* HALFWIDTH HANGUL FORMS */ - { 0xFFE0, 0x00A2 }, /* FULLWIDTH CENT SIGN */ - { 0xFFE1, 0x00A3 }, /* FULLWIDTH POUND SIGN */ - { 0xFFE2, 0x00AC }, /* FULLWIDTH NOT SIGN */ - { 0xFFE3, 0x00AF }, /* FULLWIDTH MACRON */ - { 0xFFE4, 0x00A6 }, /* FULLWIDTH BROKEN BAR */ - { 0xFFE5, 0x00A5 }, /* FULLWIDTH YEN SIGN */ - { 0xFFE6, 0x20A9 }, /* FULLWIDTH WON SIGN */ - { 0xFFE8, 0x2502 }, /* HALFWIDTH FORMS LIGHT VERTICAL */ - { 0xFFE9, 0x2190 }, /* HALFWIDTH LEFTWARDS ARROW */ - { 0xFFEA, 0x2191 }, /* HALFWIDTH UPWARDS ARROW */ - { 0xFFEB, 0x2192 }, /* HALFWIDTH RIGHTWARDS ARROW */ - { 0xFFEC, 0x2193 }, /* HALFWIDTH DOWNWARDS ARROW */ - { 0xFFED, 0x25A0 }, /* HALFWIDTH BLACK SQUARE */ - { 0xFFEE, 0x25CB }, /* HALFWIDTH WHITE CIRCLE */ - { 0xFFFD, 0x003F } /* REPLACEMENT CHARACTER */ -}; - -sal_uInt16 ImplGetReplaceChar( sal_Unicode c ) -{ - sal_uInt16 nLow; - sal_uInt16 nHigh; - sal_uInt16 nMid; - sal_uInt16 nCompareChar; - const ImplReplaceCharData* pCharData; - - nLow = 0; - nHigh = (sizeof( aImplRepCharTab )/sizeof( ImplReplaceCharData ))-1; - do - { - nMid = (nLow+nHigh)/2; - pCharData = aImplRepCharTab+nMid; - nCompareChar = pCharData->mnUniChar; - if ( c < nCompareChar ) - { - if ( !nMid ) - break; - nHigh = nMid-1; - } - else - { - if ( c > nCompareChar ) - nLow = nMid+1; - else - return pCharData->mnReplaceChar; - } - } - while ( nLow <= nHigh ); - - return 0; -} - -/* ----------------------------------------------------------------------- */ - -typedef struct -{ - sal_uInt16 mnUniChar; - sal_uInt16 maReplaceChars[IMPL_MAX_REPLACECHAR]; -} ImplReplaceCharStrData; - -static ImplReplaceCharStrData const aImplRepCharStrTab[] = -{ - { 0x00A9, { 0x0028, 0x0063, 0x0029, 0x0000, 0x0000 } }, /* COPYRIGHT SIGN */ - { 0x00AB, { 0x003C, 0x003C, 0x0000, 0x0000, 0x0000 } }, /* LEFT-POINTING-DOUBLE ANGLE QUOTATION MARK */ - { 0x0AE0, { 0x0028, 0x0072, 0x0029, 0x0000, 0x0000 } }, /* REGISTERED SIGN */ - { 0x00BB, { 0x003E, 0x003E, 0x0000, 0x0000, 0x0000 } }, /* RIGHT-POINTING-DOUBLE ANGLE QUOTATION MARK */ - { 0x00BC, { 0x0031, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE QUARTER */ - { 0x00BD, { 0x0031, 0x002F, 0x0032, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE HALF */ - { 0x00BE, { 0x0033, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE QUARTERS */ - { 0x00C6, { 0x0041, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LETTER AE */ - { 0x00E6, { 0x0061, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LETTER AE */ - { 0x0152, { 0x004F, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LIGATURE OE */ - { 0x0153, { 0x006F, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LIGATURE OE */ - { 0x2025, { 0x002E, 0x002E, 0x0000, 0x0000, 0x0000 } }, /* TWO DOT LEADER */ - { 0x2026, { 0x002E, 0x002E, 0x002E, 0x0000, 0x0000 } }, /* HORIZONTAL ELLIPSES */ - { 0x2034, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* TRIPPLE PRIME */ - { 0x2037, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* RESERVED TRIPPLE PRIME */ - { 0x20AC, { 0x0045, 0x0055, 0x0052, 0x0000, 0x0000 } }, /* EURO SIGN */ - { 0x2122, { 0x0028, 0x0074, 0x006D, 0x0029, 0x0000 } }, /* TRADE MARK SIGN */ - { 0x2153, { 0x0031, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE THIRD */ - { 0x2154, { 0x0032, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO THIRD */ - { 0x2155, { 0x0031, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE FIFTH */ - { 0x2156, { 0x0032, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO FIFTH */ - { 0x2157, { 0x0033, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE FIFTH */ - { 0x2158, { 0x0034, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION FOUR FIFTH */ - { 0x2159, { 0x0031, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE SIXTH */ - { 0x215A, { 0x0035, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE SIXTH */ - { 0x215B, { 0x0031, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE EIGHTH */ - { 0x215C, { 0x0033, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE EIGHTH */ - { 0x215D, { 0x0035, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE EIGHTH */ - { 0x215E, { 0x0037, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION SEVEN EIGHTH */ - { 0x215F, { 0x0031, 0x002F, 0x0000, 0x0000, 0x0000 } }, /* FRACTION NUMERATOR ONE */ - { 0x2161, { 0x0049, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWO */ - { 0x2162, { 0x0049, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL THREE */ - { 0x2163, { 0x0049, 0x0056, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL FOUR */ - { 0x2165, { 0x0056, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SIX */ - { 0x2166, { 0x0056, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SEVEN */ - { 0x2168, { 0x0056, 0x0049, 0x0049, 0x0049, 0x0000 } }, /* ROMAN NUMERAL EIGHT */ - { 0x2169, { 0x0049, 0x0058, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL NINE */ - { 0x216A, { 0x0058, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL ELEVEN */ - { 0x216B, { 0x0058, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWELVE */ - { 0x2171, { 0x0069, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL TWO */ - { 0x2172, { 0x0069, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL THREE */ - { 0x2173, { 0x0069, 0x0076, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL FOUR */ - { 0x2175, { 0x0076, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SIX */ - { 0x2176, { 0x0076, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SEVEN */ - { 0x2178, { 0x0076, 0x0069, 0x0069, 0x0069, 0x0000 } }, /* SMALL ROMAN NUMERAL EIGHT */ - { 0x2179, { 0x0069, 0x0078, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL NINE */ - { 0x217A, { 0x0078, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL ELEVEN */ - { 0x217B, { 0x0058, 0x0069, 0x0069, 0x0000, 0x0000 } } /* SMALL ROMAN NUMERAL TWELVE */ -}; - -const sal_uInt16* ImplGetReplaceString( sal_Unicode c ) -{ - sal_uInt16 nLow; - sal_uInt16 nHigh; - sal_uInt16 nMid; - sal_uInt16 nCompareChar; - const ImplReplaceCharStrData* pCharData; - - nLow = 0; - nHigh = (sizeof( aImplRepCharStrTab )/sizeof( ImplReplaceCharStrData ))-1; - do - { - nMid = (nLow+nHigh)/2; - pCharData = aImplRepCharStrTab+nMid; - nCompareChar = pCharData->mnUniChar; - if ( c < nCompareChar ) - { - if ( !nMid ) - break; - nHigh = nMid-1; - } - else - { - if ( c > nCompareChar ) - nLow = nMid+1; - else - return pCharData->maReplaceChars; - } - } - while ( nLow <= nHigh ); - - return 0; -} - -/* ======================================================================= */ - -sal_Size ImplSymbolToUnicode( const ImplTextConverterData* pData, - void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - sal_uChar c; - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - (void) pData; /* unused */ - (void) pContext; /* unused */ - (void) nFlags; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - while ( pSrcBuf < pEndSrcBuf ) - { - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - /* 0-31 (all Control-Character get the same Unicode value) */ - c = (sal_uChar)*pSrcBuf; - if ( c <= 0x1F ) - *pDestBuf = (sal_Unicode)c; - else - *pDestBuf = ((sal_Unicode)c)+0xF000; - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, - void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - sal_Unicode c; - sal_Char* pEndDestBuf; - const sal_Unicode* pEndSrcBuf; - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestBytes; - pEndSrcBuf = pSrcBuf+nSrcChars; - while ( pSrcBuf < pEndSrcBuf ) - { - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - c = *pSrcBuf; - if ( (c >= 0xF000) && (c <= 0xF0FF) ) - { - *pDestBuf = (sal_Char)(sal_uChar)(c-0xF000); - pDestBuf++; - pSrcBuf++; - } - // Normally 0x001F, but in many cases also symbol characters - // are stored in the first 256 bytes, so that we don't change - // these values - else if ( c <= 0x00FF ) - { - *pDestBuf = (sal_Char)(sal_uChar)c; - pDestBuf++; - pSrcBuf++; - } - else - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) - { - /* !!! */ - /* Only ascii characters < 0x1F */ - } - - /* Handle undefined and surrogates characters */ - /* (all surrogates characters are undefined) */ - if (!ImplHandleUndefinedUnicodeToTextChar(pData, - &pSrcBuf, - pEndSrcBuf, - &pDestBuf, - pEndDestBuf, - nFlags, - pInfo)) - break; - } - } - - *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); - return (nDestBytes - (pEndDestBuf-pDestBuf)); -} - -/* ======================================================================= */ - -sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, - void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - sal_uChar c; - sal_Unicode cConv; - const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - while ( pSrcBuf < pEndSrcBuf ) - { - c = (sal_uChar)*pSrcBuf; - if ( c < 0x80 ) - cConv = c; - else - { - if ( (c >= pConvertData->mnToUniStart1) && (c <= pConvertData->mnToUniEnd1) ) - cConv = pConvertData->mpToUniTab1[c-pConvertData->mnToUniStart1]; - else if ( (c >= pConvertData->mnToUniStart2) && (c <= pConvertData->mnToUniEnd2) ) - cConv = pConvertData->mpToUniTab2[c-pConvertData->mnToUniStart2]; - else - cConv = 0; - if ( !cConv ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) - { - pSrcBuf++; - continue; - } - else - cConv = ImplGetUndefinedUnicodeChar(c, nFlags); - } - } - - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = cConv; - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, - void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - sal_uChar c; - sal_Unicode cConv; - const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - (void) pContext; /* unused */ - (void) nFlags; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - *pSrcCvtBytes = 0; - return 0; - } - while ( pSrcBuf < pEndSrcBuf ) - { - c = (sal_uChar)*pSrcBuf; - if (c < 0x80) - cConv = c; - else - // c <= 0xFF is implied. - cConv = pConvertData->mpToUniTab1[c - 0x80]; - - *pDestBuf = cConv; - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ----------------------------------------------------------------------- */ - -// Writes 0--2 characters to dest: -static int ImplConvertUnicodeCharToChar( - const ImplByteConvertData* pConvertData, sal_Unicode c, sal_Char * dest ) -{ - const ImplUniCharTabData* pToCharExTab; - - if ( c < 0x80 ) - { - dest[0] = (sal_Char)c; - return 1; - } - if ( (c >= pConvertData->mnToCharStart1) && (c <= pConvertData->mnToCharEnd1) ) - { - dest[0] = (sal_Char)pConvertData->mpToCharTab1[c-pConvertData->mnToCharStart1]; - if ( dest[0] != 0 ) - return 1; - } - else if ( (c >= pConvertData->mnToCharStart2) && (c <= pConvertData->mnToCharEnd2) ) - { - dest[0] = (sal_Char)pConvertData->mpToCharTab2[c-pConvertData->mnToCharStart2]; - if ( dest[0] != 0 ) - return 1; - } - pToCharExTab = pConvertData->mpToCharExTab; - if ( pToCharExTab ) - { - sal_uInt16 nLow; - sal_uInt16 nHigh; - sal_uInt16 nMid; - sal_uInt16 nCompareChar; - const ImplUniCharTabData* pCharExData; - - nLow = 0; - nHigh = pConvertData->mnToCharExCount-1; - do - { - nMid = (nLow+nHigh)/2; - pCharExData = pToCharExTab+nMid; - nCompareChar = pCharExData->mnUniChar; - if ( c < nCompareChar ) - { - if ( !nMid ) - break; - nHigh = nMid-1; - } - else - { - if ( c > nCompareChar ) - nLow = nMid+1; - else - { - dest[0] = (sal_Char)pCharExData->mnChar; - if ( pCharExData->mnChar2 == 0 ) - return 1; - else - { - dest[1] = (sal_Char)pCharExData->mnChar2; - return 2; - } - } - } - } - while ( nLow <= nHigh ); - } - return 0; -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, - void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - sal_Unicode c; - const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; - sal_Char* pEndDestBuf; - const sal_Unicode* pEndSrcBuf; - int i; - int n; - sal_uInt16 cTemp; - sal_Char aTempBuf[IMPL_MAX_REPLACECHAR+2]; - const sal_uInt16* pReplace; - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestBytes; - pEndSrcBuf = pSrcBuf+nSrcChars; - while ( pSrcBuf < pEndSrcBuf ) - { - c = *pSrcBuf; - if ( c < 0x80 ) - { - aTempBuf[0] = (sal_Char)c; - n = 1; - } - else - { - n = ImplConvertUnicodeCharToChar( pConvertData, c, aTempBuf ); - - if ( n == 0 ) - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) - { - cTemp = ImplGetReplaceChar( c ); - if ( cTemp ) - n = ImplConvertUnicodeCharToChar( - pConvertData, cTemp, aTempBuf ); - } - - if ( n == 0 ) - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) - { - pReplace = ImplGetReplaceString( c ); - if ( pReplace ) - { - while ( *pReplace && (n < IMPL_MAX_REPLACECHAR) ) - { - i = ImplConvertUnicodeCharToChar( - pConvertData, *pReplace, aTempBuf + n ); - if ( i == 0 ) - { - n = 0; - break; - } - pReplace++; - n += i; - } - } - } - - /* Handle undefined and surrogates characters */ - /* (all surrogates characters are undefined) */ - if ( n == 0 ) - { - if (ImplHandleUndefinedUnicodeToTextChar(pData, - &pSrcBuf, - pEndSrcBuf, - &pDestBuf, - pEndDestBuf, - nFlags, - pInfo)) - continue; - else - break; - } - } - } - } - - if ( pEndDestBuf - pDestBuf < n ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - for ( i = 0; i < n; ++i ) - *pDestBuf++ = aTempBuf[i]; - pSrcBuf++; - } - - *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); - return (nDestBytes - (pEndDestBuf-pDestBuf)); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtbyte.cxx b/sal/textenc/tcvtbyte.cxx new file mode 100644 index 000000000000..9296d68f2c0d --- /dev/null +++ b/sal/textenc/tcvtbyte.cxx @@ -0,0 +1,848 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" + +#include "tenchelp.hxx" + +/* ======================================================================= */ + +#define IMPL_MAX_REPLACECHAR 5 + +sal_uInt16 ImplGetReplaceChar(sal_Unicode c); + +sal_uInt16 const * ImplGetReplaceString(sal_Unicode c); + +/* ----------------------------------------------------------------------- */ + +struct ImplReplaceCharData +{ + sal_uInt16 mnUniChar; + sal_uInt16 mnReplaceChar; +}; + +static ImplReplaceCharData const aImplRepCharTab[] = +{ + { 0x00A0, 0x0020 }, /* NO-BREAK-SPACE */ + { 0x00A1, 0x0021 }, /* INVERTED EXCLAMATION MARK */ + { 0x00B7, 0x0045 }, /* MIDDLE DOT */ + { 0x00BF, 0x003F }, /* INVERTED QUESTION MARK */ + { 0x00D7, 0x002A }, /* MULTIPLIKATION SIGN */ + { 0x00F7, 0x002F }, /* DIVISION SIGN */ + { 0x2000, 0x0020 }, /* EN QUAD */ + { 0x2001, 0x0020 }, /* EM QUAD */ + { 0x2002, 0x0020 }, /* EN SPACE */ + { 0x2003, 0x0020 }, /* EM SPACE */ + { 0x2004, 0x0020 }, /* THREE-PER-EM SPACE */ + { 0x2005, 0x0020 }, /* FOUR-PER-EM SPACE */ + { 0x2006, 0x0020 }, /* SIX-PER-EM SPACE */ + { 0x2007, 0x0020 }, /* FIGURE SPACE */ + { 0x2008, 0x0020 }, /* PUNCTATION SPACE */ + { 0x2009, 0x0020 }, /* THIN SPACE */ + { 0x200A, 0x0020 }, /* HAIR SPACE */ + { 0x2010, 0x002D }, /* HYPHEN */ + { 0x2011, 0x002D }, /* NON-BREAKING HYPHEN */ + { 0x2012, 0x002D }, /* FIGURE DASH */ + { 0x2013, 0x002D }, /* EN DASH */ + { 0x2014, 0x002D }, /* EM DASH */ + { 0x2015, 0x002D }, /* HORIZONTAL BAR */ + { 0x2018, 0x0027 }, /* LEFT SINGLE QUOTATION MARK */ + { 0x2019, 0x0027 }, /* RIGHT SINGLE QUOTATION MARK */ + { 0x201A, 0x002C }, /* SINGLE LOW-9 QUOTATION MARK */ + { 0x201B, 0x0027 }, /* SINGLE HIGH-RESERVED-9 QUOTATION MARK */ + { 0x201C, 0x0022 }, /* LEFT DOUBLE QUOTATION MARK */ + { 0x201D, 0x0022 }, /* RIGHT DOUBLE QUOTATION MARK */ + { 0x201E, 0x0022 }, /* DOUBLE LOW-9 QUOTATION MARK */ + { 0x201F, 0x0022 }, /* DOUBLE HIGH-RESERVED-9 QUOTATION MARK */ + { 0x2022, 0x002D }, /* BULLET */ + { 0x2023, 0x002D }, /* TRIANGULAR BULLET */ + { 0x2024, 0x002D }, /* ONE DOT LEADER */ + { 0x2027, 0x002D }, /* HYPHENATION POINT */ + { 0x2028, 0x000A }, /* LINE SEPARATOR */ + { 0x2029, 0x000D }, /* PARAGRAPH SEPARATOR */ + { 0x2032, 0x0027 }, /* PRIME */ + { 0x2033, 0x0022 }, /* DOUBLE PRIME */ + { 0x2035, 0x0027 }, /* RESERVED PRIME */ + { 0x2036, 0x0022 }, /* RESERVED DOUBLE PRIME */ + { 0x2039, 0x003C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ + { 0x203A, 0x003E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ + { 0x2043, 0x002D }, /* HYPHEN BULLET */ + { 0x2044, 0x002F }, /* FRACTION SLASH */ + { 0x2160, 0x0049 }, /* ROMAN NUMERAL ONE */ + { 0x2164, 0x0056 }, /* ROMAN NUMERAL FIVE */ + { 0x2169, 0x0058 }, /* ROMAN NUMERAL TEN */ + { 0x216C, 0x004C }, /* ROMAN NUMERAL FIFTY */ + { 0x216D, 0x0043 }, /* ROMAN NUMERAL ONE HUNDRED */ + { 0x216E, 0x0044 }, /* ROMAN NUMERAL FIVE HUNDRED */ + { 0x216F, 0x004D }, /* ROMAN NUMERAL ONE THOUSAND */ + { 0x2170, 0x0069 }, /* SMALL ROMAN NUMERAL ONE */ + { 0x2174, 0x0076 }, /* SMALL ROMAN NUMERAL FIVE */ + { 0x2179, 0x0078 }, /* SMALL ROMAN NUMERAL TEN */ + { 0x217C, 0x006C }, /* SMALL ROMAN NUMERAL FIFTY */ + { 0x217D, 0x0063 }, /* SMALL ROMAN NUMERAL ONE HUNDRED */ + { 0x217E, 0x0064 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED */ + { 0x217F, 0x006D }, /* SMALL ROMAN NUMERAL ONE THOUSAND */ + { 0x2215, 0x002F }, /* DIVISION SLASH */ + { 0x2217, 0x002A }, /* ASTERIX OPERATOR */ + { 0xFF00, 0x0020 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF01, 0x0021 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF02, 0x0022 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF03, 0x0023 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF04, 0x0024 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF05, 0x0025 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF06, 0x0026 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF07, 0x0027 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF08, 0x0028 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF09, 0x0029 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF0A, 0x002A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF0B, 0x002B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF0C, 0x002C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF0D, 0x002D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF0E, 0x002E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF0F, 0x002F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF10, 0x0030 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF11, 0x0031 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF12, 0x0032 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF13, 0x0033 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF14, 0x0034 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF15, 0x0035 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF16, 0x0036 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF17, 0x0037 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF18, 0x0038 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF19, 0x0039 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF1A, 0x003A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF1B, 0x003B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF1C, 0x003C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF1D, 0x003D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF1E, 0x003E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF1F, 0x003F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF20, 0x0040 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF21, 0x0041 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF22, 0x0042 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF23, 0x0043 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF24, 0x0044 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF25, 0x0045 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF26, 0x0046 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF27, 0x0047 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF28, 0x0048 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF29, 0x0049 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF2A, 0x004A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF2B, 0x004B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF2C, 0x004C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF2D, 0x004D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF2E, 0x004E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF2F, 0x004F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF30, 0x0050 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF31, 0x0051 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF32, 0x0052 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF33, 0x0053 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF34, 0x0054 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF35, 0x0055 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF36, 0x0056 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF37, 0x0057 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF38, 0x0058 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF39, 0x0059 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF3A, 0x005A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF3B, 0x005B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF3C, 0x005C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF3D, 0x005D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF3E, 0x005E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF3F, 0x005F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF40, 0x0060 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF41, 0x0061 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF42, 0x0062 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF43, 0x0063 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF44, 0x0064 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF45, 0x0065 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF46, 0x0066 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF47, 0x0067 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF48, 0x0068 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF49, 0x0069 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF4A, 0x006A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF4B, 0x006B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF4C, 0x006C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF4D, 0x006D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF4E, 0x006E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF4F, 0x006F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF50, 0x0070 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF51, 0x0071 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF52, 0x0072 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF53, 0x0073 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF54, 0x0074 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF55, 0x0075 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF56, 0x0076 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF57, 0x0077 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF58, 0x0078 }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF59, 0x0079 }, /* FULLWIDTH ASCII FORMS */ + { 0xFF5A, 0x007A }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF5B, 0x007B }, /* FULLWIDTH ASCII FORMS */ + { 0xFF5C, 0x007C }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF5D, 0x007D }, /* FULLWIDTH ASCII FORMS */ + { 0xFF5E, 0x007E }, /* FULLWIDTH ASCII FORMS*/ + { 0xFF5F, 0x007F }, /* FULLWIDTH ASCII FORMS */ + { 0xFF61, 0x3002 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF62, 0x300C }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF63, 0x300D }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF64, 0x3001 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF65, 0x30FB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF66, 0x30F2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF67, 0x30A1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF68, 0x30A3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF69, 0x30A5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6A, 0x30A7 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6B, 0x30A9 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6C, 0x30E3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6D, 0x30E5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6E, 0x30E7 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF6F, 0x30C3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF70, 0x30FC }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF71, 0x30A2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF72, 0x30A4 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF73, 0x30A6 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF74, 0x30A8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF75, 0x30AA }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF76, 0x30AB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF77, 0x30AD }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF78, 0x30AF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF79, 0x30B1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7A, 0x30B3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7B, 0x30B5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7C, 0x30B7 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7D, 0x30B9 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7E, 0x30BB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF7F, 0x30BD }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF80, 0x30BF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF81, 0x30C1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF82, 0x30C4 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF83, 0x30C6 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF84, 0x30C8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF85, 0x30CA }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF86, 0x30CB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF87, 0x30CC }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF88, 0x30CD }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF89, 0x30CE }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8A, 0x30CF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8B, 0x30D2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8C, 0x30D5 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8D, 0x30D8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8E, 0x30DB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF8F, 0x30DE }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF90, 0x30DF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF91, 0x30E0 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF92, 0x30E1 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF93, 0x30E2 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF94, 0x30E4 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF95, 0x30E6 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF96, 0x30E8 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF97, 0x30E9 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF98, 0x30EA }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF99, 0x30EB }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9A, 0x30EC }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9B, 0x30ED }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9C, 0x30EF }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9D, 0x30F3 }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9E, 0x309B }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFF9F, 0x309C }, /* HALFWIDTH KATAKANA FORMS */ + { 0xFFA0, 0x3164 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA1, 0x3131 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA2, 0x3132 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA3, 0x3133 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA4, 0x3134 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA5, 0x3135 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA6, 0x3136 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA7, 0x3137 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA8, 0x3138 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFA9, 0x3139 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAA, 0x313A }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAB, 0x313B }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAC, 0x313C }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAD, 0x313D }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAE, 0x313E }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFAF, 0x313F }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB0, 0x3140 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB1, 0x3141 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB2, 0x3142 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB3, 0x3143 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB4, 0x3144 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB5, 0x3145 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB6, 0x3146 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB7, 0x3147 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB8, 0x3148 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFB9, 0x3149 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBA, 0x314A }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBB, 0x314B }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBC, 0x314C }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBD, 0x314D }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFBE, 0x314E }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC2, 0x314F }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC3, 0x3150 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC4, 0x3151 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC5, 0x3152 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC6, 0x3153 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFC7, 0x3154 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCA, 0x3155 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCB, 0x3156 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCC, 0x3157 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCD, 0x3158 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCE, 0x3159 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFCF, 0x315A }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD2, 0x315B }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD3, 0x315C }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD4, 0x315D }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD5, 0x315E }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD6, 0x315F }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFD7, 0x3160 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFDA, 0x3161 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFDB, 0x3162 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFDC, 0x3163 }, /* HALFWIDTH HANGUL FORMS */ + { 0xFFE0, 0x00A2 }, /* FULLWIDTH CENT SIGN */ + { 0xFFE1, 0x00A3 }, /* FULLWIDTH POUND SIGN */ + { 0xFFE2, 0x00AC }, /* FULLWIDTH NOT SIGN */ + { 0xFFE3, 0x00AF }, /* FULLWIDTH MACRON */ + { 0xFFE4, 0x00A6 }, /* FULLWIDTH BROKEN BAR */ + { 0xFFE5, 0x00A5 }, /* FULLWIDTH YEN SIGN */ + { 0xFFE6, 0x20A9 }, /* FULLWIDTH WON SIGN */ + { 0xFFE8, 0x2502 }, /* HALFWIDTH FORMS LIGHT VERTICAL */ + { 0xFFE9, 0x2190 }, /* HALFWIDTH LEFTWARDS ARROW */ + { 0xFFEA, 0x2191 }, /* HALFWIDTH UPWARDS ARROW */ + { 0xFFEB, 0x2192 }, /* HALFWIDTH RIGHTWARDS ARROW */ + { 0xFFEC, 0x2193 }, /* HALFWIDTH DOWNWARDS ARROW */ + { 0xFFED, 0x25A0 }, /* HALFWIDTH BLACK SQUARE */ + { 0xFFEE, 0x25CB }, /* HALFWIDTH WHITE CIRCLE */ + { 0xFFFD, 0x003F } /* REPLACEMENT CHARACTER */ +}; + +sal_uInt16 ImplGetReplaceChar( sal_Unicode c ) +{ + sal_uInt16 nLow; + sal_uInt16 nHigh; + sal_uInt16 nMid; + sal_uInt16 nCompareChar; + const ImplReplaceCharData* pCharData; + + nLow = 0; + nHigh = (sizeof( aImplRepCharTab )/sizeof( ImplReplaceCharData ))-1; + do + { + nMid = (nLow+nHigh)/2; + pCharData = aImplRepCharTab+nMid; + nCompareChar = pCharData->mnUniChar; + if ( c < nCompareChar ) + { + if ( !nMid ) + break; + nHigh = nMid-1; + } + else + { + if ( c > nCompareChar ) + nLow = nMid+1; + else + return pCharData->mnReplaceChar; + } + } + while ( nLow <= nHigh ); + + return 0; +} + +/* ----------------------------------------------------------------------- */ + +struct ImplReplaceCharStrData +{ + sal_uInt16 mnUniChar; + sal_uInt16 maReplaceChars[IMPL_MAX_REPLACECHAR]; +}; + +static ImplReplaceCharStrData const aImplRepCharStrTab[] = +{ + { 0x00A9, { 0x0028, 0x0063, 0x0029, 0x0000, 0x0000 } }, /* COPYRIGHT SIGN */ + { 0x00AB, { 0x003C, 0x003C, 0x0000, 0x0000, 0x0000 } }, /* LEFT-POINTING-DOUBLE ANGLE QUOTATION MARK */ + { 0x0AE0, { 0x0028, 0x0072, 0x0029, 0x0000, 0x0000 } }, /* REGISTERED SIGN */ + { 0x00BB, { 0x003E, 0x003E, 0x0000, 0x0000, 0x0000 } }, /* RIGHT-POINTING-DOUBLE ANGLE QUOTATION MARK */ + { 0x00BC, { 0x0031, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE QUARTER */ + { 0x00BD, { 0x0031, 0x002F, 0x0032, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE HALF */ + { 0x00BE, { 0x0033, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE QUARTERS */ + { 0x00C6, { 0x0041, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LETTER AE */ + { 0x00E6, { 0x0061, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LETTER AE */ + { 0x0152, { 0x004F, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LIGATURE OE */ + { 0x0153, { 0x006F, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LIGATURE OE */ + { 0x2025, { 0x002E, 0x002E, 0x0000, 0x0000, 0x0000 } }, /* TWO DOT LEADER */ + { 0x2026, { 0x002E, 0x002E, 0x002E, 0x0000, 0x0000 } }, /* HORIZONTAL ELLIPSES */ + { 0x2034, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* TRIPPLE PRIME */ + { 0x2037, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* RESERVED TRIPPLE PRIME */ + { 0x20AC, { 0x0045, 0x0055, 0x0052, 0x0000, 0x0000 } }, /* EURO SIGN */ + { 0x2122, { 0x0028, 0x0074, 0x006D, 0x0029, 0x0000 } }, /* TRADE MARK SIGN */ + { 0x2153, { 0x0031, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE THIRD */ + { 0x2154, { 0x0032, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO THIRD */ + { 0x2155, { 0x0031, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE FIFTH */ + { 0x2156, { 0x0032, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO FIFTH */ + { 0x2157, { 0x0033, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE FIFTH */ + { 0x2158, { 0x0034, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION FOUR FIFTH */ + { 0x2159, { 0x0031, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE SIXTH */ + { 0x215A, { 0x0035, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE SIXTH */ + { 0x215B, { 0x0031, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE EIGHTH */ + { 0x215C, { 0x0033, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE EIGHTH */ + { 0x215D, { 0x0035, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE EIGHTH */ + { 0x215E, { 0x0037, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION SEVEN EIGHTH */ + { 0x215F, { 0x0031, 0x002F, 0x0000, 0x0000, 0x0000 } }, /* FRACTION NUMERATOR ONE */ + { 0x2161, { 0x0049, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWO */ + { 0x2162, { 0x0049, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL THREE */ + { 0x2163, { 0x0049, 0x0056, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL FOUR */ + { 0x2165, { 0x0056, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SIX */ + { 0x2166, { 0x0056, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SEVEN */ + { 0x2168, { 0x0056, 0x0049, 0x0049, 0x0049, 0x0000 } }, /* ROMAN NUMERAL EIGHT */ + { 0x2169, { 0x0049, 0x0058, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL NINE */ + { 0x216A, { 0x0058, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL ELEVEN */ + { 0x216B, { 0x0058, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWELVE */ + { 0x2171, { 0x0069, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL TWO */ + { 0x2172, { 0x0069, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL THREE */ + { 0x2173, { 0x0069, 0x0076, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL FOUR */ + { 0x2175, { 0x0076, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SIX */ + { 0x2176, { 0x0076, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SEVEN */ + { 0x2178, { 0x0076, 0x0069, 0x0069, 0x0069, 0x0000 } }, /* SMALL ROMAN NUMERAL EIGHT */ + { 0x2179, { 0x0069, 0x0078, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL NINE */ + { 0x217A, { 0x0078, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL ELEVEN */ + { 0x217B, { 0x0058, 0x0069, 0x0069, 0x0000, 0x0000 } } /* SMALL ROMAN NUMERAL TWELVE */ +}; + +const sal_uInt16* ImplGetReplaceString( sal_Unicode c ) +{ + sal_uInt16 nLow; + sal_uInt16 nHigh; + sal_uInt16 nMid; + sal_uInt16 nCompareChar; + const ImplReplaceCharStrData* pCharData; + + nLow = 0; + nHigh = (sizeof( aImplRepCharStrTab )/sizeof( ImplReplaceCharStrData ))-1; + do + { + nMid = (nLow+nHigh)/2; + pCharData = aImplRepCharStrTab+nMid; + nCompareChar = pCharData->mnUniChar; + if ( c < nCompareChar ) + { + if ( !nMid ) + break; + nHigh = nMid-1; + } + else + { + if ( c > nCompareChar ) + nLow = nMid+1; + else + return pCharData->maReplaceChars; + } + } + while ( nLow <= nHigh ); + + return 0; +} + +/* ======================================================================= */ + +sal_Size ImplSymbolToUnicode( const ImplTextConverterData*, + void*, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + sal_uChar c; + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + while ( pSrcBuf < pEndSrcBuf ) + { + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + /* 0-31 (all Control-Character get the same Unicode value) */ + c = (sal_uChar)*pSrcBuf; + if ( c <= 0x1F ) + *pDestBuf = (sal_Unicode)c; + else + *pDestBuf = ((sal_Unicode)c)+0xF000; + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, + void*, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + sal_Unicode c; + char* pEndDestBuf; + const sal_Unicode* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestBytes; + pEndSrcBuf = pSrcBuf+nSrcChars; + while ( pSrcBuf < pEndSrcBuf ) + { + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + c = *pSrcBuf; + if ( (c >= 0xF000) && (c <= 0xF0FF) ) + { + *pDestBuf = static_cast< char >(static_cast< unsigned char >(c-0xF000)); + pDestBuf++; + pSrcBuf++; + } + // Normally 0x001F, but in many cases also symbol characters + // are stored in the first 256 bytes, so that we don't change + // these values + else if ( c <= 0x00FF ) + { + *pDestBuf = static_cast< char >(static_cast< unsigned char >(c)); + pDestBuf++; + pSrcBuf++; + } + else + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) + { + /* !!! */ + /* Only ascii characters < 0x1F */ + } + + /* Handle undefined and surrogates characters */ + /* (all surrogates characters are undefined) */ + if (!ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) + break; + } + } + + *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); + return (nDestBytes - (pEndDestBuf-pDestBuf)); +} + +/* ======================================================================= */ + +sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, + void*, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + sal_uChar c; + sal_Unicode cConv; + const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + while ( pSrcBuf < pEndSrcBuf ) + { + c = (sal_uChar)*pSrcBuf; + if ( c < 0x80 ) + cConv = c; + else + { + if ( (c >= pConvertData->mnToUniStart1) && (c <= pConvertData->mnToUniEnd1) ) + cConv = pConvertData->mpToUniTab1[c-pConvertData->mnToUniStart1]; + else if ( (c >= pConvertData->mnToUniStart2) && (c <= pConvertData->mnToUniEnd2) ) + cConv = pConvertData->mpToUniTab2[c-pConvertData->mnToUniStart2]; + else + cConv = 0; + if ( !cConv ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) + { + pSrcBuf++; + continue; + } + else + cConv = ImplGetUndefinedUnicodeChar(c, nFlags); + } + } + + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = cConv; + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, + void*, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + sal_uChar c; + sal_Unicode cConv; + const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + *pSrcCvtBytes = 0; + return 0; + } + while ( pSrcBuf < pEndSrcBuf ) + { + c = (sal_uChar)*pSrcBuf; + if (c < 0x80) + cConv = c; + else + // c <= 0xFF is implied. + cConv = pConvertData->mpToUniTab1[c - 0x80]; + + *pDestBuf = cConv; + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ----------------------------------------------------------------------- */ + +// Writes 0--2 characters to dest: +static int ImplConvertUnicodeCharToChar( + const ImplByteConvertData* pConvertData, sal_Unicode c, char * dest ) +{ + const ImplUniCharTabData* pToCharExTab; + + if ( c < 0x80 ) + { + dest[0] = static_cast< char >(c); + return 1; + } + if ( (c >= pConvertData->mnToCharStart1) && (c <= pConvertData->mnToCharEnd1) ) + { + dest[0] = static_cast< char >(pConvertData->mpToCharTab1[c-pConvertData->mnToCharStart1]); + if ( dest[0] != 0 ) + return 1; + } + else if ( (c >= pConvertData->mnToCharStart2) && (c <= pConvertData->mnToCharEnd2) ) + { + dest[0] = static_cast< char >(pConvertData->mpToCharTab2[c-pConvertData->mnToCharStart2]); + if ( dest[0] != 0 ) + return 1; + } + pToCharExTab = pConvertData->mpToCharExTab; + if ( pToCharExTab ) + { + sal_uInt16 nLow; + sal_uInt16 nHigh; + sal_uInt16 nMid; + sal_uInt16 nCompareChar; + const ImplUniCharTabData* pCharExData; + + nLow = 0; + nHigh = pConvertData->mnToCharExCount-1; + do + { + nMid = (nLow+nHigh)/2; + pCharExData = pToCharExTab+nMid; + nCompareChar = pCharExData->mnUniChar; + if ( c < nCompareChar ) + { + if ( !nMid ) + break; + nHigh = nMid-1; + } + else + { + if ( c > nCompareChar ) + nLow = nMid+1; + else + { + dest[0] = static_cast< char >(pCharExData->mnChar); + if ( pCharExData->mnChar2 == 0 ) + return 1; + else + { + dest[1] = static_cast< char >(pCharExData->mnChar2); + return 2; + } + } + } + } + while ( nLow <= nHigh ); + } + return 0; +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, + void*, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + sal_Unicode c; + const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData; + char* pEndDestBuf; + const sal_Unicode* pEndSrcBuf; + int i; + int n; + sal_uInt16 cTemp; + char aTempBuf[IMPL_MAX_REPLACECHAR+2]; + const sal_uInt16* pReplace; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestBytes; + pEndSrcBuf = pSrcBuf+nSrcChars; + while ( pSrcBuf < pEndSrcBuf ) + { + c = *pSrcBuf; + if ( c < 0x80 ) + { + aTempBuf[0] = static_cast< char >(c); + n = 1; + } + else + { + n = ImplConvertUnicodeCharToChar( pConvertData, c, aTempBuf ); + + if ( n == 0 ) + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) + { + cTemp = ImplGetReplaceChar( c ); + if ( cTemp ) + n = ImplConvertUnicodeCharToChar( + pConvertData, cTemp, aTempBuf ); + } + + if ( n == 0 ) + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) + { + pReplace = ImplGetReplaceString( c ); + if ( pReplace ) + { + while ( *pReplace && (n < IMPL_MAX_REPLACECHAR) ) + { + i = ImplConvertUnicodeCharToChar( + pConvertData, *pReplace, aTempBuf + n ); + if ( i == 0 ) + { + n = 0; + break; + } + pReplace++; + n += i; + } + } + } + + /* Handle undefined and surrogates characters */ + /* (all surrogates characters are undefined) */ + if ( n == 0 ) + { + if (ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) + continue; + else + break; + } + } + } + } + + if ( pEndDestBuf - pDestBuf < n ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + for ( i = 0; i < n; ++i ) + *pDestBuf++ = aTempBuf[i]; + pSrcBuf++; + } + + *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); + return (nDestBytes - (pEndDestBuf-pDestBuf)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtmb.c b/sal/textenc/tcvtmb.c deleted file mode 100644 index e0936979eb19..000000000000 --- a/sal/textenc/tcvtmb.c +++ /dev/null @@ -1,695 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/textcvt.h" - -/* ======================================================================= */ - -/* DBCS to Unicode conversion routine use a lead table for the first byte, */ -/* where we determine the trail table or for single byte chars the unicode */ -/* value. We have for all lead byte a separate table, because we can */ -/* then share many tables for diffrent charset encodings. */ - -/* ======================================================================= */ - -sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - sal_uChar cLead; - sal_uChar cTrail; - sal_Unicode cConv; - const ImplDBCSToUniLeadTab* pLeadEntry; - const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; - const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab; - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - while ( pSrcBuf < pEndSrcBuf ) - { - cLead = (sal_uChar)*pSrcBuf; - - /* get entry for the lead byte */ - pLeadEntry = pLeadTab+cLead; - - /* SingleByte char? */ - if (pLeadEntry->mpToUniTrailTab == NULL - || cLead < pConvertData->mnLeadStart - || cLead > pConvertData->mnLeadEnd) - { - cConv = pLeadEntry->mnUniChar; - if ( !cConv && (cLead != 0) ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) - { - pSrcBuf++; - continue; - } - else - cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags); - } - } - else - { - /* Source buffer to small */ - if ( pSrcBuf +1 == pEndSrcBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - break; - } - - pSrcBuf++; - cTrail = (sal_uChar)*pSrcBuf; - if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) - cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; - else - cConv = 0; - - if ( !cConv ) - { - /* EUDC Ranges */ - sal_uInt16 i; - const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab; - for ( i = 0; i < pConvertData->mnEUDCCount; i++ ) - { - if ( (cLead >= pEUDCTab->mnLeadStart) && - (cLead <= pEUDCTab->mnLeadEnd) ) - { - if ( (cTrail >= pEUDCTab->mnTrail1Start) && - (cTrail <= pEUDCTab->mnTrail1End) ) - { - cConv = pEUDCTab->mnUniStart+ - ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ - (cTrail-pEUDCTab->mnTrail1Start); - break; - } - else - { - sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1; - if ( (pEUDCTab->mnTrailCount >= 2) && - (cTrail >= pEUDCTab->mnTrail2Start) && - (cTrail <= pEUDCTab->mnTrail2End) ) - { - cConv = pEUDCTab->mnUniStart+ - ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ - nTrailCount+ - (cTrail-pEUDCTab->mnTrail2Start); - break; - } - else - { - nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1; - if ( (pEUDCTab->mnTrailCount >= 3) && - (cTrail >= pEUDCTab->mnTrail3Start) && - (cTrail <= pEUDCTab->mnTrail3End) ) - { - cConv = pEUDCTab->mnUniStart+ - ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ - nTrailCount+ - (cTrail-pEUDCTab->mnTrail3Start); - break; - } - } - } - } - - pEUDCTab++; - } - - if ( !cConv ) - { - /* Wir vergleichen den kompletten Trailbereich den wir */ - /* definieren, der normalerweise groesser sein kann als */ - /* der definierte. Dies machen wir, damit Erweiterungen von */ - /* uns nicht beruecksichtigten Encodings so weit wie */ - /* moeglich auch richtig zu behandeln, das double byte */ - /* characters auch als ein einzelner Character behandelt */ - /* wird. */ - if (cLead < pConvertData->mnLeadStart - || cLead > pConvertData->mnLeadEnd - || cTrail < pConvertData->mnTrailStart - || cTrail > pConvertData->mnTrailEnd) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) - { - pSrcBuf++; - continue; - } - else - cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - } - else - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) - { - pSrcBuf++; - continue; - } - else - cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - } - } - } - } - - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = cConv; - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - sal_uInt16 cConv; - sal_Unicode c; - sal_uChar nHighChar; - sal_uChar nLowChar; - const ImplUniToDBCSHighTab* pHighEntry; - const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; - const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab; - sal_Char* pEndDestBuf; - const sal_Unicode* pEndSrcBuf; - - sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0 - || pConvertData->mnLeadEnd != 0xFF); - /* this statement has the effect that this extra check is only done for - EUC-KR, which uses the MS-949 tables, but does not support the full - range of MS-949 */ - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestBytes; - pEndSrcBuf = pSrcBuf+nSrcChars; - while ( pSrcBuf < pEndSrcBuf ) - { - c = *pSrcBuf; - nHighChar = (sal_uChar)((c >> 8) & 0xFF); - nLowChar = (sal_uChar)(c & 0xFF); - - /* get entry for the high byte */ - pHighEntry = pHighTab+nHighChar; - - /* is low byte in the table range */ - if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) - { - cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; - if (bCheckRange && cConv > 0x7F - && ((cConv >> 8) < pConvertData->mnLeadStart - || (cConv >> 8) > pConvertData->mnLeadEnd - || (cConv & 0xFF) < pConvertData->mnTrailStart - || (cConv & 0xFF) > pConvertData->mnTrailEnd)) - cConv = 0; - } - else - cConv = 0; - - if (cConv == 0 && c != 0) - { - /* Map to EUDC ranges: */ - ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab; - sal_uInt32 i; - for (i = 0; i < pConvertData->mnEUDCCount; ++i) - { - if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd) - { - sal_uInt32 nIndex = c - pEUDCTab->mnUniStart; - sal_uInt32 nLeadOff - = nIndex / pEUDCTab->mnTrailRangeCount; - sal_uInt32 nTrailOff - = nIndex % pEUDCTab->mnTrailRangeCount; - sal_uInt32 nSize; - cConv = (sal_uInt16) - ((pEUDCTab->mnLeadStart + nLeadOff) << 8); - nSize - = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1; - if (nTrailOff < nSize) - { - cConv |= pEUDCTab->mnTrail1Start + nTrailOff; - break; - } - nTrailOff -= nSize; - nSize - = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1; - if (nTrailOff < nSize) - { - cConv |= pEUDCTab->mnTrail2Start + nTrailOff; - break; - } - nTrailOff -= nSize; - cConv |= pEUDCTab->mnTrail3Start + nTrailOff; - break; - } - pEUDCTab++; - } - - /* FIXME - * SB: Not sure why this is in here. Plus, it does not work as - * intended when (c & 0xFF) == 0, because the next !cConv check - * will then think c has not yet been converted... - */ - if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START - && c <= RTL_TEXTCVT_BYTE_PRIVATE_END) - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) - cConv = (sal_Char)(sal_uChar)(c & 0xFF); - } - } - - if ( !cConv ) - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) - { - /* !!! */ - } - - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) - { - /* !!! */ - } - - /* Handle undefined and surrogates characters */ - /* (all surrogates characters are undefined) */ - if (ImplHandleUndefinedUnicodeToTextChar(pData, - &pSrcBuf, - pEndSrcBuf, - &pDestBuf, - pEndDestBuf, - nFlags, - pInfo)) - continue; - else - break; - } - - /* SingleByte */ - if ( !(cConv & 0xFF00) ) - { - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); - pDestBuf++; - } - else - { - if ( pDestBuf+1 >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); - pDestBuf++; - *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); - pDestBuf++; - } - - pSrcBuf++; - } - - *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); - return (nDestBytes - (pEndDestBuf-pDestBuf)); -} - -/* ======================================================================= */ - -#define JIS_EUC_LEAD_OFF 0x80 -#define JIS_EUC_TRAIL_OFF 0x80 - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, - void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - sal_uChar c; - sal_uChar cLead = '\0'; - sal_uChar cTrail = '\0'; - sal_Unicode cConv; - const ImplDBCSToUniLeadTab* pLeadEntry; - const ImplDBCSToUniLeadTab* pLeadTab; - const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - while ( pSrcBuf < pEndSrcBuf ) - { - c = (sal_uChar)*pSrcBuf; - - /* ASCII */ - if ( c <= 0x7F ) - cConv = c; - else - { - /* SS2 - Half-width katakana */ - /* 8E + A1-DF */ - if ( c == 0x8E ) - { - /* Source buffer to small */ - if ( pSrcBuf + 1 == pEndSrcBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - break; - } - - pSrcBuf++; - c = (sal_uChar)*pSrcBuf; - if ( (c >= 0xA1) && (c <= 0xDF) ) - cConv = 0xFF61+(c-0xA1); - else - { - cConv = 0; - cLead = 0x8E; - cTrail = c; - } - } - else - { - /* SS3 - JIS 0212-1990 */ - /* 8F + A1-FE + A1-FE */ - if ( c == 0x8F ) - { - /* Source buffer to small */ - if (pEndSrcBuf - pSrcBuf < 3) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - break; - } - - pSrcBuf++; - cLead = (sal_uChar)*pSrcBuf; - pSrcBuf++; - cTrail = (sal_uChar)*pSrcBuf; - pLeadTab = pConvertData->mpJIS0212ToUniLeadTab; - } - /* CodeSet 2 JIS 0208-1997 */ - /* A1-FE + A1-FE */ - else - { - /* Source buffer to small */ - if ( pSrcBuf + 1 == pEndSrcBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - break; - } - - cLead = c; - pSrcBuf++; - cTrail = (sal_uChar)*pSrcBuf; - pLeadTab = pConvertData->mpJIS0208ToUniLeadTab; - } - - /* Undefined Range */ - if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) - cConv = 0; - else - { - cLead -= JIS_EUC_LEAD_OFF; - cTrail -= JIS_EUC_TRAIL_OFF; - pLeadEntry = pLeadTab+cLead; - if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) - cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; - else - cConv = 0; - } - } - - if ( !cConv ) - { - /* Wir vergleichen den kompletten Trailbereich den wir */ - /* definieren, der normalerweise groesser sein kann als */ - /* der definierte. Dies machen wir, damit Erweiterungen von */ - /* uns nicht beruecksichtigten Encodings so weit wie */ - /* moeglich auch richtig zu behandeln, das double byte */ - /* characters auch als ein einzelner Character behandelt */ - /* wird. */ - if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) - { - pSrcBuf++; - continue; - } - else - cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - } - else - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) - { - pSrcBuf++; - continue; - } - else - cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - } - } - } - - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = cConv; - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, - void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - sal_uInt32 cConv; - sal_Unicode c; - sal_uChar nHighChar; - sal_uChar nLowChar; - const ImplUniToDBCSHighTab* pHighEntry; - const ImplUniToDBCSHighTab* pHighTab; - const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; - sal_Char* pEndDestBuf; - const sal_Unicode* pEndSrcBuf; - - (void) pContext; /* unused */ - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestBytes; - pEndSrcBuf = pSrcBuf+nSrcChars; - while ( pSrcBuf < pEndSrcBuf ) - { - c = *pSrcBuf; - - /* ASCII */ - if ( c <= 0x7F ) - cConv = c; - /* Half-width katakana */ - else if ( (c >= 0xFF61) && (c <= 0xFF9F) ) - cConv = 0x8E00+0xA1+(c-0xFF61); - else - { - nHighChar = (sal_uChar)((c >> 8) & 0xFF); - nLowChar = (sal_uChar)(c & 0xFF); - - /* JIS 0208 */ - pHighTab = pConvertData->mpUniToJIS0208HighTab; - pHighEntry = pHighTab+nHighChar; - if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) - { - cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; - if (cConv != 0) - cConv |= 0x8080; - } - else - cConv = 0; - - /* JIS 0212 */ - if ( !cConv ) - { - pHighTab = pConvertData->mpUniToJIS0212HighTab; - pHighEntry = pHighTab+nHighChar; - if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) - { - cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; - if (cConv != 0) - cConv |= 0x8F8080; - } - - if ( !cConv ) - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) - { - /* !!! */ - } - - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) - { - /* !!! */ - } - - /* Handle undefined and surrogates characters */ - /* (all surrogates characters are undefined) */ - if (ImplHandleUndefinedUnicodeToTextChar(pData, - &pSrcBuf, - pEndSrcBuf, - &pDestBuf, - pEndDestBuf, - nFlags, - pInfo)) - continue; - else - break; - } - } - } - - /* SingleByte */ - if ( !(cConv & 0xFFFF00) ) - { - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); - pDestBuf++; - } - /* DoubleByte */ - else if ( !(cConv & 0xFF0000) ) - { - if ( pDestBuf+1 >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); - pDestBuf++; - *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); - pDestBuf++; - } - else - { - if ( pDestBuf+2 >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF); - pDestBuf++; - *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); - pDestBuf++; - *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); - pDestBuf++; - } - - pSrcBuf++; - } - - *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); - return (nDestBytes - (pEndDestBuf-pDestBuf)); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtmb.cxx b/sal/textenc/tcvtmb.cxx new file mode 100644 index 000000000000..fcaba0914ec2 --- /dev/null +++ b/sal/textenc/tcvtmb.cxx @@ -0,0 +1,690 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" + +#include "tenchelp.hxx" +#include "unichars.hxx" + +/* ======================================================================= */ + +/* DBCS to Unicode conversion routine use a lead table for the first byte, */ +/* where we determine the trail table or for single byte chars the unicode */ +/* value. We have for all lead byte a separate table, because we can */ +/* then share many tables for diffrent charset encodings. */ + +/* ======================================================================= */ + +sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void*, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + sal_uChar cLead; + sal_uChar cTrail; + sal_Unicode cConv; + const ImplDBCSToUniLeadTab* pLeadEntry; + const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; + const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab; + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + while ( pSrcBuf < pEndSrcBuf ) + { + cLead = (sal_uChar)*pSrcBuf; + + /* get entry for the lead byte */ + pLeadEntry = pLeadTab+cLead; + + /* SingleByte char? */ + if (pLeadEntry->mpToUniTrailTab == NULL + || cLead < pConvertData->mnLeadStart + || cLead > pConvertData->mnLeadEnd) + { + cConv = pLeadEntry->mnUniChar; + if ( !cConv && (cLead != 0) ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) + { + pSrcBuf++; + continue; + } + else + cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags); + } + } + else + { + /* Source buffer to small */ + if ( pSrcBuf +1 == pEndSrcBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + break; + } + + pSrcBuf++; + cTrail = (sal_uChar)*pSrcBuf; + if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) + cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; + else + cConv = 0; + + if ( !cConv ) + { + /* EUDC Ranges */ + sal_uInt16 i; + const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab; + for ( i = 0; i < pConvertData->mnEUDCCount; i++ ) + { + if ( (cLead >= pEUDCTab->mnLeadStart) && + (cLead <= pEUDCTab->mnLeadEnd) ) + { + if ( (cTrail >= pEUDCTab->mnTrail1Start) && + (cTrail <= pEUDCTab->mnTrail1End) ) + { + cConv = pEUDCTab->mnUniStart+ + ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ + (cTrail-pEUDCTab->mnTrail1Start); + break; + } + else + { + sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1; + if ( (pEUDCTab->mnTrailCount >= 2) && + (cTrail >= pEUDCTab->mnTrail2Start) && + (cTrail <= pEUDCTab->mnTrail2End) ) + { + cConv = pEUDCTab->mnUniStart+ + ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ + nTrailCount+ + (cTrail-pEUDCTab->mnTrail2Start); + break; + } + else + { + nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1; + if ( (pEUDCTab->mnTrailCount >= 3) && + (cTrail >= pEUDCTab->mnTrail3Start) && + (cTrail <= pEUDCTab->mnTrail3End) ) + { + cConv = pEUDCTab->mnUniStart+ + ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ + nTrailCount+ + (cTrail-pEUDCTab->mnTrail3Start); + break; + } + } + } + } + + pEUDCTab++; + } + + if ( !cConv ) + { + /* Wir vergleichen den kompletten Trailbereich den wir */ + /* definieren, der normalerweise groesser sein kann als */ + /* der definierte. Dies machen wir, damit Erweiterungen von */ + /* uns nicht beruecksichtigten Encodings so weit wie */ + /* moeglich auch richtig zu behandeln, das double byte */ + /* characters auch als ein einzelner Character behandelt */ + /* wird. */ + if (cLead < pConvertData->mnLeadStart + || cLead > pConvertData->mnLeadEnd + || cTrail < pConvertData->mnTrailStart + || cTrail > pConvertData->mnTrailEnd) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) + { + pSrcBuf++; + continue; + } + else + cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + } + else + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) + { + pSrcBuf++; + continue; + } + else + cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + } + } + } + } + + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = cConv; + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void*, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + sal_uInt16 cConv; + sal_Unicode c; + sal_uChar nHighChar; + sal_uChar nLowChar; + const ImplUniToDBCSHighTab* pHighEntry; + const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; + const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab; + char* pEndDestBuf; + const sal_Unicode* pEndSrcBuf; + + bool bCheckRange = + pConvertData->mnLeadStart != 0 || pConvertData->mnLeadEnd != 0xFF; + /* this statement has the effect that this extra check is only done for + EUC-KR, which uses the MS-949 tables, but does not support the full + range of MS-949 */ + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestBytes; + pEndSrcBuf = pSrcBuf+nSrcChars; + while ( pSrcBuf < pEndSrcBuf ) + { + c = *pSrcBuf; + nHighChar = (sal_uChar)((c >> 8) & 0xFF); + nLowChar = (sal_uChar)(c & 0xFF); + + /* get entry for the high byte */ + pHighEntry = pHighTab+nHighChar; + + /* is low byte in the table range */ + if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) + { + cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; + if (bCheckRange && cConv > 0x7F + && ((cConv >> 8) < pConvertData->mnLeadStart + || (cConv >> 8) > pConvertData->mnLeadEnd + || (cConv & 0xFF) < pConvertData->mnTrailStart + || (cConv & 0xFF) > pConvertData->mnTrailEnd)) + cConv = 0; + } + else + cConv = 0; + + if (cConv == 0 && c != 0) + { + /* Map to EUDC ranges: */ + ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab; + sal_uInt32 i; + for (i = 0; i < pConvertData->mnEUDCCount; ++i) + { + if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd) + { + sal_uInt32 nIndex = c - pEUDCTab->mnUniStart; + sal_uInt32 nLeadOff + = nIndex / pEUDCTab->mnTrailRangeCount; + sal_uInt32 nTrailOff + = nIndex % pEUDCTab->mnTrailRangeCount; + sal_uInt32 nSize; + cConv = (sal_uInt16) + ((pEUDCTab->mnLeadStart + nLeadOff) << 8); + nSize + = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1; + if (nTrailOff < nSize) + { + cConv |= pEUDCTab->mnTrail1Start + nTrailOff; + break; + } + nTrailOff -= nSize; + nSize + = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1; + if (nTrailOff < nSize) + { + cConv |= pEUDCTab->mnTrail2Start + nTrailOff; + break; + } + nTrailOff -= nSize; + cConv |= pEUDCTab->mnTrail3Start + nTrailOff; + break; + } + pEUDCTab++; + } + + /* FIXME + * SB: Not sure why this is in here. Plus, it does not work as + * intended when (c & 0xFF) == 0, because the next !cConv check + * will then think c has not yet been converted... + */ + if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START + && c <= RTL_TEXTCVT_BYTE_PRIVATE_END) + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) + cConv = static_cast< char >(static_cast< unsigned char >(c & 0xFF)); + } + } + + if ( !cConv ) + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) + { + /* !!! */ + } + + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) + { + /* !!! */ + } + + /* Handle undefined and surrogates characters */ + /* (all surrogates characters are undefined) */ + if (ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) + continue; + else + break; + } + + /* SingleByte */ + if ( !(cConv & 0xFF00) ) + { + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF)); + pDestBuf++; + } + else + { + if ( pDestBuf+1 >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF)); + pDestBuf++; + *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF)); + pDestBuf++; + } + + pSrcBuf++; + } + + *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); + return (nDestBytes - (pEndDestBuf-pDestBuf)); +} + +/* ======================================================================= */ + +#define JIS_EUC_LEAD_OFF 0x80 +#define JIS_EUC_TRAIL_OFF 0x80 + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, + void*, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + sal_uChar c; + sal_uChar cLead = '\0'; + sal_uChar cTrail = '\0'; + sal_Unicode cConv; + const ImplDBCSToUniLeadTab* pLeadEntry; + const ImplDBCSToUniLeadTab* pLeadTab; + const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + while ( pSrcBuf < pEndSrcBuf ) + { + c = (sal_uChar)*pSrcBuf; + + /* ASCII */ + if ( c <= 0x7F ) + cConv = c; + else + { + /* SS2 - Half-width katakana */ + /* 8E + A1-DF */ + if ( c == 0x8E ) + { + /* Source buffer to small */ + if ( pSrcBuf + 1 == pEndSrcBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + break; + } + + pSrcBuf++; + c = (sal_uChar)*pSrcBuf; + if ( (c >= 0xA1) && (c <= 0xDF) ) + cConv = 0xFF61+(c-0xA1); + else + { + cConv = 0; + cLead = 0x8E; + cTrail = c; + } + } + else + { + /* SS3 - JIS 0212-1990 */ + /* 8F + A1-FE + A1-FE */ + if ( c == 0x8F ) + { + /* Source buffer to small */ + if (pEndSrcBuf - pSrcBuf < 3) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + break; + } + + pSrcBuf++; + cLead = (sal_uChar)*pSrcBuf; + pSrcBuf++; + cTrail = (sal_uChar)*pSrcBuf; + pLeadTab = pConvertData->mpJIS0212ToUniLeadTab; + } + /* CodeSet 2 JIS 0208-1997 */ + /* A1-FE + A1-FE */ + else + { + /* Source buffer to small */ + if ( pSrcBuf + 1 == pEndSrcBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + break; + } + + cLead = c; + pSrcBuf++; + cTrail = (sal_uChar)*pSrcBuf; + pLeadTab = pConvertData->mpJIS0208ToUniLeadTab; + } + + /* Undefined Range */ + if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) + cConv = 0; + else + { + cLead -= JIS_EUC_LEAD_OFF; + cTrail -= JIS_EUC_TRAIL_OFF; + pLeadEntry = pLeadTab+cLead; + if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) + cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; + else + cConv = 0; + } + } + + if ( !cConv ) + { + /* Wir vergleichen den kompletten Trailbereich den wir */ + /* definieren, der normalerweise groesser sein kann als */ + /* der definierte. Dies machen wir, damit Erweiterungen von */ + /* uns nicht beruecksichtigten Encodings so weit wie */ + /* moeglich auch richtig zu behandeln, das double byte */ + /* characters auch als ein einzelner Character behandelt */ + /* wird. */ + if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) + { + pSrcBuf++; + continue; + } + else + cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + } + else + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) + { + pSrcBuf++; + continue; + } + else + cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + } + } + } + + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = cConv; + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, + void*, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + sal_uInt32 cConv; + sal_Unicode c; + sal_uChar nHighChar; + sal_uChar nLowChar; + const ImplUniToDBCSHighTab* pHighEntry; + const ImplUniToDBCSHighTab* pHighTab; + const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; + char* pEndDestBuf; + const sal_Unicode* pEndSrcBuf; + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestBytes; + pEndSrcBuf = pSrcBuf+nSrcChars; + while ( pSrcBuf < pEndSrcBuf ) + { + c = *pSrcBuf; + + /* ASCII */ + if ( c <= 0x7F ) + cConv = c; + /* Half-width katakana */ + else if ( (c >= 0xFF61) && (c <= 0xFF9F) ) + cConv = 0x8E00+0xA1+(c-0xFF61); + else + { + nHighChar = (sal_uChar)((c >> 8) & 0xFF); + nLowChar = (sal_uChar)(c & 0xFF); + + /* JIS 0208 */ + pHighTab = pConvertData->mpUniToJIS0208HighTab; + pHighEntry = pHighTab+nHighChar; + if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) + { + cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; + if (cConv != 0) + cConv |= 0x8080; + } + else + cConv = 0; + + /* JIS 0212 */ + if ( !cConv ) + { + pHighTab = pConvertData->mpUniToJIS0212HighTab; + pHighEntry = pHighTab+nHighChar; + if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) + { + cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; + if (cConv != 0) + cConv |= 0x8F8080; + } + + if ( !cConv ) + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) + { + /* !!! */ + } + + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) + { + /* !!! */ + } + + /* Handle undefined and surrogates characters */ + /* (all surrogates characters are undefined) */ + if (ImplHandleUndefinedUnicodeToTextChar(pData, + &pSrcBuf, + pEndSrcBuf, + &pDestBuf, + pEndDestBuf, + nFlags, + pInfo)) + continue; + else + break; + } + } + } + + /* SingleByte */ + if ( !(cConv & 0xFFFF00) ) + { + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF)); + pDestBuf++; + } + /* DoubleByte */ + else if ( !(cConv & 0xFF0000) ) + { + if ( pDestBuf+1 >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF)); + pDestBuf++; + *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF)); + pDestBuf++; + } + else + { + if ( pDestBuf+2 >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 16) & 0xFF)); + pDestBuf++; + *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF)); + pDestBuf++; + *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF)); + pDestBuf++; + } + + pSrcBuf++; + } + + *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); + return (nDestBytes - (pEndDestBuf-pDestBuf)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtutf7.c b/sal/textenc/tcvtutf7.c deleted file mode 100644 index 8093a866f0c4..000000000000 --- a/sal/textenc/tcvtutf7.c +++ /dev/null @@ -1,591 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "tenchelp.h" -#include "unichars.h" - -#ifndef _RTL_ALLOC_H -#include "rtl/alloc.h" -#endif -#include "rtl/textcvt.h" - -/* ======================================================================= */ - -static sal_uChar const aImplBase64Tab[64] = -{ - /* A-Z */ - 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5A, - /* a-z */ - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, - /* 0-9,+,/ */ - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x2B, 0x2F -}; - -/* Index in Base64Tab or 0xFF, when is a invalid character */ -static sal_uChar const aImplBase64IndexTab[128] = -{ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */ - 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */ - 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */ - 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */ - 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */ - 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */ - 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */ - 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */ - 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */ - 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */ - 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */ - 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */ -}; - -static sal_uChar const aImplMustShiftTab[128] = -{ - 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */ - 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */ - 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */ - 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */ - 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */ - 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */ - 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */ - 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */ - 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */ - 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */ - 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */ - 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */ - 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */ - 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */ - 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */ - 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */ -}; - -/* + */ -#define IMPL_SHIFT_IN_CHAR 0x2B -/* - */ -#define IMPL_SHIFT_OUT_CHAR 0x2D - -/* ----------------------------------------------------------------------- */ - -typedef struct -{ - int mbShifted; - int mbFirst; - int mbWroteOne; - sal_uInt32 mnBitBuffer; - sal_uInt32 mnBufferBits; -} ImplUTF7ToUCContextData; - -/* ----------------------------------------------------------------------- */ - -void* ImplUTF7CreateUTF7TextToUnicodeContext( void ) -{ - ImplUTF7ToUCContextData* pContextData; - pContextData = (ImplUTF7ToUCContextData*)rtl_allocateMemory( sizeof( ImplUTF7ToUCContextData ) ); - pContextData->mbShifted = sal_False; - pContextData->mbFirst = sal_False; - pContextData->mbWroteOne = sal_False; - pContextData->mnBitBuffer = 0; - pContextData->mnBufferBits = 0; - return (void*)pContextData; -} - -/* ----------------------------------------------------------------------- */ - -void ImplUTF7DestroyTextToUnicodeContext( void* pContext ) -{ - rtl_freeMemory( pContext ); -} - -/* ----------------------------------------------------------------------- */ - -void ImplUTF7ResetTextToUnicodeContext( void* pContext ) -{ - ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext; - pContextData->mbShifted = sal_False; - pContextData->mbFirst = sal_False; - pContextData->mbWroteOne = sal_False; - pContextData->mnBitBuffer = 0; - pContextData->mnBufferBits = 0; -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext; - sal_uChar c ='\0'; - sal_uChar nBase64Value = 0; - int bEnd = sal_False; - int bShifted; - int bFirst; - int bWroteOne; - int bBase64End; - sal_uInt32 nBitBuffer; - sal_uInt32 nBitBufferTemp; - sal_uInt32 nBufferBits; - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - (void) pData; /* unused */ - -/* !!! Implementation not finnished !!! - if ( pContextData ) - { - bShifted = pContextData->mbShifted; - bFirst = pContextData->mbFirst; - bWroteOne = pContextData->mbWroteOne; - nBitBuffer = pContextData->mnBitBuffer; - nBufferBits = pContextData->mnBufferBits; - } - else -*/ - { - bShifted = sal_False; - bFirst = sal_False; - bWroteOne = sal_False; - nBitBuffer = 0; - nBufferBits = 0; - } - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - do - { - if ( pSrcBuf < pEndSrcBuf ) - { - c = (sal_uChar)*pSrcBuf; - - /* End, when not a base64 character */ - bBase64End = sal_False; - if ( c <= 0x7F ) - { - nBase64Value = aImplBase64IndexTab[c]; - if ( nBase64Value == 0xFF ) - bBase64End = sal_True; - } - } - else - { - bEnd = sal_True; - bBase64End = sal_True; - } - - if ( bShifted ) - { - if ( bBase64End ) - { - bShifted = sal_False; - - /* If the character causing us to drop out was SHIFT_IN */ - /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */ - /* The test for SHIFT_IN is not necessary, but allows */ - /* an alternate form of UTF-7 where SHIFT_IN is escaped */ - /* by SHIFT_IN. This only works for some values of */ - /* SHIFT_IN. It is so implemented, because this comes */ - /* from the officel unicode book (The Unicode Standard, */ - /* Version 2.0) and so I think, that someone of the */ - /* world has used this feature. */ - if ( !bEnd ) - { - if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) ) - { - /* If no base64 character, and the terminating */ - /* character of the shift sequence was the */ - /* SHIFT_OUT_CHAR, then it't a special escape */ - /* for SHIFT_IN_CHAR. */ - if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf = IMPL_SHIFT_IN_CHAR; - pDestBuf++; - bWroteOne = sal_True; - } - - /* Skip character */ - pSrcBuf++; - if ( pSrcBuf < pEndSrcBuf ) - c = (sal_uChar)*pSrcBuf; - else - bEnd = sal_True; - } - } - - /* Empty sequence not allowed, so when we don't write one */ - /* valid char, then the sequence is corrupt */ - if ( !bWroteOne ) - { - /* When no more bytes in the source buffer, then */ - /* this buffer may be to small */ - if ( bEnd ) - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - /* We insert here no default char, because I think */ - /* this is better to ignore this */ - } - } - } - else - { - /* Add 6 Bits from character to the bit buffer */ - nBufferBits += 6; - nBitBuffer |= ((sal_uInt32)(nBase64Value & 0x3F)) << (32-nBufferBits); - bFirst = sal_False; - } - - /* Extract as many full 16 bit characters as possible from the */ - /* bit buffer. */ - while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) ) - { - nBitBufferTemp = nBitBuffer >> (32-16); - *pDestBuf = (sal_Unicode)((nBitBufferTemp) & 0xFFFF); - pDestBuf++; - nBitBuffer <<= 16; - nBufferBits -= 16; - bWroteOne = sal_True; - } - - if ( nBufferBits >= 16 ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if ( bBase64End ) - { - /* Sequence ended and we have some bits, then the */ - /* sequence is corrupted */ - if ( nBufferBits && nBitBuffer ) - { - /* When no more bytes in the source buffer, then */ - /* this buffer may be to small */ - if ( bEnd ) - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf++ - = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - } - } - - } - - nBitBuffer = 0; - nBufferBits = 0; - } - } - - if ( !bEnd ) - { - if ( !bShifted ) - { - if ( c == IMPL_SHIFT_IN_CHAR ) - { - bShifted = sal_True; - bFirst = sal_True; - bWroteOne = sal_False; - } - else - { - /* No direct encoded charcater, then the buffer is */ - /* corrupt */ - if ( c > 0x7F ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; - if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; - break; - } - else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf++ - = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; - } - } - - /* Write char to unicode buffer */ - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf = c; - pDestBuf++; - } - } - - pSrcBuf++; - } - } - while ( !bEnd ); - - if ( pContextData ) - { - pContextData->mbShifted = bShifted; - pContextData->mbFirst = bFirst; - pContextData->mbWroteOne = bWroteOne; - pContextData->mnBitBuffer = nBitBuffer; - pContextData->mnBufferBits = nBufferBits; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ======================================================================= */ - -typedef struct -{ - int mbShifted; - sal_uInt32 mnBitBuffer; - sal_uInt32 mnBufferBits; -} ImplUTF7FromUCContextData; - -/* ----------------------------------------------------------------------- */ - -void* ImplUTF7CreateUnicodeToTextContext( void ) -{ - ImplUTF7FromUCContextData* pContextData; - pContextData = (ImplUTF7FromUCContextData*)rtl_allocateMemory( sizeof( ImplUTF7FromUCContextData ) ); - pContextData->mbShifted = sal_False; - pContextData->mnBitBuffer = 0; - pContextData->mnBufferBits = 0; - return (void*)pContextData; -} - -/* ----------------------------------------------------------------------- */ - -void ImplUTF7DestroyUnicodeToTextContext( void* pContext ) -{ - rtl_freeMemory( pContext ); -} - -/* ----------------------------------------------------------------------- */ - -void ImplUTF7ResetUnicodeToTextContext( void* pContext ) -{ - ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext; - pContextData->mbShifted = sal_False; - pContextData->mnBitBuffer = 0; - pContextData->mnBufferBits = 0; -} - -/* ----------------------------------------------------------------------- */ - -sal_Size ImplUnicodeToUTF7( const ImplTextConverterData* pData, void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext; - sal_Unicode c = '\0'; - int bEnd = sal_False; - int bShifted; - int bNeedShift; - sal_uInt32 nBitBuffer; - sal_uInt32 nBitBufferTemp; - sal_uInt32 nBufferBits; - sal_Char* pEndDestBuf; - const sal_Unicode* pEndSrcBuf; - - (void) pData; /* unused */ - (void) nFlags; /* unused */ - -/* !!! Implementation not finnished !!! - if ( pContextData ) - { - bShifted = pContextData->mbShifted; - nBitBuffer = pContextData->mnBitBuffer; - nBufferBits = pContextData->mnBufferBits; - } - else -*/ - { - bShifted = sal_False; - nBitBuffer = 0; - nBufferBits = 0; - } - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestBytes; - pEndSrcBuf = pSrcBuf+nSrcChars; - do - { - if ( pSrcBuf < pEndSrcBuf ) - { - c = *pSrcBuf; - - bNeedShift = (c > 0x7F) || aImplMustShiftTab[c]; - if ( bNeedShift && !bShifted ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf = IMPL_SHIFT_IN_CHAR; - pDestBuf++; - /* Special case handling for SHIFT_IN_CHAR */ - if ( c == IMPL_SHIFT_IN_CHAR ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf = IMPL_SHIFT_OUT_CHAR; - pDestBuf++; - } - else - bShifted = sal_True; - } - } - else - { - bEnd = sal_True; - bNeedShift = sal_False; - } - - if ( bShifted ) - { - /* Write the character to the bit buffer, or pad the bit */ - /* buffer out to a full base64 character */ - if ( bNeedShift ) - { - nBufferBits += 16; - nBitBuffer |= ((sal_uInt32)c) << (32-nBufferBits); - } - else - nBufferBits += (6-(nBufferBits%6))%6; - - /* Flush out as many full base64 characters as possible */ - while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) ) - { - nBitBufferTemp = nBitBuffer >> (32-6); - *pDestBuf = aImplBase64Tab[nBitBufferTemp]; - pDestBuf++; - nBitBuffer <<= 6; - nBufferBits -= 6; - } - - if ( nBufferBits >= 6 ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - /* Write SHIFT_OUT_CHAR, when needed */ - if ( !bNeedShift ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf = IMPL_SHIFT_OUT_CHAR; - pDestBuf++; - bShifted = sal_False; - } - } - - if ( !bEnd ) - { - /* Character can be directly endcoded */ - if ( !bNeedShift ) - { - if ( pDestBuf >= pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - *pDestBuf = (sal_Char)(sal_uChar)c; - pDestBuf++; - } - - pSrcBuf++; - } - } - while ( !bEnd ); - - if ( pContextData ) - { - pContextData->mbShifted = bShifted; - pContextData->mnBitBuffer = nBitBuffer; - pContextData->mnBufferBits = nBufferBits; - } - - *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); - return (nDestBytes - (pEndDestBuf-pDestBuf)); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtutf7.cxx b/sal/textenc/tcvtutf7.cxx new file mode 100644 index 000000000000..c01e20240eeb --- /dev/null +++ b/sal/textenc/tcvtutf7.cxx @@ -0,0 +1,583 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" + +#include "tenchelp.hxx" +#include "unichars.hxx" + +/* ======================================================================= */ + +static sal_uChar const aImplBase64Tab[64] = +{ + /* A-Z */ + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, + /* a-z */ + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, + /* 0-9,+,/ */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x2B, 0x2F +}; + +/* Index in Base64Tab or 0xFF, when is a invalid character */ +static sal_uChar const aImplBase64IndexTab[128] = +{ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */ + 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */ + 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */ + 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */ + 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */ + 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */ + 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */ + 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */ + 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */ + 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */ + 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */ + 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */ +}; + +static sal_uChar const aImplMustShiftTab[128] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */ + 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */ + 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */ + 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */ + 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */ + 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */ + 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */ + 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */ + 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */ + 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */ + 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */ +}; + +/* + */ +#define IMPL_SHIFT_IN_CHAR 0x2B +/* - */ +#define IMPL_SHIFT_OUT_CHAR 0x2D + +/* ----------------------------------------------------------------------- */ + +struct ImplUTF7ToUCContextData +{ + int mbShifted; + int mbFirst; + int mbWroteOne; + sal_uInt32 mnBitBuffer; + sal_uInt32 mnBufferBits; +}; + +/* ----------------------------------------------------------------------- */ + +void* ImplUTF7CreateUTF7TextToUnicodeContext() +{ + ImplUTF7ToUCContextData* pContextData = new ImplUTF7ToUCContextData; + pContextData->mbShifted = sal_False; + pContextData->mbFirst = sal_False; + pContextData->mbWroteOne = sal_False; + pContextData->mnBitBuffer = 0; + pContextData->mnBufferBits = 0; + return pContextData; +} + +/* ----------------------------------------------------------------------- */ + +void ImplUTF7DestroyTextToUnicodeContext( void* pContext ) +{ + delete static_cast< ImplUTF7ToUCContextData * >(pContext); +} + +/* ----------------------------------------------------------------------- */ + +void ImplUTF7ResetTextToUnicodeContext( void* pContext ) +{ + ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext; + pContextData->mbShifted = sal_False; + pContextData->mbFirst = sal_False; + pContextData->mbWroteOne = sal_False; + pContextData->mnBitBuffer = 0; + pContextData->mnBufferBits = 0; +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUTF7ToUnicode( const ImplTextConverterData*, void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext; + sal_uChar c ='\0'; + sal_uChar nBase64Value = 0; + int bEnd = sal_False; + int bShifted; + int bFirst; + int bWroteOne; + int bBase64End; + sal_uInt32 nBitBuffer; + sal_uInt32 nBitBufferTemp; + sal_uInt32 nBufferBits; + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + +/* !!! Implementation not finnished !!! + if ( pContextData ) + { + bShifted = pContextData->mbShifted; + bFirst = pContextData->mbFirst; + bWroteOne = pContextData->mbWroteOne; + nBitBuffer = pContextData->mnBitBuffer; + nBufferBits = pContextData->mnBufferBits; + } + else +*/ + { + bShifted = sal_False; + bFirst = sal_False; + bWroteOne = sal_False; + nBitBuffer = 0; + nBufferBits = 0; + } + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + do + { + if ( pSrcBuf < pEndSrcBuf ) + { + c = (sal_uChar)*pSrcBuf; + + /* End, when not a base64 character */ + bBase64End = sal_False; + if ( c <= 0x7F ) + { + nBase64Value = aImplBase64IndexTab[c]; + if ( nBase64Value == 0xFF ) + bBase64End = sal_True; + } + } + else + { + bEnd = sal_True; + bBase64End = sal_True; + } + + if ( bShifted ) + { + if ( bBase64End ) + { + bShifted = sal_False; + + /* If the character causing us to drop out was SHIFT_IN */ + /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */ + /* The test for SHIFT_IN is not necessary, but allows */ + /* an alternate form of UTF-7 where SHIFT_IN is escaped */ + /* by SHIFT_IN. This only works for some values of */ + /* SHIFT_IN. It is so implemented, because this comes */ + /* from the officel unicode book (The Unicode Standard, */ + /* Version 2.0) and so I think, that someone of the */ + /* world has used this feature. */ + if ( !bEnd ) + { + if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) ) + { + /* If no base64 character, and the terminating */ + /* character of the shift sequence was the */ + /* SHIFT_OUT_CHAR, then it't a special escape */ + /* for SHIFT_IN_CHAR. */ + if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf = IMPL_SHIFT_IN_CHAR; + pDestBuf++; + bWroteOne = sal_True; + } + + /* Skip character */ + pSrcBuf++; + if ( pSrcBuf < pEndSrcBuf ) + c = (sal_uChar)*pSrcBuf; + else + bEnd = sal_True; + } + } + + /* Empty sequence not allowed, so when we don't write one */ + /* valid char, then the sequence is corrupt */ + if ( !bWroteOne ) + { + /* When no more bytes in the source buffer, then */ + /* this buffer may be to small */ + if ( bEnd ) + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + /* We insert here no default char, because I think */ + /* this is better to ignore this */ + } + } + } + else + { + /* Add 6 Bits from character to the bit buffer */ + nBufferBits += 6; + nBitBuffer |= ((sal_uInt32)(nBase64Value & 0x3F)) << (32-nBufferBits); + bFirst = sal_False; + } + + /* Extract as many full 16 bit characters as possible from the */ + /* bit buffer. */ + while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) ) + { + nBitBufferTemp = nBitBuffer >> (32-16); + *pDestBuf = (sal_Unicode)((nBitBufferTemp) & 0xFFFF); + pDestBuf++; + nBitBuffer <<= 16; + nBufferBits -= 16; + bWroteOne = sal_True; + } + + if ( nBufferBits >= 16 ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if ( bBase64End ) + { + /* Sequence ended and we have some bits, then the */ + /* sequence is corrupted */ + if ( nBufferBits && nBitBuffer ) + { + /* When no more bytes in the source buffer, then */ + /* this buffer may be to small */ + if ( bEnd ) + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf++ + = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + } + } + + } + + nBitBuffer = 0; + nBufferBits = 0; + } + } + + if ( !bEnd ) + { + if ( !bShifted ) + { + if ( c == IMPL_SHIFT_IN_CHAR ) + { + bShifted = sal_True; + bFirst = sal_True; + bWroteOne = sal_False; + } + else + { + /* No direct encoded charcater, then the buffer is */ + /* corrupt */ + if ( c > 0x7F ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; + if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; + break; + } + else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf++ + = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; + } + } + + /* Write char to unicode buffer */ + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf = c; + pDestBuf++; + } + } + + pSrcBuf++; + } + } + while ( !bEnd ); + + if ( pContextData ) + { + pContextData->mbShifted = bShifted; + pContextData->mbFirst = bFirst; + pContextData->mbWroteOne = bWroteOne; + pContextData->mnBitBuffer = nBitBuffer; + pContextData->mnBufferBits = nBufferBits; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ======================================================================= */ + +struct ImplUTF7FromUCContextData +{ + int mbShifted; + sal_uInt32 mnBitBuffer; + sal_uInt32 mnBufferBits; +}; + +/* ----------------------------------------------------------------------- */ + +void* ImplUTF7CreateUnicodeToTextContext() +{ + ImplUTF7FromUCContextData* pContextData = new ImplUTF7FromUCContextData; + pContextData->mbShifted = sal_False; + pContextData->mnBitBuffer = 0; + pContextData->mnBufferBits = 0; + return pContextData; +} + +/* ----------------------------------------------------------------------- */ + +void ImplUTF7DestroyUnicodeToTextContext( void* pContext ) +{ + delete static_cast< ImplUTF7FromUCContextData * >(pContext); +} + +/* ----------------------------------------------------------------------- */ + +void ImplUTF7ResetUnicodeToTextContext( void* pContext ) +{ + ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext; + pContextData->mbShifted = sal_False; + pContextData->mnBitBuffer = 0; + pContextData->mnBufferBits = 0; +} + +/* ----------------------------------------------------------------------- */ + +sal_Size ImplUnicodeToUTF7( const ImplTextConverterData*, void* pContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext; + sal_Unicode c = '\0'; + int bEnd = sal_False; + int bShifted; + int bNeedShift; + sal_uInt32 nBitBuffer; + sal_uInt32 nBitBufferTemp; + sal_uInt32 nBufferBits; + char* pEndDestBuf; + const sal_Unicode* pEndSrcBuf; + +/* !!! Implementation not finnished !!! + if ( pContextData ) + { + bShifted = pContextData->mbShifted; + nBitBuffer = pContextData->mnBitBuffer; + nBufferBits = pContextData->mnBufferBits; + } + else +*/ + { + bShifted = sal_False; + nBitBuffer = 0; + nBufferBits = 0; + } + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestBytes; + pEndSrcBuf = pSrcBuf+nSrcChars; + do + { + if ( pSrcBuf < pEndSrcBuf ) + { + c = *pSrcBuf; + + bNeedShift = (c > 0x7F) || aImplMustShiftTab[c]; + if ( bNeedShift && !bShifted ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf = IMPL_SHIFT_IN_CHAR; + pDestBuf++; + /* Special case handling for SHIFT_IN_CHAR */ + if ( c == IMPL_SHIFT_IN_CHAR ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf = IMPL_SHIFT_OUT_CHAR; + pDestBuf++; + } + else + bShifted = sal_True; + } + } + else + { + bEnd = sal_True; + bNeedShift = sal_False; + } + + if ( bShifted ) + { + /* Write the character to the bit buffer, or pad the bit */ + /* buffer out to a full base64 character */ + if ( bNeedShift ) + { + nBufferBits += 16; + nBitBuffer |= ((sal_uInt32)c) << (32-nBufferBits); + } + else + nBufferBits += (6-(nBufferBits%6))%6; + + /* Flush out as many full base64 characters as possible */ + while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) ) + { + nBitBufferTemp = nBitBuffer >> (32-6); + *pDestBuf = aImplBase64Tab[nBitBufferTemp]; + pDestBuf++; + nBitBuffer <<= 6; + nBufferBits -= 6; + } + + if ( nBufferBits >= 6 ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + /* Write SHIFT_OUT_CHAR, when needed */ + if ( !bNeedShift ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf = IMPL_SHIFT_OUT_CHAR; + pDestBuf++; + bShifted = sal_False; + } + } + + if ( !bEnd ) + { + /* Character can be directly endcoded */ + if ( !bNeedShift ) + { + if ( pDestBuf >= pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + *pDestBuf = static_cast< char >(static_cast< unsigned char >(c)); + pDestBuf++; + } + + pSrcBuf++; + } + } + while ( !bEnd ); + + if ( pContextData ) + { + pContextData->mbShifted = bShifted; + pContextData->mnBitBuffer = nBitBuffer; + pContextData->mnBufferBits = nBufferBits; + } + + *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); + return (nDestBytes - (pEndDestBuf-pDestBuf)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtutf8.c b/sal/textenc/tcvtutf8.c deleted file mode 100644 index 3023c3e754e6..000000000000 --- a/sal/textenc/tcvtutf8.c +++ /dev/null @@ -1,422 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "sal/types.h" -#include "rtl/alloc.h" -#include "rtl/textcvt.h" - -#include "converter.h" -#include "tenchelp.h" -#include "unichars.h" - -struct ImplUtf8ToUnicodeContext -{ - sal_uInt32 nUtf32; - int nShift; - sal_Bool bCheckBom; -}; - -struct ImplUnicodeToUtf8Context -{ - sal_Unicode nHighSurrogate; /* 0xFFFF: write BOM */ -}; - -void * ImplCreateUtf8ToUnicodeContext(void) -{ - void * p = rtl_allocateMemory(sizeof (struct ImplUtf8ToUnicodeContext)); - ImplResetUtf8ToUnicodeContext(p); - return p; -} - -void ImplResetUtf8ToUnicodeContext(void * pContext) -{ - if (pContext != NULL) - { - ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = -1; - ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = sal_True; - } -} - -sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData, - void * pContext, sal_Char const * pSrcBuf, - sal_Size nSrcBytes, sal_Unicode * pDestBuf, - sal_Size nDestChars, sal_uInt32 nFlags, - sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes) -{ - /* - This function is very liberal with the UTF-8 input. Accepted are: - - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041) - - surrogates (e.g., ED A0 80 to represent U+D800) - - encodings with up to six bytes (everything outside the range - U+0000..10FFFF is considered "undefined") - The first two of these points allow this routine to translate from both - RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8. - */ - - int bJavaUtf8 = pData != NULL; - sal_uInt32 nUtf32 = 0; - int nShift = -1; - sal_Bool bCheckBom = sal_True; - sal_uInt32 nInfo = 0; - sal_uChar const * pSrcBufPtr = (sal_uChar const *) pSrcBuf; - sal_uChar const * pSrcBufEnd = pSrcBufPtr + nSrcBytes; - sal_Unicode * pDestBufPtr = pDestBuf; - sal_Unicode * pDestBufEnd = pDestBufPtr + nDestChars; - - if (pContext != NULL) - { - nUtf32 = ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32; - nShift = ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift; - bCheckBom = ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom; - } - - while (pSrcBufPtr < pSrcBufEnd) - { - sal_Bool bUndefined = sal_False; - int bConsume = sal_True; - sal_uInt32 nChar = *pSrcBufPtr++; - if (nShift < 0) - if (nChar <= 0x7F) - { - nUtf32 = nChar; - goto transform; - } - else if (nChar <= 0xBF) - goto bad_input; - else if (nChar <= 0xDF) - { - nUtf32 = (nChar & 0x1F) << 6; - nShift = 0; - } - else if (nChar <= 0xEF) - { - nUtf32 = (nChar & 0x0F) << 12; - nShift = 6; - } - else if (nChar <= 0xF7) - { - nUtf32 = (nChar & 0x07) << 18; - nShift = 12; - } - else if (nChar <= 0xFB) - { - nUtf32 = (nChar & 0x03) << 24; - nShift = 18; - } - else if (nChar <= 0xFD) - { - nUtf32 = (nChar & 0x01) << 30; - nShift = 24; - } - else - goto bad_input; - else if ((nChar & 0xC0) == 0x80) - { - nUtf32 |= (nChar & 0x3F) << nShift; - if (nShift == 0) - goto transform; - else - nShift -= 6; - } - else - { - /* - This byte is preceeded by a broken UTF-8 sequence; if this byte - is neither in the range [0x80..0xBF] nor in the range - [0xFE..0xFF], assume that this byte does not belong to that - broken sequence, but instead starts a new, legal UTF-8 sequence: - */ - bConsume = nChar >= 0xFE; - goto bad_input; - } - continue; - - transform: - if (!bCheckBom || nUtf32 != 0xFEFF - || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0 - || bJavaUtf8) - { - if (nUtf32 <= 0xFFFF) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Unicode) nUtf32; - else - goto no_output; - else if (nUtf32 <= 0x10FFFF) - if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32); - *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32); - } - else - goto no_output; - else - { - bUndefined = sal_True; - goto bad_input; - } - } - nShift = -1; - bCheckBom = sal_False; - continue; - - bad_input: - switch (ImplHandleBadInputTextToUnicodeConversion( - bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, - &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - nShift = -1; - bCheckBom = sal_False; - if (!bConsume) - --pSrcBufPtr; - break; - - case IMPL_BAD_INPUT_CONTINUE: - nShift = -1; - bCheckBom = sal_False; - if (!bConsume) - --pSrcBufPtr; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBufPtr; - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - if (nShift >= 0 - && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR - | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) - nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputTextToUnicodeConversion( - sal_False, sal_True, 0, nFlags, &pDestBufPtr, - pDestBufEnd, &nInfo)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - nShift = -1; - bCheckBom = sal_False; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - } - - if (pContext != NULL) - { - ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32 = nUtf32; - ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = nShift; - ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = bCheckBom; - } - if (pInfo != NULL) - *pInfo = nInfo; - if (pSrcCvtBytes != NULL) - *pSrcCvtBytes = (sal_Char const *) pSrcBufPtr - pSrcBuf; - return pDestBufPtr - pDestBuf; -} - -void * ImplCreateUnicodeToUtf8Context(void) -{ - void * p = rtl_allocateMemory(sizeof (struct ImplUnicodeToUtf8Context)); - ImplResetUnicodeToUtf8Context(p); - return p; -} - -void ImplResetUnicodeToUtf8Context(void * pContext) -{ - if (pContext != NULL) - ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate = 0xFFFF; -} - -sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData, - void * pContext, sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, sal_Char * pDestBuf, - sal_Size nDestBytes, sal_uInt32 nFlags, - sal_uInt32 * pInfo, sal_Size* pSrcCvtChars) -{ - int bJavaUtf8 = pData != NULL; - sal_Unicode nHighSurrogate = 0xFFFF; - sal_uInt32 nInfo = 0; - sal_Unicode const * pSrcBufPtr = pSrcBuf; - sal_Unicode const * pSrcBufEnd = pSrcBufPtr + nSrcChars; - sal_Char * pDestBufPtr = pDestBuf; - sal_Char * pDestBufEnd = pDestBufPtr + nDestBytes; - - if (pContext != NULL) - nHighSurrogate - = ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate; - - if (nHighSurrogate == 0xFFFF) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_GLOBAL_SIGNATURE) != 0 - && !bJavaUtf8) - { - if (pDestBufEnd - pDestBufPtr >= 3) - { - /* Write BOM (U+FEFF) as UTF-8: */ - *pDestBufPtr++ = (sal_Char) (unsigned char) 0xEF; - *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBB; - *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBF; - } - else - { - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - goto done; - } - } - nHighSurrogate = 0; - } - - while (pSrcBufPtr < pSrcBufEnd) - { - sal_uInt32 nChar = *pSrcBufPtr++; - if (nHighSurrogate == 0) - { - if (ImplIsHighSurrogate(nChar) && !bJavaUtf8) - { - nHighSurrogate = (sal_Unicode) nChar; - continue; - } - } - else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8) - nChar = ImplCombineSurrogates(nHighSurrogate, nChar); - else - goto bad_input; - - if ((ImplIsLowSurrogate(nChar) && !bJavaUtf8) - || ImplIsNoncharacter(nChar)) - goto bad_input; - - if (nChar <= 0x7F && (!bJavaUtf8 || nChar != 0)) - if (pDestBufPtr != pDestBufEnd) - *pDestBufPtr++ = (sal_Char) nChar; - else - goto no_output; - else if (nChar <= 0x7FF) - if (pDestBufEnd - pDestBufPtr >= 2) - { - *pDestBufPtr++ = (sal_Char) (0xC0 | (nChar >> 6)); - *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F)); - } - else - goto no_output; - else if (nChar <= 0xFFFF) - if (pDestBufEnd - pDestBufPtr >= 3) - { - *pDestBufPtr++ = (sal_Char) (0xE0 | (nChar >> 12)); - *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F)); - *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F)); - } - else - goto no_output; - else if (pDestBufEnd - pDestBufPtr >= 4) - { - *pDestBufPtr++ = (sal_Char) (0xF0 | (nChar >> 18)); - *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 12) & 0x3F)); - *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F)); - *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F)); - } - else - goto no_output; - nHighSurrogate = 0; - continue; - - bad_input: - switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0, nFlags, - &pDestBufPtr, - pDestBufEnd, &nInfo, - NULL, 0, NULL)) - { - case IMPL_BAD_INPUT_STOP: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - continue; - - case IMPL_BAD_INPUT_NO_OUTPUT: - goto no_output; - } - break; - - no_output: - --pSrcBufPtr; - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - if (nHighSurrogate != 0 - && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - == 0) - { - if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) - nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - else - switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0, - nFlags, - &pDestBufPtr, - pDestBufEnd, - &nInfo, NULL, 0, - NULL)) - { - case IMPL_BAD_INPUT_STOP: - case IMPL_BAD_INPUT_CONTINUE: - nHighSurrogate = 0; - break; - - case IMPL_BAD_INPUT_NO_OUTPUT: - nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - } - - done: - if (pContext != NULL) - ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate - = nHighSurrogate; - if (pInfo != NULL) - *pInfo = nInfo; - if (pSrcCvtChars != NULL) - *pSrcCvtChars = pSrcBufPtr - pSrcBuf; - return pDestBufPtr - pDestBuf; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx new file mode 100644 index 000000000000..18c6c62dacce --- /dev/null +++ b/sal/textenc/tcvtutf8.cxx @@ -0,0 +1,422 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "sal/types.h" +#include "rtl/textcvt.h" + +#include "converter.hxx" +#include "tenchelp.hxx" +#include "unichars.hxx" + +struct ImplUtf8ToUnicodeContext +{ + sal_uInt32 nUtf32; + int nShift; + bool bCheckBom; +}; + +struct ImplUnicodeToUtf8Context +{ + sal_Unicode nHighSurrogate; /* 0xFFFF: write BOM */ +}; + +void * ImplCreateUtf8ToUnicodeContext() +{ + ImplUtf8ToUnicodeContext * p = new ImplUtf8ToUnicodeContext; + ImplResetUtf8ToUnicodeContext(p); + return p; +} + +void ImplResetUtf8ToUnicodeContext(void * pContext) +{ + if (pContext != NULL) + { + static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nShift = -1; + static_cast< ImplUtf8ToUnicodeContext * >(pContext)->bCheckBom = true; + } +} + +sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData, + void * pContext, char const * pSrcBuf, + sal_Size nSrcBytes, sal_Unicode * pDestBuf, + sal_Size nDestChars, sal_uInt32 nFlags, + sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes) +{ + /* + This function is very liberal with the UTF-8 input. Accepted are: + - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041) + - surrogates (e.g., ED A0 80 to represent U+D800) + - encodings with up to six bytes (everything outside the range + U+0000..10FFFF is considered "undefined") + The first two of these points allow this routine to translate from both + RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8. + */ + + int bJavaUtf8 = pData != NULL; + sal_uInt32 nUtf32 = 0; + int nShift = -1; + bool bCheckBom = true; + sal_uInt32 nInfo = 0; + sal_uChar const * pSrcBufPtr = (sal_uChar const *) pSrcBuf; + sal_uChar const * pSrcBufEnd = pSrcBufPtr + nSrcBytes; + sal_Unicode * pDestBufPtr = pDestBuf; + sal_Unicode * pDestBufEnd = pDestBufPtr + nDestChars; + + if (pContext != NULL) + { + nUtf32 = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nUtf32; + nShift = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nShift; + bCheckBom = static_cast< ImplUtf8ToUnicodeContext * >(pContext)->bCheckBom; + } + + while (pSrcBufPtr < pSrcBufEnd) + { + bool bUndefined = false; + int bConsume = true; + sal_uInt32 nChar = *pSrcBufPtr++; + if (nShift < 0) + if (nChar <= 0x7F) + { + nUtf32 = nChar; + goto transform; + } + else if (nChar <= 0xBF) + goto bad_input; + else if (nChar <= 0xDF) + { + nUtf32 = (nChar & 0x1F) << 6; + nShift = 0; + } + else if (nChar <= 0xEF) + { + nUtf32 = (nChar & 0x0F) << 12; + nShift = 6; + } + else if (nChar <= 0xF7) + { + nUtf32 = (nChar & 0x07) << 18; + nShift = 12; + } + else if (nChar <= 0xFB) + { + nUtf32 = (nChar & 0x03) << 24; + nShift = 18; + } + else if (nChar <= 0xFD) + { + nUtf32 = (nChar & 0x01) << 30; + nShift = 24; + } + else + goto bad_input; + else if ((nChar & 0xC0) == 0x80) + { + nUtf32 |= (nChar & 0x3F) << nShift; + if (nShift == 0) + goto transform; + else + nShift -= 6; + } + else + { + /* + This byte is preceeded by a broken UTF-8 sequence; if this byte + is neither in the range [0x80..0xBF] nor in the range + [0xFE..0xFF], assume that this byte does not belong to that + broken sequence, but instead starts a new, legal UTF-8 sequence: + */ + bConsume = nChar >= 0xFE; + goto bad_input; + } + continue; + + transform: + if (!bCheckBom || nUtf32 != 0xFEFF + || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0 + || bJavaUtf8) + { + if (nUtf32 <= 0xFFFF) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = (sal_Unicode) nUtf32; + else + goto no_output; + else if (nUtf32 <= 0x10FFFF) + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32); + *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32); + } + else + goto no_output; + else + { + bUndefined = true; + goto bad_input; + } + } + nShift = -1; + bCheckBom = false; + continue; + + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + nShift = -1; + bCheckBom = false; + if (!bConsume) + --pSrcBufPtr; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nShift = -1; + bCheckBom = false; + if (!bConsume) + --pSrcBufPtr; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBufPtr; + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nShift >= 0 + && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR + | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) + nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputTextToUnicodeConversion( + false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd, + &nInfo)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nShift = -1; + bCheckBom = false; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + } + + if (pContext != NULL) + { + static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nUtf32 = nUtf32; + static_cast< ImplUtf8ToUnicodeContext * >(pContext)->nShift = nShift; + static_cast< ImplUtf8ToUnicodeContext * >(pContext)->bCheckBom = bCheckBom; + } + if (pInfo != NULL) + *pInfo = nInfo; + if (pSrcCvtBytes != NULL) + *pSrcCvtBytes = reinterpret_cast< char const * >(pSrcBufPtr) - pSrcBuf; + return pDestBufPtr - pDestBuf; +} + +void * ImplCreateUnicodeToUtf8Context() +{ + ImplUnicodeToUtf8Context * p = new ImplUnicodeToUtf8Context; + ImplResetUnicodeToUtf8Context(p); + return p; +} + +void ImplResetUnicodeToUtf8Context(void * pContext) +{ + if (pContext != NULL) + static_cast< ImplUnicodeToUtf8Context * >(pContext)->nHighSurrogate = 0xFFFF; +} + +sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData, + void * pContext, sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, char * pDestBuf, + sal_Size nDestBytes, sal_uInt32 nFlags, + sal_uInt32 * pInfo, sal_Size* pSrcCvtChars) +{ + int bJavaUtf8 = pData != NULL; + sal_Unicode nHighSurrogate = 0xFFFF; + sal_uInt32 nInfo = 0; + sal_Unicode const * pSrcBufPtr = pSrcBuf; + sal_Unicode const * pSrcBufEnd = pSrcBufPtr + nSrcChars; + char * pDestBufPtr = pDestBuf; + char * pDestBufEnd = pDestBufPtr + nDestBytes; + + if (pContext != NULL) + nHighSurrogate + = static_cast< ImplUnicodeToUtf8Context * >(pContext)->nHighSurrogate; + + if (nHighSurrogate == 0xFFFF) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_GLOBAL_SIGNATURE) != 0 + && !bJavaUtf8) + { + if (pDestBufEnd - pDestBufPtr >= 3) + { + /* Write BOM (U+FEFF) as UTF-8: */ + *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0xEF)); + *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0xBB)); + *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0xBF)); + } + else + { + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + goto done; + } + } + nHighSurrogate = 0; + } + + while (pSrcBufPtr < pSrcBufEnd) + { + sal_uInt32 nChar = *pSrcBufPtr++; + if (nHighSurrogate == 0) + { + if (ImplIsHighSurrogate(nChar) && !bJavaUtf8) + { + nHighSurrogate = (sal_Unicode) nChar; + continue; + } + } + else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8) + nChar = ImplCombineSurrogates(nHighSurrogate, nChar); + else + goto bad_input; + + if ((ImplIsLowSurrogate(nChar) && !bJavaUtf8) + || ImplIsNoncharacter(nChar)) + goto bad_input; + + if (nChar <= 0x7F && (!bJavaUtf8 || nChar != 0)) + if (pDestBufPtr != pDestBufEnd) + *pDestBufPtr++ = static_cast< char >(nChar); + else + goto no_output; + else if (nChar <= 0x7FF) + if (pDestBufEnd - pDestBufPtr >= 2) + { + *pDestBufPtr++ = static_cast< char >(0xC0 | (nChar >> 6)); + *pDestBufPtr++ = static_cast< char >(0x80 | (nChar & 0x3F)); + } + else + goto no_output; + else if (nChar <= 0xFFFF) + if (pDestBufEnd - pDestBufPtr >= 3) + { + *pDestBufPtr++ = static_cast< char >(0xE0 | (nChar >> 12)); + *pDestBufPtr++ = static_cast< char >(0x80 | ((nChar >> 6) & 0x3F)); + *pDestBufPtr++ = static_cast< char >(0x80 | (nChar & 0x3F)); + } + else + goto no_output; + else if (pDestBufEnd - pDestBufPtr >= 4) + { + *pDestBufPtr++ = static_cast< char >(0xF0 | (nChar >> 18)); + *pDestBufPtr++ = static_cast< char >(0x80 | ((nChar >> 12) & 0x3F)); + *pDestBufPtr++ = static_cast< char >(0x80 | ((nChar >> 6) & 0x3F)); + *pDestBufPtr++ = static_cast< char >(0x80 | (nChar & 0x3F)); + } + else + goto no_output; + nHighSurrogate = 0; + continue; + + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion(false, 0, nFlags, + &pDestBufPtr, + pDestBufEnd, &nInfo, + NULL, 0, NULL)) + { + case IMPL_BAD_INPUT_STOP: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + + no_output: + --pSrcBufPtr; + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + if (nHighSurrogate != 0 + && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0) + { + if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) + nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + else + switch (ImplHandleBadInputUnicodeToTextConversion(false, 0, nFlags, + &pDestBufPtr, + pDestBufEnd, + &nInfo, NULL, 0, + NULL)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + nHighSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + + done: + if (pContext != NULL) + static_cast< ImplUnicodeToUtf8Context * >(pContext)->nHighSurrogate + = nHighSurrogate; + if (pInfo != NULL) + *pInfo = nInfo; + if (pSrcCvtChars != NULL) + *pSrcCvtChars = pSrcBufPtr - pSrcBuf; + return pDestBufPtr - pDestBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tenchelp.c b/sal/textenc/tenchelp.c deleted file mode 100644 index ed924336fa17..000000000000 --- a/sal/textenc/tenchelp.c +++ /dev/null @@ -1,215 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "tenchelp.h" -#include "unichars.h" -#include "rtl/textcvt.h" -#include "sal/types.h" - -static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, - sal_Char * pBuf, - sal_Size nMaxLen); - -static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, - sal_Char * pBuf, - sal_Size nMaxLen); - -static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags); - -sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, - sal_Char * pBuf, - sal_Size nMaxLen) -{ - if (nMaxLen == 0) - return sal_False; - switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) - { - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: - *pBuf = 0x00; - break; - - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: - default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ - *pBuf = 0x3F; - break; - - case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: - *pBuf = 0x5F; - break; - } - return sal_True; -} - -sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, - sal_Char * pBuf, - sal_Size nMaxLen) -{ - if (nMaxLen == 0) - return sal_False; - switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) - { - case RTL_UNICODETOTEXT_FLAGS_INVALID_0: - *pBuf = 0x00; - break; - - case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: - default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ - *pBuf = 0x3F; - break; - - case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: - *pBuf = 0x5F; - break; - } - return sal_True; -} - -int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) -{ - return - ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0 - && ImplIsZeroWidth(c)) - || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0 - && ImplIsControlOrFormat(c)) - || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0 - && ImplIsPrivateUse(c)); -} - -/* ======================================================================= */ - -sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags) -{ - return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) - == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ? - RTL_TEXTCVT_BYTE_PRIVATE_START + cChar : - RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; -} - -/* ----------------------------------------------------------------------- */ - -sal_Bool -ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, - sal_Unicode const ** ppSrcBuf, - sal_Unicode const * pEndSrcBuf, - sal_Char ** ppDestBuf, - sal_Char const * pEndDestBuf, - sal_uInt32 nFlags, - sal_uInt32 * pInfo) -{ - sal_Unicode c = **ppSrcBuf; - - (void) pData; /* unused */ - - /* Should the private character map to one byte */ - if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) ) - { - if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) - { - **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START); - (*ppDestBuf)++; - (*ppSrcBuf)++; - return sal_True; - } - } - - /* Should this character ignored (Private, Non Spacing, Control) */ - if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) - { - (*ppSrcBuf)++; - return sal_True; - } - - /* Surrogates Characters should result in */ - /* one replacement character */ - if (ImplIsHighSurrogate(c)) - { - if ( *ppSrcBuf == pEndSrcBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; - return sal_False; - } - - c = *((*ppSrcBuf)+1); - if (ImplIsLowSurrogate(c)) - (*ppSrcBuf)++; - else - { - *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID; - if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; - return sal_False; - } - else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) - { - (*ppSrcBuf)++; - return sal_True; - } - else if (ImplGetInvalidAsciiMultiByte(nFlags, - *ppDestBuf, - pEndDestBuf - *ppDestBuf)) - { - ++*ppSrcBuf; - ++*ppDestBuf; - return sal_True; - } - else - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - return sal_False; - } - } - } - - *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED; - if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; - return sal_False; - } - else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) - (*ppSrcBuf)++; - else if (ImplGetUndefinedAsciiMultiByte(nFlags, - *ppDestBuf, - pEndDestBuf - *ppDestBuf)) - { - ++*ppSrcBuf; - ++*ppDestBuf; - } - else - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR - | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - return sal_False; - } - - return sal_True; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tenchelp.cxx b/sal/textenc/tenchelp.cxx new file mode 100644 index 000000000000..ddc84dc18d70 --- /dev/null +++ b/sal/textenc/tenchelp.cxx @@ -0,0 +1,216 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include "tenchelp.hxx" +#include "unichars.hxx" + +static bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, + char * pBuf, + sal_Size nMaxLen); + +static bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, + char * pBuf, + sal_Size nMaxLen); + +static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags); + +bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, + char * pBuf, + sal_Size nMaxLen) +{ + if (nMaxLen == 0) + return false; + switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) + { + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: + *pBuf = 0x00; + break; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: + default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ + *pBuf = 0x3F; + break; + + case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: + *pBuf = 0x5F; + break; + } + return true; +} + +bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, + char * pBuf, + sal_Size nMaxLen) +{ + if (nMaxLen == 0) + return false; + switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) + { + case RTL_UNICODETOTEXT_FLAGS_INVALID_0: + *pBuf = 0x00; + break; + + case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: + default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ + *pBuf = 0x3F; + break; + + case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: + *pBuf = 0x5F; + break; + } + return true; +} + +int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) +{ + return + ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0 + && ImplIsZeroWidth(c)) + || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0 + && ImplIsControlOrFormat(c)) + || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0 + && ImplIsPrivateUse(c)); +} + +/* ======================================================================= */ + +sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags) +{ + return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) + == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ? + RTL_TEXTCVT_BYTE_PRIVATE_START + cChar : + RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; +} + +/* ----------------------------------------------------------------------- */ + +bool +ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const *, + sal_Unicode const ** ppSrcBuf, + sal_Unicode const * pEndSrcBuf, + char ** ppDestBuf, + char const * pEndDestBuf, + sal_uInt32 nFlags, + sal_uInt32 * pInfo) +{ + sal_Unicode c = **ppSrcBuf; + + /* Should the private character map to one byte */ + if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) ) + { + if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) + { + **ppDestBuf = (char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START); + (*ppDestBuf)++; + (*ppSrcBuf)++; + return true; + } + } + + /* Should this character ignored (Private, Non Spacing, Control) */ + if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) + { + (*ppSrcBuf)++; + return true; + } + + /* Surrogates Characters should result in */ + /* one replacement character */ + if (ImplIsHighSurrogate(c)) + { + if ( *ppSrcBuf == pEndSrcBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + return false; + } + + c = *((*ppSrcBuf)+1); + if (ImplIsLowSurrogate(c)) + (*ppSrcBuf)++; + else + { + *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID; + if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; + return false; + } + else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) + { + (*ppSrcBuf)++; + return true; + } + else if (ImplGetInvalidAsciiMultiByte(nFlags, + *ppDestBuf, + pEndDestBuf - *ppDestBuf)) + { + ++*ppSrcBuf; + ++*ppDestBuf; + return true; + } + else + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + return false; + } + } + } + + *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED; + if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; + return false; + } + else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) + (*ppSrcBuf)++; + else if (ImplGetUndefinedAsciiMultiByte(nFlags, + *ppDestBuf, + pEndDestBuf - *ppDestBuf)) + { + ++*ppSrcBuf; + ++*ppDestBuf; + } + else + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + return false; + } + + return true; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tenchelp.h b/sal/textenc/tenchelp.h deleted file mode 100644 index 7f6879c4bee1..000000000000 --- a/sal/textenc/tenchelp.h +++ /dev/null @@ -1,305 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H -#define INCLUDED_RTL_TEXTENC_TENCHELP_H - -#include "rtl/tencinfo.h" -#include "rtl/textenc.h" -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#define RTL_TEXTCVT_BYTE_PRIVATE_START 0xF100 -#define RTL_TEXTCVT_BYTE_PRIVATE_END 0xF1FF - -/* ----------------- */ -/* - TextConverter - */ -/* ----------------- */ - -typedef void ImplTextConverterData; - -typedef -sal_Size (* ImplConvertToUnicodeProc)(ImplTextConverterData const * pData, - void * pContext, - sal_Char const * pSrcBuf, - sal_Size nSrcBytes, - sal_Unicode * pDestBuf, - sal_Size nDestChars, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtBytes); - -typedef -sal_Size (* ImplConvertToTextProc)(ImplTextConverterData const * pData, - void * pContext, - sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, - sal_Char * pDestBuf, - sal_Size nDestBytes, - sal_uInt32 nFlags, - sal_uInt32 * pInfo, - sal_Size * pSrcCvtChars); - -typedef void * (* ImplCreateTextContextProc)(void); - -typedef void (* ImplDestroyTextContextProc)(void * pContext); - -typedef void (* ImplResetTextContextProc)(void * pContext); - -typedef void * (* ImplCreateUnicodeContextProc)(void); - -typedef void (* ImplDestroyUnicodeContextProc)(void * pContext); - -typedef void (* ImplResetUnicodeContextProc)(void * pContext); - -typedef struct -{ - ImplTextConverterData const * mpConvertData; - ImplConvertToUnicodeProc mpConvertTextToUnicodeProc; - ImplConvertToTextProc mpConvertUnicodeToTextProc; - ImplCreateTextContextProc mpCreateTextToUnicodeContext; - ImplDestroyTextContextProc mpDestroyTextToUnicodeContext; - ImplResetTextContextProc mpResetTextToUnicodeContext; - ImplCreateUnicodeContextProc mpCreateUnicodeToTextContext; - ImplDestroyUnicodeContextProc mpDestroyUnicodeToTextContext; - ImplResetUnicodeContextProc mpResetUnicodeToTextContext; -} ImplTextConverter; - -/* ----------------------------- */ -/* - TextEncoding - Structures - */ -/* ----------------------------- */ - -typedef struct -{ - ImplTextConverter maConverter; - sal_uInt8 mnMinCharSize; - sal_uInt8 mnMaxCharSize; - sal_uInt8 mnAveCharSize; - sal_uInt8 mnBestWindowsCharset; - char const * mpBestUnixCharset; - char const * mpBestMimeCharset; - sal_uInt32 mnInfoFlags; -} ImplTextEncodingData; - -/* ----------------------------------- */ -/* - TextConverter - Byte-Structures - */ -/* ----------------------------------- */ - -typedef struct -{ - sal_uInt16 mnUniChar; - sal_uChar mnChar; - sal_uChar mnChar2; - // to cater for mappings like MS1258 with 1--2 bytes per Unicode char, - // 0 if unused -} ImplUniCharTabData; - -typedef struct -{ - const sal_uInt16* mpToUniTab1; - const sal_uInt16* mpToUniTab2; - sal_uChar mnToUniStart1; - sal_uChar mnToUniEnd1; - sal_uChar mnToUniStart2; - sal_uChar mnToUniEnd2; - const sal_uChar* mpToCharTab1; - const sal_uChar* mpToCharTab2; - const ImplUniCharTabData* mpToCharExTab; - sal_uInt16 mnToCharStart1; - sal_uInt16 mnToCharEnd1; - sal_uInt16 mnToCharStart2; - sal_uInt16 mnToCharEnd2; - sal_uInt16 mnToCharExCount; -} ImplByteConvertData; - -/* ----------------------------------- */ -/* - TextConverter - DBCS-Structures - */ -/* ----------------------------------- */ - -typedef struct -{ - sal_uChar mnLeadStart; - sal_uChar mnLeadEnd; - sal_uChar mnTrail1Start; - sal_uChar mnTrail1End; - sal_uChar mnTrail2Start; - sal_uChar mnTrail2End; - sal_uChar mnTrail3Start; - sal_uChar mnTrail3End; - sal_uChar mnTrailCount; - sal_uInt16 mnTrailRangeCount; - sal_uInt16 mnUniStart; - sal_uInt16 mnUniEnd; -} ImplDBCSEUDCData; - -typedef struct -{ - sal_uInt16 mnUniChar; - sal_uInt8 mnTrailStart; - sal_uInt8 mnTrailEnd; - const sal_uInt16* mpToUniTrailTab; -} ImplDBCSToUniLeadTab; - -typedef struct -{ - sal_uInt8 mnLowStart; - sal_uInt8 mnLowEnd; - const sal_uInt16* mpToUniTrailTab; -} ImplUniToDBCSHighTab; - -typedef struct -{ - const ImplDBCSToUniLeadTab* mpToUniLeadTab; - const ImplUniToDBCSHighTab* mpToDBCSHighTab; - sal_uChar mnLeadStart; - sal_uChar mnLeadEnd; - sal_uChar mnTrailStart; - sal_uChar mnTrailEnd; - const ImplDBCSEUDCData* mpEUDCTab; - sal_uInt16 mnEUDCCount; -} ImplDBCSConvertData; - -/* ---------------------------------- */ -/* - TextConverter - EUC-Structures - */ -/* ---------------------------------- */ - -typedef struct -{ - const ImplDBCSToUniLeadTab* mpJIS0208ToUniLeadTab; - const ImplDBCSToUniLeadTab* mpJIS0212ToUniLeadTab; - const ImplUniToDBCSHighTab* mpUniToJIS0208HighTab; - const ImplUniToDBCSHighTab* mpUniToJIS0212HighTab; -} ImplEUCJPConvertData; - -/* --------------------------------- */ -/* - TextConverter - HelpFunctions - */ -/* --------------------------------- */ - -sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags); - -sal_Bool -ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, - sal_Unicode const ** ppSrcBuf, - sal_Unicode const * pEndSrcBuf, - sal_Char ** ppDestBuf, - sal_Char const * pEndDestBuf, - sal_uInt32 nFlags, - sal_uInt32 * pInfo); - /* sal_True means 'continue,' sal_False means 'break' */ - -/* ----------------------------- */ -/* - TextConverter - Functions - */ -/* ----------------------------- */ - -sal_Size ImplSymbolToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes ); -sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ); -sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes ); -/** For those encodings only with unicode range of 0x80 to 0xFF. */ -sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes ); -sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ); -sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ); -sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ); -sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, - void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ); -sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, - void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ); -void* ImplUTF7CreateUTF7TextToUnicodeContext( void ); -void ImplUTF7DestroyTextToUnicodeContext( void* pContext ); -void ImplUTF7ResetTextToUnicodeContext( void* pContext ); -sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ); -void* ImplUTF7CreateUnicodeToTextContext( void ); -void ImplUTF7DestroyUnicodeToTextContext( void* pContext ); -void ImplUTF7ResetUnicodeToTextContext( void* pContext ); -sal_Size ImplUnicodeToUTF7( const ImplTextConverterData* pData, void* pContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ); - -void * ImplCreateUtf8ToUnicodeContext(void) SAL_THROW_EXTERN_C(); -void ImplResetUtf8ToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C(); -sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData, - void * pContext, sal_Char const * pSrcBuf, - sal_Size nSrcBytes, sal_Unicode * pDestBuf, - sal_Size nDestChars, sal_uInt32 nFlags, - sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes) - SAL_THROW_EXTERN_C(); -void * ImplCreateUnicodeToUtf8Context(void) SAL_THROW_EXTERN_C(); -void ImplResetUnicodeToUtf8Context(void * pContext) SAL_THROW_EXTERN_C(); -sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData, - void * pContext, sal_Unicode const * pSrcBuf, - sal_Size nSrcChars, sal_Char * pDestBuf, - sal_Size nDestBytes, sal_uInt32 nFlags, - sal_uInt32 * pInfo, sal_Size* pSrcCvtChars) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cplusplus */ - -#endif /* INCLUDED_RTL_TEXTENC_TENCHELP_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tenchelp.hxx b/sal/textenc/tenchelp.hxx new file mode 100644 index 000000000000..5c069f90a7ea --- /dev/null +++ b/sal/textenc/tenchelp.hxx @@ -0,0 +1,297 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_TENCHELP_HXX +#define INCLUDED_SAL_TEXTENC_TENCHELP_HXX + +#include "sal/config.h" + +#include "rtl/tencinfo.h" +#include "rtl/textenc.h" +#include "sal/types.h" + +#define RTL_TEXTCVT_BYTE_PRIVATE_START 0xF100 +#define RTL_TEXTCVT_BYTE_PRIVATE_END 0xF1FF + +/* ----------------- */ +/* - TextConverter - */ +/* ----------------- */ + +typedef void ImplTextConverterData; + +typedef +sal_Size (* ImplConvertToUnicodeProc)(ImplTextConverterData const * pData, + void * pContext, + char const * pSrcBuf, + sal_Size nSrcBytes, + sal_Unicode * pDestBuf, + sal_Size nDestChars, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtBytes); + +typedef +sal_Size (* ImplConvertToTextProc)(ImplTextConverterData const * pData, + void * pContext, + sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, + char * pDestBuf, + sal_Size nDestBytes, + sal_uInt32 nFlags, + sal_uInt32 * pInfo, + sal_Size * pSrcCvtChars); + +typedef void * (* ImplCreateTextContextProc)(void); + +typedef void (* ImplDestroyTextContextProc)(void * pContext); + +typedef void (* ImplResetTextContextProc)(void * pContext); + +typedef void * (* ImplCreateUnicodeContextProc)(void); + +typedef void (* ImplDestroyUnicodeContextProc)(void * pContext); + +typedef void (* ImplResetUnicodeContextProc)(void * pContext); + +struct ImplTextConverter +{ + ImplTextConverterData const * mpConvertData; + ImplConvertToUnicodeProc mpConvertTextToUnicodeProc; + ImplConvertToTextProc mpConvertUnicodeToTextProc; + ImplCreateTextContextProc mpCreateTextToUnicodeContext; + ImplDestroyTextContextProc mpDestroyTextToUnicodeContext; + ImplResetTextContextProc mpResetTextToUnicodeContext; + ImplCreateUnicodeContextProc mpCreateUnicodeToTextContext; + ImplDestroyUnicodeContextProc mpDestroyUnicodeToTextContext; + ImplResetUnicodeContextProc mpResetUnicodeToTextContext; +}; + +/* ----------------------------- */ +/* - TextEncoding - Structures - */ +/* ----------------------------- */ + +struct ImplTextEncodingData +{ + ImplTextConverter maConverter; + sal_uInt8 mnMinCharSize; + sal_uInt8 mnMaxCharSize; + sal_uInt8 mnAveCharSize; + sal_uInt8 mnBestWindowsCharset; + char const * mpBestUnixCharset; + char const * mpBestMimeCharset; + sal_uInt32 mnInfoFlags; +}; + +/* ----------------------------------- */ +/* - TextConverter - Byte-Structures - */ +/* ----------------------------------- */ + +struct ImplUniCharTabData +{ + sal_uInt16 mnUniChar; + sal_uChar mnChar; + sal_uChar mnChar2; + // to cater for mappings like MS1258 with 1--2 bytes per Unicode char, + // 0 if unused +}; + +struct ImplByteConvertData +{ + const sal_uInt16* mpToUniTab1; + const sal_uInt16* mpToUniTab2; + sal_uChar mnToUniStart1; + sal_uChar mnToUniEnd1; + sal_uChar mnToUniStart2; + sal_uChar mnToUniEnd2; + const sal_uChar* mpToCharTab1; + const sal_uChar* mpToCharTab2; + const ImplUniCharTabData* mpToCharExTab; + sal_uInt16 mnToCharStart1; + sal_uInt16 mnToCharEnd1; + sal_uInt16 mnToCharStart2; + sal_uInt16 mnToCharEnd2; + sal_uInt16 mnToCharExCount; +}; + +/* ----------------------------------- */ +/* - TextConverter - DBCS-Structures - */ +/* ----------------------------------- */ + +struct ImplDBCSEUDCData +{ + sal_uChar mnLeadStart; + sal_uChar mnLeadEnd; + sal_uChar mnTrail1Start; + sal_uChar mnTrail1End; + sal_uChar mnTrail2Start; + sal_uChar mnTrail2End; + sal_uChar mnTrail3Start; + sal_uChar mnTrail3End; + sal_uChar mnTrailCount; + sal_uInt16 mnTrailRangeCount; + sal_uInt16 mnUniStart; + sal_uInt16 mnUniEnd; +}; + +struct ImplDBCSToUniLeadTab +{ + sal_uInt16 mnUniChar; + sal_uInt8 mnTrailStart; + sal_uInt8 mnTrailEnd; + const sal_uInt16* mpToUniTrailTab; +}; + +struct ImplUniToDBCSHighTab +{ + sal_uInt8 mnLowStart; + sal_uInt8 mnLowEnd; + const sal_uInt16* mpToUniTrailTab; +}; + +struct ImplDBCSConvertData +{ + const ImplDBCSToUniLeadTab* mpToUniLeadTab; + const ImplUniToDBCSHighTab* mpToDBCSHighTab; + sal_uChar mnLeadStart; + sal_uChar mnLeadEnd; + sal_uChar mnTrailStart; + sal_uChar mnTrailEnd; + const ImplDBCSEUDCData* mpEUDCTab; + sal_uInt16 mnEUDCCount; +}; + +/* ---------------------------------- */ +/* - TextConverter - EUC-Structures - */ +/* ---------------------------------- */ + +struct ImplEUCJPConvertData +{ + const ImplDBCSToUniLeadTab* mpJIS0208ToUniLeadTab; + const ImplDBCSToUniLeadTab* mpJIS0212ToUniLeadTab; + const ImplUniToDBCSHighTab* mpUniToJIS0208HighTab; + const ImplUniToDBCSHighTab* mpUniToJIS0212HighTab; +}; + +/* --------------------------------- */ +/* - TextConverter - HelpFunctions - */ +/* --------------------------------- */ + +sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags); + +bool +ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, + sal_Unicode const ** ppSrcBuf, + sal_Unicode const * pEndSrcBuf, + char ** ppDestBuf, + char const * pEndDestBuf, + sal_uInt32 nFlags, + sal_uInt32 * pInfo); + /* true means 'continue,' false means 'break' */ + +/* ----------------------------- */ +/* - TextConverter - Functions - */ +/* ----------------------------- */ + +sal_Size ImplSymbolToUnicode( const ImplTextConverterData* pData, void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes ); +sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, void* pContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ); +sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes ); +/** For those encodings only with unicode range of 0x80 to 0xFF. */ +sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes ); +sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, void* pContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars ); +sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ); +sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ); +sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, + void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ); +sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, + void* pContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ); +void* ImplUTF7CreateUTF7TextToUnicodeContext( void ); +void ImplUTF7DestroyTextToUnicodeContext( void* pContext ); +void ImplUTF7ResetTextToUnicodeContext( void* pContext ); +sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ); +void* ImplUTF7CreateUnicodeToTextContext( void ); +void ImplUTF7DestroyUnicodeToTextContext( void* pContext ); +void ImplUTF7ResetUnicodeToTextContext( void* pContext ); +sal_Size ImplUnicodeToUTF7( const ImplTextConverterData* pData, void* pContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ); + +void * ImplCreateUtf8ToUnicodeContext(void); +void ImplResetUtf8ToUnicodeContext(void * pContext); +sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData, + void * pContext, char const * pSrcBuf, + sal_Size nSrcBytes, sal_Unicode * pDestBuf, + sal_Size nDestChars, sal_uInt32 nFlags, + sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes); +void * ImplCreateUnicodeToUtf8Context(void); +void ImplResetUnicodeToUtf8Context(void * pContext); +sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData, + void * pContext, sal_Unicode const * pSrcBuf, + sal_Size nSrcChars, char * pDestBuf, + sal_Size nDestBytes, sal_uInt32 nFlags, + sal_uInt32 * pInfo, sal_Size* pSrcCvtChars); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tencinfo.c b/sal/textenc/tencinfo.c deleted file mode 100644 index 2731880c66d2..000000000000 --- a/sal/textenc/tencinfo.c +++ /dev/null @@ -1,1002 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "rtl/tencinfo.h" -#include "gettextencodingdata.h" -#include "tenchelp.h" - -#ifndef _RTL_ALLOC_H -#include "rtl/alloc.h" -#endif - -#ifndef INCLUDED_STDDEF_H -#include -#define INCLUDED_STDDEF_H -#endif -#ifndef INCLUDED_STRING_H -#include -#define INCLUDED_STRING_H -#endif - -sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding) -{ - return (sal_Bool) - (nEncoding > RTL_TEXTENCODING_DONTKNOW - && (nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS) - /* always update this! */ - && nEncoding != 9); /* RTL_TEXTENCODING_SYSTEM */ -} - -/* ======================================================================= */ - -static void Impl_toAsciiLower( const sal_Char* pName, sal_Char* pBuf ) -{ - while ( *pName ) - { - /* A-Z */ - if ( (*pName >= 0x41) && (*pName <= 0x5A) ) - *pBuf = (*pName)+0x20; /* toAsciiLower */ - else - *pBuf = *pName; - - pBuf++; - pName++; - } - - *pBuf = '\0'; -} - -/* ----------------------------------------------------------------------- */ - -static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const sal_Char* pName, sal_Char* pBuf ) -{ - while ( *pName ) - { - /* A-Z */ - if ( (*pName >= 0x41) && (*pName <= 0x5A) ) - { - *pBuf = (*pName)+0x20; /* toAsciiLower */ - pBuf++; - } - /* a-z, 0-9 */ - else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) || - ((*pName >= 0x30) && (*pName <= 0x39)) ) - { - *pBuf = *pName; - pBuf++; - } - - pName++; - } - - *pBuf = '\0'; -} - -/* ----------------------------------------------------------------------- */ - -/* pMatchStr must match with all characters in pCompStr */ -static sal_Bool Impl_matchString( const sal_Char* pCompStr, const sal_Char* pMatchStr ) -{ - /* We test only for end in MatchStr, because the last 0 character from */ - /* pCompStr is unequal a character in MatchStr, so the loop terminates */ - while ( *pMatchStr ) - { - if ( *pCompStr != *pMatchStr ) - return sal_False; - - pCompStr++; - pMatchStr++; - } - - return sal_True; -} - -/* ======================================================================= */ - -typedef struct -{ - const sal_Char* mpCharsetStr; - rtl_TextEncoding meTextEncoding; -} ImplStrCharsetDef; - -typedef struct -{ - const sal_Char* mpCharsetStr; - const ImplStrCharsetDef* mpSecondPartTab; -} ImplStrFirstPartCharsetDef; - -/* ======================================================================= */ - -sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo ) -{ - const ImplTextEncodingData* pData; - - pData = Impl_getTextEncodingData( eTextEncoding ); - if ( !pData ) - { - /* HACK: For not implemented encoding, because not all - calls handle the errors */ - if ( pEncInfo->StructSize < 5 ) - return sal_False; - pEncInfo->MinimumCharSize = 1; - - if ( pEncInfo->StructSize < 6 ) - return sal_True; - pEncInfo->MaximumCharSize = 1; - - if ( pEncInfo->StructSize < 7 ) - return sal_True; - pEncInfo->AverageCharSize = 1; - - if ( pEncInfo->StructSize < 12 ) - return sal_True; - pEncInfo->Flags = 0; - - return sal_False; - } - - if ( pEncInfo->StructSize < 5 ) - return sal_False; - pEncInfo->MinimumCharSize = pData->mnMinCharSize; - - if ( pEncInfo->StructSize < 6 ) - return sal_True; - pEncInfo->MaximumCharSize = pData->mnMaxCharSize; - - if ( pEncInfo->StructSize < 7 ) - return sal_True; - pEncInfo->AverageCharSize = pData->mnAveCharSize; - - if ( pEncInfo->StructSize < 12 ) - return sal_True; - pEncInfo->Flags = pData->mnInfoFlags; - - return sal_True; -} - -/* ======================================================================= */ - -rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset ) -{ - rtl_TextEncoding eTextEncoding; - - switch ( nWinCharset ) - { - case 0: eTextEncoding = RTL_TEXTENCODING_MS_1252; break; /* ANSI_CHARSET */ - case 2: eTextEncoding = RTL_TEXTENCODING_SYMBOL; break; /* SYMBOL_CHARSET */ - case 77: eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */ - case 128: eTextEncoding = RTL_TEXTENCODING_MS_932; break; /* SHIFTJIS_CHARSET */ - case 129: eTextEncoding = RTL_TEXTENCODING_MS_949; break; /* HANGEUL_CHARSET */ - case 130: eTextEncoding = RTL_TEXTENCODING_MS_1361; break; /* JOHAB_CHARSET */ - case 134: eTextEncoding = RTL_TEXTENCODING_MS_936; break; /* GB2312_CHARSET */ - case 136: eTextEncoding = RTL_TEXTENCODING_MS_950; break; /* CHINESEBIG5_CHARSET */ - case 161: eTextEncoding = RTL_TEXTENCODING_MS_1253; break; /* GREEK_CHARSET */ - case 162: eTextEncoding = RTL_TEXTENCODING_MS_1254; break; /* TURKISH_CHARSET */ - case 163: eTextEncoding = RTL_TEXTENCODING_MS_1258; break; /* VIETNAMESE_CHARSET !!! */ - case 177: eTextEncoding = RTL_TEXTENCODING_MS_1255; break; /* HEBREW_CHARSET */ - case 178: eTextEncoding = RTL_TEXTENCODING_MS_1256; break; /* ARABIC_CHARSET */ - case 186: eTextEncoding = RTL_TEXTENCODING_MS_1257; break; /* BALTIC_CHARSET */ - case 204: eTextEncoding = RTL_TEXTENCODING_MS_1251; break; /* RUSSIAN_CHARSET */ - case 222: eTextEncoding = RTL_TEXTENCODING_MS_874; break; /* THAI_CHARSET */ - case 238: eTextEncoding = RTL_TEXTENCODING_MS_1250; break; /* EASTEUROPE_CHARSET */ - case 255: eTextEncoding = RTL_TEXTENCODING_IBM_850; break; /* OEM_CHARSET */ - default: eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break; - }; - - return eTextEncoding; -} - -/* ----------------------------------------------------------------------- */ - -rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const sal_Char* pUnixCharset ) -{ - /* See , section 14 ("Font Charset - * (Registry and Encoding) Names"). - */ - - /* All Identifiers in the tables are lower case The function search */ - /* for the first matching string in the tables. */ - /* Sort order: unique (first 14, than 1), important */ - - static ImplStrCharsetDef const aUnixCharsetISOTab[] = - { - { "15", RTL_TEXTENCODING_ISO_8859_15 }, - { "14", RTL_TEXTENCODING_ISO_8859_14 }, - { "13", RTL_TEXTENCODING_ISO_8859_13 }, - { "11", RTL_TEXTENCODING_TIS_620 }, - { "10", RTL_TEXTENCODING_ISO_8859_10 }, - { "1", RTL_TEXTENCODING_ISO_8859_1 }, - { "2", RTL_TEXTENCODING_ISO_8859_2 }, - { "3", RTL_TEXTENCODING_ISO_8859_3 }, - { "4", RTL_TEXTENCODING_ISO_8859_4 }, - { "5", RTL_TEXTENCODING_ISO_8859_5 }, - { "6", RTL_TEXTENCODING_ISO_8859_6 }, - { "7", RTL_TEXTENCODING_ISO_8859_7 }, - { "8", RTL_TEXTENCODING_ISO_8859_8 }, - { "9", RTL_TEXTENCODING_ISO_8859_9 }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - static ImplStrCharsetDef const aUnixCharsetADOBETab[] = - { - { "fontspecific", RTL_TEXTENCODING_SYMBOL }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - static ImplStrCharsetDef const aUnixCharsetMSTab[] = - { - { "1252", RTL_TEXTENCODING_MS_1252 }, - { "1250", RTL_TEXTENCODING_MS_1250 }, - { "1251", RTL_TEXTENCODING_MS_1251 }, - { "1253", RTL_TEXTENCODING_MS_1253 }, - { "1254", RTL_TEXTENCODING_MS_1254 }, - { "1255", RTL_TEXTENCODING_MS_1255 }, - { "1256", RTL_TEXTENCODING_MS_1256 }, - { "1257", RTL_TEXTENCODING_MS_1257 }, - { "1258", RTL_TEXTENCODING_MS_1258 }, - { "932", RTL_TEXTENCODING_MS_932 }, - { "936", RTL_TEXTENCODING_MS_936 }, - { "949", RTL_TEXTENCODING_MS_949 }, - { "950", RTL_TEXTENCODING_MS_950 }, - { "1361", RTL_TEXTENCODING_MS_1361 }, - { "cp1252", RTL_TEXTENCODING_MS_1252 }, - { "cp1250", RTL_TEXTENCODING_MS_1250 }, - { "cp1251", RTL_TEXTENCODING_MS_1251 }, - { "cp1253", RTL_TEXTENCODING_MS_1253 }, - { "cp1254", RTL_TEXTENCODING_MS_1254 }, - { "cp1255", RTL_TEXTENCODING_MS_1255 }, - { "cp1256", RTL_TEXTENCODING_MS_1256 }, - { "cp1257", RTL_TEXTENCODING_MS_1257 }, - { "cp1258", RTL_TEXTENCODING_MS_1258 }, - { "cp932", RTL_TEXTENCODING_MS_932 }, - { "cp936", RTL_TEXTENCODING_MS_936 }, - { "cp949", RTL_TEXTENCODING_MS_949 }, - { "cp950", RTL_TEXTENCODING_MS_950 }, - { "cp1361", RTL_TEXTENCODING_MS_1361 }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - static ImplStrCharsetDef const aUnixCharsetIBMTab[] = - { - { "437", RTL_TEXTENCODING_IBM_437 }, - { "850", RTL_TEXTENCODING_IBM_850 }, - { "860", RTL_TEXTENCODING_IBM_860 }, - { "861", RTL_TEXTENCODING_IBM_861 }, - { "863", RTL_TEXTENCODING_IBM_863 }, - { "865", RTL_TEXTENCODING_IBM_865 }, - { "737", RTL_TEXTENCODING_IBM_737 }, - { "775", RTL_TEXTENCODING_IBM_775 }, - { "852", RTL_TEXTENCODING_IBM_852 }, - { "855", RTL_TEXTENCODING_IBM_855 }, - { "857", RTL_TEXTENCODING_IBM_857 }, - { "862", RTL_TEXTENCODING_IBM_862 }, - { "864", RTL_TEXTENCODING_IBM_864 }, - { "866", RTL_TEXTENCODING_IBM_866 }, - { "869", RTL_TEXTENCODING_IBM_869 }, - { "874", RTL_TEXTENCODING_MS_874 }, - { "1004", RTL_TEXTENCODING_MS_1252 }, - { "65400", RTL_TEXTENCODING_SYMBOL }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] = - { - { "r", RTL_TEXTENCODING_KOI8_R }, - { "u", RTL_TEXTENCODING_KOI8_U }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - static ImplStrCharsetDef aUnixCharsetJISX0208Tab[] = - { - { NULL, RTL_TEXTENCODING_JIS_X_0208 } - }; - - static ImplStrCharsetDef aUnixCharsetJISX0201Tab[] = - { - { NULL, RTL_TEXTENCODING_JIS_X_0201 } - }; - - static ImplStrCharsetDef aUnixCharsetJISX0212Tab[] = - { - { NULL, RTL_TEXTENCODING_JIS_X_0212 } - }; - - static ImplStrCharsetDef aUnixCharsetGBTab[] = - { - { NULL, RTL_TEXTENCODING_GB_2312 } - }; - - static ImplStrCharsetDef aUnixCharsetGBKTab[] = - { - { NULL, RTL_TEXTENCODING_GBK } - }; - - static ImplStrCharsetDef aUnixCharsetBIG5Tab[] = - { - { NULL, RTL_TEXTENCODING_BIG5 } - }; - - static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] = - { - { NULL, RTL_TEXTENCODING_EUC_KR } - }; - - static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] = - { - { NULL, RTL_TEXTENCODING_MS_1361 } - }; - - static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] = - { - { NULL, RTL_TEXTENCODING_UNICODE } - }; - - static ImplStrCharsetDef const aUnixCharsetUNICODETab[] = - { -/* Currently every Unicode Encoding is for us Unicode */ -/* { "fontspecific", RTL_TEXTENCODING_UNICODE }, */ - { NULL, RTL_TEXTENCODING_UNICODE } - }; - - static ImplStrCharsetDef const aUnixCharsetSymbolTab[] = - { - { NULL, RTL_TEXTENCODING_SYMBOL } - }; - - /* See : */ - static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] = - { - { "0", RTL_TEXTENCODING_TIS_620 }, - { "2529", RTL_TEXTENCODING_TIS_620 }, - { "2533", RTL_TEXTENCODING_TIS_620 }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] = - { - { "1", RTL_TEXTENCODING_TIS_620 }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] = - { - { "0", RTL_TEXTENCODING_TIS_620 }, - { "1", RTL_TEXTENCODING_TIS_620 }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] = - { - { "iso8859", aUnixCharsetISOTab }, - { "adobe", aUnixCharsetADOBETab }, - { "ansi", aUnixCharsetMSTab }, - { "microsoft", aUnixCharsetMSTab }, - { "ibm", aUnixCharsetIBMTab }, - { "koi8", aUnixCharsetKOI8Tab }, - { "jisx0208", aUnixCharsetJISX0208Tab }, - { "jisx0208.1983", aUnixCharsetJISX0208Tab }, - { "jisx0201", aUnixCharsetJISX0201Tab }, - { "jisx0201.1976", aUnixCharsetJISX0201Tab }, - { "jisx0212", aUnixCharsetJISX0212Tab }, - { "jisx0212.1990", aUnixCharsetJISX0212Tab }, - { "gb2312", aUnixCharsetGBTab }, - { "gbk", aUnixCharsetGBKTab }, - { "big5", aUnixCharsetBIG5Tab }, - { "iso10646", aUnixCharsetISO10646Tab }, -/* { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */ - { "sunolcursor", aUnixCharsetSymbolTab }, - { "sunolglyph", aUnixCharsetSymbolTab }, - { "iso10646", aUnixCharsetUNICODETab }, - { "ksc5601.1987", aUnixCharsetKSC56011987Tab }, - { "ksc5601.1992", aUnixCharsetKSC56011992Tab }, - { "tis620.2529", aUnixCharsetTIS6202529Tab }, - { "tis620.2533", aUnixCharsetTIS6202533Tab }, - { "tis620", aUnixCharsetTIS620Tab }, -/* { "sunudcja.1997", }, */ -/* { "sunudcko.1997", }, */ -/* { "sunudczh.1997", }, */ -/* { "sunudczhtw.1997", }, */ - { NULL, NULL } - }; - - rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; - sal_Char* pBuf; - sal_Char* pTempBuf; - sal_uInt32 nBufLen = strlen( pUnixCharset )+1; - const sal_Char* pFirstPart; - const sal_Char* pSecondPart; - - /* Alloc Buffer and map to lower case */ - pBuf = (char*)rtl_allocateMemory( nBufLen ); - Impl_toAsciiLower( pUnixCharset, pBuf ); - - /* Search FirstPart */ - pFirstPart = pBuf; - pSecondPart = NULL; - pTempBuf = pBuf; - while ( *pTempBuf ) - { - if ( *pTempBuf == '-' ) - { - *pTempBuf = '\0'; - pSecondPart = pTempBuf+1; - break; - } - - pTempBuf++; - } - - /* Parttrenner gefunden */ - if ( pSecondPart ) - { - /* Search for the part tab */ - const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab; - while ( pFirstPartData->mpCharsetStr ) - { - if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) ) - { - /* Search for the charset in the second part tab */ - const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab; - while ( pData->mpCharsetStr ) - { - if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) ) - { - eEncoding = pData->meTextEncoding; - break; - } - - pData++; - } - - /* use default encoding for first part */ - eEncoding = pData->meTextEncoding; - break; - } - - pFirstPartData++; - } - } - - rtl_freeMemory( pBuf ); - - return eEncoding; -} - -/* ----------------------------------------------------------------------- */ - -rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const sal_Char* pMimeCharset ) -{ - /* All Identifiers are in lower case and contain only alphanumeric */ - /* characters. The function search for the first equal string in */ - /* the table. In this table are only the most used mime types. */ - /* Sort order: important */ - static ImplStrCharsetDef const aVIPMimeCharsetTab[] = - { - { "usascii", RTL_TEXTENCODING_ASCII_US }, - { "utf8", RTL_TEXTENCODING_UTF8 }, - { "utf7", RTL_TEXTENCODING_UTF7 }, - { "iso88591", RTL_TEXTENCODING_ISO_8859_1 }, - { "iso88592", RTL_TEXTENCODING_ISO_8859_2 }, - { "iso88593", RTL_TEXTENCODING_ISO_8859_3 }, - { "iso88594", RTL_TEXTENCODING_ISO_8859_4 }, - { "iso88595", RTL_TEXTENCODING_ISO_8859_5 }, - { "iso88596", RTL_TEXTENCODING_ISO_8859_6 }, - { "iso88597", RTL_TEXTENCODING_ISO_8859_7 }, - { "iso88598", RTL_TEXTENCODING_ISO_8859_8 }, - { "iso88599", RTL_TEXTENCODING_ISO_8859_9 }, - { "iso885910", RTL_TEXTENCODING_ISO_8859_10 }, - { "iso885913", RTL_TEXTENCODING_ISO_8859_13 }, - { "iso885914", RTL_TEXTENCODING_ISO_8859_14 }, - { "iso885915", RTL_TEXTENCODING_ISO_8859_15 }, - { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP }, - { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP }, - { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN }, - { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN }, - { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR }, - { "eucjp", RTL_TEXTENCODING_EUC_JP }, - { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS }, - { "mskanji", RTL_TEXTENCODING_MS_932 }, - { "gb2312", RTL_TEXTENCODING_GB_2312 }, - { "cngb", RTL_TEXTENCODING_GB_2312 }, - { "big5", RTL_TEXTENCODING_BIG5 }, - { "cnbig5", RTL_TEXTENCODING_BIG5 }, - { "cngb12345", RTL_TEXTENCODING_GBT_12345 }, - { "euckr", RTL_TEXTENCODING_EUC_KR }, - { "koi8r", RTL_TEXTENCODING_KOI8_R }, - { "windows1252", RTL_TEXTENCODING_MS_1252 }, - { "windows1250", RTL_TEXTENCODING_MS_1250 }, - { "windows1251", RTL_TEXTENCODING_MS_1251 }, - { "windows1253", RTL_TEXTENCODING_MS_1253 }, - { "windows1254", RTL_TEXTENCODING_MS_1254 }, - { "windows1255", RTL_TEXTENCODING_MS_1255 }, - { "windows1256", RTL_TEXTENCODING_MS_1256 }, - { "windows1257", RTL_TEXTENCODING_MS_1257 }, - { "windows1258", RTL_TEXTENCODING_MS_1258 }, - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - /* All Identifiers are in lower case and contain only alphanumeric */ - /* characters. The function search for the first matching string in */ - /* the table. */ - /* Sort order: unique (first iso885914, than iso88591), important */ - static ImplStrCharsetDef const aMimeCharsetTab[] = - { - { "unicode11utf7", RTL_TEXTENCODING_UTF7 }, - { "caunicode11utf7", RTL_TEXTENCODING_UTF7 }, - { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 }, - { "iso88591win", RTL_TEXTENCODING_MS_1252 }, - { "iso88592win", RTL_TEXTENCODING_MS_1250 }, - { "iso88599win", RTL_TEXTENCODING_MS_1254 }, - { "iso885915", RTL_TEXTENCODING_ISO_8859_15 }, - { "iso885914", RTL_TEXTENCODING_ISO_8859_14 }, - { "iso885913", RTL_TEXTENCODING_ISO_8859_13 }, - { "iso885911", RTL_TEXTENCODING_TIS_620 }, - /* This is no official MIME character set name, but it might be in - use in Thailand. */ - { "iso885910", RTL_TEXTENCODING_ISO_8859_10 }, - { "iso88591", RTL_TEXTENCODING_ISO_8859_1 }, - { "iso88592", RTL_TEXTENCODING_ISO_8859_2 }, - { "iso88593", RTL_TEXTENCODING_ISO_8859_3 }, - { "iso88594", RTL_TEXTENCODING_ISO_8859_4 }, - { "iso88595", RTL_TEXTENCODING_ISO_8859_5 }, - { "iso88596", RTL_TEXTENCODING_ISO_8859_6 }, - { "iso88597", RTL_TEXTENCODING_ISO_8859_7 }, - { "iso88598", RTL_TEXTENCODING_ISO_8859_8 }, - { "iso88599", RTL_TEXTENCODING_ISO_8859_9 }, - { "isoir100", RTL_TEXTENCODING_ISO_8859_1 }, - { "latin1", RTL_TEXTENCODING_ISO_8859_1 }, - { "l1", RTL_TEXTENCODING_ISO_8859_1 }, - { "cp819", RTL_TEXTENCODING_ISO_8859_1 }, - { "ibm819", RTL_TEXTENCODING_ISO_8859_1 }, - { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 }, - { "isoir101", RTL_TEXTENCODING_ISO_8859_2 }, - { "latin2", RTL_TEXTENCODING_ISO_8859_2 }, - { "l2", RTL_TEXTENCODING_ISO_8859_2 }, - { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 }, - { "isoir109", RTL_TEXTENCODING_ISO_8859_3 }, - { "latin3", RTL_TEXTENCODING_ISO_8859_3 }, - { "l3", RTL_TEXTENCODING_ISO_8859_3 }, - { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 }, - { "isoir110", RTL_TEXTENCODING_ISO_8859_4 }, - { "latin4", RTL_TEXTENCODING_ISO_8859_4 }, - { "l4", RTL_TEXTENCODING_ISO_8859_4 }, - { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 }, - { "isoir144", RTL_TEXTENCODING_ISO_8859_5 }, - { "cyrillicasian", RTL_TEXTENCODING_PT154 }, - { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 }, - { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 }, - { "isoir127", RTL_TEXTENCODING_ISO_8859_6 }, - { "arabic", RTL_TEXTENCODING_ISO_8859_6 }, - { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 }, - { "ecma114", RTL_TEXTENCODING_ISO_8859_6 }, - { "asmo708", RTL_TEXTENCODING_ISO_8859_6 }, - { "isoir126", RTL_TEXTENCODING_ISO_8859_7 }, - { "greek", RTL_TEXTENCODING_ISO_8859_7 }, - { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 }, - { "elot928", RTL_TEXTENCODING_ISO_8859_7 }, - { "ecma118", RTL_TEXTENCODING_ISO_8859_7 }, - { "isoir138", RTL_TEXTENCODING_ISO_8859_8 }, - { "hebrew", RTL_TEXTENCODING_ISO_8859_8 }, - { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 }, - { "isoir148", RTL_TEXTENCODING_ISO_8859_9 }, - { "latin5", RTL_TEXTENCODING_ISO_8859_9 }, - { "l5", RTL_TEXTENCODING_ISO_8859_9 }, - { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 }, - { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 }, - { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 }, - { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 }, - { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 }, - { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 }, - { "iso10646us", RTL_TEXTENCODING_ASCII_US }, - { "iso646irv", RTL_TEXTENCODING_ASCII_US }, - { "cskoi8r", RTL_TEXTENCODING_KOI8_R }, - { "ibm437", RTL_TEXTENCODING_IBM_437 }, - { "cp437", RTL_TEXTENCODING_IBM_437 }, - { "437", RTL_TEXTENCODING_IBM_437 }, - { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 }, - { "ansix34", RTL_TEXTENCODING_ASCII_US }, - { "ibm367", RTL_TEXTENCODING_ASCII_US }, - { "cp367", RTL_TEXTENCODING_ASCII_US }, - { "csascii", RTL_TEXTENCODING_ASCII_US }, - { "ibm775", RTL_TEXTENCODING_IBM_775 }, - { "cp775", RTL_TEXTENCODING_IBM_775 }, - { "cspc775baltic", RTL_TEXTENCODING_IBM_775 }, - { "ibm850", RTL_TEXTENCODING_IBM_850 }, - { "cp850", RTL_TEXTENCODING_IBM_850 }, - { "850", RTL_TEXTENCODING_IBM_850 }, - { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 }, -/* { "ibm851", RTL_TEXTENCODING_IBM_851 }, */ -/* { "cp851", RTL_TEXTENCODING_IBM_851 }, */ -/* { "851", RTL_TEXTENCODING_IBM_851 }, */ -/* { "csibm851", RTL_TEXTENCODING_IBM_851 }, */ - { "ibm852", RTL_TEXTENCODING_IBM_852 }, - { "cp852", RTL_TEXTENCODING_IBM_852 }, - { "852", RTL_TEXTENCODING_IBM_852 }, - { "cspcp852", RTL_TEXTENCODING_IBM_852 }, - { "ibm855", RTL_TEXTENCODING_IBM_855 }, - { "cp855", RTL_TEXTENCODING_IBM_855 }, - { "855", RTL_TEXTENCODING_IBM_855 }, - { "csibm855", RTL_TEXTENCODING_IBM_855 }, - { "ibm857", RTL_TEXTENCODING_IBM_857 }, - { "cp857", RTL_TEXTENCODING_IBM_857 }, - { "857", RTL_TEXTENCODING_IBM_857 }, - { "csibm857", RTL_TEXTENCODING_IBM_857 }, - { "ibm860", RTL_TEXTENCODING_IBM_860 }, - { "cp860", RTL_TEXTENCODING_IBM_860 }, - { "860", RTL_TEXTENCODING_IBM_860 }, - { "csibm860", RTL_TEXTENCODING_IBM_860 }, - { "ibm861", RTL_TEXTENCODING_IBM_861 }, - { "cp861", RTL_TEXTENCODING_IBM_861 }, - { "861", RTL_TEXTENCODING_IBM_861 }, - { "csis", RTL_TEXTENCODING_IBM_861 }, - { "csibm861", RTL_TEXTENCODING_IBM_861 }, - { "ibm862", RTL_TEXTENCODING_IBM_862 }, - { "cp862", RTL_TEXTENCODING_IBM_862 }, - { "862", RTL_TEXTENCODING_IBM_862 }, - { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 }, - { "ibm863", RTL_TEXTENCODING_IBM_863 }, - { "cp863", RTL_TEXTENCODING_IBM_863 }, - { "863", RTL_TEXTENCODING_IBM_863 }, - { "csibm863", RTL_TEXTENCODING_IBM_863 }, - { "ibm864", RTL_TEXTENCODING_IBM_864 }, - { "cp864", RTL_TEXTENCODING_IBM_864 }, - { "864", RTL_TEXTENCODING_IBM_864 }, - { "csibm864", RTL_TEXTENCODING_IBM_864 }, - { "ibm865", RTL_TEXTENCODING_IBM_865 }, - { "cp865", RTL_TEXTENCODING_IBM_865 }, - { "865", RTL_TEXTENCODING_IBM_865 }, - { "csibm865", RTL_TEXTENCODING_IBM_865 }, - { "ibm866", RTL_TEXTENCODING_IBM_866 }, - { "cp866", RTL_TEXTENCODING_IBM_866 }, - { "866", RTL_TEXTENCODING_IBM_866 }, - { "csibm866", RTL_TEXTENCODING_IBM_866 }, -/* { "ibm868", RTL_TEXTENCODING_IBM_868 }, */ -/* { "cp868", RTL_TEXTENCODING_IBM_868 }, */ -/* { "cpar", RTL_TEXTENCODING_IBM_868 }, */ -/* { "csibm868", RTL_TEXTENCODING_IBM_868 }, */ - { "ibm869", RTL_TEXTENCODING_IBM_869 }, - { "cp869", RTL_TEXTENCODING_IBM_869 }, - { "869", RTL_TEXTENCODING_IBM_869 }, - { "cpgr", RTL_TEXTENCODING_IBM_869 }, - { "csibm869", RTL_TEXTENCODING_IBM_869 }, - { "ibm869", RTL_TEXTENCODING_IBM_869 }, - { "cp869", RTL_TEXTENCODING_IBM_869 }, - { "869", RTL_TEXTENCODING_IBM_869 }, - { "cpgr", RTL_TEXTENCODING_IBM_869 }, - { "csibm869", RTL_TEXTENCODING_IBM_869 }, - { "mac", RTL_TEXTENCODING_APPLE_ROMAN }, - { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN }, - { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS }, - { "mskanji", RTL_TEXTENCODING_MS_932 }, - { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS }, - { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 }, - { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 }, - { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 }, - { "isoir86", RTL_TEXTENCODING_JIS_X_0208 }, - { "x0208", RTL_TEXTENCODING_JIS_X_0208 }, - { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 }, - { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 }, - { "x0201", RTL_TEXTENCODING_JIS_X_0201 }, - { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 }, - { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 }, - { "isoir159", RTL_TEXTENCODING_JIS_X_0208 }, - { "x0212", RTL_TEXTENCODING_JIS_X_0212 }, - { "isoir6", RTL_TEXTENCODING_ASCII_US }, - { "xsjis", RTL_TEXTENCODING_SHIFT_JIS }, - { "sjis", RTL_TEXTENCODING_SHIFT_JIS }, - { "ascii", RTL_TEXTENCODING_ASCII_US }, - { "us", RTL_TEXTENCODING_ASCII_US }, - { "gb180302000", RTL_TEXTENCODING_GB_18030 }, - /* This is no actual MIME character set name, it is only in here - for backwards compatibility (before "GB18030" was officially - registered with IANA, this code contained some guesses of what - would become official names for GB18030). */ - { "gb18030", RTL_TEXTENCODING_GB_18030 }, - { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS }, - { "tis620", RTL_TEXTENCODING_TIS_620 }, - { "gbk", RTL_TEXTENCODING_GBK }, - { "cp936", RTL_TEXTENCODING_GBK }, - { "ms936", RTL_TEXTENCODING_GBK }, - { "windows936", RTL_TEXTENCODING_GBK }, - { "cp874", RTL_TEXTENCODING_MS_874 }, - /* This is no official MIME character set name, but it might be in - use in Thailand. */ - { "ms874", RTL_TEXTENCODING_MS_874 }, - /* This is no official MIME character set name, but it might be in - use in Thailand. */ - { "windows874", RTL_TEXTENCODING_MS_874 }, - /* This is no official MIME character set name, but it might be in - use in Thailand. */ - { "koi8u", RTL_TEXTENCODING_KOI8_U }, - { "cpis", RTL_TEXTENCODING_IBM_861 }, - { "ksc56011987", RTL_TEXTENCODING_MS_949 }, - { "isoir149", RTL_TEXTENCODING_MS_949 }, - { "ksc56011989", RTL_TEXTENCODING_MS_949 }, - { "ksc5601", RTL_TEXTENCODING_MS_949 }, - { "korean", RTL_TEXTENCODING_MS_949 }, - { "csksc56011987", RTL_TEXTENCODING_MS_949 }, - /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as - this character set identifier seems to be prominently used by MS - to stand for KS C 5601 plus MS-949 extensions */ - { "latin9", RTL_TEXTENCODING_ISO_8859_15 }, - { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD }, - { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD }, - { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL }, - { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL }, - { "ptcp154", RTL_TEXTENCODING_PT154 }, - { "csptcp154", RTL_TEXTENCODING_PT154 }, - { "pt154", RTL_TEXTENCODING_PT154 }, - { "cp154", RTL_TEXTENCODING_PT154 }, - { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI }, - /* This is not an official MIME character set name, but is in use by - various windows APIs. */ - { NULL, RTL_TEXTENCODING_DONTKNOW } - }; - - rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; - sal_Char* pBuf; - const ImplStrCharsetDef* pData = aVIPMimeCharsetTab; - sal_uInt32 nBufLen = strlen( pMimeCharset )+1; - - /* Alloc Buffer and map to lower case and remove non alphanumeric chars */ - pBuf = (char*)rtl_allocateMemory( nBufLen ); - Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf ); - - /* Search for equal in the VIP table */ - while ( pData->mpCharsetStr ) - { - if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 ) - { - eEncoding = pData->meTextEncoding; - break; - } - - pData++; - } - - /* Search for matching in the mime table */ - if ( eEncoding == RTL_TEXTENCODING_DONTKNOW ) - { - pData = aMimeCharsetTab; - while ( pData->mpCharsetStr ) - { - if ( Impl_matchString( pBuf, pData->mpCharsetStr ) ) - { - eEncoding = pData->meTextEncoding; - break; - } - - pData++; - } - } - - rtl_freeMemory( pBuf ); - - return eEncoding; -} - -/* ======================================================================= */ - -sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return pData->mnBestWindowsCharset; - else - return 1; -} - -/* ----------------------------------------------------------------------- */ - -const sal_Char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return (sal_Char const *) pData->mpBestUnixCharset; - else if( eTextEncoding == RTL_TEXTENCODING_UNICODE ) - return (sal_Char const *) "iso10646-1"; - else - return 0; -} - -/* ----------------------------------------------------------------------- */ - -char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding - nEncoding) -{ - ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding); - return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ? - p->mpBestMimeCharset : NULL; -} - -const sal_Char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return (sal_Char const *) pData->mpBestMimeCharset; - else - return 0; -} - -/* The following two functions are based on , , and . - */ - -rtl_TextEncoding SAL_CALL -rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage) -{ - switch (nCodePage) - { - case 437: return RTL_TEXTENCODING_IBM_437; - case 708: return RTL_TEXTENCODING_ISO_8859_6; - case 737: return RTL_TEXTENCODING_IBM_737; - case 775: return RTL_TEXTENCODING_IBM_775; - case 850: return RTL_TEXTENCODING_IBM_850; - case 852: return RTL_TEXTENCODING_IBM_852; - case 855: return RTL_TEXTENCODING_IBM_855; - case 857: return RTL_TEXTENCODING_IBM_857; - case 860: return RTL_TEXTENCODING_IBM_860; - case 861: return RTL_TEXTENCODING_IBM_861; - case 862: return RTL_TEXTENCODING_IBM_862; - case 863: return RTL_TEXTENCODING_IBM_863; - case 864: return RTL_TEXTENCODING_IBM_864; - case 865: return RTL_TEXTENCODING_IBM_865; - case 866: return RTL_TEXTENCODING_IBM_866; - case 869: return RTL_TEXTENCODING_IBM_869; - case 874: return RTL_TEXTENCODING_MS_874; - case 932: return RTL_TEXTENCODING_MS_932; - case 936: return RTL_TEXTENCODING_MS_936; - case 949: return RTL_TEXTENCODING_MS_949; - case 950: return RTL_TEXTENCODING_MS_950; - case 1250: return RTL_TEXTENCODING_MS_1250; - case 1251: return RTL_TEXTENCODING_MS_1251; - case 1252: return RTL_TEXTENCODING_MS_1252; - case 1253: return RTL_TEXTENCODING_MS_1253; - case 1254: return RTL_TEXTENCODING_MS_1254; - case 1255: return RTL_TEXTENCODING_MS_1255; - case 1256: return RTL_TEXTENCODING_MS_1256; - case 1257: return RTL_TEXTENCODING_MS_1257; - case 1258: return RTL_TEXTENCODING_MS_1258; - case 1361: return RTL_TEXTENCODING_MS_1361; - case 10000: return RTL_TEXTENCODING_APPLE_ROMAN; - case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE; - case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD; - case 10003: return RTL_TEXTENCODING_APPLE_KOREAN; - case 10004: return RTL_TEXTENCODING_APPLE_ARABIC; - case 10005: return RTL_TEXTENCODING_APPLE_HEBREW; - case 10006: return RTL_TEXTENCODING_APPLE_GREEK; - case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC; - case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP; - case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN; - case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN; - case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO; - case 10079: return RTL_TEXTENCODING_APPLE_ICELAND; - case 10081: return RTL_TEXTENCODING_APPLE_TURKISH; - case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN; - case 20127: return RTL_TEXTENCODING_ASCII_US; - case 20866: return RTL_TEXTENCODING_KOI8_R; - case 21866: return RTL_TEXTENCODING_KOI8_U; - case 28591: return RTL_TEXTENCODING_ISO_8859_1; - case 28592: return RTL_TEXTENCODING_ISO_8859_2; - case 28593: return RTL_TEXTENCODING_ISO_8859_3; - case 28594: return RTL_TEXTENCODING_ISO_8859_4; - case 28595: return RTL_TEXTENCODING_ISO_8859_5; - case 28596: return RTL_TEXTENCODING_ISO_8859_6; - case 28597: return RTL_TEXTENCODING_ISO_8859_7; - case 28598: return RTL_TEXTENCODING_ISO_8859_8; - case 28599: return RTL_TEXTENCODING_ISO_8859_9; - case 28605: return RTL_TEXTENCODING_ISO_8859_15; - case 50220: return RTL_TEXTENCODING_ISO_2022_JP; - case 50225: return RTL_TEXTENCODING_ISO_2022_KR; - case 51932: return RTL_TEXTENCODING_EUC_JP; - case 51936: return RTL_TEXTENCODING_EUC_CN; - case 51949: return RTL_TEXTENCODING_EUC_KR; - case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI; - case 65000: return RTL_TEXTENCODING_UTF7; - case 65001: return RTL_TEXTENCODING_UTF8; - default: return RTL_TEXTENCODING_DONTKNOW; - } -} - -sal_uInt32 SAL_CALL -rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding) -{ - switch (nEncoding) - { - case RTL_TEXTENCODING_IBM_437: return 437; - /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */ - case RTL_TEXTENCODING_IBM_737: return 737; - case RTL_TEXTENCODING_IBM_775: return 775; - case RTL_TEXTENCODING_IBM_850: return 850; - case RTL_TEXTENCODING_IBM_852: return 852; - case RTL_TEXTENCODING_IBM_855: return 855; - case RTL_TEXTENCODING_IBM_857: return 857; - case RTL_TEXTENCODING_IBM_860: return 860; - case RTL_TEXTENCODING_IBM_861: return 861; - case RTL_TEXTENCODING_IBM_862: return 862; - case RTL_TEXTENCODING_IBM_863: return 863; - case RTL_TEXTENCODING_IBM_864: return 864; - case RTL_TEXTENCODING_IBM_865: return 865; - case RTL_TEXTENCODING_IBM_866: return 866; - case RTL_TEXTENCODING_IBM_869: return 869; - case RTL_TEXTENCODING_MS_874: return 874; - case RTL_TEXTENCODING_MS_932: return 932; - case RTL_TEXTENCODING_MS_936: return 936; - case RTL_TEXTENCODING_MS_949: return 949; - case RTL_TEXTENCODING_MS_950: return 950; - case RTL_TEXTENCODING_MS_1250: return 1250; - case RTL_TEXTENCODING_MS_1251: return 1251; - case RTL_TEXTENCODING_MS_1252: return 1252; - case RTL_TEXTENCODING_MS_1253: return 1253; - case RTL_TEXTENCODING_MS_1254: return 1254; - case RTL_TEXTENCODING_MS_1255: return 1255; - case RTL_TEXTENCODING_MS_1256: return 1256; - case RTL_TEXTENCODING_MS_1257: return 1257; - case RTL_TEXTENCODING_MS_1258: return 1258; - case RTL_TEXTENCODING_MS_1361: return 1361; - case RTL_TEXTENCODING_APPLE_ROMAN: return 10000; - case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001; - case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002; - case RTL_TEXTENCODING_APPLE_KOREAN: return 10003; - case RTL_TEXTENCODING_APPLE_ARABIC: return 10004; - case RTL_TEXTENCODING_APPLE_HEBREW: return 10005; - case RTL_TEXTENCODING_APPLE_GREEK: return 10006; - case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007; - case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008; - case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010; - case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017; - case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029; - case RTL_TEXTENCODING_APPLE_ICELAND: return 10079; - case RTL_TEXTENCODING_APPLE_TURKISH: return 10081; - case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082; - case RTL_TEXTENCODING_ASCII_US: return 20127; - case RTL_TEXTENCODING_KOI8_R: return 20866; - case RTL_TEXTENCODING_KOI8_U: return 21866; - case RTL_TEXTENCODING_ISO_8859_1: return 28591; - case RTL_TEXTENCODING_ISO_8859_2: return 28592; - case RTL_TEXTENCODING_ISO_8859_3: return 28593; - case RTL_TEXTENCODING_ISO_8859_4: return 28594; - case RTL_TEXTENCODING_ISO_8859_5: return 28595; - case RTL_TEXTENCODING_ISO_8859_6: return 28596; - case RTL_TEXTENCODING_ISO_8859_7: return 28597; - case RTL_TEXTENCODING_ISO_8859_8: return 28598; - case RTL_TEXTENCODING_ISO_8859_9: return 28599; - case RTL_TEXTENCODING_ISO_8859_15: return 28605; - case RTL_TEXTENCODING_ISO_2022_JP: return 50220; - case RTL_TEXTENCODING_ISO_2022_KR: return 50225; - case RTL_TEXTENCODING_EUC_JP: return 51932; - case RTL_TEXTENCODING_EUC_CN: return 51936; - case RTL_TEXTENCODING_EUC_KR: return 51949; - case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002; - case RTL_TEXTENCODING_UTF7: return 65000; - case RTL_TEXTENCODING_UTF8: return 65001; - default: return 0; - } -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/tencinfo.cxx b/sal/textenc/tencinfo.cxx new file mode 100644 index 000000000000..d82a3625cf45 --- /dev/null +++ b/sal/textenc/tencinfo.cxx @@ -0,0 +1,994 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include +#include + +#include "rtl/tencinfo.h" + +#include "gettextencodingdata.hxx" +#include "tenchelp.hxx" + +sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding) +{ + return + nEncoding > RTL_TEXTENCODING_DONTKNOW + && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM + && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this! +} + +/* ======================================================================= */ + +static void Impl_toAsciiLower( const char* pName, char* pBuf ) +{ + while ( *pName ) + { + /* A-Z */ + if ( (*pName >= 0x41) && (*pName <= 0x5A) ) + *pBuf = (*pName)+0x20; /* toAsciiLower */ + else + *pBuf = *pName; + + pBuf++; + pName++; + } + + *pBuf = '\0'; +} + +/* ----------------------------------------------------------------------- */ + +static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf ) +{ + while ( *pName ) + { + /* A-Z */ + if ( (*pName >= 0x41) && (*pName <= 0x5A) ) + { + *pBuf = (*pName)+0x20; /* toAsciiLower */ + pBuf++; + } + /* a-z, 0-9 */ + else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) || + ((*pName >= 0x30) && (*pName <= 0x39)) ) + { + *pBuf = *pName; + pBuf++; + } + + pName++; + } + + *pBuf = '\0'; +} + +/* ----------------------------------------------------------------------- */ + +/* pMatchStr must match with all characters in pCompStr */ +static bool Impl_matchString( const char* pCompStr, const char* pMatchStr ) +{ + /* We test only for end in MatchStr, because the last 0 character from */ + /* pCompStr is unequal a character in MatchStr, so the loop terminates */ + while ( *pMatchStr ) + { + if ( *pCompStr != *pMatchStr ) + return false; + + pCompStr++; + pMatchStr++; + } + + return true; +} + +/* ======================================================================= */ + +struct ImplStrCharsetDef +{ + const char* mpCharsetStr; + rtl_TextEncoding meTextEncoding; +}; + +struct ImplStrFirstPartCharsetDef +{ + const char* mpCharsetStr; + const ImplStrCharsetDef* mpSecondPartTab; +}; + +/* ======================================================================= */ + +sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo ) +{ + const ImplTextEncodingData* pData; + + pData = Impl_getTextEncodingData( eTextEncoding ); + if ( !pData ) + { + /* HACK: For not implemented encoding, because not all + calls handle the errors */ + if ( pEncInfo->StructSize < 5 ) + return false; + pEncInfo->MinimumCharSize = 1; + + if ( pEncInfo->StructSize < 6 ) + return true; + pEncInfo->MaximumCharSize = 1; + + if ( pEncInfo->StructSize < 7 ) + return true; + pEncInfo->AverageCharSize = 1; + + if ( pEncInfo->StructSize < 12 ) + return true; + pEncInfo->Flags = 0; + + return false; + } + + if ( pEncInfo->StructSize < 5 ) + return false; + pEncInfo->MinimumCharSize = pData->mnMinCharSize; + + if ( pEncInfo->StructSize < 6 ) + return true; + pEncInfo->MaximumCharSize = pData->mnMaxCharSize; + + if ( pEncInfo->StructSize < 7 ) + return true; + pEncInfo->AverageCharSize = pData->mnAveCharSize; + + if ( pEncInfo->StructSize < 12 ) + return true; + pEncInfo->Flags = pData->mnInfoFlags; + + return true; +} + +/* ======================================================================= */ + +rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset ) +{ + rtl_TextEncoding eTextEncoding; + + switch ( nWinCharset ) + { + case 0: eTextEncoding = RTL_TEXTENCODING_MS_1252; break; /* ANSI_CHARSET */ + case 2: eTextEncoding = RTL_TEXTENCODING_SYMBOL; break; /* SYMBOL_CHARSET */ + case 77: eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */ + case 128: eTextEncoding = RTL_TEXTENCODING_MS_932; break; /* SHIFTJIS_CHARSET */ + case 129: eTextEncoding = RTL_TEXTENCODING_MS_949; break; /* HANGEUL_CHARSET */ + case 130: eTextEncoding = RTL_TEXTENCODING_MS_1361; break; /* JOHAB_CHARSET */ + case 134: eTextEncoding = RTL_TEXTENCODING_MS_936; break; /* GB2312_CHARSET */ + case 136: eTextEncoding = RTL_TEXTENCODING_MS_950; break; /* CHINESEBIG5_CHARSET */ + case 161: eTextEncoding = RTL_TEXTENCODING_MS_1253; break; /* GREEK_CHARSET */ + case 162: eTextEncoding = RTL_TEXTENCODING_MS_1254; break; /* TURKISH_CHARSET */ + case 163: eTextEncoding = RTL_TEXTENCODING_MS_1258; break; /* VIETNAMESE_CHARSET !!! */ + case 177: eTextEncoding = RTL_TEXTENCODING_MS_1255; break; /* HEBREW_CHARSET */ + case 178: eTextEncoding = RTL_TEXTENCODING_MS_1256; break; /* ARABIC_CHARSET */ + case 186: eTextEncoding = RTL_TEXTENCODING_MS_1257; break; /* BALTIC_CHARSET */ + case 204: eTextEncoding = RTL_TEXTENCODING_MS_1251; break; /* RUSSIAN_CHARSET */ + case 222: eTextEncoding = RTL_TEXTENCODING_MS_874; break; /* THAI_CHARSET */ + case 238: eTextEncoding = RTL_TEXTENCODING_MS_1250; break; /* EASTEUROPE_CHARSET */ + case 255: eTextEncoding = RTL_TEXTENCODING_IBM_850; break; /* OEM_CHARSET */ + default: eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break; + }; + + return eTextEncoding; +} + +/* ----------------------------------------------------------------------- */ + +rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset ) +{ + /* See , section 14 ("Font Charset + * (Registry and Encoding) Names"). + */ + + /* All Identifiers in the tables are lower case The function search */ + /* for the first matching string in the tables. */ + /* Sort order: unique (first 14, than 1), important */ + + static ImplStrCharsetDef const aUnixCharsetISOTab[] = + { + { "15", RTL_TEXTENCODING_ISO_8859_15 }, + { "14", RTL_TEXTENCODING_ISO_8859_14 }, + { "13", RTL_TEXTENCODING_ISO_8859_13 }, + { "11", RTL_TEXTENCODING_TIS_620 }, + { "10", RTL_TEXTENCODING_ISO_8859_10 }, + { "1", RTL_TEXTENCODING_ISO_8859_1 }, + { "2", RTL_TEXTENCODING_ISO_8859_2 }, + { "3", RTL_TEXTENCODING_ISO_8859_3 }, + { "4", RTL_TEXTENCODING_ISO_8859_4 }, + { "5", RTL_TEXTENCODING_ISO_8859_5 }, + { "6", RTL_TEXTENCODING_ISO_8859_6 }, + { "7", RTL_TEXTENCODING_ISO_8859_7 }, + { "8", RTL_TEXTENCODING_ISO_8859_8 }, + { "9", RTL_TEXTENCODING_ISO_8859_9 }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + static ImplStrCharsetDef const aUnixCharsetADOBETab[] = + { + { "fontspecific", RTL_TEXTENCODING_SYMBOL }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + static ImplStrCharsetDef const aUnixCharsetMSTab[] = + { + { "1252", RTL_TEXTENCODING_MS_1252 }, + { "1250", RTL_TEXTENCODING_MS_1250 }, + { "1251", RTL_TEXTENCODING_MS_1251 }, + { "1253", RTL_TEXTENCODING_MS_1253 }, + { "1254", RTL_TEXTENCODING_MS_1254 }, + { "1255", RTL_TEXTENCODING_MS_1255 }, + { "1256", RTL_TEXTENCODING_MS_1256 }, + { "1257", RTL_TEXTENCODING_MS_1257 }, + { "1258", RTL_TEXTENCODING_MS_1258 }, + { "932", RTL_TEXTENCODING_MS_932 }, + { "936", RTL_TEXTENCODING_MS_936 }, + { "949", RTL_TEXTENCODING_MS_949 }, + { "950", RTL_TEXTENCODING_MS_950 }, + { "1361", RTL_TEXTENCODING_MS_1361 }, + { "cp1252", RTL_TEXTENCODING_MS_1252 }, + { "cp1250", RTL_TEXTENCODING_MS_1250 }, + { "cp1251", RTL_TEXTENCODING_MS_1251 }, + { "cp1253", RTL_TEXTENCODING_MS_1253 }, + { "cp1254", RTL_TEXTENCODING_MS_1254 }, + { "cp1255", RTL_TEXTENCODING_MS_1255 }, + { "cp1256", RTL_TEXTENCODING_MS_1256 }, + { "cp1257", RTL_TEXTENCODING_MS_1257 }, + { "cp1258", RTL_TEXTENCODING_MS_1258 }, + { "cp932", RTL_TEXTENCODING_MS_932 }, + { "cp936", RTL_TEXTENCODING_MS_936 }, + { "cp949", RTL_TEXTENCODING_MS_949 }, + { "cp950", RTL_TEXTENCODING_MS_950 }, + { "cp1361", RTL_TEXTENCODING_MS_1361 }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + static ImplStrCharsetDef const aUnixCharsetIBMTab[] = + { + { "437", RTL_TEXTENCODING_IBM_437 }, + { "850", RTL_TEXTENCODING_IBM_850 }, + { "860", RTL_TEXTENCODING_IBM_860 }, + { "861", RTL_TEXTENCODING_IBM_861 }, + { "863", RTL_TEXTENCODING_IBM_863 }, + { "865", RTL_TEXTENCODING_IBM_865 }, + { "737", RTL_TEXTENCODING_IBM_737 }, + { "775", RTL_TEXTENCODING_IBM_775 }, + { "852", RTL_TEXTENCODING_IBM_852 }, + { "855", RTL_TEXTENCODING_IBM_855 }, + { "857", RTL_TEXTENCODING_IBM_857 }, + { "862", RTL_TEXTENCODING_IBM_862 }, + { "864", RTL_TEXTENCODING_IBM_864 }, + { "866", RTL_TEXTENCODING_IBM_866 }, + { "869", RTL_TEXTENCODING_IBM_869 }, + { "874", RTL_TEXTENCODING_MS_874 }, + { "1004", RTL_TEXTENCODING_MS_1252 }, + { "65400", RTL_TEXTENCODING_SYMBOL }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] = + { + { "r", RTL_TEXTENCODING_KOI8_R }, + { "u", RTL_TEXTENCODING_KOI8_U }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + static ImplStrCharsetDef aUnixCharsetJISX0208Tab[] = + { + { NULL, RTL_TEXTENCODING_JIS_X_0208 } + }; + + static ImplStrCharsetDef aUnixCharsetJISX0201Tab[] = + { + { NULL, RTL_TEXTENCODING_JIS_X_0201 } + }; + + static ImplStrCharsetDef aUnixCharsetJISX0212Tab[] = + { + { NULL, RTL_TEXTENCODING_JIS_X_0212 } + }; + + static ImplStrCharsetDef aUnixCharsetGBTab[] = + { + { NULL, RTL_TEXTENCODING_GB_2312 } + }; + + static ImplStrCharsetDef aUnixCharsetGBKTab[] = + { + { NULL, RTL_TEXTENCODING_GBK } + }; + + static ImplStrCharsetDef aUnixCharsetBIG5Tab[] = + { + { NULL, RTL_TEXTENCODING_BIG5 } + }; + + static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] = + { + { NULL, RTL_TEXTENCODING_EUC_KR } + }; + + static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] = + { + { NULL, RTL_TEXTENCODING_MS_1361 } + }; + + static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] = + { + { NULL, RTL_TEXTENCODING_UNICODE } + }; + + static ImplStrCharsetDef const aUnixCharsetUNICODETab[] = + { +/* Currently every Unicode Encoding is for us Unicode */ +/* { "fontspecific", RTL_TEXTENCODING_UNICODE }, */ + { NULL, RTL_TEXTENCODING_UNICODE } + }; + + static ImplStrCharsetDef const aUnixCharsetSymbolTab[] = + { + { NULL, RTL_TEXTENCODING_SYMBOL } + }; + + /* See : */ + static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] = + { + { "0", RTL_TEXTENCODING_TIS_620 }, + { "2529", RTL_TEXTENCODING_TIS_620 }, + { "2533", RTL_TEXTENCODING_TIS_620 }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] = + { + { "1", RTL_TEXTENCODING_TIS_620 }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] = + { + { "0", RTL_TEXTENCODING_TIS_620 }, + { "1", RTL_TEXTENCODING_TIS_620 }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] = + { + { "iso8859", aUnixCharsetISOTab }, + { "adobe", aUnixCharsetADOBETab }, + { "ansi", aUnixCharsetMSTab }, + { "microsoft", aUnixCharsetMSTab }, + { "ibm", aUnixCharsetIBMTab }, + { "koi8", aUnixCharsetKOI8Tab }, + { "jisx0208", aUnixCharsetJISX0208Tab }, + { "jisx0208.1983", aUnixCharsetJISX0208Tab }, + { "jisx0201", aUnixCharsetJISX0201Tab }, + { "jisx0201.1976", aUnixCharsetJISX0201Tab }, + { "jisx0212", aUnixCharsetJISX0212Tab }, + { "jisx0212.1990", aUnixCharsetJISX0212Tab }, + { "gb2312", aUnixCharsetGBTab }, + { "gbk", aUnixCharsetGBKTab }, + { "big5", aUnixCharsetBIG5Tab }, + { "iso10646", aUnixCharsetISO10646Tab }, +/* { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */ + { "sunolcursor", aUnixCharsetSymbolTab }, + { "sunolglyph", aUnixCharsetSymbolTab }, + { "iso10646", aUnixCharsetUNICODETab }, + { "ksc5601.1987", aUnixCharsetKSC56011987Tab }, + { "ksc5601.1992", aUnixCharsetKSC56011992Tab }, + { "tis620.2529", aUnixCharsetTIS6202529Tab }, + { "tis620.2533", aUnixCharsetTIS6202533Tab }, + { "tis620", aUnixCharsetTIS620Tab }, +/* { "sunudcja.1997", }, */ +/* { "sunudcko.1997", }, */ +/* { "sunudczh.1997", }, */ +/* { "sunudczhtw.1997", }, */ + { NULL, NULL } + }; + + rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; + char* pBuf; + char* pTempBuf; + sal_uInt32 nBufLen = strlen( pUnixCharset )+1; + const char* pFirstPart; + const char* pSecondPart; + + /* Alloc Buffer and map to lower case */ + pBuf = new char[nBufLen]; + Impl_toAsciiLower( pUnixCharset, pBuf ); + + /* Search FirstPart */ + pFirstPart = pBuf; + pSecondPart = NULL; + pTempBuf = pBuf; + while ( *pTempBuf ) + { + if ( *pTempBuf == '-' ) + { + *pTempBuf = '\0'; + pSecondPart = pTempBuf+1; + break; + } + + pTempBuf++; + } + + /* Parttrenner gefunden */ + if ( pSecondPart ) + { + /* Search for the part tab */ + const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab; + while ( pFirstPartData->mpCharsetStr ) + { + if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) ) + { + /* Search for the charset in the second part tab */ + const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab; + while ( pData->mpCharsetStr ) + { + if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) ) + { + eEncoding = pData->meTextEncoding; + break; + } + + pData++; + } + + /* use default encoding for first part */ + eEncoding = pData->meTextEncoding; + break; + } + + pFirstPartData++; + } + } + + delete[] pBuf; + + return eEncoding; +} + +/* ----------------------------------------------------------------------- */ + +rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset ) +{ + /* All Identifiers are in lower case and contain only alphanumeric */ + /* characters. The function search for the first equal string in */ + /* the table. In this table are only the most used mime types. */ + /* Sort order: important */ + static ImplStrCharsetDef const aVIPMimeCharsetTab[] = + { + { "usascii", RTL_TEXTENCODING_ASCII_US }, + { "utf8", RTL_TEXTENCODING_UTF8 }, + { "utf7", RTL_TEXTENCODING_UTF7 }, + { "iso88591", RTL_TEXTENCODING_ISO_8859_1 }, + { "iso88592", RTL_TEXTENCODING_ISO_8859_2 }, + { "iso88593", RTL_TEXTENCODING_ISO_8859_3 }, + { "iso88594", RTL_TEXTENCODING_ISO_8859_4 }, + { "iso88595", RTL_TEXTENCODING_ISO_8859_5 }, + { "iso88596", RTL_TEXTENCODING_ISO_8859_6 }, + { "iso88597", RTL_TEXTENCODING_ISO_8859_7 }, + { "iso88598", RTL_TEXTENCODING_ISO_8859_8 }, + { "iso88599", RTL_TEXTENCODING_ISO_8859_9 }, + { "iso885910", RTL_TEXTENCODING_ISO_8859_10 }, + { "iso885913", RTL_TEXTENCODING_ISO_8859_13 }, + { "iso885914", RTL_TEXTENCODING_ISO_8859_14 }, + { "iso885915", RTL_TEXTENCODING_ISO_8859_15 }, + { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP }, + { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP }, + { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN }, + { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN }, + { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR }, + { "eucjp", RTL_TEXTENCODING_EUC_JP }, + { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS }, + { "mskanji", RTL_TEXTENCODING_MS_932 }, + { "gb2312", RTL_TEXTENCODING_GB_2312 }, + { "cngb", RTL_TEXTENCODING_GB_2312 }, + { "big5", RTL_TEXTENCODING_BIG5 }, + { "cnbig5", RTL_TEXTENCODING_BIG5 }, + { "cngb12345", RTL_TEXTENCODING_GBT_12345 }, + { "euckr", RTL_TEXTENCODING_EUC_KR }, + { "koi8r", RTL_TEXTENCODING_KOI8_R }, + { "windows1252", RTL_TEXTENCODING_MS_1252 }, + { "windows1250", RTL_TEXTENCODING_MS_1250 }, + { "windows1251", RTL_TEXTENCODING_MS_1251 }, + { "windows1253", RTL_TEXTENCODING_MS_1253 }, + { "windows1254", RTL_TEXTENCODING_MS_1254 }, + { "windows1255", RTL_TEXTENCODING_MS_1255 }, + { "windows1256", RTL_TEXTENCODING_MS_1256 }, + { "windows1257", RTL_TEXTENCODING_MS_1257 }, + { "windows1258", RTL_TEXTENCODING_MS_1258 }, + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + /* All Identifiers are in lower case and contain only alphanumeric */ + /* characters. The function search for the first matching string in */ + /* the table. */ + /* Sort order: unique (first iso885914, than iso88591), important */ + static ImplStrCharsetDef const aMimeCharsetTab[] = + { + { "unicode11utf7", RTL_TEXTENCODING_UTF7 }, + { "caunicode11utf7", RTL_TEXTENCODING_UTF7 }, + { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 }, + { "iso88591win", RTL_TEXTENCODING_MS_1252 }, + { "iso88592win", RTL_TEXTENCODING_MS_1250 }, + { "iso88599win", RTL_TEXTENCODING_MS_1254 }, + { "iso885915", RTL_TEXTENCODING_ISO_8859_15 }, + { "iso885914", RTL_TEXTENCODING_ISO_8859_14 }, + { "iso885913", RTL_TEXTENCODING_ISO_8859_13 }, + { "iso885911", RTL_TEXTENCODING_TIS_620 }, + /* This is no official MIME character set name, but it might be in + use in Thailand. */ + { "iso885910", RTL_TEXTENCODING_ISO_8859_10 }, + { "iso88591", RTL_TEXTENCODING_ISO_8859_1 }, + { "iso88592", RTL_TEXTENCODING_ISO_8859_2 }, + { "iso88593", RTL_TEXTENCODING_ISO_8859_3 }, + { "iso88594", RTL_TEXTENCODING_ISO_8859_4 }, + { "iso88595", RTL_TEXTENCODING_ISO_8859_5 }, + { "iso88596", RTL_TEXTENCODING_ISO_8859_6 }, + { "iso88597", RTL_TEXTENCODING_ISO_8859_7 }, + { "iso88598", RTL_TEXTENCODING_ISO_8859_8 }, + { "iso88599", RTL_TEXTENCODING_ISO_8859_9 }, + { "isoir100", RTL_TEXTENCODING_ISO_8859_1 }, + { "latin1", RTL_TEXTENCODING_ISO_8859_1 }, + { "l1", RTL_TEXTENCODING_ISO_8859_1 }, + { "cp819", RTL_TEXTENCODING_ISO_8859_1 }, + { "ibm819", RTL_TEXTENCODING_ISO_8859_1 }, + { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 }, + { "isoir101", RTL_TEXTENCODING_ISO_8859_2 }, + { "latin2", RTL_TEXTENCODING_ISO_8859_2 }, + { "l2", RTL_TEXTENCODING_ISO_8859_2 }, + { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 }, + { "isoir109", RTL_TEXTENCODING_ISO_8859_3 }, + { "latin3", RTL_TEXTENCODING_ISO_8859_3 }, + { "l3", RTL_TEXTENCODING_ISO_8859_3 }, + { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 }, + { "isoir110", RTL_TEXTENCODING_ISO_8859_4 }, + { "latin4", RTL_TEXTENCODING_ISO_8859_4 }, + { "l4", RTL_TEXTENCODING_ISO_8859_4 }, + { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 }, + { "isoir144", RTL_TEXTENCODING_ISO_8859_5 }, + { "cyrillicasian", RTL_TEXTENCODING_PT154 }, + { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 }, + { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 }, + { "isoir127", RTL_TEXTENCODING_ISO_8859_6 }, + { "arabic", RTL_TEXTENCODING_ISO_8859_6 }, + { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 }, + { "ecma114", RTL_TEXTENCODING_ISO_8859_6 }, + { "asmo708", RTL_TEXTENCODING_ISO_8859_6 }, + { "isoir126", RTL_TEXTENCODING_ISO_8859_7 }, + { "greek", RTL_TEXTENCODING_ISO_8859_7 }, + { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 }, + { "elot928", RTL_TEXTENCODING_ISO_8859_7 }, + { "ecma118", RTL_TEXTENCODING_ISO_8859_7 }, + { "isoir138", RTL_TEXTENCODING_ISO_8859_8 }, + { "hebrew", RTL_TEXTENCODING_ISO_8859_8 }, + { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 }, + { "isoir148", RTL_TEXTENCODING_ISO_8859_9 }, + { "latin5", RTL_TEXTENCODING_ISO_8859_9 }, + { "l5", RTL_TEXTENCODING_ISO_8859_9 }, + { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 }, + { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 }, + { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 }, + { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 }, + { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 }, + { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 }, + { "iso10646us", RTL_TEXTENCODING_ASCII_US }, + { "iso646irv", RTL_TEXTENCODING_ASCII_US }, + { "cskoi8r", RTL_TEXTENCODING_KOI8_R }, + { "ibm437", RTL_TEXTENCODING_IBM_437 }, + { "cp437", RTL_TEXTENCODING_IBM_437 }, + { "437", RTL_TEXTENCODING_IBM_437 }, + { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 }, + { "ansix34", RTL_TEXTENCODING_ASCII_US }, + { "ibm367", RTL_TEXTENCODING_ASCII_US }, + { "cp367", RTL_TEXTENCODING_ASCII_US }, + { "csascii", RTL_TEXTENCODING_ASCII_US }, + { "ibm775", RTL_TEXTENCODING_IBM_775 }, + { "cp775", RTL_TEXTENCODING_IBM_775 }, + { "cspc775baltic", RTL_TEXTENCODING_IBM_775 }, + { "ibm850", RTL_TEXTENCODING_IBM_850 }, + { "cp850", RTL_TEXTENCODING_IBM_850 }, + { "850", RTL_TEXTENCODING_IBM_850 }, + { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 }, +/* { "ibm851", RTL_TEXTENCODING_IBM_851 }, */ +/* { "cp851", RTL_TEXTENCODING_IBM_851 }, */ +/* { "851", RTL_TEXTENCODING_IBM_851 }, */ +/* { "csibm851", RTL_TEXTENCODING_IBM_851 }, */ + { "ibm852", RTL_TEXTENCODING_IBM_852 }, + { "cp852", RTL_TEXTENCODING_IBM_852 }, + { "852", RTL_TEXTENCODING_IBM_852 }, + { "cspcp852", RTL_TEXTENCODING_IBM_852 }, + { "ibm855", RTL_TEXTENCODING_IBM_855 }, + { "cp855", RTL_TEXTENCODING_IBM_855 }, + { "855", RTL_TEXTENCODING_IBM_855 }, + { "csibm855", RTL_TEXTENCODING_IBM_855 }, + { "ibm857", RTL_TEXTENCODING_IBM_857 }, + { "cp857", RTL_TEXTENCODING_IBM_857 }, + { "857", RTL_TEXTENCODING_IBM_857 }, + { "csibm857", RTL_TEXTENCODING_IBM_857 }, + { "ibm860", RTL_TEXTENCODING_IBM_860 }, + { "cp860", RTL_TEXTENCODING_IBM_860 }, + { "860", RTL_TEXTENCODING_IBM_860 }, + { "csibm860", RTL_TEXTENCODING_IBM_860 }, + { "ibm861", RTL_TEXTENCODING_IBM_861 }, + { "cp861", RTL_TEXTENCODING_IBM_861 }, + { "861", RTL_TEXTENCODING_IBM_861 }, + { "csis", RTL_TEXTENCODING_IBM_861 }, + { "csibm861", RTL_TEXTENCODING_IBM_861 }, + { "ibm862", RTL_TEXTENCODING_IBM_862 }, + { "cp862", RTL_TEXTENCODING_IBM_862 }, + { "862", RTL_TEXTENCODING_IBM_862 }, + { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 }, + { "ibm863", RTL_TEXTENCODING_IBM_863 }, + { "cp863", RTL_TEXTENCODING_IBM_863 }, + { "863", RTL_TEXTENCODING_IBM_863 }, + { "csibm863", RTL_TEXTENCODING_IBM_863 }, + { "ibm864", RTL_TEXTENCODING_IBM_864 }, + { "cp864", RTL_TEXTENCODING_IBM_864 }, + { "864", RTL_TEXTENCODING_IBM_864 }, + { "csibm864", RTL_TEXTENCODING_IBM_864 }, + { "ibm865", RTL_TEXTENCODING_IBM_865 }, + { "cp865", RTL_TEXTENCODING_IBM_865 }, + { "865", RTL_TEXTENCODING_IBM_865 }, + { "csibm865", RTL_TEXTENCODING_IBM_865 }, + { "ibm866", RTL_TEXTENCODING_IBM_866 }, + { "cp866", RTL_TEXTENCODING_IBM_866 }, + { "866", RTL_TEXTENCODING_IBM_866 }, + { "csibm866", RTL_TEXTENCODING_IBM_866 }, +/* { "ibm868", RTL_TEXTENCODING_IBM_868 }, */ +/* { "cp868", RTL_TEXTENCODING_IBM_868 }, */ +/* { "cpar", RTL_TEXTENCODING_IBM_868 }, */ +/* { "csibm868", RTL_TEXTENCODING_IBM_868 }, */ + { "ibm869", RTL_TEXTENCODING_IBM_869 }, + { "cp869", RTL_TEXTENCODING_IBM_869 }, + { "869", RTL_TEXTENCODING_IBM_869 }, + { "cpgr", RTL_TEXTENCODING_IBM_869 }, + { "csibm869", RTL_TEXTENCODING_IBM_869 }, + { "ibm869", RTL_TEXTENCODING_IBM_869 }, + { "cp869", RTL_TEXTENCODING_IBM_869 }, + { "869", RTL_TEXTENCODING_IBM_869 }, + { "cpgr", RTL_TEXTENCODING_IBM_869 }, + { "csibm869", RTL_TEXTENCODING_IBM_869 }, + { "mac", RTL_TEXTENCODING_APPLE_ROMAN }, + { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN }, + { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS }, + { "mskanji", RTL_TEXTENCODING_MS_932 }, + { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS }, + { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 }, + { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 }, + { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 }, + { "isoir86", RTL_TEXTENCODING_JIS_X_0208 }, + { "x0208", RTL_TEXTENCODING_JIS_X_0208 }, + { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 }, + { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 }, + { "x0201", RTL_TEXTENCODING_JIS_X_0201 }, + { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 }, + { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 }, + { "isoir159", RTL_TEXTENCODING_JIS_X_0208 }, + { "x0212", RTL_TEXTENCODING_JIS_X_0212 }, + { "isoir6", RTL_TEXTENCODING_ASCII_US }, + { "xsjis", RTL_TEXTENCODING_SHIFT_JIS }, + { "sjis", RTL_TEXTENCODING_SHIFT_JIS }, + { "ascii", RTL_TEXTENCODING_ASCII_US }, + { "us", RTL_TEXTENCODING_ASCII_US }, + { "gb180302000", RTL_TEXTENCODING_GB_18030 }, + /* This is no actual MIME character set name, it is only in here + for backwards compatibility (before "GB18030" was officially + registered with IANA, this code contained some guesses of what + would become official names for GB18030). */ + { "gb18030", RTL_TEXTENCODING_GB_18030 }, + { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS }, + { "tis620", RTL_TEXTENCODING_TIS_620 }, + { "gbk", RTL_TEXTENCODING_GBK }, + { "cp936", RTL_TEXTENCODING_GBK }, + { "ms936", RTL_TEXTENCODING_GBK }, + { "windows936", RTL_TEXTENCODING_GBK }, + { "cp874", RTL_TEXTENCODING_MS_874 }, + /* This is no official MIME character set name, but it might be in + use in Thailand. */ + { "ms874", RTL_TEXTENCODING_MS_874 }, + /* This is no official MIME character set name, but it might be in + use in Thailand. */ + { "windows874", RTL_TEXTENCODING_MS_874 }, + /* This is no official MIME character set name, but it might be in + use in Thailand. */ + { "koi8u", RTL_TEXTENCODING_KOI8_U }, + { "cpis", RTL_TEXTENCODING_IBM_861 }, + { "ksc56011987", RTL_TEXTENCODING_MS_949 }, + { "isoir149", RTL_TEXTENCODING_MS_949 }, + { "ksc56011989", RTL_TEXTENCODING_MS_949 }, + { "ksc5601", RTL_TEXTENCODING_MS_949 }, + { "korean", RTL_TEXTENCODING_MS_949 }, + { "csksc56011987", RTL_TEXTENCODING_MS_949 }, + /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as + this character set identifier seems to be prominently used by MS + to stand for KS C 5601 plus MS-949 extensions */ + { "latin9", RTL_TEXTENCODING_ISO_8859_15 }, + { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD }, + { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD }, + { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL }, + { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL }, + { "ptcp154", RTL_TEXTENCODING_PT154 }, + { "csptcp154", RTL_TEXTENCODING_PT154 }, + { "pt154", RTL_TEXTENCODING_PT154 }, + { "cp154", RTL_TEXTENCODING_PT154 }, + { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI }, + /* This is not an official MIME character set name, but is in use by + various windows APIs. */ + { NULL, RTL_TEXTENCODING_DONTKNOW } + }; + + rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; + char* pBuf; + const ImplStrCharsetDef* pData = aVIPMimeCharsetTab; + sal_uInt32 nBufLen = strlen( pMimeCharset )+1; + + /* Alloc Buffer and map to lower case and remove non alphanumeric chars */ + pBuf = new char[nBufLen]; + Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf ); + + /* Search for equal in the VIP table */ + while ( pData->mpCharsetStr ) + { + if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 ) + { + eEncoding = pData->meTextEncoding; + break; + } + + pData++; + } + + /* Search for matching in the mime table */ + if ( eEncoding == RTL_TEXTENCODING_DONTKNOW ) + { + pData = aMimeCharsetTab; + while ( pData->mpCharsetStr ) + { + if ( Impl_matchString( pBuf, pData->mpCharsetStr ) ) + { + eEncoding = pData->meTextEncoding; + break; + } + + pData++; + } + } + + delete[] pBuf; + + return eEncoding; +} + +/* ======================================================================= */ + +sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) +{ + const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); + if ( pData ) + return pData->mnBestWindowsCharset; + else + return 1; +} + +/* ----------------------------------------------------------------------- */ + +const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) +{ + const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); + if ( pData ) + return (char const *) pData->mpBestUnixCharset; + else if( eTextEncoding == RTL_TEXTENCODING_UNICODE ) + return (char const *) "iso10646-1"; + else + return 0; +} + +/* ----------------------------------------------------------------------- */ + +char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding + nEncoding) +{ + ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding); + return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ? + p->mpBestMimeCharset : NULL; +} + +const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding ) +{ + const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); + if ( pData ) + return (char const *) pData->mpBestMimeCharset; + else + return 0; +} + +/* The following two functions are based on , , and . + */ + +rtl_TextEncoding SAL_CALL +rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage) +{ + switch (nCodePage) + { + case 437: return RTL_TEXTENCODING_IBM_437; + case 708: return RTL_TEXTENCODING_ISO_8859_6; + case 737: return RTL_TEXTENCODING_IBM_737; + case 775: return RTL_TEXTENCODING_IBM_775; + case 850: return RTL_TEXTENCODING_IBM_850; + case 852: return RTL_TEXTENCODING_IBM_852; + case 855: return RTL_TEXTENCODING_IBM_855; + case 857: return RTL_TEXTENCODING_IBM_857; + case 860: return RTL_TEXTENCODING_IBM_860; + case 861: return RTL_TEXTENCODING_IBM_861; + case 862: return RTL_TEXTENCODING_IBM_862; + case 863: return RTL_TEXTENCODING_IBM_863; + case 864: return RTL_TEXTENCODING_IBM_864; + case 865: return RTL_TEXTENCODING_IBM_865; + case 866: return RTL_TEXTENCODING_IBM_866; + case 869: return RTL_TEXTENCODING_IBM_869; + case 874: return RTL_TEXTENCODING_MS_874; + case 932: return RTL_TEXTENCODING_MS_932; + case 936: return RTL_TEXTENCODING_MS_936; + case 949: return RTL_TEXTENCODING_MS_949; + case 950: return RTL_TEXTENCODING_MS_950; + case 1250: return RTL_TEXTENCODING_MS_1250; + case 1251: return RTL_TEXTENCODING_MS_1251; + case 1252: return RTL_TEXTENCODING_MS_1252; + case 1253: return RTL_TEXTENCODING_MS_1253; + case 1254: return RTL_TEXTENCODING_MS_1254; + case 1255: return RTL_TEXTENCODING_MS_1255; + case 1256: return RTL_TEXTENCODING_MS_1256; + case 1257: return RTL_TEXTENCODING_MS_1257; + case 1258: return RTL_TEXTENCODING_MS_1258; + case 1361: return RTL_TEXTENCODING_MS_1361; + case 10000: return RTL_TEXTENCODING_APPLE_ROMAN; + case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE; + case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD; + case 10003: return RTL_TEXTENCODING_APPLE_KOREAN; + case 10004: return RTL_TEXTENCODING_APPLE_ARABIC; + case 10005: return RTL_TEXTENCODING_APPLE_HEBREW; + case 10006: return RTL_TEXTENCODING_APPLE_GREEK; + case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC; + case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP; + case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN; + case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN; + case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO; + case 10079: return RTL_TEXTENCODING_APPLE_ICELAND; + case 10081: return RTL_TEXTENCODING_APPLE_TURKISH; + case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN; + case 20127: return RTL_TEXTENCODING_ASCII_US; + case 20866: return RTL_TEXTENCODING_KOI8_R; + case 21866: return RTL_TEXTENCODING_KOI8_U; + case 28591: return RTL_TEXTENCODING_ISO_8859_1; + case 28592: return RTL_TEXTENCODING_ISO_8859_2; + case 28593: return RTL_TEXTENCODING_ISO_8859_3; + case 28594: return RTL_TEXTENCODING_ISO_8859_4; + case 28595: return RTL_TEXTENCODING_ISO_8859_5; + case 28596: return RTL_TEXTENCODING_ISO_8859_6; + case 28597: return RTL_TEXTENCODING_ISO_8859_7; + case 28598: return RTL_TEXTENCODING_ISO_8859_8; + case 28599: return RTL_TEXTENCODING_ISO_8859_9; + case 28605: return RTL_TEXTENCODING_ISO_8859_15; + case 50220: return RTL_TEXTENCODING_ISO_2022_JP; + case 50225: return RTL_TEXTENCODING_ISO_2022_KR; + case 51932: return RTL_TEXTENCODING_EUC_JP; + case 51936: return RTL_TEXTENCODING_EUC_CN; + case 51949: return RTL_TEXTENCODING_EUC_KR; + case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI; + case 65000: return RTL_TEXTENCODING_UTF7; + case 65001: return RTL_TEXTENCODING_UTF8; + default: return RTL_TEXTENCODING_DONTKNOW; + } +} + +sal_uInt32 SAL_CALL +rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding) +{ + switch (nEncoding) + { + case RTL_TEXTENCODING_IBM_437: return 437; + /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */ + case RTL_TEXTENCODING_IBM_737: return 737; + case RTL_TEXTENCODING_IBM_775: return 775; + case RTL_TEXTENCODING_IBM_850: return 850; + case RTL_TEXTENCODING_IBM_852: return 852; + case RTL_TEXTENCODING_IBM_855: return 855; + case RTL_TEXTENCODING_IBM_857: return 857; + case RTL_TEXTENCODING_IBM_860: return 860; + case RTL_TEXTENCODING_IBM_861: return 861; + case RTL_TEXTENCODING_IBM_862: return 862; + case RTL_TEXTENCODING_IBM_863: return 863; + case RTL_TEXTENCODING_IBM_864: return 864; + case RTL_TEXTENCODING_IBM_865: return 865; + case RTL_TEXTENCODING_IBM_866: return 866; + case RTL_TEXTENCODING_IBM_869: return 869; + case RTL_TEXTENCODING_MS_874: return 874; + case RTL_TEXTENCODING_MS_932: return 932; + case RTL_TEXTENCODING_MS_936: return 936; + case RTL_TEXTENCODING_MS_949: return 949; + case RTL_TEXTENCODING_MS_950: return 950; + case RTL_TEXTENCODING_MS_1250: return 1250; + case RTL_TEXTENCODING_MS_1251: return 1251; + case RTL_TEXTENCODING_MS_1252: return 1252; + case RTL_TEXTENCODING_MS_1253: return 1253; + case RTL_TEXTENCODING_MS_1254: return 1254; + case RTL_TEXTENCODING_MS_1255: return 1255; + case RTL_TEXTENCODING_MS_1256: return 1256; + case RTL_TEXTENCODING_MS_1257: return 1257; + case RTL_TEXTENCODING_MS_1258: return 1258; + case RTL_TEXTENCODING_MS_1361: return 1361; + case RTL_TEXTENCODING_APPLE_ROMAN: return 10000; + case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001; + case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002; + case RTL_TEXTENCODING_APPLE_KOREAN: return 10003; + case RTL_TEXTENCODING_APPLE_ARABIC: return 10004; + case RTL_TEXTENCODING_APPLE_HEBREW: return 10005; + case RTL_TEXTENCODING_APPLE_GREEK: return 10006; + case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007; + case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008; + case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010; + case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017; + case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029; + case RTL_TEXTENCODING_APPLE_ICELAND: return 10079; + case RTL_TEXTENCODING_APPLE_TURKISH: return 10081; + case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082; + case RTL_TEXTENCODING_ASCII_US: return 20127; + case RTL_TEXTENCODING_KOI8_R: return 20866; + case RTL_TEXTENCODING_KOI8_U: return 21866; + case RTL_TEXTENCODING_ISO_8859_1: return 28591; + case RTL_TEXTENCODING_ISO_8859_2: return 28592; + case RTL_TEXTENCODING_ISO_8859_3: return 28593; + case RTL_TEXTENCODING_ISO_8859_4: return 28594; + case RTL_TEXTENCODING_ISO_8859_5: return 28595; + case RTL_TEXTENCODING_ISO_8859_6: return 28596; + case RTL_TEXTENCODING_ISO_8859_7: return 28597; + case RTL_TEXTENCODING_ISO_8859_8: return 28598; + case RTL_TEXTENCODING_ISO_8859_9: return 28599; + case RTL_TEXTENCODING_ISO_8859_15: return 28605; + case RTL_TEXTENCODING_ISO_2022_JP: return 50220; + case RTL_TEXTENCODING_ISO_2022_KR: return 50225; + case RTL_TEXTENCODING_EUC_JP: return 51932; + case RTL_TEXTENCODING_EUC_CN: return 51936; + case RTL_TEXTENCODING_EUC_KR: return 51949; + case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002; + case RTL_TEXTENCODING_UTF7: return 65000; + case RTL_TEXTENCODING_UTF8: return 65001; + default: return 0; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/textcvt.c b/sal/textenc/textcvt.c deleted file mode 100644 index 0030590cf926..000000000000 --- a/sal/textenc/textcvt.c +++ /dev/null @@ -1,267 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "rtl/textcvt.h" -#include "gettextencodingdata.h" -#include "tenchelp.h" - -/* ======================================================================= */ - -static sal_Size ImplDummyToUnicode( const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - sal_Unicode* pEndDestBuf; - const sal_Char* pEndSrcBuf; - - if ( ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR) || - ((nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR) ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | - RTL_TEXTTOUNICODE_INFO_UNDEFINED | - RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; - return 0; - } - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestChars; - pEndSrcBuf = pSrcBuf+nSrcBytes; - while ( pSrcBuf < pEndSrcBuf ) - { - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Unicode)(sal_uChar)*pSrcBuf; - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); - return (nDestChars - (pEndDestBuf-pDestBuf)); -} - -/* ----------------------------------------------------------------------- */ - -static sal_Size ImplUnicodeToDummy( const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - sal_Char* pEndDestBuf; - const sal_Unicode* pEndSrcBuf; - - if ( ((nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR) ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | - RTL_UNICODETOTEXT_INFO_UNDEFINED; - return 0; - } - - *pInfo = 0; - pEndDestBuf = pDestBuf+nDestBytes; - pEndSrcBuf = pSrcBuf+nSrcChars; - while ( pSrcBuf < pEndSrcBuf ) - { - if ( pDestBuf == pEndDestBuf ) - { - *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; - break; - } - - *pDestBuf = (sal_Char)(sal_uChar)(*pSrcBuf & 0x00FF); - pDestBuf++; - pSrcBuf++; - } - - *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); - return (nDestBytes - (pEndDestBuf-pDestBuf)); -} - -/* ======================================================================= */ - -rtl_TextToUnicodeConverter SAL_CALL rtl_createTextToUnicodeConverter( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return (rtl_TextToUnicodeConverter) &pData->maConverter; - else - return 0; -} - -/* ----------------------------------------------------------------------- */ - -void SAL_CALL rtl_destroyTextToUnicodeConverter( rtl_TextToUnicodeConverter hContext ) -{ - (void) hContext; /* unused */ -} - -/* ----------------------------------------------------------------------- */ - -rtl_TextToUnicodeContext SAL_CALL rtl_createTextToUnicodeContext( rtl_TextToUnicodeConverter hConverter ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - if ( !pConverter ) - return 0; - else if ( pConverter->mpCreateTextToUnicodeContext ) - return (rtl_TextToUnicodeContext)pConverter->mpCreateTextToUnicodeContext(); - else - return (rtl_TextToUnicodeContext)1; -} - -/* ----------------------------------------------------------------------- */ - -void SAL_CALL rtl_destroyTextToUnicodeContext( rtl_TextToUnicodeConverter hConverter, - rtl_TextToUnicodeContext hContext ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - if ( pConverter && hContext && pConverter->mpDestroyTextToUnicodeContext ) - pConverter->mpDestroyTextToUnicodeContext( (void*)hContext ); -} - -/* ----------------------------------------------------------------------- */ - -void SAL_CALL rtl_resetTextToUnicodeContext( rtl_TextToUnicodeConverter hConverter, - rtl_TextToUnicodeContext hContext ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - if ( pConverter && hContext && pConverter->mpResetTextToUnicodeContext ) - pConverter->mpResetTextToUnicodeContext( (void*)hContext ); -} - -/* ----------------------------------------------------------------------- */ - -sal_Size SAL_CALL rtl_convertTextToUnicode( rtl_TextToUnicodeConverter hConverter, - rtl_TextToUnicodeContext hContext, - const sal_Char* pSrcBuf, sal_Size nSrcBytes, - sal_Unicode* pDestBuf, sal_Size nDestChars, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtBytes ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - - /* Only temporaer, because we don't want die, if we don't have a - converter, because not all converters are implemented yet */ - if ( !pConverter ) - { - return ImplDummyToUnicode( pSrcBuf, nSrcBytes, - pDestBuf, nDestChars, - nFlags, pInfo, pSrcCvtBytes ); - } - - return pConverter->mpConvertTextToUnicodeProc( pConverter->mpConvertData, - (void*)hContext, - pSrcBuf, nSrcBytes, - pDestBuf, nDestChars, - nFlags, pInfo, - pSrcCvtBytes ); -} - -/* ======================================================================= */ - -rtl_UnicodeToTextConverter SAL_CALL rtl_createUnicodeToTextConverter( rtl_TextEncoding eTextEncoding ) -{ - const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); - if ( pData ) - return (rtl_TextToUnicodeConverter) &pData->maConverter; - else - return 0; -} - -/* ----------------------------------------------------------------------- */ - -void SAL_CALL rtl_destroyUnicodeToTextConverter( rtl_UnicodeToTextConverter hConverter ) -{ - (void) hConverter; /* unused */ -} - -/* ----------------------------------------------------------------------- */ - -rtl_UnicodeToTextContext SAL_CALL rtl_createUnicodeToTextContext( rtl_UnicodeToTextConverter hConverter ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - if ( !pConverter ) - return 0; - else if ( pConverter->mpCreateUnicodeToTextContext ) - return (rtl_UnicodeToTextContext)pConverter->mpCreateUnicodeToTextContext(); - else - return (rtl_UnicodeToTextContext)1; -} - -/* ----------------------------------------------------------------------- */ - -void SAL_CALL rtl_destroyUnicodeToTextContext( rtl_UnicodeToTextConverter hConverter, - rtl_UnicodeToTextContext hContext ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - if ( pConverter && hContext && pConverter->mpDestroyUnicodeToTextContext ) - pConverter->mpDestroyUnicodeToTextContext( (void*)hContext ); -} - -/* ----------------------------------------------------------------------- */ - -void SAL_CALL rtl_resetUnicodeToTextContext( rtl_UnicodeToTextConverter hConverter, - rtl_UnicodeToTextContext hContext ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - if ( pConverter && hContext && pConverter->mpResetUnicodeToTextContext ) - pConverter->mpResetUnicodeToTextContext( (void*)hContext ); -} - -/* ----------------------------------------------------------------------- */ - -sal_Size SAL_CALL rtl_convertUnicodeToText( rtl_UnicodeToTextConverter hConverter, - rtl_UnicodeToTextContext hContext, - const sal_Unicode* pSrcBuf, sal_Size nSrcChars, - sal_Char* pDestBuf, sal_Size nDestBytes, - sal_uInt32 nFlags, sal_uInt32* pInfo, - sal_Size* pSrcCvtChars ) -{ - const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; - - /* Only temporaer, because we don't want die, if we don't have a - converter, because not all converters are implemented yet */ - if ( !pConverter ) - { - return ImplUnicodeToDummy( pSrcBuf, nSrcChars, - pDestBuf, nDestBytes, - nFlags, pInfo, pSrcCvtChars ); - } - - return pConverter->mpConvertUnicodeToTextProc( pConverter->mpConvertData, - (void*)hContext, - pSrcBuf, nSrcChars, - pDestBuf, nDestBytes, - nFlags, pInfo, - pSrcCvtChars ); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/textcvt.cxx b/sal/textenc/textcvt.cxx new file mode 100644 index 000000000000..e6f6470305fd --- /dev/null +++ b/sal/textenc/textcvt.cxx @@ -0,0 +1,266 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include "rtl/textcvt.h" + +#include "gettextencodingdata.hxx" +#include "tenchelp.hxx" + +/* ======================================================================= */ + +static sal_Size ImplDummyToUnicode( const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + sal_Unicode* pEndDestBuf; + const char* pEndSrcBuf; + + if ( ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR) || + ((nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR) ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | + RTL_TEXTTOUNICODE_INFO_UNDEFINED | + RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; + return 0; + } + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestChars; + pEndSrcBuf = pSrcBuf+nSrcBytes; + while ( pSrcBuf < pEndSrcBuf ) + { + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = (sal_Unicode)(sal_uChar)*pSrcBuf; + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); + return (nDestChars - (pEndDestBuf-pDestBuf)); +} + +/* ----------------------------------------------------------------------- */ + +static sal_Size ImplUnicodeToDummy( const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + char* pEndDestBuf; + const sal_Unicode* pEndSrcBuf; + + if ( ((nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR) ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | + RTL_UNICODETOTEXT_INFO_UNDEFINED; + return 0; + } + + *pInfo = 0; + pEndDestBuf = pDestBuf+nDestBytes; + pEndSrcBuf = pSrcBuf+nSrcChars; + while ( pSrcBuf < pEndSrcBuf ) + { + if ( pDestBuf == pEndDestBuf ) + { + *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + + *pDestBuf = (char)(sal_uChar)(*pSrcBuf & 0x00FF); + pDestBuf++; + pSrcBuf++; + } + + *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); + return (nDestBytes - (pEndDestBuf-pDestBuf)); +} + +/* ======================================================================= */ + +rtl_TextToUnicodeConverter SAL_CALL rtl_createTextToUnicodeConverter( rtl_TextEncoding eTextEncoding ) +{ + const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); + if ( pData ) + return (rtl_TextToUnicodeConverter) &pData->maConverter; + else + return 0; +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_destroyTextToUnicodeConverter( rtl_TextToUnicodeConverter ) +{} + +/* ----------------------------------------------------------------------- */ + +rtl_TextToUnicodeContext SAL_CALL rtl_createTextToUnicodeContext( rtl_TextToUnicodeConverter hConverter ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + if ( !pConverter ) + return 0; + else if ( pConverter->mpCreateTextToUnicodeContext ) + return (rtl_TextToUnicodeContext)pConverter->mpCreateTextToUnicodeContext(); + else + return (rtl_TextToUnicodeContext)1; +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_destroyTextToUnicodeContext( rtl_TextToUnicodeConverter hConverter, + rtl_TextToUnicodeContext hContext ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + if ( pConverter && hContext && pConverter->mpDestroyTextToUnicodeContext ) + pConverter->mpDestroyTextToUnicodeContext( hContext ); +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_resetTextToUnicodeContext( rtl_TextToUnicodeConverter hConverter, + rtl_TextToUnicodeContext hContext ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + if ( pConverter && hContext && pConverter->mpResetTextToUnicodeContext ) + pConverter->mpResetTextToUnicodeContext( hContext ); +} + +/* ----------------------------------------------------------------------- */ + +sal_Size SAL_CALL rtl_convertTextToUnicode( rtl_TextToUnicodeConverter hConverter, + rtl_TextToUnicodeContext hContext, + const char* pSrcBuf, sal_Size nSrcBytes, + sal_Unicode* pDestBuf, sal_Size nDestChars, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtBytes ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + + /* Only temporaer, because we don't want die, if we don't have a + converter, because not all converters are implemented yet */ + if ( !pConverter ) + { + return ImplDummyToUnicode( pSrcBuf, nSrcBytes, + pDestBuf, nDestChars, + nFlags, pInfo, pSrcCvtBytes ); + } + + return pConverter->mpConvertTextToUnicodeProc( pConverter->mpConvertData, + hContext, + pSrcBuf, nSrcBytes, + pDestBuf, nDestChars, + nFlags, pInfo, + pSrcCvtBytes ); +} + +/* ======================================================================= */ + +rtl_UnicodeToTextConverter SAL_CALL rtl_createUnicodeToTextConverter( rtl_TextEncoding eTextEncoding ) +{ + const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding ); + if ( pData ) + return (rtl_TextToUnicodeConverter) &pData->maConverter; + else + return 0; +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_destroyUnicodeToTextConverter( rtl_UnicodeToTextConverter ) +{} + +/* ----------------------------------------------------------------------- */ + +rtl_UnicodeToTextContext SAL_CALL rtl_createUnicodeToTextContext( rtl_UnicodeToTextConverter hConverter ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + if ( !pConverter ) + return 0; + else if ( pConverter->mpCreateUnicodeToTextContext ) + return (rtl_UnicodeToTextContext)pConverter->mpCreateUnicodeToTextContext(); + else + return (rtl_UnicodeToTextContext)1; +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_destroyUnicodeToTextContext( rtl_UnicodeToTextConverter hConverter, + rtl_UnicodeToTextContext hContext ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + if ( pConverter && hContext && pConverter->mpDestroyUnicodeToTextContext ) + pConverter->mpDestroyUnicodeToTextContext( hContext ); +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_resetUnicodeToTextContext( rtl_UnicodeToTextConverter hConverter, + rtl_UnicodeToTextContext hContext ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + if ( pConverter && hContext && pConverter->mpResetUnicodeToTextContext ) + pConverter->mpResetUnicodeToTextContext( hContext ); +} + +/* ----------------------------------------------------------------------- */ + +sal_Size SAL_CALL rtl_convertUnicodeToText( rtl_UnicodeToTextConverter hConverter, + rtl_UnicodeToTextContext hContext, + const sal_Unicode* pSrcBuf, sal_Size nSrcChars, + char* pDestBuf, sal_Size nDestBytes, + sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars ) +{ + const ImplTextConverter* pConverter = (const ImplTextConverter*)hConverter; + + /* Only temporaer, because we don't want die, if we don't have a + converter, because not all converters are implemented yet */ + if ( !pConverter ) + { + return ImplUnicodeToDummy( pSrcBuf, nSrcChars, + pDestBuf, nDestBytes, + nFlags, pInfo, pSrcCvtChars ); + } + + return pConverter->mpConvertUnicodeToTextProc( pConverter->mpConvertData, + hContext, + pSrcBuf, nSrcChars, + pDestBuf, nDestBytes, + nFlags, pInfo, + pSrcCvtChars ); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/textenc.cxx b/sal/textenc/textenc.cxx index 05550a505365..cc226100e58e 100644 --- a/sal/textenc/textenc.cxx +++ b/sal/textenc/textenc.cxx @@ -26,26 +26,28 @@ * ************************************************************************/ +/* TODO! This file should not be called textenc.c, because it is not the + implementation of rtl/textenc.h. Rather, it should be called + gettextencodingdata.c. */ + #include "sal/config.h" #include -/* TODO! This file should not be called textenc.c, because it is not the - implementation of rtl/textenc.h. Rather, it should be called - gettextencodingdata.c. */ -#include "context.h" -#include "gettextencodingdata.h" -#include "tenchelp.h" -#include "rtl/textenc.h" -#include #include "boost/noncopyable.hpp" #include "osl/diagnose.h" #include "osl/module.hxx" #include "rtl/instance.hxx" +#include "rtl/textenc.h" #include "rtl/ustring.h" #include "rtl/ustring.hxx" +#include "sal/macros.h" #include "sal/types.h" +#include "context.hxx" +#include "gettextencodingdata.hxx" +#include "tenchelp.hxx" + #ifndef INCLUDED_STDDEF_H #include #define INCLUDED_STDDEF_H @@ -421,7 +423,7 @@ struct FullTextEncodingDataSingleton: } ImplTextEncodingData const * -Impl_getTextEncodingData(rtl_TextEncoding nEncoding) SAL_THROW_EXTERN_C() +Impl_getTextEncodingData(rtl_TextEncoding nEncoding) { switch(nEncoding) { diff --git a/sal/textenc/unichars.c b/sal/textenc/unichars.c deleted file mode 100644 index 0d4131699732..000000000000 --- a/sal/textenc/unichars.c +++ /dev/null @@ -1,139 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "unichars.h" -#include "osl/diagnose.h" -#include "sal/types.h" - -int ImplIsNoncharacter(sal_uInt32 nUtf32) -{ - /* All code points that are noncharacters, as of Unicode 3.1.1. */ - return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF) - || (nUtf32 & 0xFFFF) >= 0xFFFE - || nUtf32 > 0x10FFFF; -} - -int ImplIsControlOrFormat(sal_uInt32 nUtf32) -{ - /* All code points of , Version 3.1.1, that have a General Category of Cc - (Other, Control) or Cf (Other, Format). - */ - return nUtf32 <= 0x001F - || (nUtf32 >= 0x007F && nUtf32 <= 0x009F) - || nUtf32 == 0x070F /* SYRIAC ABBREVIATION MARK */ - || nUtf32 == 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */ - || nUtf32 == 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */ - || nUtf32 == 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */ - || nUtf32 == 0x180E /* MONGOLIAN VOWEL SEPARATOR */ - || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ - || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ - || nUtf32 == 0x200E /* LEFT-TO-RIGHT MARK */ - || nUtf32 == 0x200F /* RIGHT-TO-LEFT MARK */ - || nUtf32 == 0x202A /* LEFT-TO-RIGHT EMBEDDING */ - || nUtf32 == 0x202B /* RIGHT-TO-LEFT EMBEDDING */ - || nUtf32 == 0x202C /* POP DIRECTIONAL FORMATTING */ - || nUtf32 == 0x202D /* LEFT-TO-RIGHT OVERRIDE */ - || nUtf32 == 0x202E /* RIGHT-TO-LEFT OVERRIDE */ - || nUtf32 == 0x206A /* INHIBIT SYMMETRIC SWAPPING */ - || nUtf32 == 0x206B /* ACTIVATE SYMMETRIC SWAPPING */ - || nUtf32 == 0x206C /* INHIBIT ARABIC FORM SHAPING */ - || nUtf32 == 0x206D /* ACTIVATE ARABIC FORM SHAPING */ - || nUtf32 == 0x206E /* NATIONAL DIGIT SHAPES */ - || nUtf32 == 0x206F /* NOMINAL DIGIT SHAPES */ - || nUtf32 == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */ - || nUtf32 == 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */ - || nUtf32 == 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */ - || nUtf32 == 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */ - || nUtf32 == 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */ - || nUtf32 == 0x1D174 /* MUSICAL SYMBOL END BEAM */ - || nUtf32 == 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */ - || nUtf32 == 0x1D176 /* MUSICAL SYMBOL END TIE */ - || nUtf32 == 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */ - || nUtf32 == 0x1D178 /* MUSICAL SYMBOL END SLUR */ - || nUtf32 == 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */ - || nUtf32 == 0x1D17A /* MUSICAL SYMBOL END PHRASE */ - || nUtf32 == 0xE0001 /* LANGUAGE TAG */ - || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F); -} - -int ImplIsHighSurrogate(sal_uInt32 nUtf32) -{ - /* All code points that are high-surrogates, as of Unicode 3.1.1. */ - return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; -} - -int ImplIsLowSurrogate(sal_uInt32 nUtf32) -{ - /* All code points that are low-surrogates, as of Unicode 3.1.1. */ - return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; -} - -int ImplIsPrivateUse(sal_uInt32 nUtf32) -{ - /* All code points of , Version 3.1.1, that have a General Category of Co - (Other, Private Use). - */ - return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF) - || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD) - || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD); -} - -int ImplIsZeroWidth(sal_uInt32 nUtf32) -{ - /* All code points of , Version 3.1.1, that have "ZERO WIDTH" in their - Character name. - */ - return nUtf32 == 0x200B /* ZERO WIDTH SPACE */ - || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ - || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ - || nUtf32 == 0xFEFF; /* ZEOR WIDTH NO-BREAK SPACE */ -} - -sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) -{ - OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); - return ((nUtf32 - 0x10000) >> 10) | 0xD800; -} - -sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) -{ - OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); - return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00; -} - -sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) -{ - OSL_ENSURE(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow), - "specification violation"); - return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/unichars.cxx b/sal/textenc/unichars.cxx new file mode 100644 index 000000000000..1291fff8b8c0 --- /dev/null +++ b/sal/textenc/unichars.cxx @@ -0,0 +1,140 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "sal/config.h" + +#include + +#include "sal/types.h" + +#include "unichars.hxx" + +bool ImplIsNoncharacter(sal_uInt32 nUtf32) +{ + // All code points that are noncharacters, as of Unicode 3.1.1: + return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF) + || (nUtf32 & 0xFFFF) >= 0xFFFE + || nUtf32 > 0x10FFFF; +} + +bool ImplIsControlOrFormat(sal_uInt32 nUtf32) +{ + // All code points of + // , Version 3.1.1, + // that have a General Category of Cc (Other, Control) or Cf (Other, + // Format): + return nUtf32 <= 0x001F + || (nUtf32 >= 0x007F && nUtf32 <= 0x009F) + || nUtf32 == 0x070F // SYRIAC ABBREVIATION MARK + || nUtf32 == 0x180B // MONGOLIAN FREE VARIATION SELECTOR ONE + || nUtf32 == 0x180C // MONGOLIAN FREE VARIATION SELECTOR TWO + || nUtf32 == 0x180D // MONGOLIAN FREE VARIATION SELECTOR THREE + || nUtf32 == 0x180E // MONGOLIAN VOWEL SEPARATOR + || nUtf32 == 0x200C // ZERO WIDTH NON-JOINER + || nUtf32 == 0x200D // ZERO WIDTH JOINER + || nUtf32 == 0x200E // LEFT-TO-RIGHT MARK + || nUtf32 == 0x200F // RIGHT-TO-LEFT MARK + || nUtf32 == 0x202A // LEFT-TO-RIGHT EMBEDDING + || nUtf32 == 0x202B // RIGHT-TO-LEFT EMBEDDING + || nUtf32 == 0x202C // POP DIRECTIONAL FORMATTING + || nUtf32 == 0x202D // LEFT-TO-RIGHT OVERRIDE + || nUtf32 == 0x202E // RIGHT-TO-LEFT OVERRIDE + || nUtf32 == 0x206A // INHIBIT SYMMETRIC SWAPPING + || nUtf32 == 0x206B // ACTIVATE SYMMETRIC SWAPPING + || nUtf32 == 0x206C // INHIBIT ARABIC FORM SHAPING + || nUtf32 == 0x206D // ACTIVATE ARABIC FORM SHAPING + || nUtf32 == 0x206E // NATIONAL DIGIT SHAPES + || nUtf32 == 0x206F // NOMINAL DIGIT SHAPES + || nUtf32 == 0xFEFF // ZERO WIDTH NO-BREAK SPACE + || nUtf32 == 0xFFF9 // INTERLINEAR ANNOTATION ANCHOR + || nUtf32 == 0xFFFA // INTERLINEAR ANNOTATION SEPARATOR + || nUtf32 == 0xFFFB // INTERLINEAR ANNOTATION TERMINATOR + || nUtf32 == 0x1D173 // MUSICAL SYMBOL BEGIN BEAM + || nUtf32 == 0x1D174 // MUSICAL SYMBOL END BEAM + || nUtf32 == 0x1D175 // MUSICAL SYMBOL BEGIN TIE + || nUtf32 == 0x1D176 // MUSICAL SYMBOL END TIE + || nUtf32 == 0x1D177 // MUSICAL SYMBOL BEGIN SLUR + || nUtf32 == 0x1D178 // MUSICAL SYMBOL END SLUR + || nUtf32 == 0x1D179 // MUSICAL SYMBOL BEGIN PHRASE + || nUtf32 == 0x1D17A // MUSICAL SYMBOL END PHRASE + || nUtf32 == 0xE0001 // LANGUAGE TAG + || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F); +} + +bool ImplIsHighSurrogate(sal_uInt32 nUtf32) +{ + // All code points that are high-surrogates, as of Unicode 3.1.1. + return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; +} + +bool ImplIsLowSurrogate(sal_uInt32 nUtf32) +{ + // All code points that are low-surrogates, as of Unicode 3.1.1. + return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; +} + +bool ImplIsPrivateUse(sal_uInt32 nUtf32) +{ + // All code points of + // , Version 3.1.1, + // that have a General Category of Co (Other, Private Use): + return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF) + || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD) + || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD); +} + +bool ImplIsZeroWidth(sal_uInt32 nUtf32) +{ + // All code points of + // , Version 3.1.1, + // that have "ZERO WIDTH" in their Character name: + return nUtf32 == 0x200B // ZERO WIDTH SPACE + || nUtf32 == 0x200C // ZERO WIDTH NON-JOINER + || nUtf32 == 0x200D // ZERO WIDTH JOINER + || nUtf32 == 0xFEFF; // ZEOR WIDTH NO-BREAK SPACE +} + +sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) +{ + assert(nUtf32 >= 0x10000); + return ((nUtf32 - 0x10000) >> 10) | 0xD800; +} + +sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) +{ + assert(nUtf32 >= 0x10000); + return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00; +} + +sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) +{ + assert(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow)); + return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/unichars.h b/sal/textenc/unichars.h deleted file mode 100644 index f1e18460dfb0..000000000000 --- a/sal/textenc/unichars.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef INCLUDED_RTL_TEXTENC_UNICHARS_H -#define INCLUDED_RTL_TEXTENC_UNICHARS_H - -#include "sal/types.h" - -#if defined __cplusplus -extern "C" { -#endif /* __cpluscplus */ - -#define RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER 0xFFFD - -int ImplIsNoncharacter(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -int ImplIsControlOrFormat(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -int ImplIsHighSurrogate(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -int ImplIsLowSurrogate(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -int ImplIsPrivateUse(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -int ImplIsZeroWidth(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) SAL_THROW_EXTERN_C(); - -sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) - SAL_THROW_EXTERN_C(); - -#if defined __cplusplus -} -#endif /* __cpluscplus */ - -#endif /* INCLUDED_RTL_TEXTENC_UNICHARS_H */ - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx new file mode 100644 index 000000000000..b068fff8e15e --- /dev/null +++ b/sal/textenc/unichars.hxx @@ -0,0 +1,58 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef INCLUDED_SAL_TEXTENC_UNICHARS_HXX +#define INCLUDED_SAL_TEXTENC_UNICHARS_HXX + +#include "sal/config.h" + +#include "sal/types.h" + +#define RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER 0xFFFD + +bool ImplIsNoncharacter(sal_uInt32 nUtf32); + +bool ImplIsControlOrFormat(sal_uInt32 nUtf32); + +bool ImplIsHighSurrogate(sal_uInt32 nUtf32); + +bool ImplIsLowSurrogate(sal_uInt32 nUtf32); + +bool ImplIsPrivateUse(sal_uInt32 nUtf32); + +bool ImplIsZeroWidth(sal_uInt32 nUtf32); + +sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32); + +sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32); + +sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit