diff options
author | Vladimir Glazounov <vg@openoffice.org> | 2003-04-17 16:54:00 +0000 |
---|---|---|
committer | Vladimir Glazounov <vg@openoffice.org> | 2003-04-17 16:54:00 +0000 |
commit | f790480baa4bfef1e61157ef23fc409a6890c591 (patch) | |
tree | 058ad8db6939ff173072f60460843facd4532d31 | |
parent | 1b1780543a007011ca2051604b9a48699ff29565 (diff) |
INTEGRATION: CWS hhc (1.1.2); FILE ADDED
2003/04/15 06:53:49 khong 1.1.2.7: #105688# make script type checking not start from begining every times
2003/04/10 07:19:58 khong 1.1.2.6: #105688# fix a linux compiling error
2003/04/10 04:06:13 khong 1.1.2.5: #105688# fix a linux compiling error
2003/04/02 19:17:51 khong 1.1.2.4: #105688# add hhc character conversion dictionary
2003/04/02 00:12:08 khong 1.1.2.3: #105688# add utility to generate data file from dictionary for Hangul/Hanja conversion
2003/04/01 17:19:21 khong 1.1.2.2: #105688# update conversion dictionary API according to IDL changes
2003/03/29 02:04:24 khong 1.1.2.1: #105688# new Hangul/Hanja conversion
-rw-r--r-- | i18npool/source/textconversion/textconversion_ko.cxx | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/i18npool/source/textconversion/textconversion_ko.cxx b/i18npool/source/textconversion/textconversion_ko.cxx new file mode 100644 index 000000000000..d358d23ccb88 --- /dev/null +++ b/i18npool/source/textconversion/textconversion_ko.cxx @@ -0,0 +1,372 @@ +/************************************************************************* + * + * $RCSfile: textconversion_ko.cxx,v $ + * + * $Revision: 1.2 $ + * + * last change: $Author: vg $ $Date: 2003-04-17 17:54:00 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +#include <assert.h> +#include <textconversion.hxx> +#include <com/sun/star/i18n/TextConversionType.hpp> +#include <com/sun/star/i18n/TextConversionOption.hpp> +#include <com/sun/star/linguistic2/ConversionDirection.hpp> +#include <com/sun/star/linguistic2/ConversionDictionaryType.hpp> +#include <rtl/ustrbuf.hxx> +#include <x_rtl_ustring.h> +#include <unicode.hxx> + +using namespace com::sun::star::lang; +using namespace com::sun::star::i18n; +using namespace com::sun::star::linguistic2; +using namespace com::sun::star::uno; +using namespace rtl; + +namespace com { namespace sun { namespace star { namespace i18n { + +// defined in hangul2hanja.cxx generated from hangul2hanja.dic by genconv_dict +extern const sal_Unicode* getHangul2HanjaData(); +extern const Hangul_Index* getHangul2HanjaIndex(); +extern const sal_Int16 getHangul2HanjaIndexCount(); +extern const sal_uInt16* getHanja2HangulIndex(); +extern const sal_Unicode* getHanja2HangulData(); + +#define SCRIPT_OTHERS 0 +#define SCRIPT_HANJA 1 +#define SCRIPT_HANGUL 2 + +TextConversion_ko::TextConversion_ko( const Reference < XMultiServiceFactory >& xMSF ) +{ + Reference < XInterface > xI; + + xI = xMSF->createInstance( + OUString::createFromAscii("com.sun.star.i18n.ConversionDictionary_ko")); + + if ( xI.is() ) + xI->queryInterface( getCppuType((const Reference< XConversionDictionary>*)0) ) >>= xCD; + + xI = xMSF->createInstance( + OUString::createFromAscii("com.sun.star.linguist2.ConversionDictionaryList")); + + if ( xI.is() ) + xI->queryInterface( getCppuType((const Reference< XConversionDictionaryList>*)0) ) >>= xCDL; + + maxLeftLength = maxRightLength = 1; + + // get maximum length of word in dictionary + if (xCDL.is()) { + Locale loc(OUString::createFromAscii("ko"), + OUString::createFromAscii("KR"), + OUString()); + maxLeftLength = xCDL->queryMaxCharCount(loc, + ConversionDictionaryType::HANJA_HANGUL, + ConversionDirection_FROM_LEFT); + maxRightLength = xCDL->queryMaxCharCount(loc, + ConversionDictionaryType::HANJA_HANGUL, + ConversionDirection_FROM_RIGHT); + if (xCD.is()) { + sal_Int32 tmp = xCD->getMaxCharCount(ConversionDirection_FROM_LEFT); + if (tmp > maxLeftLength) + maxLeftLength = tmp; + tmp = xCD->getMaxCharCount(ConversionDirection_FROM_RIGHT); + if (tmp > maxRightLength) + maxRightLength = tmp; + } + } else if (xCD.is()) { + maxLeftLength = xCD->getMaxCharCount(ConversionDirection_FROM_LEFT); + maxRightLength = xCD->getMaxCharCount(ConversionDirection_FROM_RIGHT); + } + + implementationName = "com.sun.star.i18n.TextConversion_ko"; +} + +sal_Int16 SAL_CALL checkScriptType(sal_Unicode c) +{ + static ScriptTypeList typeList[] = { + { UnicodeScript_kHangulJamo, SCRIPT_HANGUL }, // 29 + { UnicodeScript_kCJKRadicalsSupplement, SCRIPT_HANJA }, // 57, + { UnicodeScript_kKangxiRadicals, SCRIPT_HANJA }, // 58, + { UnicodeScript_kIdeographicDescriptionCharacters, SCRIPT_HANJA }, // 59, + { UnicodeScript_kCJKSymbolPunctuation, SCRIPT_HANJA }, // 60, + { UnicodeScript_kHiragana, SCRIPT_HANJA }, // 61, + { UnicodeScript_kKatakana, SCRIPT_HANJA }, // 62, + { UnicodeScript_kBopomofo, SCRIPT_HANJA }, // 63, + { UnicodeScript_kHangulCompatibilityJamo, SCRIPT_HANGUL }, // 64, + { UnicodeScript_kKanbun, SCRIPT_HANJA }, // 65, + { UnicodeScript_kBopomofoExtended, SCRIPT_HANJA }, // 66, + { UnicodeScript_kEnclosedCJKLetterMonth, SCRIPT_HANJA }, // 67, + { UnicodeScript_kCJKCompatibility, SCRIPT_HANJA }, // 68, + { UnicodeScript_k_CJKUnifiedIdeographsExtensionA, SCRIPT_HANJA }, // 69, + { UnicodeScript_kCJKUnifiedIdeograph, SCRIPT_HANJA }, // 70, + { UnicodeScript_kYiSyllables, SCRIPT_HANJA }, // 71, + { UnicodeScript_kYiRadicals, SCRIPT_HANJA }, // 72, + { UnicodeScript_kHangulSyllable, SCRIPT_HANGUL }, // 73, + { UnicodeScript_kCJKCompatibilityIdeograph, SCRIPT_HANJA }, // 78, + { UnicodeScript_kCombiningHalfMark, SCRIPT_HANJA }, // 81, + { UnicodeScript_kCJKCompatibilityForm, SCRIPT_HANJA }, // 82, + { UnicodeScript_kSmallFormVariant, SCRIPT_HANJA }, // 83, + { UnicodeScript_kHalfwidthFullwidthForm, SCRIPT_HANJA }, // 86, + + { UnicodeScript_kScriptCount, SCRIPT_OTHERS } // 87, + }; + + return unicode::getUnicodeScriptType(c, typeList, SCRIPT_OTHERS); +} + +Sequence< OUString > SAL_CALL getCharConversions(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toHanja) +{ + sal_Unicode ch; + Sequence< OUString > output; + if (toHanja) { + ch = aText[nStartPos]; + const Hangul_Index *Hangul_ko = getHangul2HanjaIndex(); + sal_Int16 top = getHangul2HanjaIndexCount() - 1; + sal_Int16 bottom = 0; + + while (bottom <= top) { + sal_Int16 current = (top + bottom) / 2; + sal_Unicode current_ch = Hangul_ko[current].code; + if (ch < current_ch) + top = current - 1; + else if (ch > current_ch) + bottom = current + 1; + else { + const sal_Unicode *ptr = getHangul2HanjaData() + Hangul_ko[current].address; + sal_Int16 count = Hangul_ko[current].count; + output.realloc(count); + for (sal_Int16 i = 0; i < count; i++) + output[i] = OUString(ptr + i, 1); + break; + } + } + } else { + rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength ); // defined in x_rtl_ustring.h + sal_Int32 count = 0; + while (count < nLength) { + ch = aText[nStartPos + count]; + sal_Unicode address = getHanja2HangulIndex()[ch>>8]; + if (address != 0xFFFF) + address = getHanja2HangulData()[address + (ch & 0xFF)]; + + if (address != 0xFFFF) + newStr->buffer[count++] = address; + else + break; + } + if (count > 0) { + output.realloc(1); + output[0] = OUString( newStr->buffer, count); + } + } + return output; +} + +static Sequence< OUString >& operator += (Sequence< OUString > &rSeq1, Sequence< OUString > &rSeq2 ) +{ + if (! rSeq1.hasElements() && rSeq2.hasElements()) + rSeq1 = rSeq2; + else if (rSeq2.hasElements()) { + sal_Int32 i, j, k, l; + k = l = rSeq1.getLength(); + rSeq1.realloc(l + rSeq2.getLength()); + + for (i = 0; i < rSeq2.getLength(); i++) { + for (j = 0; j < l; j++) + if (rSeq1[j] == rSeq2[i]) + break; + if (j == l) + rSeq1[k++] = rSeq2[i]; + } + if (rSeq1.getLength() > k) + rSeq1.realloc(k); + } + return rSeq1; +} + +TextConversionResult SAL_CALL +TextConversion_ko::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, + const Locale& aLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions) + throw( RuntimeException, IllegalArgumentException, NoSupportException ) +{ + TextConversionResult result; + Sequence <OUString> candidates; + result.Boundary.startPos = result.Boundary.endPos = 0; + + // do conversion only when there are right conversion type and dictionary services. + if (nConversionType == TextConversionType::TO_HANGUL || + nConversionType == TextConversionType::TO_HANJA) { + sal_Int32 start, end, length = aText.getLength() - nStartPos; + + if (length < 0 || nStartPos < 0) + length = 0; + else if (length > nLength) + length = nLength; + + sal_Int32 maxLength = 1; + sal_Int16 scriptType = SCRIPT_OTHERS; + ConversionDirection eDirection = ConversionDirection_FROM_LEFT; + sal_Bool toHanja = sal_True; + + // search for a max length of convertible text + for (start = 0, end = 0; start < length; start++) { + + if (end <= start) { + scriptType = checkScriptType(aText[nStartPos + start]); + /* wait for BI_DIRECTION being add to idl + if (nConversionOptions & TextConversionOption::BI_DIRECTION) { + if (scriptType == SCRIPT_OTHERS) // skip non-Korean chararacters + continue; + } else + */ + if (nConversionType == TextConversionType::TO_HANJA) { + if (scriptType != SCRIPT_HANGUL) // skip non-Hangul characters + continue; + } else { + if (scriptType != SCRIPT_HANJA) // skip non-Hanja characters + continue; + } + + toHanja = (scriptType == SCRIPT_HANGUL); + eDirection = toHanja ? + ConversionDirection_FROM_LEFT : ConversionDirection_FROM_RIGHT; + + maxLength = toHanja ? maxLeftLength : maxRightLength; + if (maxLength == 0 || (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)) + maxLength = 1; + + end = start + 1; + } + + for (; end < length && end - start < maxLength; end++) + if (checkScriptType(aText[nStartPos + end]) != scriptType) + break; + + for (sal_Int32 len = end - start; len > 0; len--) { + if (xCDL.is()) + result.Candidates = xCDL->queryConversions(aText, start + nStartPos, len, + aLocale, ConversionDictionaryType::HANJA_HANGUL, eDirection, nConversionOptions); // user dictionary + if (xCD.is() && toHanja) { // System dictionary would not do Hanja_to_Hangul conversion. + // Char2char converison below is enough. + candidates = xCD->getConversions(aText, start + nStartPos, len, eDirection, nConversionOptions); // system dictionary + result.Candidates += candidates; + } + if (len == 1) { + if (!toHanja && !result.Candidates.hasElements()) { + // do whole word character 2 character conversion + result.Candidates = getCharConversions(aText, nStartPos + start, length - start, toHanja); + if (result.Candidates.hasElements()) + len = result.Candidates[0].getLength(); + } else { + candidates = getCharConversions(aText, nStartPos + start, 1, toHanja); // char2char conversion + result.Candidates += candidates; + } + } + + // found match + if (result.Candidates.hasElements()) { + result.Boundary.startPos = start + nStartPos;; + result.Boundary.endPos = start + len + nStartPos; + return result; + } + } + } + } else + throw NoSupportException(); // Conversion type is not supported in this service. + return result; +} + +OUString SAL_CALL +TextConversion_ko::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, + const Locale& aLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions) + throw( RuntimeException, IllegalArgumentException, NoSupportException ) +{ + sal_Int32 length = aText.getLength() - nStartPos; + + if (length <= 0 || nStartPos < 0) + return OUString(); + else if (length > nLength) + length = nLength; + + OUStringBuffer aBuf(length + 1); + TextConversionResult result; + const sal_Unicode *str = aText.getStr(); + + for (sal_Int32 start = nStartPos; length + nStartPos > start; start = result.Boundary.endPos) { + + result = getConversions(aText, start, length + nStartPos - start, aLocale, nConversionType, nConversionOptions); + + if (result.Boundary.endPos > 0) { + if (result.Boundary.startPos > start) + aBuf.append(str + start, result.Boundary.startPos - start); // append skip portion + aBuf.append(result.Candidates[0]); // append converted portion + } else { + if (length + nStartPos > start) + aBuf.append(str + start, length + nStartPos - start); // append last portion + break; + } + } + + return aBuf.makeStringAndClear(); +} + +sal_Bool SAL_CALL +TextConversion_ko::interactiveConversion( const Locale& aLocale, sal_Int16 nTextConversionType, sal_Int32 nTextConversionOptions ) + throw( RuntimeException, IllegalArgumentException, NoSupportException ) +{ + return sal_True; +} + +} } } } |