diff options
author | Caolán McNamara <caolanm@redhat.com> | 2011-10-20 22:04:34 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2011-10-21 11:27:38 +0100 |
commit | 41c083a7ade3feb7f5b6516f859db00fa8e2cf72 (patch) | |
tree | b5114e19850b93c6dbd3b7cfb3e089c52f9b555f | |
parent | c57ba94721a152db405716753a15c870401c3a63 (diff) |
expose an API to map from UScriptCode to i18n::ScriptType
-rw-r--r-- | i18npool/source/breakiterator/breakiteratorImpl.cxx | 57 | ||||
-rw-r--r-- | i18nutil/inc/i18nutil/unicode.hxx | 4 | ||||
-rw-r--r-- | i18nutil/source/utility/unicode.cxx | 57 |
3 files changed, 63 insertions, 55 deletions
diff --git a/i18npool/source/breakiterator/breakiteratorImpl.cxx b/i18npool/source/breakiterator/breakiteratorImpl.cxx index e81bfff9452a..fcc8376f9806 100644 --- a/i18npool/source/breakiterator/breakiteratorImpl.cxx +++ b/i18npool/source/breakiterator/breakiteratorImpl.cxx @@ -31,6 +31,7 @@ #include <breakiteratorImpl.hxx> #include <unicode/uchar.h> +#include <i18nutil/unicode.hxx> #include <rtl/ustrbuf.hxx> using namespace ::com::sun::star::uno; @@ -445,64 +446,10 @@ sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/, namespace { - //See unicode/uscript.h - static sal_Int16 scriptTypes[] = - { - ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, - ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, - // 15 - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX, - ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, - // 30 - ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - // 45 - ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, - ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - // 60 - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, - // 75 - ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - // 90 - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, - // 105 - ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, - // 120 - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - // 135 - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, - ScriptType::COMPLEX, - ScriptType::WEAK - }; - -# define scriptTypesCount SAL_N_ELEMENTS(scriptTypes) - sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar) { - sal_Int16 nRet; int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT); - if (script < 0) - nRet = ScriptType::WEAK; - else if (static_cast<size_t>(script) >= SAL_N_ELEMENTS(scriptTypes)) - nRet = ScriptType::COMPLEX; // anything new is going to be pretty wild - else - nRet = scriptTypes[script]; - return nRet; + return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script)); } struct UBlock2Script diff --git a/i18nutil/inc/i18nutil/unicode.hxx b/i18nutil/inc/i18nutil/unicode.hxx index f865552f1709..d5e72b1feb70 100644 --- a/i18nutil/inc/i18nutil/unicode.hxx +++ b/i18nutil/inc/i18nutil/unicode.hxx @@ -30,6 +30,7 @@ #include <com/sun/star/i18n/UnicodeScript.hpp> #include <sal/types.h> +#include <unicode/uscript.h> #include "i18nutildllapi.h" typedef struct _ScriptTypeList { @@ -61,6 +62,9 @@ public: static sal_Bool SAL_CALL isWhiteSpace( const sal_Unicode ch); static sal_Bool SAL_CALL isAlphaDigit( const sal_Unicode ch); static sal_Bool SAL_CALL isPunctuation( const sal_Unicode ch); + + //Map an ISO 15924 script code to Latin/Asian/Complex/Weak + static sal_Int16 SAL_CALL getScriptClassFromUScriptCode(UScriptCode eScript); }; #endif diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index f04faa5846cf..b0a97d6a55d7 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -28,6 +28,7 @@ #include <com/sun/star/i18n/UnicodeType.hpp> #include <com/sun/star/i18n/KCharacterType.hpp> +#include <com/sun/star/i18n/ScriptType.hpp> #include <i18nutil/unicode.hxx> #include "unicode_data.h" @@ -497,5 +498,61 @@ sal_Int32 SAL_CALL unicode::getCharType( const sal_Unicode ch ) } } +sal_Int16 SAL_CALL unicode::getScriptClassFromUScriptCode(UScriptCode eScript) +{ + //See unicode/uscript.h + static sal_Int16 scriptTypes[] = + { + ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, + ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, + // 15 + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX, + ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, + // 30 + ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + // 45 + ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, + ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + // 60 + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, + // 75 + ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + // 90 + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, + // 105 + ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, + // 120 + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + // 135 + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, + ScriptType::COMPLEX, + ScriptType::WEAK + }; + + sal_Int16 nRet; + if (eScript < USCRIPT_COMMON) + nRet = ScriptType::WEAK; + else if (static_cast<size_t>(eScript) >= SAL_N_ELEMENTS(scriptTypes)) + nRet = ScriptType::COMPLEX; // anything new is going to be pretty wild + else + nRet = scriptTypes[eScript]; + return nRet; +} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |