diff options
-rw-r--r-- | i18npool/CustomTarget_breakiterator.mk | 16 | ||||
-rw-r--r-- | i18npool/Executable_gendict.mk | 8 | ||||
-rw-r--r-- | i18npool/Library_i18npool.mk | 6 | ||||
-rw-r--r-- | i18npool/README | 15 | ||||
-rw-r--r-- | i18npool/source/breakiterator/gendict.cxx | 111 | ||||
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 76 | ||||
-rw-r--r-- | ios/CustomTarget_TiledLibreOffice_app.mk | 3 |
7 files changed, 196 insertions, 39 deletions
diff --git a/i18npool/CustomTarget_breakiterator.mk b/i18npool/CustomTarget_breakiterator.mk index dbe6ab223531..fb7f0d97ae08 100644 --- a/i18npool/CustomTarget_breakiterator.mk +++ b/i18npool/CustomTarget_breakiterator.mk @@ -11,6 +11,20 @@ $(eval $(call gb_CustomTarget_CustomTarget,i18npool/breakiterator)) i18npool_BIDIR := $(call gb_CustomTarget_get_workdir,i18npool/breakiterator) +ifeq ($(OS),IOS) + +$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \ + $(i18npool_BIDIR)/dict_ja.data $(i18npool_BIDIR)/dict_zh.data $(i18npool_BIDIR)/OpenOffice_dat.c + +$(i18npool_BIDIR)/dict_%.data : \ + $(SRCDIR)/i18npool/source/breakiterator/data/%.dic \ + $(call gb_Executable_get_runtime_dependencies,gendict) \ + | $(i18npool_BIDIR)/.dir + $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),$(true),DIC,1) + $(call gb_Helper_abbreviate_dirs,\ + $(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@)) +else + $(call gb_CustomTarget_get_target,i18npool/breakiterator) : \ $(i18npool_BIDIR)/dict_ja.cxx $(i18npool_BIDIR)/dict_zh.cxx $(i18npool_BIDIR)/OpenOffice_dat.c @@ -22,6 +36,8 @@ $(i18npool_BIDIR)/dict_%.cxx : \ $(call gb_Helper_abbreviate_dirs,\ $(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@)) +endif + i18npool_BRKTXTS := \ char_in.brk \ char.brk \ diff --git a/i18npool/Executable_gendict.mk b/i18npool/Executable_gendict.mk index a738f64e0666..69230cf52c07 100644 --- a/i18npool/Executable_gendict.mk +++ b/i18npool/Executable_gendict.mk @@ -14,6 +14,14 @@ $(eval $(call gb_Executable_use_libraries,gendict,\ $(gb_UWINAPI) \ )) +ifeq ($(gb_Side),build) +ifneq ($(shell grep OS=IOS $(BUILDDIR)/config_host.mk),) +$(eval $(call gb_Executable_add_cxxflags,gendict,\ + -DDICT_JA_ZH_IN_DATAFILE \ +)) +endif +endif + $(eval $(call gb_Executable_add_exception_objects,gendict,\ i18npool/source/breakiterator/gendict \ )) diff --git a/i18npool/Library_i18npool.mk b/i18npool/Library_i18npool.mk index 4f9e5999a17a..c095fa8cf3ed 100644 --- a/i18npool/Library_i18npool.mk +++ b/i18npool/Library_i18npool.mk @@ -35,6 +35,12 @@ $(eval $(call gb_Library_use_externals,i18npool,\ icu_headers \ )) +ifeq ($(OS),IOS) +$(eval $(call gb_Library_add_cxxflags,i18npool,\ + -DDICT_JA_ZH_IN_DATAFILE \ +)) +endif + $(eval $(call gb_Library_add_exception_objects,i18npool,\ i18npool/source/breakiterator/breakiterator_cjk \ i18npool/source/breakiterator/breakiterator_ctl \ diff --git a/i18npool/README b/i18npool/README index 86f72d1babe2..9e1a04f3f329 100644 --- a/i18npool/README +++ b/i18npool/README @@ -1,6 +1,19 @@ Internationalisation (i18npool) framework ensures that the suite is adaptable to the requirements of different -native languages, their local settings and customs, etc without source code modification. +native languages, their local settings and customs, etc without source code modification. (Wow, that is such marketing-speak...) Specifically for locale data documentation please see i18npool/source/localedata/data/locale.dtd See also [http://wiki.documentfoundation.org/Category:I18n] + +On iOS we put the largest data generated here, the dict_ja and dict_zh +stuff, into separate files and not into code to keep the size of an +app binary down. Temporary test code: + + static bool beenhere = false; + if (!beenhere) { + beenhere = true; + uno::Reference< uno::XComponentContext > xComponentContext(::cppu::defaultBootstrap_InitialComponentContext()); + uno::Reference< lang::XMultiComponentFactory > xMultiComponentFactoryClient( xComponentContext->getServiceManager() ); + uno::Reference< uno::XInterface > xInterface = + xMultiComponentFactoryClient->createInstanceWithContext( "com.sun.star.i18n.BreakIterator_ja", xComponentContext ); + } diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx index eac6998929a3..c0f1e8ecd577 100644 --- a/i18npool/source/breakiterator/gendict.cxx +++ b/i18npool/source/breakiterator/gendict.cxx @@ -17,7 +17,6 @@ * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ - #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -32,6 +31,22 @@ using std::vector; using namespace ::rtl; +// For iOS, where we must strive for a minimal executable size, we +// keep the data produced by this utility not as large const tables in +// source code but instead as separate data files, to be bundled with +// an app, and mmapped in at run time. + +// To test this easier on a desktop OS, just make sure +// DICT_JA_ZH_IN_DATAFILE is defined when building i18npool. + +#ifdef DICT_JA_ZH_IN_DATAFILE +static sal_Int64 dataAreaOffset = 0; +static sal_Int64 lenArrayOffset = 0; +static sal_Int64 index1Offset = 0; +static sal_Int64 index2Offset = 0; +static sal_Int64 existMarkOffset = 0; +#endif + /* Utility gendict: "BreakIterator_CJK provides input string caching and dictionary searching for @@ -60,12 +75,17 @@ static inline void set_exists(sal_uInt32 index) static inline void printIncludes(FILE* source_fp) { +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp); fputs("#include <sal/types.h>\n\n", source_fp); +#else + (void) source_fp; +#endif } static inline void printFunctions(FILE* source_fp, const char *lang) { +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("#ifndef DISABLE_DYNLOADING\n", source_fp); fputs ("SAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n", source_fp); fputs ("SAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n", source_fp); @@ -79,12 +99,20 @@ static inline void printFunctions(FILE* source_fp, const char *lang) fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray_%s() { return lenArray; }\n", lang); fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea_%s() { return dataArea; }\n", lang); fputs ("#endif\n", source_fp); +#else + (void) source_fp; + (void) lang; +#endif } static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray) { // generate main dict. data array +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp); +#else + dataAreaOffset = ftell(source_fp); +#endif sal_Char str[1024]; sal_uInt32 lenArrayCurr = 0; sal_Unicode current = 0; @@ -114,28 +142,47 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa // first character is stored in charArray, so start from second for (i = 1; i < len; i++, lenArrayCurr++) { set_exists(u[i]); +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%04x, ", u[i]); if ((lenArrayCurr & 0x0f) == 0x0f) fputs("\n\t", source_fp); +#else + fwrite(&u[i], sizeof(u[i]), 1, source_fp); +#endif } } lenArray.push_back( lenArrayCurr ); // store last ending pointer charArray[current+1] = lenArray.size(); +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("\n};\n", source_fp); +#endif } static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t"); fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array. +#else + lenArrayOffset = ftell(source_fp); + sal_uInt32 zero(0); + fwrite(&zero, sizeof(zero), 1, source_fp); +#endif for (size_t k = 0; k < lenArray.size(); k++) { if( !(k & 0xf) ) fputs("\n\t", source_fp); +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k])); +#else + fwrite(&lenArray[k], sizeof(lenArray[k]), 1, source_fp); +#endif } + +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("\n};\n", source_fp ); +#endif } /* FIXME?: what happens if in every range i there is at least one charArray != 0 @@ -143,23 +190,40 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenA => then in index2, the last range will be ignored incorrectly */ static inline void printIndex1(FILE *source_fp, sal_Int16 *set) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t"); +#else + index1Offset = ftell(source_fp); +#endif + sal_Int16 count = 0; for (sal_Int32 i = 0; i < 0x100; i++) { sal_Int32 j = 0; while( j < 0x100 && charArray[(i<<8) + j] == 0) j++; - fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff)); + set[i] = (j < 0x100 ? count++ : 0xff); +#ifndef DICT_JA_ZH_IN_DATAFILE + fprintf(source_fp, "0x%02x, ", set[i]); if ((i & 0x0f) == 0x0f) fputs ("\n\t", source_fp); +#else + fwrite(&set[i], sizeof(set[i]), 1, source_fp); +#endif } + +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("};\n", source_fp); +#endif } static inline void printIndex2(FILE *source_fp, sal_Int16 *set) { +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("static const sal_Int32 index2[] = {\n\t", source_fp); +#else + index2Offset = ftell(source_fp); +#endif sal_Int32 prev = 0; for (sal_Int32 i = 0; i < 0x100; i++) { if (set[i] != 0xff) { @@ -170,28 +234,48 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set) k++; prev = charArray[(i<<8) + j]; +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0)); if ((j & 0x0f) == 0x0f) fputs ("\n\t", source_fp); +#else + sal_uInt32 n = (k < 0x10000 ? charArray[k] + 1 : 0); + fwrite(&n, sizeof(n), 1, source_fp); +#endif } +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("\n\t", source_fp); +#endif } } +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("\n};\n", source_fp); +#endif } /* Generates a bitmask for the existance of sal_Unicode values in dictionary; it packs 8 sal_Bool values in 1 sal_uInt8 */ static inline void printExistsMask(FILE *source_fp) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t"); +#else + existMarkOffset = ftell(source_fp); +#endif for (unsigned int i = 0; i < 0x2000; i++) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%02x, ", exists[i]); if ( (i & 0xf) == 0xf ) fputs("\n\t", source_fp); +#else + fwrite(&exists[i], sizeof(exists[i]), 1, source_fp); +#endif } + +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("\n};\n", source_fp); +#endif } SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) @@ -228,14 +312,25 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) sal_Int16 set[0x100]; printIncludes(source_fp); +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("extern \"C\" {\n", source_fp); - printDataArea(dictionary_fp, source_fp, lenArray); - printLenArray(source_fp, lenArray); - printIndex1(source_fp, set); - printIndex2(source_fp, set); - printExistsMask(source_fp); - printFunctions(source_fp, argv[3]); +#endif + printDataArea(dictionary_fp, source_fp, lenArray); + printLenArray(source_fp, lenArray); + printIndex1(source_fp, set); + printIndex2(source_fp, set); + printExistsMask(source_fp); + printFunctions(source_fp, argv[3]); +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("}\n", source_fp); +#else + // Put pointers to the tables at the end of the file... + fwrite(&dataAreaOffset, sizeof(dataAreaOffset), 1, source_fp); + fwrite(&lenArrayOffset, sizeof(lenArrayOffset), 1, source_fp); + fwrite(&index1Offset, sizeof(index1Offset), 1, source_fp); + fwrite(&index2Offset, sizeof(index2Offset), 1, source_fp); + fwrite(&existMarkOffset, sizeof(existMarkOffset), 1, source_fp); +#endif fclose(dictionary_fp); fclose(source_fp); diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index 4cb51f9b5439..390b2cd7dcc4 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -17,28 +17,22 @@ * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ +#include <config_folders.h> -// xdictionary.cpp: implementation of the xdictionary class. - - - - +#include <osl/file.h> #include <rtl/ustrbuf.hxx> - +#include <rtl/bootstrap.hxx> #include <com/sun/star/i18n/WordType.hpp> #include <xdictionary.hxx> #include <unicode/uchar.h> #include <string.h> #include <breakiteratorImpl.hxx> - -// Construction/Destruction - - - namespace com { namespace sun { namespace star { namespace i18n { -#ifndef DISABLE_DYNLOADING +#ifdef DICT_JA_ZH_IN_DATAFILE + +#elif !defined DISABLE_DYNLOADING extern "C" { static void SAL_CALL thisModule() {} } @@ -74,8 +68,44 @@ xdictionary::xdictionary(const sal_Char *lang) : boundary(), japaneseWordBreak( sal_False ) { - index1 = 0; -#ifndef DISABLE_DYNLOADING + existMark = NULL; + index1 = NULL; + index2 = NULL; + lenArray = NULL; + dataArea = NULL; + +#ifdef DICT_JA_ZH_IN_DATAFILE + + if( strcmp( lang, "ja" ) == 0 || strcmp( lang, "zh" ) == 0 ) + { + OUString sUrl( "$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/dict_" ); + rtl::Bootstrap::expandMacros(sUrl); + + if( strcmp( lang, "ja" ) == 0 ) + sUrl += "ja.data"; + else if( strcmp( lang, "zh" ) == 0 ) + sUrl += "zh.data"; + + oslFileHandle aFileHandle; + sal_uInt64 nFileSize; + char *pMapping; + if( osl_openFile( sUrl.pData, &aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None && + osl_getFileSize( aFileHandle, &nFileSize) == osl_File_E_None && + osl_mapFile( aFileHandle, (void **) &pMapping, nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None ) + { + // We have the offsets to the parts of the file at its end, see gendict.cxx + sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize); + + existMark = (sal_uInt8*) (pMapping + pEOF[-1]); + index2 = (sal_Int32*) (pMapping + pEOF[-2]); + index1 = (sal_Int16*) (pMapping + pEOF[-3]); + lenArray = (sal_Int32*) (pMapping + pEOF[-4]); + dataArea = (sal_Unicode*) (pMapping + pEOF[-5]); + } + } + +#elif !defined DISABLE_DYNLOADING + #ifdef SAL_DLLPREFIX OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh) aBuf.appendAscii( SAL_DLLPREFIX ); @@ -97,16 +127,9 @@ xdictionary::xdictionary(const sal_Char *lang) : func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData ); dataArea = (sal_Unicode*) (*func)(); } - else - { - existMark = NULL; - index1 = NULL; - index2 = NULL; - lenArray = NULL; - dataArea = NULL; - } #else + if( strcmp( lang, "ja" ) == 0 ) { existMark = getExistMark_ja(); index1 = getIndex1_ja(); @@ -121,14 +144,7 @@ xdictionary::xdictionary(const sal_Char *lang) : lenArray = getLenArray_zh(); dataArea = getDataArea_zh(); } - else - { - existMark = NULL; - index1 = NULL; - index2 = NULL; - lenArray = NULL; - dataArea = NULL; - } + #endif for (sal_Int32 i = 0; i < CACHE_MAX; i++) diff --git a/ios/CustomTarget_TiledLibreOffice_app.mk b/ios/CustomTarget_TiledLibreOffice_app.mk index 82a960c11bda..618b4cc980ed 100644 --- a/ios/CustomTarget_TiledLibreOffice_app.mk +++ b/ios/CustomTarget_TiledLibreOffice_app.mk @@ -59,6 +59,9 @@ TiledLibreOffice_setup: mkdir -p $(TiledLibreOffice_resource)/share/config cp -R $(INSTDIR)/share/config/soffice.cfg $(TiledLibreOffice_resource)/share/config + # Japanese and Chinese dict files + cp $(WORKDIR)/CustomTarget/i18npool/breakiterator/dict_*.data $(TiledLibreOffice_resource)/share + # "registry" cp -R $(INSTDIR)/share/registry $(TiledLibreOffice_resource)/share |