diff options
author | Tor Lillqvist <tml@collabora.com> | 2014-04-14 14:31:25 +0300 |
---|---|---|
committer | Tor Lillqvist <tml@collabora.com> | 2014-04-14 17:21:19 +0300 |
commit | 0b6fb1f45a179e94bb39f49dd8f245812d753113 (patch) | |
tree | 5e697160e3c5e1cfa4a11057b2dd72c77f49f83d /i18npool/source | |
parent | c0f2dc1347cfbc121408959531998b217cd1c617 (diff) |
Put the dict_ja and _zh data in files instead of code for iOS
Map the file(s) into memory on demand. The executable file of an app
needs to be as small as possible. Including additional data files in
an app bundle is fine.
Change-Id: Ife9bfe99a2cf0473d459f38f50dfa3304b39e282
Diffstat (limited to 'i18npool/source')
-rw-r--r-- | i18npool/source/breakiterator/gendict.cxx | 111 | ||||
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 76 |
2 files changed, 149 insertions, 38 deletions
diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx index eac6998929a3..c0f1e8ecd577 100644 --- a/i18npool/source/breakiterator/gendict.cxx +++ b/i18npool/source/breakiterator/gendict.cxx @@ -17,7 +17,6 @@ * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ - #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -32,6 +31,22 @@ using std::vector; using namespace ::rtl; +// For iOS, where we must strive for a minimal executable size, we +// keep the data produced by this utility not as large const tables in +// source code but instead as separate data files, to be bundled with +// an app, and mmapped in at run time. + +// To test this easier on a desktop OS, just make sure +// DICT_JA_ZH_IN_DATAFILE is defined when building i18npool. + +#ifdef DICT_JA_ZH_IN_DATAFILE +static sal_Int64 dataAreaOffset = 0; +static sal_Int64 lenArrayOffset = 0; +static sal_Int64 index1Offset = 0; +static sal_Int64 index2Offset = 0; +static sal_Int64 existMarkOffset = 0; +#endif + /* Utility gendict: "BreakIterator_CJK provides input string caching and dictionary searching for @@ -60,12 +75,17 @@ static inline void set_exists(sal_uInt32 index) static inline void printIncludes(FILE* source_fp) { +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp); fputs("#include <sal/types.h>\n\n", source_fp); +#else + (void) source_fp; +#endif } static inline void printFunctions(FILE* source_fp, const char *lang) { +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("#ifndef DISABLE_DYNLOADING\n", source_fp); fputs ("SAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n", source_fp); fputs ("SAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n", source_fp); @@ -79,12 +99,20 @@ static inline void printFunctions(FILE* source_fp, const char *lang) fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray_%s() { return lenArray; }\n", lang); fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea_%s() { return dataArea; }\n", lang); fputs ("#endif\n", source_fp); +#else + (void) source_fp; + (void) lang; +#endif } static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray) { // generate main dict. data array +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp); +#else + dataAreaOffset = ftell(source_fp); +#endif sal_Char str[1024]; sal_uInt32 lenArrayCurr = 0; sal_Unicode current = 0; @@ -114,28 +142,47 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa // first character is stored in charArray, so start from second for (i = 1; i < len; i++, lenArrayCurr++) { set_exists(u[i]); +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%04x, ", u[i]); if ((lenArrayCurr & 0x0f) == 0x0f) fputs("\n\t", source_fp); +#else + fwrite(&u[i], sizeof(u[i]), 1, source_fp); +#endif } } lenArray.push_back( lenArrayCurr ); // store last ending pointer charArray[current+1] = lenArray.size(); +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("\n};\n", source_fp); +#endif } static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t"); fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array. +#else + lenArrayOffset = ftell(source_fp); + sal_uInt32 zero(0); + fwrite(&zero, sizeof(zero), 1, source_fp); +#endif for (size_t k = 0; k < lenArray.size(); k++) { if( !(k & 0xf) ) fputs("\n\t", source_fp); +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k])); +#else + fwrite(&lenArray[k], sizeof(lenArray[k]), 1, source_fp); +#endif } + +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("\n};\n", source_fp ); +#endif } /* FIXME?: what happens if in every range i there is at least one charArray != 0 @@ -143,23 +190,40 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenA => then in index2, the last range will be ignored incorrectly */ static inline void printIndex1(FILE *source_fp, sal_Int16 *set) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t"); +#else + index1Offset = ftell(source_fp); +#endif + sal_Int16 count = 0; for (sal_Int32 i = 0; i < 0x100; i++) { sal_Int32 j = 0; while( j < 0x100 && charArray[(i<<8) + j] == 0) j++; - fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff)); + set[i] = (j < 0x100 ? count++ : 0xff); +#ifndef DICT_JA_ZH_IN_DATAFILE + fprintf(source_fp, "0x%02x, ", set[i]); if ((i & 0x0f) == 0x0f) fputs ("\n\t", source_fp); +#else + fwrite(&set[i], sizeof(set[i]), 1, source_fp); +#endif } + +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("};\n", source_fp); +#endif } static inline void printIndex2(FILE *source_fp, sal_Int16 *set) { +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("static const sal_Int32 index2[] = {\n\t", source_fp); +#else + index2Offset = ftell(source_fp); +#endif sal_Int32 prev = 0; for (sal_Int32 i = 0; i < 0x100; i++) { if (set[i] != 0xff) { @@ -170,28 +234,48 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set) k++; prev = charArray[(i<<8) + j]; +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0)); if ((j & 0x0f) == 0x0f) fputs ("\n\t", source_fp); +#else + sal_uInt32 n = (k < 0x10000 ? charArray[k] + 1 : 0); + fwrite(&n, sizeof(n), 1, source_fp); +#endif } +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("\n\t", source_fp); +#endif } } +#ifndef DICT_JA_ZH_IN_DATAFILE fputs ("\n};\n", source_fp); +#endif } /* Generates a bitmask for the existance of sal_Unicode values in dictionary; it packs 8 sal_Bool values in 1 sal_uInt8 */ static inline void printExistsMask(FILE *source_fp) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t"); +#else + existMarkOffset = ftell(source_fp); +#endif for (unsigned int i = 0; i < 0x2000; i++) { +#ifndef DICT_JA_ZH_IN_DATAFILE fprintf(source_fp, "0x%02x, ", exists[i]); if ( (i & 0xf) == 0xf ) fputs("\n\t", source_fp); +#else + fwrite(&exists[i], sizeof(exists[i]), 1, source_fp); +#endif } + +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("\n};\n", source_fp); +#endif } SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) @@ -228,14 +312,25 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) sal_Int16 set[0x100]; printIncludes(source_fp); +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("extern \"C\" {\n", source_fp); - printDataArea(dictionary_fp, source_fp, lenArray); - printLenArray(source_fp, lenArray); - printIndex1(source_fp, set); - printIndex2(source_fp, set); - printExistsMask(source_fp); - printFunctions(source_fp, argv[3]); +#endif + printDataArea(dictionary_fp, source_fp, lenArray); + printLenArray(source_fp, lenArray); + printIndex1(source_fp, set); + printIndex2(source_fp, set); + printExistsMask(source_fp); + printFunctions(source_fp, argv[3]); +#ifndef DICT_JA_ZH_IN_DATAFILE fputs("}\n", source_fp); +#else + // Put pointers to the tables at the end of the file... + fwrite(&dataAreaOffset, sizeof(dataAreaOffset), 1, source_fp); + fwrite(&lenArrayOffset, sizeof(lenArrayOffset), 1, source_fp); + fwrite(&index1Offset, sizeof(index1Offset), 1, source_fp); + fwrite(&index2Offset, sizeof(index2Offset), 1, source_fp); + fwrite(&existMarkOffset, sizeof(existMarkOffset), 1, source_fp); +#endif fclose(dictionary_fp); fclose(source_fp); diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index 4cb51f9b5439..390b2cd7dcc4 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -17,28 +17,22 @@ * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ +#include <config_folders.h> -// xdictionary.cpp: implementation of the xdictionary class. - - - - +#include <osl/file.h> #include <rtl/ustrbuf.hxx> - +#include <rtl/bootstrap.hxx> #include <com/sun/star/i18n/WordType.hpp> #include <xdictionary.hxx> #include <unicode/uchar.h> #include <string.h> #include <breakiteratorImpl.hxx> - -// Construction/Destruction - - - namespace com { namespace sun { namespace star { namespace i18n { -#ifndef DISABLE_DYNLOADING +#ifdef DICT_JA_ZH_IN_DATAFILE + +#elif !defined DISABLE_DYNLOADING extern "C" { static void SAL_CALL thisModule() {} } @@ -74,8 +68,44 @@ xdictionary::xdictionary(const sal_Char *lang) : boundary(), japaneseWordBreak( sal_False ) { - index1 = 0; -#ifndef DISABLE_DYNLOADING + existMark = NULL; + index1 = NULL; + index2 = NULL; + lenArray = NULL; + dataArea = NULL; + +#ifdef DICT_JA_ZH_IN_DATAFILE + + if( strcmp( lang, "ja" ) == 0 || strcmp( lang, "zh" ) == 0 ) + { + OUString sUrl( "$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/dict_" ); + rtl::Bootstrap::expandMacros(sUrl); + + if( strcmp( lang, "ja" ) == 0 ) + sUrl += "ja.data"; + else if( strcmp( lang, "zh" ) == 0 ) + sUrl += "zh.data"; + + oslFileHandle aFileHandle; + sal_uInt64 nFileSize; + char *pMapping; + if( osl_openFile( sUrl.pData, &aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None && + osl_getFileSize( aFileHandle, &nFileSize) == osl_File_E_None && + osl_mapFile( aFileHandle, (void **) &pMapping, nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None ) + { + // We have the offsets to the parts of the file at its end, see gendict.cxx + sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize); + + existMark = (sal_uInt8*) (pMapping + pEOF[-1]); + index2 = (sal_Int32*) (pMapping + pEOF[-2]); + index1 = (sal_Int16*) (pMapping + pEOF[-3]); + lenArray = (sal_Int32*) (pMapping + pEOF[-4]); + dataArea = (sal_Unicode*) (pMapping + pEOF[-5]); + } + } + +#elif !defined DISABLE_DYNLOADING + #ifdef SAL_DLLPREFIX OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh) aBuf.appendAscii( SAL_DLLPREFIX ); @@ -97,16 +127,9 @@ xdictionary::xdictionary(const sal_Char *lang) : func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData ); dataArea = (sal_Unicode*) (*func)(); } - else - { - existMark = NULL; - index1 = NULL; - index2 = NULL; - lenArray = NULL; - dataArea = NULL; - } #else + if( strcmp( lang, "ja" ) == 0 ) { existMark = getExistMark_ja(); index1 = getIndex1_ja(); @@ -121,14 +144,7 @@ xdictionary::xdictionary(const sal_Char *lang) : lenArray = getLenArray_zh(); dataArea = getDataArea_zh(); } - else - { - existMark = NULL; - index1 = NULL; - index2 = NULL; - lenArray = NULL; - dataArea = NULL; - } + #endif for (sal_Int32 i = 0; i < CACHE_MAX; i++) |