summaryrefslogtreecommitdiff
path: root/i18npool/source
diff options
context:
space:
mode:
authorTor Lillqvist <tml@collabora.com>2014-04-14 14:31:25 +0300
committerTor Lillqvist <tml@collabora.com>2014-04-14 17:21:19 +0300
commit0b6fb1f45a179e94bb39f49dd8f245812d753113 (patch)
tree5e697160e3c5e1cfa4a11057b2dd72c77f49f83d /i18npool/source
parentc0f2dc1347cfbc121408959531998b217cd1c617 (diff)
Put the dict_ja and _zh data in files instead of code for iOS
Map the file(s) into memory on demand. The executable file of an app needs to be as small as possible. Including additional data files in an app bundle is fine. Change-Id: Ife9bfe99a2cf0473d459f38f50dfa3304b39e282
Diffstat (limited to 'i18npool/source')
-rw-r--r--i18npool/source/breakiterator/gendict.cxx111
-rw-r--r--i18npool/source/breakiterator/xdictionary.cxx76
2 files changed, 149 insertions, 38 deletions
diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index eac6998929a3..c0f1e8ecd577 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -17,7 +17,6 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
-
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -32,6 +31,22 @@ using std::vector;
using namespace ::rtl;
+// For iOS, where we must strive for a minimal executable size, we
+// keep the data produced by this utility not as large const tables in
+// source code but instead as separate data files, to be bundled with
+// an app, and mmapped in at run time.
+
+// To test this easier on a desktop OS, just make sure
+// DICT_JA_ZH_IN_DATAFILE is defined when building i18npool.
+
+#ifdef DICT_JA_ZH_IN_DATAFILE
+static sal_Int64 dataAreaOffset = 0;
+static sal_Int64 lenArrayOffset = 0;
+static sal_Int64 index1Offset = 0;
+static sal_Int64 index2Offset = 0;
+static sal_Int64 existMarkOffset = 0;
+#endif
+
/* Utility gendict:
"BreakIterator_CJK provides input string caching and dictionary searching for
@@ -60,12 +75,17 @@ static inline void set_exists(sal_uInt32 index)
static inline void printIncludes(FILE* source_fp)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp);
fputs("#include <sal/types.h>\n\n", source_fp);
+#else
+ (void) source_fp;
+#endif
}
static inline void printFunctions(FILE* source_fp, const char *lang)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("#ifndef DISABLE_DYNLOADING\n", source_fp);
fputs ("SAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n", source_fp);
fputs ("SAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n", source_fp);
@@ -79,12 +99,20 @@ static inline void printFunctions(FILE* source_fp, const char *lang)
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray_%s() { return lenArray; }\n", lang);
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea_%s() { return dataArea; }\n", lang);
fputs ("#endif\n", source_fp);
+#else
+ (void) source_fp;
+ (void) lang;
+#endif
}
static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray)
{
// generate main dict. data array
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp);
+#else
+ dataAreaOffset = ftell(source_fp);
+#endif
sal_Char str[1024];
sal_uInt32 lenArrayCurr = 0;
sal_Unicode current = 0;
@@ -114,28 +142,47 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
// first character is stored in charArray, so start from second
for (i = 1; i < len; i++, lenArrayCurr++) {
set_exists(u[i]);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%04x, ", u[i]);
if ((lenArrayCurr & 0x0f) == 0x0f)
fputs("\n\t", source_fp);
+#else
+ fwrite(&u[i], sizeof(u[i]), 1, source_fp);
+#endif
}
}
lenArray.push_back( lenArrayCurr ); // store last ending pointer
charArray[current+1] = lenArray.size();
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp);
+#endif
}
static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
+#else
+ lenArrayOffset = ftell(source_fp);
+ sal_uInt32 zero(0);
+ fwrite(&zero, sizeof(zero), 1, source_fp);
+#endif
for (size_t k = 0; k < lenArray.size(); k++)
{
if( !(k & 0xf) )
fputs("\n\t", source_fp);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
+#else
+ fwrite(&lenArray[k], sizeof(lenArray[k]), 1, source_fp);
+#endif
}
+
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp );
+#endif
}
/* FIXME?: what happens if in every range i there is at least one charArray != 0
@@ -143,23 +190,40 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenA
=> then in index2, the last range will be ignored incorrectly */
static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
+#else
+ index1Offset = ftell(source_fp);
+#endif
+
sal_Int16 count = 0;
for (sal_Int32 i = 0; i < 0x100; i++) {
sal_Int32 j = 0;
while( j < 0x100 && charArray[(i<<8) + j] == 0)
j++;
- fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
+ set[i] = (j < 0x100 ? count++ : 0xff);
+#ifndef DICT_JA_ZH_IN_DATAFILE
+ fprintf(source_fp, "0x%02x, ", set[i]);
if ((i & 0x0f) == 0x0f)
fputs ("\n\t", source_fp);
+#else
+ fwrite(&set[i], sizeof(set[i]), 1, source_fp);
+#endif
}
+
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("};\n", source_fp);
+#endif
}
static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("static const sal_Int32 index2[] = {\n\t", source_fp);
+#else
+ index2Offset = ftell(source_fp);
+#endif
sal_Int32 prev = 0;
for (sal_Int32 i = 0; i < 0x100; i++) {
if (set[i] != 0xff) {
@@ -170,28 +234,48 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
k++;
prev = charArray[(i<<8) + j];
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0));
if ((j & 0x0f) == 0x0f)
fputs ("\n\t", source_fp);
+#else
+ sal_uInt32 n = (k < 0x10000 ? charArray[k] + 1 : 0);
+ fwrite(&n, sizeof(n), 1, source_fp);
+#endif
}
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("\n\t", source_fp);
+#endif
}
}
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("\n};\n", source_fp);
+#endif
}
/* Generates a bitmask for the existance of sal_Unicode values in dictionary;
it packs 8 sal_Bool values in 1 sal_uInt8 */
static inline void printExistsMask(FILE *source_fp)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
+#else
+ existMarkOffset = ftell(source_fp);
+#endif
for (unsigned int i = 0; i < 0x2000; i++)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%02x, ", exists[i]);
if ( (i & 0xf) == 0xf )
fputs("\n\t", source_fp);
+#else
+ fwrite(&exists[i], sizeof(exists[i]), 1, source_fp);
+#endif
}
+
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp);
+#endif
}
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
@@ -228,14 +312,25 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
sal_Int16 set[0x100];
printIncludes(source_fp);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("extern \"C\" {\n", source_fp);
- printDataArea(dictionary_fp, source_fp, lenArray);
- printLenArray(source_fp, lenArray);
- printIndex1(source_fp, set);
- printIndex2(source_fp, set);
- printExistsMask(source_fp);
- printFunctions(source_fp, argv[3]);
+#endif
+ printDataArea(dictionary_fp, source_fp, lenArray);
+ printLenArray(source_fp, lenArray);
+ printIndex1(source_fp, set);
+ printIndex2(source_fp, set);
+ printExistsMask(source_fp);
+ printFunctions(source_fp, argv[3]);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("}\n", source_fp);
+#else
+ // Put pointers to the tables at the end of the file...
+ fwrite(&dataAreaOffset, sizeof(dataAreaOffset), 1, source_fp);
+ fwrite(&lenArrayOffset, sizeof(lenArrayOffset), 1, source_fp);
+ fwrite(&index1Offset, sizeof(index1Offset), 1, source_fp);
+ fwrite(&index2Offset, sizeof(index2Offset), 1, source_fp);
+ fwrite(&existMarkOffset, sizeof(existMarkOffset), 1, source_fp);
+#endif
fclose(dictionary_fp);
fclose(source_fp);
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx
index 4cb51f9b5439..390b2cd7dcc4 100644
--- a/i18npool/source/breakiterator/xdictionary.cxx
+++ b/i18npool/source/breakiterator/xdictionary.cxx
@@ -17,28 +17,22 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
+#include <config_folders.h>
-// xdictionary.cpp: implementation of the xdictionary class.
-
-
-
-
+#include <osl/file.h>
#include <rtl/ustrbuf.hxx>
-
+#include <rtl/bootstrap.hxx>
#include <com/sun/star/i18n/WordType.hpp>
#include <xdictionary.hxx>
#include <unicode/uchar.h>
#include <string.h>
#include <breakiteratorImpl.hxx>
-
-// Construction/Destruction
-
-
-
namespace com { namespace sun { namespace star { namespace i18n {
-#ifndef DISABLE_DYNLOADING
+#ifdef DICT_JA_ZH_IN_DATAFILE
+
+#elif !defined DISABLE_DYNLOADING
extern "C" { static void SAL_CALL thisModule() {} }
@@ -74,8 +68,44 @@ xdictionary::xdictionary(const sal_Char *lang) :
boundary(),
japaneseWordBreak( sal_False )
{
- index1 = 0;
-#ifndef DISABLE_DYNLOADING
+ existMark = NULL;
+ index1 = NULL;
+ index2 = NULL;
+ lenArray = NULL;
+ dataArea = NULL;
+
+#ifdef DICT_JA_ZH_IN_DATAFILE
+
+ if( strcmp( lang, "ja" ) == 0 || strcmp( lang, "zh" ) == 0 )
+ {
+ OUString sUrl( "$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/dict_" );
+ rtl::Bootstrap::expandMacros(sUrl);
+
+ if( strcmp( lang, "ja" ) == 0 )
+ sUrl += "ja.data";
+ else if( strcmp( lang, "zh" ) == 0 )
+ sUrl += "zh.data";
+
+ oslFileHandle aFileHandle;
+ sal_uInt64 nFileSize;
+ char *pMapping;
+ if( osl_openFile( sUrl.pData, &aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None &&
+ osl_getFileSize( aFileHandle, &nFileSize) == osl_File_E_None &&
+ osl_mapFile( aFileHandle, (void **) &pMapping, nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None )
+ {
+ // We have the offsets to the parts of the file at its end, see gendict.cxx
+ sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize);
+
+ existMark = (sal_uInt8*) (pMapping + pEOF[-1]);
+ index2 = (sal_Int32*) (pMapping + pEOF[-2]);
+ index1 = (sal_Int16*) (pMapping + pEOF[-3]);
+ lenArray = (sal_Int32*) (pMapping + pEOF[-4]);
+ dataArea = (sal_Unicode*) (pMapping + pEOF[-5]);
+ }
+ }
+
+#elif !defined DISABLE_DYNLOADING
+
#ifdef SAL_DLLPREFIX
OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh)
aBuf.appendAscii( SAL_DLLPREFIX );
@@ -97,16 +127,9 @@ xdictionary::xdictionary(const sal_Char *lang) :
func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData );
dataArea = (sal_Unicode*) (*func)();
}
- else
- {
- existMark = NULL;
- index1 = NULL;
- index2 = NULL;
- lenArray = NULL;
- dataArea = NULL;
- }
#else
+
if( strcmp( lang, "ja" ) == 0 ) {
existMark = getExistMark_ja();
index1 = getIndex1_ja();
@@ -121,14 +144,7 @@ xdictionary::xdictionary(const sal_Char *lang) :
lenArray = getLenArray_zh();
dataArea = getDataArea_zh();
}
- else
- {
- existMark = NULL;
- index1 = NULL;
- index2 = NULL;
- lenArray = NULL;
- dataArea = NULL;
- }
+
#endif
for (sal_Int32 i = 0; i < CACHE_MAX; i++)