diff options
author | Michael Meeks <michael.meeks@collabora.com> | 2014-05-09 16:28:18 +0100 |
---|---|---|
committer | Michael Meeks <michael.meeks@collabora.com> | 2014-05-12 10:56:33 +0100 |
commit | 69a74afb07c7c58b201d315dbd254bc50b9c9f03 (patch) | |
tree | 0c81326a377b22ca3f1946d0797907452cf11d15 /i18npool | |
parent | 22259f02b211694525251afcdba12a5d3a43cbae (diff) |
Avoid expensive dlopen thrash for break iterators.
Change-Id: I770c1b3e5164cb486b5a5c2b1259f713914a1bae
Diffstat (limited to 'i18npool')
-rw-r--r-- | i18npool/inc/xdictionary.hxx | 26 | ||||
-rw-r--r-- | i18npool/qa/cppunit/test_breakiterator.cxx | 20 | ||||
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 147 |
3 files changed, 117 insertions, 76 deletions
diff --git a/i18npool/inc/xdictionary.hxx b/i18npool/inc/xdictionary.hxx index 13078e88d5c0..2b1ece0d2447 100644 --- a/i18npool/inc/xdictionary.hxx +++ b/i18npool/inc/xdictionary.hxx @@ -40,17 +40,29 @@ struct WordBreakCache { bool equals(const sal_Unicode *str, Boundary& boundary); // checking cached string }; -class xdictionary +struct xdictionarydata { -private: const sal_uInt8 * existMark; const sal_Int16 * index1; - const sal_Int32 * index2; - const sal_Int32 * lenArray; + const sal_Int32 * index2; + const sal_Int32 * lenArray; const sal_Unicode* dataArea; -#ifndef DISABLE_DYNLOADING - oslModule hModule; -#endif + xdictionarydata() : + existMark( NULL ), + index1( NULL ), + index2( NULL ), + lenArray( NULL ), + dataArea( NULL ) + { + } +}; + +class xdictionary +{ +private: + xdictionarydata data; + void initDictionaryData(const sal_Char *lang); + Boundary boundary; bool japaneseWordBreak; diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 6f7b53753dee..36e3d3b808fc 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -68,6 +68,7 @@ public: CPPUNIT_TEST_SUITE_END(); private: uno::Reference<i18n::XBreakIterator> m_xBreak; + void doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak); }; void TestBreakIterator::testLineBreaking() @@ -906,7 +907,7 @@ void TestBreakIterator::testKhmer() } #endif -void TestBreakIterator::testJapanese() +void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak) { lang::Locale aLocale; aLocale.Language = "ja"; @@ -917,7 +918,7 @@ void TestBreakIterator::testJapanese() const sal_Unicode JAPANESE[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 }; OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE)); - aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale, + aBounds = xBreak->getWordBoundary(aTest, 5, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7); @@ -927,18 +928,29 @@ void TestBreakIterator::testJapanese() const sal_Unicode JAPANESE[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 }; OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE)); - aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, + aBounds = xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3); - aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale, + aBounds = xBreak->getWordBoundary(aTest, 5, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 6); } } +void TestBreakIterator::testJapanese() +{ + doTestJapanese(m_xBreak); + + // fdo#78479 - test second / cached instantiation of xdictionary + uno::Reference< i18n::XBreakIterator > xTmpBreak(m_xSFactory->createInstance( + "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW); + + doTestJapanese(xTmpBreak); +} + void TestBreakIterator::testChinese() { lang::Locale aLocale; diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index 6ded12393adc..e83b5297f328 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -20,6 +20,7 @@ #include <config_folders.h> #include <osl/file.h> +#include <osl/mutex.hxx> #include <rtl/ustrbuf.hxx> #include <rtl/bootstrap.hxx> #include <com/sun/star/i18n/WordType.hpp> @@ -57,22 +58,9 @@ sal_Unicode* getDataArea_zh(); #endif xdictionary::xdictionary(const sal_Char *lang) : - existMark( NULL ), - index1( NULL ), - index2( NULL ), - lenArray( NULL ), - dataArea( NULL ), -#ifndef DISABLE_DYNLOADING - hModule( NULL ), -#endif boundary(), japaneseWordBreak( false ) { - existMark = NULL; - index1 = NULL; - index2 = NULL; - lenArray = NULL; - dataArea = NULL; #ifdef DICT_JA_ZH_IN_DATAFILE @@ -96,53 +84,33 @@ xdictionary::xdictionary(const sal_Char *lang) : // We have the offsets to the parts of the file at its end, see gendict.cxx sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize); - existMark = (sal_uInt8*) (pMapping + pEOF[-1]); - index2 = (sal_Int32*) (pMapping + pEOF[-2]); - index1 = (sal_Int16*) (pMapping + pEOF[-3]); - lenArray = (sal_Int32*) (pMapping + pEOF[-4]); - dataArea = (sal_Unicode*) (pMapping + pEOF[-5]); + data.existMark = (sal_uInt8*) (pMapping + pEOF[-1]); + data.index2 = (sal_Int32*) (pMapping + pEOF[-2]); + data.index1 = (sal_Int16*) (pMapping + pEOF[-3]); + data.lenArray = (sal_Int32*) (pMapping + pEOF[-4]); + data.dataArea = (sal_Unicode*) (pMapping + pEOF[-5]); } } #elif !defined DISABLE_DYNLOADING -#ifdef SAL_DLLPREFIX - OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh) - aBuf.appendAscii( SAL_DLLPREFIX ); -#else - OUStringBuffer aBuf( strlen(lang) + 7 + 4 ); // mostly "*.dll" (with * == dict_zh) -#endif - aBuf.appendAscii( "dict_" ).appendAscii( lang ).appendAscii( SAL_DLLEXTENSION ); - hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT ); - if( hModule ) { - sal_IntPtr (*func)(); - func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getExistMark").pData ); - existMark = (sal_uInt8*) (*func)(); - func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getIndex1").pData ); - index1 = (sal_Int16*) (*func)(); - func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getIndex2").pData ); - index2 = (sal_Int32*) (*func)(); - func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getLenArray").pData ); - lenArray = (sal_Int32*) (*func)(); - func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData ); - dataArea = (sal_Unicode*) (*func)(); - } + initDictionaryData( lang ); #else if( strcmp( lang, "ja" ) == 0 ) { - existMark = getExistMark_ja(); - index1 = getIndex1_ja(); - index2 = getIndex2_ja(); - lenArray = getLenArray_ja(); - dataArea = getDataArea_ja(); + data.existMark = getExistMark_ja(); + data.index1 = getIndex1_ja(); + data.index2 = getIndex2_ja(); + data.lenArray = getLenArray_ja(); + data.dataArea = getDataArea_ja(); } else if( strcmp( lang, "zh" ) == 0 ) { - existMark = getExistMark_zh(); - index1 = getIndex1_zh(); - index2 = getIndex2_zh(); - lenArray = getLenArray_zh(); - dataArea = getDataArea_zh(); + data.existMark = getExistMark_zh(); + data.index1 = getIndex1_zh(); + data.index2 = getIndex2_zh(); + data.lenArray = getLenArray_zh(); + data.dataArea = getDataArea_zh(); } #endif @@ -155,15 +123,65 @@ xdictionary::xdictionary(const sal_Char *lang) : xdictionary::~xdictionary() { -#ifndef DISABLE_DYNLOADING - osl_unloadModule(hModule); -#endif - for (sal_Int32 i = 0; i < CACHE_MAX; i++) { - if (cache[i].size > 0) { - delete [] cache[i].contents; - delete [] cache[i].wordboundary; - } + for (sal_Int32 i = 0; i < CACHE_MAX; i++) { + if (cache[i].size > 0) { + delete [] cache[i].contents; + delete [] cache[i].wordboundary; + } + } +} + +namespace { + struct datacache { + oslModule mhModule; + OString maLang; + xdictionarydata maData; + }; +} + +void xdictionary::initDictionaryData(const sal_Char *pLang) +{ + // Global cache, never released for performance + static std::vector< datacache > aLoadedCache; + + osl::MutexGuard aGuard( osl::Mutex::getGlobalMutex() ); + for( size_t i = 0; i < aLoadedCache.size(); ++i ) + { + if( !strcmp( pLang, aLoadedCache[ i ].maLang.getStr() ) ) + { + data = aLoadedCache[ i ].maData; + return; } + } + + // otherwise add to the cache, positive or negative. + datacache aEntry; + aEntry.maLang = OString( pLang, strlen( pLang ) ); + +#ifdef SAL_DLLPREFIX + OUStringBuffer aBuf( strlen( pLang ) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh) + aBuf.appendAscii( SAL_DLLPREFIX ); +#else + OUStringBuffer aBuf( strlen( pLang ) + 7 + 4 ); // mostly "*.dll" (with * == dict_zh) +#endif + aBuf.appendAscii( "dict_" ).appendAscii( pLang ).appendAscii( SAL_DLLEXTENSION ); + aEntry.mhModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT ); + if( aEntry.mhModule ) { + sal_IntPtr (*func)(); + func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getExistMark").pData ); + aEntry.maData.existMark = (sal_uInt8*) (*func)(); + func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getIndex1").pData ); + aEntry.maData.index1 = (sal_Int16*) (*func)(); + func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getIndex2").pData ); + aEntry.maData.index2 = (sal_Int32*) (*func)(); + func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getLenArray").pData ); + aEntry.maData.lenArray = (sal_Int32*) (*func)(); + func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getDataArea").pData ); + aEntry.maData.dataArea = (sal_Unicode*) (*func)(); + } + + data = aEntry.maData; + aLoadedCache.push_back( aEntry ); } void xdictionary::setJapaneseWordBreak() @@ -173,8 +191,8 @@ void xdictionary::setJapaneseWordBreak() bool xdictionary::exists(const sal_uInt32 c) { - // 0x1FFF is the hardcoded limit in gendict for existMarks - bool exist = (existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False; + // 0x1FFF is the hardcoded limit in gendict for data.existMarks + bool exist = (data.existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((data.existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False; if (!exist && japaneseWordBreak) return BreakIteratorImpl::getScriptClass(c) == ScriptType::ASIAN; else @@ -183,24 +201,23 @@ bool xdictionary::exists(const sal_uInt32 c) sal_Int32 xdictionary::getLongestMatch(const sal_Unicode* str, sal_Int32 sLen) { + if ( !data.index1 ) return 0; - if ( !index1 ) return 0; - - sal_Int16 idx = index1[str[0] >> 8]; + sal_Int16 idx = data.index1[str[0] >> 8]; if (idx == 0xFF) return 0; idx = (idx<<8) | (str[0]&0xff); - sal_uInt32 begin = index2[idx], end = index2[idx+1]; + sal_uInt32 begin = data.index2[idx], end = data.index2[idx+1]; if (begin == 0) return 0; str++; sLen--; // first character is not stored in the dictionary for (sal_uInt32 i = end; i > begin; i--) { - sal_Int32 len = lenArray[i] - lenArray[i - 1]; + sal_Int32 len = data.lenArray[i] - data.lenArray[i - 1]; if (sLen >= len) { - const sal_Unicode *dstr = dataArea + lenArray[i-1]; + const sal_Unicode *dstr = data.dataArea + data.lenArray[i-1]; sal_Int32 pos = 0; while (pos < len && dstr[pos] == str[pos]) { pos++; } |