summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
authorMichael Meeks <michael.meeks@suse.com>2012-12-20 23:04:15 +0000
committerMichael Meeks <michael.meeks@suse.com>2012-12-21 11:40:02 +0000
commit9c6006b961f690728f4035c10f8b9fe9fdb6f332 (patch)
tree374536fe3c6b15d0bd0f6478d43bc4b6dc07fbe8 /i18npool
parent8808f482a2d28919e4d3eddfccfe6e4daf697601 (diff)
fdo#58590 - cleanup and accelerate break-iterators.
Doing word-count by switching per-word between two different kinds of word iterator was insanely slow. This preserves an ICU break-iterator for each type of word-breaking.
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/inc/breakiterator_unicode.hxx7
-rw-r--r--i18npool/source/breakiterator/breakiterator_unicode.cxx80
2 files changed, 41 insertions, 46 deletions
diff --git a/i18npool/inc/breakiterator_unicode.hxx b/i18npool/inc/breakiterator_unicode.hxx
index 26046ea4ed6d..fe226d4c5b41 100644
--- a/i18npool/inc/breakiterator_unicode.hxx
+++ b/i18npool/inc/breakiterator_unicode.hxx
@@ -80,6 +80,7 @@ protected:
rtl::OUString aICUText;
UText *ut;
icu::BreakIterator *aBreakIterator;
+ com::sun::star::lang::Locale maLocale;
BI_Data()
: ut(NULL)
@@ -91,10 +92,10 @@ protected:
utext_close(ut);
}
- } character, word, sentence, line, *icuBI;
+ } character, sentence, line, *icuBI;
+ BI_Data words[4]; // 4 is css::i18n::WordType enumeration size
- com::sun::star::lang::Locale aLocale;
- sal_Int16 aBreakType, aWordType;
+ sal_Int16 aBreakType;
void SAL_CALL loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char* name, const rtl::OUString& rText) throw(com::sun::star::uno::RuntimeException);
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 242cfa6e54ab..77ca8319e841 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -44,29 +44,17 @@ BreakIterator_Unicode::BreakIterator_Unicode() :
cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ), // implementation name
wordRule( "word" ),
lineRule( "line" ),
- result(),
- character(),
- word(),
- sentence(),
- line(),
- icuBI( NULL ),
- aLocale(),
- aBreakType(),
- aWordType()
+ icuBI( NULL )
{
}
-
BreakIterator_Unicode::~BreakIterator_Unicode()
{
- if (icuBI && icuBI->aBreakIterator) {
- delete icuBI->aBreakIterator;
- icuBI->aBreakIterator=NULL;
- }
- if (character.aBreakIterator) delete character.aBreakIterator;
- if (word.aBreakIterator) delete word.aBreakIterator;
- if (sentence.aBreakIterator) delete sentence.aBreakIterator;
- if (line.aBreakIterator) delete line.aBreakIterator;
+ delete character.aBreakIterator;
+ delete sentence.aBreakIterator;
+ delete line.aBreakIterator;
+ for (size_t i = 0; i < SAL_N_ELEMENTS(words); i++)
+ delete words[i].aBreakIterator;
}
/*
@@ -86,26 +74,34 @@ class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
// loading ICU breakiterator on demand.
void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
- sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
+ sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
{
sal_Bool newBreak = sal_False;
UErrorCode status = U_ZERO_ERROR;
sal_Int16 breakType = 0;
switch (rBreakType) {
case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
- case LOAD_WORD_BREAKITERATOR: icuBI=&word;
- switch (rWordType) {
- case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break;
- case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break;
- case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break;
+ case LOAD_WORD_BREAKITERATOR:
+ assert (nWordType >= 0 && nWordType<= WordType::WORD_COUNT);
+ icuBI=&words[nWordType];
+ switch (nWordType) {
+ case WordType::ANY_WORD: break; // odd but previous behavior
+ case WordType::ANYWORD_IGNOREWHITESPACES:
+ breakType = 0; rule = wordRule = "edit_word"; break;
+ case WordType::DICTIONARY_WORD:
+ breakType = 1; rule = wordRule = "dict_word"; break;
+ default:
+ case WordType::WORD_COUNT:
+ breakType = 2; rule = wordRule = "count_word"; break;
}
break;
case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
}
- if (!icuBI->aBreakIterator || rWordType != aWordType ||
- rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country ||
- rLocale.Variant != aLocale.Variant) {
+ if (!icuBI->aBreakIterator ||
+ rLocale.Language != icuBI->maLocale.Language ||
+ rLocale.Country != icuBI->maLocale.Country ||
+ rLocale.Variant != icuBI->maLocale.Variant) {
if (icuBI->aBreakIterator) {
delete icuBI->aBreakIterator;
icuBI->aBreakIterator=NULL;
@@ -180,9 +176,7 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
}
}
if (icuBI->aBreakIterator) {
- aLocale=rLocale;
- aWordType=rWordType;
- aBreakType=rBreakType;
+ icuBI->maLocale=rLocale;
newBreak=sal_True;
} else {
throw ERROR;
@@ -252,16 +246,16 @@ Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int
{
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
- result.startPos = word.aBreakIterator->following(nStartPos);
+ result.startPos = icuBI->aBreakIterator->following(nStartPos);
if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
result.endPos = result.startPos;
else {
if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
rWordType == WordType::DICTIONARY_WORD ) &&
u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
- result.startPos = word.aBreakIterator->following(result.startPos);
+ result.startPos = icuBI->aBreakIterator->following(result.startPos);
- result.endPos = word.aBreakIterator->following(result.startPos);
+ result.endPos = icuBI->aBreakIterator->following(result.startPos);
if(result.endPos == BreakIterator::DONE)
result.endPos = result.startPos;
}
@@ -274,16 +268,16 @@ Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_
{
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
- result.startPos = word.aBreakIterator->preceding(nStartPos);
+ result.startPos = icuBI->aBreakIterator->preceding(nStartPos);
if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
result.endPos = result.startPos;
else {
if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
rWordType == WordType::DICTIONARY_WORD) &&
u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
- result.startPos = word.aBreakIterator->preceding(result.startPos);
+ result.startPos = icuBI->aBreakIterator->preceding(result.startPos);
- result.endPos = word.aBreakIterator->following(result.startPos);
+ result.endPos = icuBI->aBreakIterator->following(result.startPos);
if(result.endPos == BreakIterator::DONE)
result.endPos = result.startPos;
}
@@ -297,22 +291,22 @@ Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text,
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
sal_Int32 len = Text.getLength();
- if(word.aBreakIterator->isBoundary(nPos)) {
+ if(icuBI->aBreakIterator->isBoundary(nPos)) {
result.startPos = result.endPos = nPos;
if((bDirection || nPos == 0) && nPos < len) //forward
- result.endPos = word.aBreakIterator->following(nPos);
+ result.endPos = icuBI->aBreakIterator->following(nPos);
else
- result.startPos = word.aBreakIterator->preceding(nPos);
+ result.startPos = icuBI->aBreakIterator->preceding(nPos);
} else {
if(nPos <= 0) {
result.startPos = 0;
- result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0;
+ result.endPos = len ? icuBI->aBreakIterator->following((sal_Int32)0) : 0;
} else if(nPos >= len) {
- result.startPos = word.aBreakIterator->preceding(len);
+ result.startPos = icuBI->aBreakIterator->preceding(len);
result.endPos = len;
} else {
- result.startPos = word.aBreakIterator->preceding(nPos);
- result.endPos = word.aBreakIterator->following(nPos);
+ result.startPos = icuBI->aBreakIterator->preceding(nPos);
+ result.endPos = icuBI->aBreakIterator->following(nPos);
}
}
if (result.startPos == BreakIterator::DONE)