summaryrefslogtreecommitdiff
path: root/lingucomponent/source
diff options
context:
space:
mode:
authorOliver Bolte <obo@openoffice.org>2009-03-04 09:51:42 +0000
committerOliver Bolte <obo@openoffice.org>2009-03-04 09:51:42 +0000
commit76af6630bf438a418981dd4843749b8b9458dd66 (patch)
tree62ffd7f02f35ffb956a95afc1bc96384a7d13734 /lingucomponent/source
parent03ff7f6dadd9e0dea3c9647cef41601f6975b257 (diff)
CWS-TOOLING: integrate CWS hunspell4thesaurus
2009-02-02 16:45:01 +0100 hjs r267278 : #i98415# - kick touch here - go for the root cause elsewhere 2009-02-02 12:09:15 +0100 hjs r267257 : #i98415# - fix parameters of touch 2009-01-23 23:13:00 +0100 mba r266855 : fixed warning 2009-01-23 18:59:55 +0100 mba r266848 : #i98415#: touch copied files 2009-01-21 09:58:05 +0100 nemeth r266633 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@266428 (milestone: DEV300:m39) 2009-01-21 09:04:48 +0100 nemeth r266629 : Issue: #19563 Submitted by: nemeth Reviewed by: nemeth Patch: handle bad dictionary items for back compatibility (affix separator without affix flags) 2009-01-20 20:04:10 +0100 nemeth r266622 : Issue: #19563 Submitted by: nemeth Reviewed by:nemeth Add two small fixes for SF.net Hunspell Bug ID 2487684 2519814 2008-12-10 00:21:41 +0100 nemeth r265141 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@264807 (milestone: DEV300:m37) 2008-12-09 16:12:56 +0100 nemeth r265113 : #i19563#: fixed stemming, and #i90028#: fixed and improved hyphenation 2008-11-26 23:09:05 +0100 nemeth r264438 : #i90028#: CWS hunspell4thesaurus: Windows fixes of the Hunspell patch. 2008-11-26 22:51:03 +0100 nemeth r264436 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@264325 (milestone: DEV300:m36) 2008-11-22 09:02:20 +0100 nemeth r264182 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@263288 (milestone: DEV300:m35) 2008-11-22 08:23:10 +0100 nemeth r264181 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@263288 (milestone: DEV300:m35) 2008-11-21 22:15:21 +0100 nemeth r264176 : #i90028#: migrate CWS hunspell4thesaurus to SVN.
Diffstat (limited to 'lingucomponent/source')
-rw-r--r--lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx9
-rw-r--r--lingucomponent/source/spellcheck/hunspell/phonet.cxx297
-rw-r--r--lingucomponent/source/spellcheck/hunspell/phonet.hxx50
-rw-r--r--lingucomponent/source/spellcheck/spell/sspellimp.cxx4
-rw-r--r--lingucomponent/source/thesaurus/libnth/nthesimp.cxx140
-rw-r--r--lingucomponent/source/thesaurus/libnth/nthesimp.hxx18
-rw-r--r--lingucomponent/source/thesaurus/mythes/data_layout.txt1
7 files changed, 155 insertions, 364 deletions
diff --git a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
index f7ad506cd34f..b24bc6ee80f5 100644
--- a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
+++ b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
@@ -86,6 +86,11 @@ using namespace linguistic;
#define CAPTYPE_ALLCAP 3
#define CAPTYPE_MIXED 4
+// min, max
+
+//#define Min(a,b) (a < b ? a : b)
+#define Max(a,b) (a > b ? a : b)
+
///////////////////////////////////////////////////////////////////////////
@@ -395,7 +400,9 @@ Hyphenator::hyphenate( const ::rtl::OUString& aWord,
while((n >=0) && (lcword[n] == '.')) n--;
n++;
if (n > 0) {
- if (hnj_hyphen_hyphenate2(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut))
+ if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut,
+ minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
+ Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2)))))
{
//whoops something did not work
delete[] hyphens;
diff --git a/lingucomponent/source/spellcheck/hunspell/phonet.cxx b/lingucomponent/source/spellcheck/hunspell/phonet.cxx
index ee14606d7541..e69de29bb2d1 100644
--- a/lingucomponent/source/spellcheck/hunspell/phonet.cxx
+++ b/lingucomponent/source/spellcheck/hunspell/phonet.cxx
@@ -1,297 +0,0 @@
-/* phonetic.c - generic replacement aglogithms for phonetic transformation
- Copyright (C) 2000 Bjoern Jacke
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License version 2.1 as published by the Free Software Foundation;
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; If not, see
- <http://www.gnu.org/licenses/>.
-
- Changelog:
-
- 2000-01-05 Bjoern Jacke <bjoern at j3e.de>
- Initial Release insprired by the article about phonetic
- transformations out of c't 25/1999
-
- 2007-07-26 Bjoern Jacke <bjoern at j3e.de>
- Released under MPL/GPL/LGPL tri-license for Hunspell
-
- 2007-08-23 Laszlo Nemeth <nemeth at OOo>
- Porting from Aspell to Hunspell using C-like structs
-*/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-using namespace std;
-#else
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-#endif
-
-#include "csutil.hxx"
-#include "phonet.hxx"
-
-void init_phonet_hash(phonetable & parms)
- {
- int i, k;
-
- for (i = 0; i < parms.hash_size; i++) {
- parms.hash[i] = -1;
- }
-
- for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
- /** set hash value **/
- k = (unsigned char) parms.rules[i][0];
-
- if (parms.hash[k] < 0) {
- parms.hash[k] = i;
- }
- }
- }
-
- // like strcpy but safe if the strings overlap
- // but only if dest < src
- static inline void strmove(char * dest, char * src) {
- while (*src)
- *dest++ = *src++;
- *dest = '\0';
- }
-
-/* phonetic transcription algorithm */
-/* see: http://aspell.net/man-html/Phonetic-Code.html */
-/* convert string to uppercase before this call */
-int phonet (const char * inword, char * target,
- int len,
- phonetable & parms)
- {
- /** Do phonetic transformation. **/
- /** "len" = length of "inword" incl. '\0'. **/
-
- /** result: >= 0: length of "target" **/
- /** otherwise: error **/
-
- int i,j,k=0,n,p,z;
- int k0,n0,p0=-333,z0;
- char c, c0;
- const char * s;
- typedef unsigned char uchar;
- char word[MAXPHONETUTF8LEN + 1];
- if (len == -1) len = strlen(inword);
- if (len > MAXPHONETUTF8LEN) return 0;
- strcpy(word, inword);
-
- /** check word **/
- i = j = z = 0;
- while ((c = word[i]) != '\0') {
- n = parms.hash[(uchar) c];
- z0 = 0;
-
- if (n >= 0) {
- /** check all rules for the same letter **/
- while (parms.rules[n][0] == c) {
-
- /** check whole string **/
- k = 1; /** number of found letters **/
- p = 5; /** default priority **/
- s = parms.rules[n];
- s++; /** important for (see below) "*(s-1)" **/
-
- while (*s != '\0' && word[i+k] == *s
- && !isdigit (*s) && strchr ("(-<^$", *s) == NULL) {
- k++;
- s++;
- }
- if (*s == '(') {
- /** check letters in "(..)" **/
- if (isalpha(word[i+k]) // ...could be implied?
- && strchr(s+1, word[i+k]) != NULL) {
- k++;
- while (*s != ')')
- s++;
- s++;
- }
- }
- p0 = (int) *s;
- k0 = k;
- while (*s == '-' && k > 1) {
- k--;
- s++;
- }
- if (*s == '<')
- s++;
- if (isdigit (*s)) {
- /** determine priority **/
- p = *s - '0';
- s++;
- }
- if (*s == '^' && *(s+1) == '^')
- s++;
-
- if (*s == '\0'
- || (*s == '^'
- && (i == 0 || ! isalpha(word[i-1]))
- && (*(s+1) != '$'
- || (! isalpha(word[i+k0]) )))
- || (*s == '$' && i > 0
- && isalpha(word[i-1])
- && (! isalpha(word[i+k0]) )))
- {
- /** search for followup rules, if: **/
- /** parms.followup and k > 1 and NO '-' in searchstring **/
- c0 = word[i+k-1];
- n0 = parms.hash[(uchar) c0];
-
-// if (parms.followup && k > 1 && n0 >= 0
- if (k > 1 && n0 >= 0
- && p0 != (int) '-' && word[i+k] != '\0') {
- /** test follow-up rule for "word[i+k]" **/
- while (parms.rules[n0][0] == c0) {
-
- /** check whole string **/
- k0 = k;
- p0 = 5;
- s = parms.rules[n0];
- s++;
- while (*s != '\0' && word[i+k0] == *s
- && ! isdigit(*s) && strchr("(-<^$",*s) == NULL) {
- k0++;
- s++;
- }
- if (*s == '(') {
- /** check letters **/
- if (isalpha(word[i+k0])
- && strchr (s+1, word[i+k0]) != NULL) {
- k0++;
- while (*s != ')' && *s != '\0')
- s++;
- if (*s == ')')
- s++;
- }
- }
- while (*s == '-') {
- /** "k0" gets NOT reduced **/
- /** because "if (k0 == k)" **/
- s++;
- }
- if (*s == '<')
- s++;
- if (isdigit (*s)) {
- p0 = *s - '0';
- s++;
- }
-
- if (*s == '\0'
- /** *s == '^' cuts **/
- || (*s == '$' && ! isalpha(word[i+k0])))
- {
- if (k0 == k) {
- /** this is just a piece of the string **/
- n0 += 2;
- continue;
- }
-
- if (p0 < p) {
- /** priority too low **/
- n0 += 2;
- continue;
- }
- /** rule fits; stop search **/
- break;
- }
- n0 += 2;
- } /** End of "while (parms.rules[n0][0] == c0)" **/
-
- if (p0 >= p && parms.rules[n0][0] == c0) {
- n += 2;
- continue;
- }
- } /** end of follow-up stuff **/
-
- /** replace string **/
- s = parms.rules[n+1];
- p0 = (parms.rules[n][0] != '\0'
- && strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;
- if (p0 == 1 && z == 0) {
- /** rule with '<' is used **/
- if (j > 0 && *s != '\0'
- && (target[j-1] == c || target[j-1] == *s)) {
- j--;
- }
- z0 = 1;
- z = 1;
- k0 = 0;
- while (*s != '\0' && word[i+k0] != '\0') {
- word[i+k0] = *s;
- k0++;
- s++;
- }
- if (k > k0)
- strmove (&word[0]+i+k0, &word[0]+i+k);
-
- /** new "actual letter" **/
- c = word[i];
- }
- else { /** no '<' rule used **/
- i += k - 1;
- z = 0;
- while (*s != '\0'
- && *(s+1) != '\0' && j < len) {
- if (j == 0 || target[j-1] != *s) {
- target[j] = *s;
- j++;
- }
- s++;
- }
- /** new "actual letter" **/
- c = *s;
- if (parms.rules[n][0] != '\0'
- && strstr (parms.rules[n]+1, "^^") != NULL) {
- if (c != '\0') {
- target[j] = c;
- j++;
- }
- strmove (&word[0], &word[0]+i+1);
- i = 0;
- z0 = 1;
- }
- }
- break;
- } /** end of follow-up stuff **/
- n += 2;
- } /** end of while (parms.rules[n][0] == c) **/
- } /** end of if (n >= 0) **/
- if (z0 == 0) {
-// if (k && (assert(p0!=-333),!p0) && j < len && c != '\0'
-// && (!parms.collapse_result || j == 0 || target[j-1] != c)){
- if (k && !p0 && j < len && c != '\0'
- && (1 || j == 0 || target[j-1] != c)){
- /** condense only double letters **/
- target[j] = c;
- ///printf("\n setting \n");
- j++;
- }
-
- i++;
- z = 0;
- k=0;
- }
- } /** end of while ((c = word[i]) != '\0') **/
-
- target[j] = '\0';
- return (j);
-
- } /** end of function "phonet" **/
diff --git a/lingucomponent/source/spellcheck/hunspell/phonet.hxx b/lingucomponent/source/spellcheck/hunspell/phonet.hxx
index 4e9e0d647080..e69de29bb2d1 100644
--- a/lingucomponent/source/spellcheck/hunspell/phonet.hxx
+++ b/lingucomponent/source/spellcheck/hunspell/phonet.hxx
@@ -1,50 +0,0 @@
-/* phonetic.c - generic replacement aglogithms for phonetic transformation
- Copyright (C) 2000 Bjoern Jacke
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License version 2.1 as published by the Free Software Foundation;
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; If not, see
- <http://www.gnu.org/licenses/>.
-
- Changelog:
-
- 2000-01-05 Bjoern Jacke <bjoern at j3e.de>
- Initial Release insprired by the article about phonetic
- transformations out of c't 25/1999
-
- 2007-07-26 Bjoern Jacke <bjoern at j3e.de>
- Released under MPL/GPL/LGPL tri-license for Hunspell
-
- 2007-08-23 Laszlo Nemeth <nemeth at OOo>
- Porting from Aspell to Hunspell using C-like structs
-*/
-
-#ifndef __PHONETHXX__
-#define __PHONETHXX__
-
-#define MAXPHONETLEN 256
-#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
-
-struct phonetable {
- char utf8;
- cs_info * lang;
- int num;
- char * * rules;
- static const int hash_size = 256;
- int hash[hash_size];
-};
-
-void init_phonet_hash(phonetable & parms);
-
-int phonet (const char * inword, char * target,
- int len, phonetable & phone);
-
-#endif
diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
index 02de8841b3ef..ed1ddf4ca77b 100644
--- a/lingucomponent/source/spellcheck/spell/sspellimp.cxx
+++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
@@ -72,6 +72,8 @@ using namespace com::sun::star::uno;
using namespace com::sun::star::linguistic2;
using namespace linguistic;
+// XML-header of SPELLML queries
+#define SPELLML_HEADER "<?xml?>"
///////////////////////////////////////////////////////////////////////////
@@ -387,7 +389,7 @@ sal_Bool SAL_CALL
rHelper.SetTmpPropVals( rProperties );
INT16 nFailure = GetSpellFailure( rWord, rLocale );
- if (nFailure != -1)
+ if (nFailure != -1 && !rWord.match(A2OU(SPELLML_HEADER), 0))
{
INT16 nLang = LocaleToLanguage( rLocale );
// postprocess result for errors that should be ignored
diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
index c712a29c9fdf..1ecba30bcb8c 100644
--- a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
+++ b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
@@ -7,7 +7,7 @@
* OpenOffice.org - a multi-platform office productivity suite
*
* $RCSfile: nthesimp.cxx,v $
- * $Revision: 1.18 $
+ * $Revision: 1.15.6.4 $
*
* This file is part of OpenOffice.org.
*
@@ -62,7 +62,8 @@
#define CAPTYPE_ALLCAP 3
#define CAPTYPE_MIXED 4
-
+// XML-header to query SPELLML support
+#define SPELLML_SUPPORT "<?xml?>"
using namespace utl;
using namespace osl;
@@ -78,6 +79,19 @@ using namespace linguistic;
///////////////////////////////////////////////////////////////////////////
+static uno::Reference< XLinguServiceManager > GetLngSvcMgr_Impl()
+{
+ uno::Reference< XLinguServiceManager > xRes;
+ uno::Reference< XMultiServiceFactory > xMgr = getProcessServiceFactory();
+ if (xMgr.is())
+ {
+ xRes = uno::Reference< XLinguServiceManager > ( xMgr->createInstance(
+ OUString( RTL_CONSTASCII_USTRINGPARAM(
+ "com.sun.star.linguistic2.LinguServiceManager" ) ) ), UNO_QUERY ) ;
+ }
+ return xRes;
+}
+
Thesaurus::Thesaurus() :
aEvtListeners ( GetLinguMutex() )
{
@@ -291,7 +305,7 @@ sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
- Thesaurus::queryMeanings( const OUString& rTerm, const Locale& rLocale,
+ Thesaurus::queryMeanings( const OUString& qTerm, const Locale& rLocale,
const PropertyValues& rProperties)
throw(IllegalArgumentException, RuntimeException)
{
@@ -299,6 +313,14 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
+ uno::Reference< XLinguServiceManager > xLngSvcMgr( GetLngSvcMgr_Impl() );
+ uno::Reference< XSpellChecker1 > xSpell;
+
+ OUString rTerm(qTerm);
+ OUString pTerm(qTerm);
+ sal_uInt16 ct = CAPTYPE_UNKNOWN;
+ sal_Int32 stem = 0;
+ sal_Int32 stem2 = 0;
INT16 nLanguage = LocaleToLanguage( rLocale );
@@ -312,6 +334,8 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
return noMeanings;
#endif
+ if (prevTerm == qTerm && prevLocale == nLanguage) return prevMeanings;
+
mentry * pmean = NULL;
sal_Int32 nmean = 0;
@@ -322,7 +346,6 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
rtl_TextEncoding aEnc = 0;
CharClass * pCC = NULL;
-
// find the first thesaurus that matches the locale
for (int i =0; i < numthes; i++) {
if (rLocale == aTLocs[i])
@@ -374,10 +397,9 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
}
}
- if (pTH) {
+ while (pTH) {
// convert word to all lower case for searching
- sal_uInt16 ct = CAPTYPE_UNKNOWN;
- ct = capitalType(rTerm, pCC);
+ if (!stem) ct = capitalType(rTerm, pCC);
OUString nTerm(makeLowerCase(rTerm, pCC));
OString aTmp( OU2ENC(nTerm, aEnc) );
nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
@@ -385,13 +407,58 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
if (nmean) aMeanings.realloc( nmean );
mentry * pe = pmean;
+ OUString codeTerm = qTerm;
+ Reference< XSpellAlternatives > xTmpRes2;
+
+ if (stem) {
+ xTmpRes2 = xSpell->spell( A2OU("<?xml?><query type='analyze'><word>") +
+ pTerm + A2OU("</word></query>"), nLanguage, rProperties );
+ if (xTmpRes2.is()) {
+ Sequence<OUString>seq = xTmpRes2->getAlternatives();
+ if (seq.getLength() > 0) {
+ codeTerm = seq[0];
+ stem2 = 1;
+ }
+#if 0
+ OString o = OUStringToOString(codeTerm, rtl_getTextEncodingFromUnixCharset("UTF-8"));
+ fprintf(stderr, "CODETERM: %s\n", o.pData->buffer);
+#endif
+ }
+ }
+
for (int j = 0; j < nmean; j++) {
int count = pe->count;
if (count) {
Sequence< OUString > aStr( count );
OUString *pStr = aStr.getArray();
+
for (int i=0; i < count; i++) {
OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),aEnc );
+ sal_Int32 catpos = sTerm.indexOf('(');
+ sal_Int32 catpos2 = 0;
+ OUString catst;
+ OUString catst2;
+ if (catpos > 2) {
+ // remove category name for affixation and casing
+ catst = A2OU(" ") + sTerm.copy(catpos);
+ sTerm = sTerm.copy(0, catpos);
+ sTerm = sTerm.trim();
+ }
+ // generate synonyms with affixes
+ if (stem && stem2) {
+ Reference< XSpellAlternatives > xTmpRes;
+ xTmpRes = xSpell->spell( A2OU("<?xml?><query type='generate'><word>") +
+ sTerm + A2OU("</word>") + codeTerm + A2OU("</query>"), nLanguage, rProperties );
+ if (xTmpRes.is()) {
+ Sequence<OUString>seq = xTmpRes->getAlternatives();
+ for (int k = 0; k < seq.getLength(); k++) {
+ OString o = OUStringToOString(seq[k], rtl_getTextEncodingFromUnixCharset("UTF-8"));
+ }
+ if (seq.getLength() > 0) sTerm = seq[0];
+ }
+ }
+ if (catpos2) sTerm = catst2 + sTerm;
+
sal_uInt16 ct1 = capitalType(sTerm, pCC);
if (CAPTYPE_MIXED == ct1)
ct = ct1;
@@ -413,7 +480,7 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
break;
}
}
- OUString aAlt( cTerm );
+ OUString aAlt( cTerm + catst);
pStr[i] = aAlt;
}
#if 0
@@ -429,10 +496,60 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
pe++;
}
pTH->CleanUpAfterLookup(&pmean,nmean);
- }
+
if (nmean) {
+ prevTerm = qTerm;
+ prevMeanings = aMeanings;
+ prevLocale = nLanguage;
return aMeanings;
}
+
+ if (stem || !xLngSvcMgr.is()) return noMeanings;
+ stem = 1;
+
+ xSpell = uno::Reference< XSpellChecker1 >( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
+ if (!xSpell.is() || !xSpell->isValid( A2OU(SPELLML_SUPPORT), nLanguage, rProperties )) {
+ return noMeanings;
+ }
+ Reference< XSpellAlternatives > xTmpRes;
+ xTmpRes = xSpell->spell( A2OU("<?xml?><query type='stem'><word>") +
+ rTerm + A2OU("</word></query>"), nLanguage, rProperties );
+ if (xTmpRes.is()) {
+ Sequence<OUString>seq = xTmpRes->getAlternatives();
+#if 0
+ for (int i = 0; i < seq.getLength(); i++) {
+ OString o = OUStringToOString(seq[i], rtl_getTextEncodingFromUnixCharset("UTF-8"));
+ fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
+ }
+#endif
+ if (seq.getLength() > 0) {
+ rTerm = seq[0]; // XXX Use only the first stem
+ continue;
+ }
+ }
+
+ // stem the last word of the synonym (for categories after affixation)
+ rTerm = rTerm.trim();
+ sal_Int32 pos = rTerm.lastIndexOf(' ');
+ if (!pos) return noMeanings;
+ xTmpRes = xSpell->spell( A2OU("<?xml?><query type='stem'><word>") +
+ rTerm.copy(pos + 1) + A2OU("</word></query>"), nLanguage, rProperties );
+ if (xTmpRes.is()) {
+ Sequence<OUString>seq = xTmpRes->getAlternatives();
+ if (seq.getLength() > 0) {
+ pTerm = rTerm.copy(pos + 1);
+ rTerm = rTerm.copy(0, pos + 1) + seq[0];
+#if 0
+ for (int i = 0; i < seq.getLength(); i++) {
+ OString o = OUStringToOString(seq[i], rtl_getTextEncodingFromUnixCharset("UTF-8"));
+ fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
+ }
+#endif
+ continue;
+ }
+ }
+ break;
+ }
return noMeanings;
}
@@ -478,9 +595,8 @@ void SAL_CALL
xPropHelper = pPropHelper;
pPropHelper->AddAsPropListener(); //! after a reference is established
}
- else {
+ else
DBG_ERROR( "wrong number of arguments in sequence" );
- }
}
}
@@ -492,7 +608,7 @@ sal_uInt16 SAL_CALL Thesaurus::capitalType(const OUString& aTerm, CharClass * pC
if ((pCC) && (tlen)) {
String aStr(aTerm);
sal_Int32 nc = 0;
- for (xub_StrLen tindex = 0; tindex < tlen; tindex++) {
+ for (USHORT tindex = 0; tindex < tlen; tindex++) {
if (pCC->getCharacterType(aStr,tindex) &
::com::sun::star::i18n::KCharacterType::UPPER) nc++;
}
diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
index bf168ec161dc..f75cc29b9755 100644
--- a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
+++ b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
@@ -7,7 +7,7 @@
* OpenOffice.org - a multi-platform office productivity suite
*
* $RCSfile: nthesimp.hxx,v $
- * $Revision: 1.6 $
+ * $Revision: 1.6.16.1 $
*
* This file is part of OpenOffice.org.
*
@@ -46,6 +46,8 @@
#include <com/sun/star/linguistic2/XMeaning.hpp>
#include <com/sun/star/linguistic2/XThesaurus.hpp>
+#include <com/sun/star/linguistic2/XLinguServiceManager.hpp>
+#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
#include <tools/table.hxx>
@@ -96,8 +98,10 @@ class Thesaurus :
OUString * aTNames;
sal_Int32 numthes;
-
-
+ // cache for the Thesaurus dialog
+ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > prevMeanings;
+ OUString prevTerm;
+ INT16 prevLocale;
// disallow copy-constructor and assignment-operator for now
Thesaurus(const Thesaurus &);
@@ -177,6 +181,14 @@ private:
OUString SAL_CALL makeUpperCase(const OUString&, CharClass *);
OUString SAL_CALL makeInitCap(const OUString&, CharClass *);
+/* static ::com::sun::star::uno::Reference<
+ ::com::sun::star::linguistic2::XLinguServiceManager > xLngSvcMgr;
+ static ::com::sun::star::uno::Reference<
+ ::com::sun::star::linguistic2::XSpellChecker1 > xSpell;
+*/
+ static ::com::sun::star::uno::Reference<
+ ::com::sun::star::linguistic2::XLinguServiceManager > GetLngSvcMgr();
+
};
inline OUString Thesaurus::getImplementationName_Static() throw()
diff --git a/lingucomponent/source/thesaurus/mythes/data_layout.txt b/lingucomponent/source/thesaurus/mythes/data_layout.txt
index 12d77a74f208..ef4bc255d96a 100644
--- a/lingucomponent/source/thesaurus/mythes/data_layout.txt
+++ b/lingucomponent/source/thesaurus/mythes/data_layout.txt
@@ -18,6 +18,7 @@ to and from this encoding if necessary.
Strings currently recognized by OpenOffice.org are:
+ UTF-8
ISO8859-1
ISO8859-2
ISO8859-3