diff options
author | obo <obo@openoffice.org> | 2010-06-22 12:09:13 +0200 |
---|---|---|
committer | obo <obo@openoffice.org> | 2010-06-22 12:09:13 +0200 |
commit | 6c5baefb90ef14fa08327d4b1ecbcf3e54c3e84d (patch) | |
tree | 290910a7fb143460ac6f50309a651254f713872d /lingucomponent | |
parent | cb6e6415ba6c4df918e5d88f47a2509e2ff391ee (diff) | |
parent | af15b9a545676a6d83f19a23c6712c23fd024305 (diff) |
CWS-TOOLING: integrate CWS mythes12
Diffstat (limited to 'lingucomponent')
-rw-r--r-- | lingucomponent/prj/build.lst | 5 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/Makefile | 39 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/README | 60 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/checkme.lst | 4 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/data_layout.txt | 131 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/example.cxx | 128 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/license.readme | 34 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/makefile.mk | 59 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/mythes.cxx | 403 | ||||
-rw-r--r-- | lingucomponent/source/thesaurus/mythes/mythes.hxx | 76 |
10 files changed, 2 insertions, 937 deletions
diff --git a/lingucomponent/prj/build.lst b/lingucomponent/prj/build.lst index 138e5858f70e..045aaee82999 100644 --- a/lingucomponent/prj/build.lst +++ b/lingucomponent/prj/build.lst @@ -1,9 +1,8 @@ -lc lingucomponent : linguistic libtextcat svl HYPHEN:hyphen HUNSPELL:hunspell NULL +lc lingucomponent : linguistic libtextcat svl HYPHEN:hyphen HUNSPELL:hunspell MYTHES:mythes NULL lc lingucomponent usr1 - all lc_mkout NULL lc lingucomponent\inc nmake - all lc_inc NULL lc lingucomponent\source\lingutil nmake - all lc_util lc_inc NULL -lc lingucomponent\source\thesaurus\mythes nmake - all lc_mythes lc_util lc_inc NULL -lc lingucomponent\source\thesaurus\libnth nmake - all lc_libnth lc_mythes lc_util lc_inc NULL +lc lingucomponent\source\thesaurus\libnth nmake - all lc_libnth lc_util lc_inc NULL lc lingucomponent\source\spellcheck\spell nmake - all lc_libspell lc_util lc_inc NULL lc lingucomponent\source\hyphenator\altlinuxhyph\hyphen nmake - all lc_libhyphen lc_util lc_inc NULL lc lingucomponent\source\languageguessing nmake - all lc_languageguessing lc_util lc_inc NULL diff --git a/lingucomponent/source/thesaurus/mythes/Makefile b/lingucomponent/source/thesaurus/mythes/Makefile deleted file mode 100644 index b1d811296714..000000000000 --- a/lingucomponent/source/thesaurus/mythes/Makefile +++ /dev/null @@ -1,39 +0,0 @@ - -CXX=g++ - -CXXFLAGS= -O2 -Wall -ansi -pedantic -I. - -LDFLAGS=-L. -lmythes - -LIBS=libmythes.a - -AR=ar rc -RANLIB=ranlib - -OBJS = mythes.o - -all: example - -libmythes.a: $(OBJS) - $(AR) $@ $(OBJS) - -@ ($(RANLIB) $@ || true) >/dev/null 2>&1 - -example: example.o $(LIBS) - $(CXX) $(CXXFLAGS) -o $@ example.o $(LDFLAGS) - -%.o: %.cxx - $(CXX) $(CXXFLAGS) -c $< - -clean: - rm -f *.o *~ example libthes.a - -distclean: clean - -depend: - makedepend -- $(CXXFLAGS) -- *.[ch]xx - -# DO NOT DELETE THIS LINE -- make depend depends on it. - -mythes.o: mythes.hxx -example.o: mythes.hxx - diff --git a/lingucomponent/source/thesaurus/mythes/README b/lingucomponent/source/thesaurus/mythes/README deleted file mode 100644 index 421f16a712fe..000000000000 --- a/lingucomponent/source/thesaurus/mythes/README +++ /dev/null @@ -1,60 +0,0 @@ -MyThes is a simple thesaurus that uses a structured -text data file and an index file with binary search -to lookup words and phrases and return information -on part of speech, meanings, and synonyms - -MyThes was written to provide a thesaurus for the -OpenOffice.org project - -The Main features of MyThes are: - -1. written in C++ to make it easier to interface with - Pspell, OpenOffice, AbiWord, etc - -2. it is stateless, uses no static variables and - should be completely reentrant with no ifdefs - -3. it compiles with -ansi and -pedantic and -Wall - with no warnings so it should be quite portable - -4. it uses a perl program to read the structured - text file and create the index needed for bianry - searching (see dictionaries/en_US/th_gen_idx.pl) - -5. it is very simple with *lots* of comments. - The main "smarts" are in the structure of the - text file that makes up the thesaurus data - -6. It comes with a ready-to-go structured thesaurus - data file for en_US extracted from the WordNet-2.0 data. - (see dictioanries/en_US/th_en_US_new.dat) - - Please see WordNet_license.txt and WordNet_readme.txt - for more information on the very useful project! - (found in dictionaries/en_US/) - -7. The source code has a BSD license (and no advertising clause) - - -MyThes has the world's simplest Makefile and no -configure support. It does come with a simple example -program that looks up some words and returns meanings -and synonyms. - -To build it simply do the following: - -unzip mythes.zip -cd mythes -make - -To run the example program: -./example th_en_US_new.idx th_en_US_new.dat checkme.lst - -Please play around with it and let me know -what you think. - -Thanks, - -Kevin Hendricks -kevin.hendricks@sympatico.ca - diff --git a/lingucomponent/source/thesaurus/mythes/checkme.lst b/lingucomponent/source/thesaurus/mythes/checkme.lst deleted file mode 100644 index 120d343a9e0f..000000000000 --- a/lingucomponent/source/thesaurus/mythes/checkme.lst +++ /dev/null @@ -1,4 +0,0 @@ -simple -complex -junk -jhjhjh diff --git a/lingucomponent/source/thesaurus/mythes/data_layout.txt b/lingucomponent/source/thesaurus/mythes/data_layout.txt deleted file mode 100644 index ef4bc255d96a..000000000000 --- a/lingucomponent/source/thesaurus/mythes/data_layout.txt +++ /dev/null @@ -1,131 +0,0 @@ -Description of the Structure of the Data needed by MyThes --------------------------------------------------------- - -MyThes is very simple. Almost all of the "smarts" are really -in the thesaurus data file itself. - -The format for this file is at follows: - -- no binary data - -- line ending is a newline '\n' and not carriage return/linefeeds - -- Line 1 is a character string that describes the encoding -used for the file. It is up to the calling program to convert -to and from this encoding if necessary. - - ISO8859-1 is used by the th_en_US_new.dat file. - - Strings currently recognized by OpenOffice.org are: - - UTF-8 - ISO8859-1 - ISO8859-2 - ISO8859-3 - ISO8859-4 - ISO8859-5 - ISO8859-6 - ISO8859-7 - ISO8859-8 - ISO8859-9 - ISO8859-10 - KOI8-R - CP-1251 - ISO8859-14 - ISCII-DEVANAGARI - - -- All of the remaning lines of the file follow this structure - -entry|num_mean -pos|syn1_mean|syn2|... -. -. -. -pos|mean_syn1|syn2|... - - -where: - - entry - all lowercase version of the word or phrase being described - num_mean - number of meanings for this entry - - There is one meaning per line and each meaning is comprised of - - pos - part of speech or other meaning specific description - syn1_mean - synonym 1 also used to describe the meaning itself - syn2 - synonym 2 for that meaning etc. - - -To make this even more clearer, here is actual data for the -entry "simple". - -simple|9 -(adj)|simple |elemental|ultimate|oversimplified|simplistic|simplex|simplified|unanalyzable| -undecomposable|uncomplicated|unsophisticated|easy|plain|unsubdivided -(adj)|elementary|uncomplicated|unproblematic|easy -(adj)|bare|mere|plain -(adj)|childlike|wide-eyed|dewy-eyed|naive |naif -(adj)|dim-witted|half-witted|simple-minded|retarded -(adj)|simple |unsubdivided|unlobed|smooth -(adj)|plain -(noun)|herb|herbaceous plant -(noun)|simpleton|person|individual|someone|somebody|mortal|human|soul - - -It says that "simple" has 9 different meanings and each -meaning will have its part of speech and at least 1 synonym -with other if presetn following on the same line. - - - -Once you ahve created your own structured text file you can use -the perl program "th_gen_idx.pl" which can be found in this -directory to create an index file that is used to seek into -your data file by the MyThes code. - -The correct way to run the perl program is as follows: - -cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx - - - -Then if you head the resulting index file you should see the -following: - -ISO8859-1 -142689 -'hood|10 -'s gravenhage|88 -'tween|173 -'tween decks|196 -.22|231 -.22 caliber|319 -.22 calibre|365 -.38 caliber|411 -.38 calibre|457 -.45 caliber|503 -.45 calibre|549 -0|595 -1|666 -1 chronicles|6283 -1 esdras|6336 - - -Line 1 is the same encoding string taken from the -structured thesaurus data file. - -Line 2 is a count of the total number of entries -in your thesaurus. - -All of the remaining lines are of the form - -entry|byte_offset_into_data_file_where_entry_is_found - - -That's all there is too it. - - -Kevin -kevin.hendricks@sympatico.ca - diff --git a/lingucomponent/source/thesaurus/mythes/example.cxx b/lingucomponent/source/thesaurus/mythes/example.cxx deleted file mode 100644 index 31c85989cf26..000000000000 --- a/lingucomponent/source/thesaurus/mythes/example.cxx +++ /dev/null @@ -1,128 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_lingucomponent.hxx" -#include <cstring> -#include <cstdlib> -#include <cstdio> - -#include "mythes.hxx" - -extern char * mystrdup(const char * s); - -using namespace std; - -int -main(int argc, char** argv) -{ - - char * af; - char * df; - char * wtc; - FILE* wtclst; - - /* first parse the command line options */ - /* arg1 - index file, arg2 thesaurus data file, arg3 - file of words to check */ - - if (argv[1]) { - af = mystrdup(argv[1]); - } else { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n"); - exit(1); - } - if (argv[2]) { - df = mystrdup(argv[2]); - } else { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n"); - exit(1); - } - if (argv[3]) { - wtc = mystrdup(argv[3]); - } else { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n"); - exit(1); - } - - - /* open the words to check list */ - wtclst = fopen(wtc,"r"); - if (!wtclst) { - fprintf(stderr,"Error - could not open file of words to check\n"); - exit(1); - } - - // open a new thesaurus object - MyThes * pMT= new MyThes(af,df); - - // get the encoding used for the thesaurus data - char * encoding = pMT->get_th_encoding(); - fprintf(stdout,"Thesaurus uses encoding %s\n\n",encoding); - - int k; - char buf[101]; - mentry * pmean; - - while(fgets(buf,100,wtclst)) { - k = strlen(buf); - *(buf + k - 1) = '\0'; - int len = strlen(buf); - int count = pMT->Lookup(buf,len,&pmean); - // don't change value of pmean - // or count since needed for CleanUpAfterLookup routine - mentry* pm = pmean; - if (count) { - fprintf(stdout,"%s has %d meanings\n",buf,count); - for (int i=0; i < count; i++) { - fprintf(stdout," meaning %d: %s\n",i,pm->defn); - for (int j=0; j < pm->count; j++) { - fprintf(stdout," %s\n",pm->psyns[j]); - } - fprintf(stdout,"\n"); - pm++; - } - fprintf(stdout,"\n\n"); - // now clean up all allocated memory - pMT->CleanUpAfterLookup(&pmean,count); - } else { - fprintf(stdout,"\"%s\" is not in thesaurus!\n",buf); - } - } - - delete pMT; - fclose(wtclst); - free(wtc); - free(df); - free(af); - - return 0; -} - diff --git a/lingucomponent/source/thesaurus/mythes/license.readme b/lingucomponent/source/thesaurus/mythes/license.readme deleted file mode 100644 index b6bf70a0c7fe..000000000000 --- a/lingucomponent/source/thesaurus/mythes/license.readme +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ diff --git a/lingucomponent/source/thesaurus/mythes/makefile.mk b/lingucomponent/source/thesaurus/mythes/makefile.mk deleted file mode 100644 index ac45219b97a0..000000000000 --- a/lingucomponent/source/thesaurus/mythes/makefile.mk +++ /dev/null @@ -1,59 +0,0 @@ -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2010 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -PRJ = ..$/..$/.. - -PRJNAME = lingucomponent -TARGET = mythes -LIBTARGET=NO - -#----- Settings --------------------------------------------------------- - -.INCLUDE : settings.mk - -# --- Files -------------------------------------------------------- - -.IF "$(SYSTEM_MYTHES)" == "YES" -@all: - @echo "Using system mythes..." -.ENDIF - -all_target: ALLTAR - - - -SLOFILES= \ - $(SLO)$/mythes.obj - -LIB1TARGET= $(SLB)$/lib$(TARGET).lib -LIB1ARCHIV= $(LB)/lib$(TARGET).a -LIB1OBJFILES= $(SLOFILES) - -# --- Targets ------------------------------------------------------ - -.INCLUDE : target.mk - diff --git a/lingucomponent/source/thesaurus/mythes/mythes.cxx b/lingucomponent/source/thesaurus/mythes/mythes.cxx deleted file mode 100644 index ebb224d92140..000000000000 --- a/lingucomponent/source/thesaurus/mythes/mythes.cxx +++ /dev/null @@ -1,403 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_lingucomponent.hxx" -#include "license.readme" -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <errno.h> - -#include "mythes.hxx" - - - -MyThes::MyThes(const char* idxpath, const char * datpath) -{ - nw = 0; - encoding = NULL; - list = NULL; - offst = NULL; - - if (thInitialize(idxpath, datpath) != 1) { - fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath); - fflush(stderr); - thCleanup(); - // did not initialize properly - throw exception? - } -} - - -MyThes::~MyThes() -{ - thCleanup(); -} - - -int MyThes::thInitialize(const char* idxpath, const char* datpath) -{ - - // open the index file - FILE * pifile = fopen(idxpath,"r"); - if (!pifile) { - return 0; - } - - // parse in encoding and index size */ - char * wrd; - wrd = (char *)calloc(1, MAX_WD_LEN); - if (!wrd) { - fprintf(stderr,"Error - bad memory allocation\n"); - fflush(stderr); - fclose(pifile); - return 0; - } - int len = readLine(pifile,wrd,MAX_WD_LEN); - encoding = mystrdup(wrd); - len = readLine(pifile,wrd,MAX_WD_LEN); - int idxsz = atoi(wrd); - - - // now allocate list, offst for the given size - list = (char**) calloc(idxsz,sizeof(char*)); - offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int)); - - if ( (!(list)) || (!(offst)) ) { - fprintf(stderr,"Error - bad memory allocation\n"); - fflush(stderr); - fclose(pifile); - return 0; - } - - // now parse the remaining lines of the index - len = readLine(pifile,wrd,MAX_WD_LEN); - while (len > 0) - { - int np = mystr_indexOfChar(wrd,'|'); - if (nw < idxsz) { - if (np >= 0) { - *(wrd+np) = '\0'; - list[nw] = (char *)calloc(1,(np+1)); - if (!list[nw]) { - fprintf(stderr,"Error - bad memory allocation\n"); - fflush(stderr); - fclose(pifile); - return 0; - } - memcpy((list[nw]),wrd,np); - offst[nw] = atoi(wrd+np+1); - nw++; - } - } - len = readLine(pifile,wrd,MAX_WD_LEN); - } - - free((void *)wrd); - fclose(pifile); - - /* next open the data file */ - pdfile = fopen(datpath,"r"); - if (!pdfile) { - return 0; - } - - return 1; -} - - -void MyThes::thCleanup() -{ - /* first close the data file */ - if (pdfile) { - fclose(pdfile); - pdfile=NULL; - } - - if (list) - { - /* now free up all the allocated strings on the list */ - for (int i=0; i < nw; i++) - { - if (list[i]) { - free(list[i]); - list[i] = 0; - } - } - free((void*)list); - } - - if (encoding) free((void*)encoding); - if (offst) free((void*)offst); - - encoding = NULL; - list = NULL; - offst = NULL; - nw = 0; -} - - - -// lookup text in index and count of meanings and a list of meaning entries -// with each entry having a synonym count and pointer to an -// array of char * (i.e the synonyms) -// -// note: calling routine should call CleanUpAfterLookup with the original -// meaning point and count to properly deallocate memory - -int MyThes::Lookup(const char * pText, int len, mentry** pme) -{ - - *pme = NULL; - - // handle the case of missing file or file related errors - if (! pdfile) return 0; - - long offset = 0; - - /* copy search word and make sure null terminated */ - char * wrd = (char *) calloc(1,(len+1)); - memcpy(wrd,pText,len); - - /* find it in the list */ - int idx = nw > 0 ? binsearch(wrd,list,nw) : -1; - free(wrd); - if (idx < 0) return 0; - - // now seek to the offset - offset = (long) offst[idx]; - int rc = fseek(pdfile,offset,SEEK_SET); - if (rc) { - return 0; - } - - // grab the count of the number of meanings - // and allocate a list of meaning entries - char * buf = NULL; - buf = (char *) malloc( MAX_LN_LEN ); - if (!buf) return 0; - readLine(pdfile, buf, (MAX_LN_LEN-1)); - int np = mystr_indexOfChar(buf,'|'); - if (np < 0) { - free(buf); - return 0; - } - int nmeanings = atoi(buf+np+1); - *pme = (mentry*) malloc( nmeanings * sizeof(mentry) ); - if (!(*pme)) { - free(buf); - return 0; - } - - // now read in each meaning and parse it to get defn, count and synonym lists - mentry* pm = *(pme); - char dfn[MAX_WD_LEN]; - - for (int j = 0; j < nmeanings; j++) { - readLine(pdfile, buf, (MAX_LN_LEN-1)); - - pm->count = 0; - pm->psyns = NULL; - pm->defn = NULL; - - // store away the part of speech for later use - char * p = buf; - char * pos = NULL; - np = mystr_indexOfChar(p,'|'); - if (np >= 0) { - *(buf+np) = '\0'; - pos = mystrdup(p); - p = p + np + 1; - } else { - pos = mystrdup(""); - } - - // count the number of fields in the remaining line - int nf = 1; - char * d = p; - np = mystr_indexOfChar(d,'|'); - while ( np >= 0 ) { - nf++; - d = d + np + 1; - np = mystr_indexOfChar(d,'|'); - } - pm->count = nf; - pm->psyns = (char **) malloc(nf*sizeof(char*)); - - // fill in the synonym list - d = p; - for (int jj = 0; jj < nf; jj++) - { - np = mystr_indexOfChar(d,'|'); - if (np > 0) - { - *(d+np) = '\0'; - pm->psyns[jj] = mystrdup(d); - d = d + np + 1; - } - else - { - pm->psyns[jj] = mystrdup(d); - } - } - - // add pos to first synonym to create the definition - int k = strlen(pos); - int m = strlen(pm->psyns[0]); - if ((k+m) < (MAX_WD_LEN - 1)) { - strncpy(dfn,pos,k); - *(dfn+k) = ' '; - strncpy((dfn+k+1),(pm->psyns[0]),m+1); - pm->defn = mystrdup(dfn); - } else { - pm->defn = mystrdup(pm->psyns[0]); - } - free(pos); - pm++; - - } - free(buf); - - return nmeanings; -} - - - -void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings) -{ - - if (nmeanings == 0) return; - if ((*pme) == NULL) return; - - mentry * pm = *pme; - - for (int i = 0; i < nmeanings; i++) { - int count = pm->count; - for (int j = 0; j < count; j++) { - if (pm->psyns[j]) free(pm->psyns[j]); - pm->psyns[j] = NULL; - } - if (pm->psyns) free(pm->psyns); - pm->psyns = NULL; - if (pm->defn) free(pm->defn); - pm->defn = NULL; - pm->count = 0; - pm++; - } - pm = *pme; - free(pm); - *pme = NULL; - return; -} - - -// read a line of text from a text file stripping -// off the line terminator and replacing it with -// a null string terminator. -// returns: -1 on error or the number of characters in -// in the returning string - -// A maximum of nc characters will be returned - -int MyThes::readLine(FILE * pf, char * buf, int nc) -{ - - if (fgets(buf,nc,pf)) { - mychomp(buf); - return strlen(buf); - } - return -1; -} - - - -// performs a binary search on null terminated character -// strings -// -// returns: -1 on not found -// index of wrd in the list[] - -int MyThes::binsearch(char * sw, char* _list[], int nlst) -{ - int lp, up, mp, j, indx; - lp = 0; - up = nlst-1; - indx = -1; - if (strcmp(sw,_list[lp]) < 0) return -1; - if (strcmp(sw,_list[up]) > 0) return -1; - while (indx < 0 ) { - mp = (int)((lp+up) >> 1); - j = strcmp(sw,_list[mp]); - if ( j > 0) { - lp = mp + 1; - } else if (j < 0 ) { - up = mp - 1; - } else { - indx = mp; - } - if (lp > up) return -1; - } - return indx; -} - -char * MyThes::get_th_encoding() -{ - if (encoding) return encoding; - return NULL; -} - - -// string duplication routine -char * MyThes::mystrdup(const char * p) -{ - int sl = strlen(p) + 1; - char * d = (char *)malloc(sl); - if (d) { - memcpy(d,p,sl); - return d; - } - return NULL; -} - -// remove cross-platform text line end characters -void MyThes::mychomp(char * s) -{ - int k = strlen(s); - if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; - if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; -} - - -// return index of char in string -int MyThes::mystr_indexOfChar(const char * d, int c) -{ - char * p = strchr((char *)d,c); - if (p) return (int)(p-d); - return -1; -} - diff --git a/lingucomponent/source/thesaurus/mythes/mythes.hxx b/lingucomponent/source/thesaurus/mythes/mythes.hxx deleted file mode 100644 index 539e6723c42d..000000000000 --- a/lingucomponent/source/thesaurus/mythes/mythes.hxx +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _MYTHES_HXX_ -#define _MYTHES_HXX_ - -// some maximum sizes for buffers -#define MAX_WD_LEN 200 -#define MAX_LN_LEN 16384 - - -// a meaning with definition, count of synonyms and synonym list -struct mentry { - char* defn; - int count; - char** psyns; -}; - - -class MyThes -{ - - int nw; /* number of entries in thesaurus */ - char** list; /* stores word list */ - unsigned int* offst; /* stores offset list */ - char * encoding; /* stores text encoding; */ - - FILE *pdfile; - - // disallow copy-constructor and assignment-operator for now - MyThes(); - MyThes(const MyThes &); - MyThes & operator = (const MyThes &); - -public: - MyThes(const char* idxpath, const char* datpath); - ~MyThes(); - - // lookup text in index and return number of meanings - // each meaning entry has a defintion, synonym count and pointer - // when complete return the *original* meaning entry and count via - // CleanUpAfterLookup to properly handle memory deallocation - - int Lookup(const char * pText, int len, mentry** pme); - - void CleanUpAfterLookup(mentry** pme, int nmean); - - char* get_th_encoding(); - -private: - // Open index and dat files and load list array - int thInitialize (const char* indxpath, const char* datpath); - - // internal close and cleanup dat and idx files - void thCleanup (); - - // read a text line (\n terminated) stripping off line terminator - int readLine(FILE * pf, char * buf, int nc); - - // binary search on null terminated character strings - int binsearch(char * wrd, char* list[], int nlst); - - // string duplication routine - char * mystrdup(const char * p); - - // remove cross-platform text line end characters - void mychomp(char * s); - - // return index of char in string - int mystr_indexOfChar(const char * d, int c); - -}; - -#endif - - - - - |