summaryrefslogtreecommitdiff
path: root/lingucomponent
diff options
context:
space:
mode:
authorobo <obo@openoffice.org>2010-06-22 12:09:13 +0200
committerobo <obo@openoffice.org>2010-06-22 12:09:13 +0200
commit6c5baefb90ef14fa08327d4b1ecbcf3e54c3e84d (patch)
tree290910a7fb143460ac6f50309a651254f713872d /lingucomponent
parentcb6e6415ba6c4df918e5d88f47a2509e2ff391ee (diff)
parentaf15b9a545676a6d83f19a23c6712c23fd024305 (diff)
CWS-TOOLING: integrate CWS mythes12
Diffstat (limited to 'lingucomponent')
-rw-r--r--lingucomponent/prj/build.lst5
-rw-r--r--lingucomponent/source/thesaurus/mythes/Makefile39
-rw-r--r--lingucomponent/source/thesaurus/mythes/README60
-rw-r--r--lingucomponent/source/thesaurus/mythes/checkme.lst4
-rw-r--r--lingucomponent/source/thesaurus/mythes/data_layout.txt131
-rw-r--r--lingucomponent/source/thesaurus/mythes/example.cxx128
-rw-r--r--lingucomponent/source/thesaurus/mythes/license.readme34
-rw-r--r--lingucomponent/source/thesaurus/mythes/makefile.mk59
-rw-r--r--lingucomponent/source/thesaurus/mythes/mythes.cxx403
-rw-r--r--lingucomponent/source/thesaurus/mythes/mythes.hxx76
10 files changed, 2 insertions, 937 deletions
diff --git a/lingucomponent/prj/build.lst b/lingucomponent/prj/build.lst
index 138e5858f70e..045aaee82999 100644
--- a/lingucomponent/prj/build.lst
+++ b/lingucomponent/prj/build.lst
@@ -1,9 +1,8 @@
-lc lingucomponent : linguistic libtextcat svl HYPHEN:hyphen HUNSPELL:hunspell NULL
+lc lingucomponent : linguistic libtextcat svl HYPHEN:hyphen HUNSPELL:hunspell MYTHES:mythes NULL
lc lingucomponent usr1 - all lc_mkout NULL
lc lingucomponent\inc nmake - all lc_inc NULL
lc lingucomponent\source\lingutil nmake - all lc_util lc_inc NULL
-lc lingucomponent\source\thesaurus\mythes nmake - all lc_mythes lc_util lc_inc NULL
-lc lingucomponent\source\thesaurus\libnth nmake - all lc_libnth lc_mythes lc_util lc_inc NULL
+lc lingucomponent\source\thesaurus\libnth nmake - all lc_libnth lc_util lc_inc NULL
lc lingucomponent\source\spellcheck\spell nmake - all lc_libspell lc_util lc_inc NULL
lc lingucomponent\source\hyphenator\altlinuxhyph\hyphen nmake - all lc_libhyphen lc_util lc_inc NULL
lc lingucomponent\source\languageguessing nmake - all lc_languageguessing lc_util lc_inc NULL
diff --git a/lingucomponent/source/thesaurus/mythes/Makefile b/lingucomponent/source/thesaurus/mythes/Makefile
deleted file mode 100644
index b1d811296714..000000000000
--- a/lingucomponent/source/thesaurus/mythes/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-
-CXX=g++
-
-CXXFLAGS= -O2 -Wall -ansi -pedantic -I.
-
-LDFLAGS=-L. -lmythes
-
-LIBS=libmythes.a
-
-AR=ar rc
-RANLIB=ranlib
-
-OBJS = mythes.o
-
-all: example
-
-libmythes.a: $(OBJS)
- $(AR) $@ $(OBJS)
- -@ ($(RANLIB) $@ || true) >/dev/null 2>&1
-
-example: example.o $(LIBS)
- $(CXX) $(CXXFLAGS) -o $@ example.o $(LDFLAGS)
-
-%.o: %.cxx
- $(CXX) $(CXXFLAGS) -c $<
-
-clean:
- rm -f *.o *~ example libthes.a
-
-distclean: clean
-
-depend:
- makedepend -- $(CXXFLAGS) -- *.[ch]xx
-
-# DO NOT DELETE THIS LINE -- make depend depends on it.
-
-mythes.o: mythes.hxx
-example.o: mythes.hxx
-
diff --git a/lingucomponent/source/thesaurus/mythes/README b/lingucomponent/source/thesaurus/mythes/README
deleted file mode 100644
index 421f16a712fe..000000000000
--- a/lingucomponent/source/thesaurus/mythes/README
+++ /dev/null
@@ -1,60 +0,0 @@
-MyThes is a simple thesaurus that uses a structured
-text data file and an index file with binary search
-to lookup words and phrases and return information
-on part of speech, meanings, and synonyms
-
-MyThes was written to provide a thesaurus for the
-OpenOffice.org project
-
-The Main features of MyThes are:
-
-1. written in C++ to make it easier to interface with
- Pspell, OpenOffice, AbiWord, etc
-
-2. it is stateless, uses no static variables and
- should be completely reentrant with no ifdefs
-
-3. it compiles with -ansi and -pedantic and -Wall
- with no warnings so it should be quite portable
-
-4. it uses a perl program to read the structured
- text file and create the index needed for bianry
- searching (see dictionaries/en_US/th_gen_idx.pl)
-
-5. it is very simple with *lots* of comments.
- The main "smarts" are in the structure of the
- text file that makes up the thesaurus data
-
-6. It comes with a ready-to-go structured thesaurus
- data file for en_US extracted from the WordNet-2.0 data.
- (see dictioanries/en_US/th_en_US_new.dat)
-
- Please see WordNet_license.txt and WordNet_readme.txt
- for more information on the very useful project!
- (found in dictionaries/en_US/)
-
-7. The source code has a BSD license (and no advertising clause)
-
-
-MyThes has the world's simplest Makefile and no
-configure support. It does come with a simple example
-program that looks up some words and returns meanings
-and synonyms.
-
-To build it simply do the following:
-
-unzip mythes.zip
-cd mythes
-make
-
-To run the example program:
-./example th_en_US_new.idx th_en_US_new.dat checkme.lst
-
-Please play around with it and let me know
-what you think.
-
-Thanks,
-
-Kevin Hendricks
-kevin.hendricks@sympatico.ca
-
diff --git a/lingucomponent/source/thesaurus/mythes/checkme.lst b/lingucomponent/source/thesaurus/mythes/checkme.lst
deleted file mode 100644
index 120d343a9e0f..000000000000
--- a/lingucomponent/source/thesaurus/mythes/checkme.lst
+++ /dev/null
@@ -1,4 +0,0 @@
-simple
-complex
-junk
-jhjhjh
diff --git a/lingucomponent/source/thesaurus/mythes/data_layout.txt b/lingucomponent/source/thesaurus/mythes/data_layout.txt
deleted file mode 100644
index ef4bc255d96a..000000000000
--- a/lingucomponent/source/thesaurus/mythes/data_layout.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-Description of the Structure of the Data needed by MyThes
---------------------------------------------------------
-
-MyThes is very simple. Almost all of the "smarts" are really
-in the thesaurus data file itself.
-
-The format for this file is at follows:
-
-- no binary data
-
-- line ending is a newline '\n' and not carriage return/linefeeds
-
-- Line 1 is a character string that describes the encoding
-used for the file. It is up to the calling program to convert
-to and from this encoding if necessary.
-
- ISO8859-1 is used by the th_en_US_new.dat file.
-
- Strings currently recognized by OpenOffice.org are:
-
- UTF-8
- ISO8859-1
- ISO8859-2
- ISO8859-3
- ISO8859-4
- ISO8859-5
- ISO8859-6
- ISO8859-7
- ISO8859-8
- ISO8859-9
- ISO8859-10
- KOI8-R
- CP-1251
- ISO8859-14
- ISCII-DEVANAGARI
-
-
-- All of the remaning lines of the file follow this structure
-
-entry|num_mean
-pos|syn1_mean|syn2|...
-.
-.
-.
-pos|mean_syn1|syn2|...
-
-
-where:
-
- entry - all lowercase version of the word or phrase being described
- num_mean - number of meanings for this entry
-
- There is one meaning per line and each meaning is comprised of
-
- pos - part of speech or other meaning specific description
- syn1_mean - synonym 1 also used to describe the meaning itself
- syn2 - synonym 2 for that meaning etc.
-
-
-To make this even more clearer, here is actual data for the
-entry "simple".
-
-simple|9
-(adj)|simple |elemental|ultimate|oversimplified|simplistic|simplex|simplified|unanalyzable|
-undecomposable|uncomplicated|unsophisticated|easy|plain|unsubdivided
-(adj)|elementary|uncomplicated|unproblematic|easy
-(adj)|bare|mere|plain
-(adj)|childlike|wide-eyed|dewy-eyed|naive |naif
-(adj)|dim-witted|half-witted|simple-minded|retarded
-(adj)|simple |unsubdivided|unlobed|smooth
-(adj)|plain
-(noun)|herb|herbaceous plant
-(noun)|simpleton|person|individual|someone|somebody|mortal|human|soul
-
-
-It says that "simple" has 9 different meanings and each
-meaning will have its part of speech and at least 1 synonym
-with other if presetn following on the same line.
-
-
-
-Once you ahve created your own structured text file you can use
-the perl program "th_gen_idx.pl" which can be found in this
-directory to create an index file that is used to seek into
-your data file by the MyThes code.
-
-The correct way to run the perl program is as follows:
-
-cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
-
-
-
-Then if you head the resulting index file you should see the
-following:
-
-ISO8859-1
-142689
-'hood|10
-'s gravenhage|88
-'tween|173
-'tween decks|196
-.22|231
-.22 caliber|319
-.22 calibre|365
-.38 caliber|411
-.38 calibre|457
-.45 caliber|503
-.45 calibre|549
-0|595
-1|666
-1 chronicles|6283
-1 esdras|6336
-
-
-Line 1 is the same encoding string taken from the
-structured thesaurus data file.
-
-Line 2 is a count of the total number of entries
-in your thesaurus.
-
-All of the remaining lines are of the form
-
-entry|byte_offset_into_data_file_where_entry_is_found
-
-
-That's all there is too it.
-
-
-Kevin
-kevin.hendricks@sympatico.ca
-
diff --git a/lingucomponent/source/thesaurus/mythes/example.cxx b/lingucomponent/source/thesaurus/mythes/example.cxx
deleted file mode 100644
index 31c85989cf26..000000000000
--- a/lingucomponent/source/thesaurus/mythes/example.cxx
+++ /dev/null
@@ -1,128 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-#include <cstring>
-#include <cstdlib>
-#include <cstdio>
-
-#include "mythes.hxx"
-
-extern char * mystrdup(const char * s);
-
-using namespace std;
-
-int
-main(int argc, char** argv)
-{
-
- char * af;
- char * df;
- char * wtc;
- FILE* wtclst;
-
- /* first parse the command line options */
- /* arg1 - index file, arg2 thesaurus data file, arg3 - file of words to check */
-
- if (argv[1]) {
- af = mystrdup(argv[1]);
- } else {
- fprintf(stderr,"correct syntax is:\n");
- fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n");
- exit(1);
- }
- if (argv[2]) {
- df = mystrdup(argv[2]);
- } else {
- fprintf(stderr,"correct syntax is:\n");
- fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n");
- exit(1);
- }
- if (argv[3]) {
- wtc = mystrdup(argv[3]);
- } else {
- fprintf(stderr,"correct syntax is:\n");
- fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n");
- exit(1);
- }
-
-
- /* open the words to check list */
- wtclst = fopen(wtc,"r");
- if (!wtclst) {
- fprintf(stderr,"Error - could not open file of words to check\n");
- exit(1);
- }
-
- // open a new thesaurus object
- MyThes * pMT= new MyThes(af,df);
-
- // get the encoding used for the thesaurus data
- char * encoding = pMT->get_th_encoding();
- fprintf(stdout,"Thesaurus uses encoding %s\n\n",encoding);
-
- int k;
- char buf[101];
- mentry * pmean;
-
- while(fgets(buf,100,wtclst)) {
- k = strlen(buf);
- *(buf + k - 1) = '\0';
- int len = strlen(buf);
- int count = pMT->Lookup(buf,len,&pmean);
- // don't change value of pmean
- // or count since needed for CleanUpAfterLookup routine
- mentry* pm = pmean;
- if (count) {
- fprintf(stdout,"%s has %d meanings\n",buf,count);
- for (int i=0; i < count; i++) {
- fprintf(stdout," meaning %d: %s\n",i,pm->defn);
- for (int j=0; j < pm->count; j++) {
- fprintf(stdout," %s\n",pm->psyns[j]);
- }
- fprintf(stdout,"\n");
- pm++;
- }
- fprintf(stdout,"\n\n");
- // now clean up all allocated memory
- pMT->CleanUpAfterLookup(&pmean,count);
- } else {
- fprintf(stdout,"\"%s\" is not in thesaurus!\n",buf);
- }
- }
-
- delete pMT;
- fclose(wtclst);
- free(wtc);
- free(df);
- free(af);
-
- return 0;
-}
-
diff --git a/lingucomponent/source/thesaurus/mythes/license.readme b/lingucomponent/source/thesaurus/mythes/license.readme
deleted file mode 100644
index b6bf70a0c7fe..000000000000
--- a/lingucomponent/source/thesaurus/mythes/license.readme
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
- * And Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * 3. All modifications to the source code must be clearly marked as
- * such. Binary redistributions based on modified source code
- * must be clearly marked as modified versions in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
diff --git a/lingucomponent/source/thesaurus/mythes/makefile.mk b/lingucomponent/source/thesaurus/mythes/makefile.mk
deleted file mode 100644
index ac45219b97a0..000000000000
--- a/lingucomponent/source/thesaurus/mythes/makefile.mk
+++ /dev/null
@@ -1,59 +0,0 @@
-#*************************************************************************
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# Copyright 2000, 2010 Oracle and/or its affiliates.
-#
-# OpenOffice.org - a multi-platform office productivity suite
-#
-# This file is part of OpenOffice.org.
-#
-# OpenOffice.org is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License version 3
-# only, as published by the Free Software Foundation.
-#
-# OpenOffice.org is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License version 3 for more details
-# (a copy is included in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU Lesser General Public License
-# version 3 along with OpenOffice.org. If not, see
-# <http://www.openoffice.org/license.html>
-# for a copy of the LGPLv3 License.
-#
-#*************************************************************************
-
-PRJ = ..$/..$/..
-
-PRJNAME = lingucomponent
-TARGET = mythes
-LIBTARGET=NO
-
-#----- Settings ---------------------------------------------------------
-
-.INCLUDE : settings.mk
-
-# --- Files --------------------------------------------------------
-
-.IF "$(SYSTEM_MYTHES)" == "YES"
-@all:
- @echo "Using system mythes..."
-.ENDIF
-
-all_target: ALLTAR
-
-
-
-SLOFILES= \
- $(SLO)$/mythes.obj
-
-LIB1TARGET= $(SLB)$/lib$(TARGET).lib
-LIB1ARCHIV= $(LB)/lib$(TARGET).a
-LIB1OBJFILES= $(SLOFILES)
-
-# --- Targets ------------------------------------------------------
-
-.INCLUDE : target.mk
-
diff --git a/lingucomponent/source/thesaurus/mythes/mythes.cxx b/lingucomponent/source/thesaurus/mythes/mythes.cxx
deleted file mode 100644
index ebb224d92140..000000000000
--- a/lingucomponent/source/thesaurus/mythes/mythes.cxx
+++ /dev/null
@@ -1,403 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-#include "license.readme"
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#include "mythes.hxx"
-
-
-
-MyThes::MyThes(const char* idxpath, const char * datpath)
-{
- nw = 0;
- encoding = NULL;
- list = NULL;
- offst = NULL;
-
- if (thInitialize(idxpath, datpath) != 1) {
- fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
- fflush(stderr);
- thCleanup();
- // did not initialize properly - throw exception?
- }
-}
-
-
-MyThes::~MyThes()
-{
- thCleanup();
-}
-
-
-int MyThes::thInitialize(const char* idxpath, const char* datpath)
-{
-
- // open the index file
- FILE * pifile = fopen(idxpath,"r");
- if (!pifile) {
- return 0;
- }
-
- // parse in encoding and index size */
- char * wrd;
- wrd = (char *)calloc(1, MAX_WD_LEN);
- if (!wrd) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- fclose(pifile);
- return 0;
- }
- int len = readLine(pifile,wrd,MAX_WD_LEN);
- encoding = mystrdup(wrd);
- len = readLine(pifile,wrd,MAX_WD_LEN);
- int idxsz = atoi(wrd);
-
-
- // now allocate list, offst for the given size
- list = (char**) calloc(idxsz,sizeof(char*));
- offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
-
- if ( (!(list)) || (!(offst)) ) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- fclose(pifile);
- return 0;
- }
-
- // now parse the remaining lines of the index
- len = readLine(pifile,wrd,MAX_WD_LEN);
- while (len > 0)
- {
- int np = mystr_indexOfChar(wrd,'|');
- if (nw < idxsz) {
- if (np >= 0) {
- *(wrd+np) = '\0';
- list[nw] = (char *)calloc(1,(np+1));
- if (!list[nw]) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- fclose(pifile);
- return 0;
- }
- memcpy((list[nw]),wrd,np);
- offst[nw] = atoi(wrd+np+1);
- nw++;
- }
- }
- len = readLine(pifile,wrd,MAX_WD_LEN);
- }
-
- free((void *)wrd);
- fclose(pifile);
-
- /* next open the data file */
- pdfile = fopen(datpath,"r");
- if (!pdfile) {
- return 0;
- }
-
- return 1;
-}
-
-
-void MyThes::thCleanup()
-{
- /* first close the data file */
- if (pdfile) {
- fclose(pdfile);
- pdfile=NULL;
- }
-
- if (list)
- {
- /* now free up all the allocated strings on the list */
- for (int i=0; i < nw; i++)
- {
- if (list[i]) {
- free(list[i]);
- list[i] = 0;
- }
- }
- free((void*)list);
- }
-
- if (encoding) free((void*)encoding);
- if (offst) free((void*)offst);
-
- encoding = NULL;
- list = NULL;
- offst = NULL;
- nw = 0;
-}
-
-
-
-// lookup text in index and count of meanings and a list of meaning entries
-// with each entry having a synonym count and pointer to an
-// array of char * (i.e the synonyms)
-//
-// note: calling routine should call CleanUpAfterLookup with the original
-// meaning point and count to properly deallocate memory
-
-int MyThes::Lookup(const char * pText, int len, mentry** pme)
-{
-
- *pme = NULL;
-
- // handle the case of missing file or file related errors
- if (! pdfile) return 0;
-
- long offset = 0;
-
- /* copy search word and make sure null terminated */
- char * wrd = (char *) calloc(1,(len+1));
- memcpy(wrd,pText,len);
-
- /* find it in the list */
- int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
- free(wrd);
- if (idx < 0) return 0;
-
- // now seek to the offset
- offset = (long) offst[idx];
- int rc = fseek(pdfile,offset,SEEK_SET);
- if (rc) {
- return 0;
- }
-
- // grab the count of the number of meanings
- // and allocate a list of meaning entries
- char * buf = NULL;
- buf = (char *) malloc( MAX_LN_LEN );
- if (!buf) return 0;
- readLine(pdfile, buf, (MAX_LN_LEN-1));
- int np = mystr_indexOfChar(buf,'|');
- if (np < 0) {
- free(buf);
- return 0;
- }
- int nmeanings = atoi(buf+np+1);
- *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
- if (!(*pme)) {
- free(buf);
- return 0;
- }
-
- // now read in each meaning and parse it to get defn, count and synonym lists
- mentry* pm = *(pme);
- char dfn[MAX_WD_LEN];
-
- for (int j = 0; j < nmeanings; j++) {
- readLine(pdfile, buf, (MAX_LN_LEN-1));
-
- pm->count = 0;
- pm->psyns = NULL;
- pm->defn = NULL;
-
- // store away the part of speech for later use
- char * p = buf;
- char * pos = NULL;
- np = mystr_indexOfChar(p,'|');
- if (np >= 0) {
- *(buf+np) = '\0';
- pos = mystrdup(p);
- p = p + np + 1;
- } else {
- pos = mystrdup("");
- }
-
- // count the number of fields in the remaining line
- int nf = 1;
- char * d = p;
- np = mystr_indexOfChar(d,'|');
- while ( np >= 0 ) {
- nf++;
- d = d + np + 1;
- np = mystr_indexOfChar(d,'|');
- }
- pm->count = nf;
- pm->psyns = (char **) malloc(nf*sizeof(char*));
-
- // fill in the synonym list
- d = p;
- for (int jj = 0; jj < nf; jj++)
- {
- np = mystr_indexOfChar(d,'|');
- if (np > 0)
- {
- *(d+np) = '\0';
- pm->psyns[jj] = mystrdup(d);
- d = d + np + 1;
- }
- else
- {
- pm->psyns[jj] = mystrdup(d);
- }
- }
-
- // add pos to first synonym to create the definition
- int k = strlen(pos);
- int m = strlen(pm->psyns[0]);
- if ((k+m) < (MAX_WD_LEN - 1)) {
- strncpy(dfn,pos,k);
- *(dfn+k) = ' ';
- strncpy((dfn+k+1),(pm->psyns[0]),m+1);
- pm->defn = mystrdup(dfn);
- } else {
- pm->defn = mystrdup(pm->psyns[0]);
- }
- free(pos);
- pm++;
-
- }
- free(buf);
-
- return nmeanings;
-}
-
-
-
-void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
-{
-
- if (nmeanings == 0) return;
- if ((*pme) == NULL) return;
-
- mentry * pm = *pme;
-
- for (int i = 0; i < nmeanings; i++) {
- int count = pm->count;
- for (int j = 0; j < count; j++) {
- if (pm->psyns[j]) free(pm->psyns[j]);
- pm->psyns[j] = NULL;
- }
- if (pm->psyns) free(pm->psyns);
- pm->psyns = NULL;
- if (pm->defn) free(pm->defn);
- pm->defn = NULL;
- pm->count = 0;
- pm++;
- }
- pm = *pme;
- free(pm);
- *pme = NULL;
- return;
-}
-
-
-// read a line of text from a text file stripping
-// off the line terminator and replacing it with
-// a null string terminator.
-// returns: -1 on error or the number of characters in
-// in the returning string
-
-// A maximum of nc characters will be returned
-
-int MyThes::readLine(FILE * pf, char * buf, int nc)
-{
-
- if (fgets(buf,nc,pf)) {
- mychomp(buf);
- return strlen(buf);
- }
- return -1;
-}
-
-
-
-// performs a binary search on null terminated character
-// strings
-//
-// returns: -1 on not found
-// index of wrd in the list[]
-
-int MyThes::binsearch(char * sw, char* _list[], int nlst)
-{
- int lp, up, mp, j, indx;
- lp = 0;
- up = nlst-1;
- indx = -1;
- if (strcmp(sw,_list[lp]) < 0) return -1;
- if (strcmp(sw,_list[up]) > 0) return -1;
- while (indx < 0 ) {
- mp = (int)((lp+up) >> 1);
- j = strcmp(sw,_list[mp]);
- if ( j > 0) {
- lp = mp + 1;
- } else if (j < 0 ) {
- up = mp - 1;
- } else {
- indx = mp;
- }
- if (lp > up) return -1;
- }
- return indx;
-}
-
-char * MyThes::get_th_encoding()
-{
- if (encoding) return encoding;
- return NULL;
-}
-
-
-// string duplication routine
-char * MyThes::mystrdup(const char * p)
-{
- int sl = strlen(p) + 1;
- char * d = (char *)malloc(sl);
- if (d) {
- memcpy(d,p,sl);
- return d;
- }
- return NULL;
-}
-
-// remove cross-platform text line end characters
-void MyThes::mychomp(char * s)
-{
- int k = strlen(s);
- if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
- if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
-}
-
-
-// return index of char in string
-int MyThes::mystr_indexOfChar(const char * d, int c)
-{
- char * p = strchr((char *)d,c);
- if (p) return (int)(p-d);
- return -1;
-}
-
diff --git a/lingucomponent/source/thesaurus/mythes/mythes.hxx b/lingucomponent/source/thesaurus/mythes/mythes.hxx
deleted file mode 100644
index 539e6723c42d..000000000000
--- a/lingucomponent/source/thesaurus/mythes/mythes.hxx
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _MYTHES_HXX_
-#define _MYTHES_HXX_
-
-// some maximum sizes for buffers
-#define MAX_WD_LEN 200
-#define MAX_LN_LEN 16384
-
-
-// a meaning with definition, count of synonyms and synonym list
-struct mentry {
- char* defn;
- int count;
- char** psyns;
-};
-
-
-class MyThes
-{
-
- int nw; /* number of entries in thesaurus */
- char** list; /* stores word list */
- unsigned int* offst; /* stores offset list */
- char * encoding; /* stores text encoding; */
-
- FILE *pdfile;
-
- // disallow copy-constructor and assignment-operator for now
- MyThes();
- MyThes(const MyThes &);
- MyThes & operator = (const MyThes &);
-
-public:
- MyThes(const char* idxpath, const char* datpath);
- ~MyThes();
-
- // lookup text in index and return number of meanings
- // each meaning entry has a defintion, synonym count and pointer
- // when complete return the *original* meaning entry and count via
- // CleanUpAfterLookup to properly handle memory deallocation
-
- int Lookup(const char * pText, int len, mentry** pme);
-
- void CleanUpAfterLookup(mentry** pme, int nmean);
-
- char* get_th_encoding();
-
-private:
- // Open index and dat files and load list array
- int thInitialize (const char* indxpath, const char* datpath);
-
- // internal close and cleanup dat and idx files
- void thCleanup ();
-
- // read a text line (\n terminated) stripping off line terminator
- int readLine(FILE * pf, char * buf, int nc);
-
- // binary search on null terminated character strings
- int binsearch(char * wrd, char* list[], int nlst);
-
- // string duplication routine
- char * mystrdup(const char * p);
-
- // remove cross-platform text line end characters
- void mychomp(char * s);
-
- // return index of char in string
- int mystr_indexOfChar(const char * d, int c);
-
-};
-
-#endif
-
-
-
-
-