summaryrefslogtreecommitdiff
path: root/libtextcat/libtextcat-2.2.patch
diff options
context:
space:
mode:
authorThomas Lange <tl@openoffice.org>2007-01-12 11:40:54 +0000
committerThomas Lange <tl@openoffice.org>2007-01-12 11:40:54 +0000
commit23147b5b1f280e1c7758c4ce27b99dc92135b354 (patch)
treecdee4b730e97cad5db3fd941f5513dc826530fd8 /libtextcat/libtextcat-2.2.patch
parent2bb6503c63165d28d1f9a0224b675565b6acaa96 (diff)
#i73173# integrate Google SoC language-guessing
Diffstat (limited to 'libtextcat/libtextcat-2.2.patch')
-rw-r--r--libtextcat/libtextcat-2.2.patch2137
1 files changed, 2137 insertions, 0 deletions
diff --git a/libtextcat/libtextcat-2.2.patch b/libtextcat/libtextcat-2.2.patch
new file mode 100644
index 000000000000..81babb0eb0aa
--- /dev/null
+++ b/libtextcat/libtextcat-2.2.patch
@@ -0,0 +1,2137 @@
+*** misc/libtextcat-2.2/src/common.c 2003-05-22 13:32:43.000000000 +0200
+--- misc/build/libtextcat-2.2/src/common.c 2007-01-11 13:19:40.000000000 +0100
+***************
+*** 3,25 ****
+ *
+ * Copyright (c) 2003, WiseGuys Internet B.V.
+ * All rights reserved.
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+--- 3,25 ----
+ *
+ * Copyright (c) 2003, WiseGuys Internet B.V.
+ * All rights reserved.
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+***************
+*** 114,124 ****
+ wgmem_error( "Error while strduping %u bytes.\n", strlen(s) );
+ }
+
+! return( result );
+ }
+
+! extern void* wg_realloc( void *ptr, size_t size )
+! {
+ void *result;
+
+ if (!size) {
+--- 114,124 ----
+ wgmem_error( "Error while strduping %u bytes.\n", strlen(s) );
+ }
+
+! return( result );
+ }
+
+! extern void* wg_realloc( void *ptr, size_t size )
+! {
+ void *result;
+
+ if (!size) {
+***************
+*** 131,137 ****
+ wgmem_error( "Error while reallocing %u bytes.\n", size );
+ }
+
+! return( result );
+ }
+
+ extern void wg_free( void *mem )
+--- 131,137 ----
+ wgmem_error( "Error while reallocing %u bytes.\n", size );
+ }
+
+! return( result );
+ }
+
+ extern void wg_free( void *mem )
+***************
+*** 148,159 ****
+ if ( fgets(line, size, fp) == NULL ) {
+ return NULL;
+ }
+!
+ /** kill term null **/
+ if ( (p = strpbrk( line, "\n\r" )) ) {
+ *p = '\0';
+! }
+!
+ return line;
+ }
+
+--- 148,159 ----
+ if ( fgets(line, size, fp) == NULL ) {
+ return NULL;
+ }
+!
+ /** kill term null **/
+ if ( (p = strpbrk( line, "\n\r" )) ) {
+ *p = '\0';
+! }
+!
+ return line;
+ }
+
+***************
+*** 164,202 ****
+ *
+ * ARGUMENTS:
+ * - result:
+! *
+ * After the split, this array contains pointers to the start of each
+ * detected segment. Must be preallocated and at least as large as
+ * maxsegments. The pointers point into the dest buffer.
+! *
+! * - dest:
+! *
+ * String into which result points as an index. Must be preallocated, and
+ * at least as big as src. You can use src as dest, but in that case src
+ * is overwritten!
+! *
+! * - src:
+! *
+ * The string to split. Sequences of whitespace are treated as separators, unless
+ * escaped. There are two ways to escape: by using single quotes (anything
+ * between single quotes is treated as one segment), or by using a backslash
+ * to escape the next character. The backslash escape works inside quotation
+ * as well.
+! *
+ * Example:
+! *
+ * "It\'s very\ easy 'to use WiseGuys\' wg_split()' function" is split into:
+! *
+ * "It's"
+ * "very easy"
+ * "to use WiseGuys' wg_split()"
+ * "function"
+! *
+! * - maxsegments:
+! *
+ * The maximum number of segments. If the splitter runs out of segments,
+ * the remainder of the string is stored in the last segment.
+! *
+ * RETURN VALUE:
+ * The number of segments found.
+ */
+--- 164,202 ----
+ *
+ * ARGUMENTS:
+ * - result:
+! *
+ * After the split, this array contains pointers to the start of each
+ * detected segment. Must be preallocated and at least as large as
+ * maxsegments. The pointers point into the dest buffer.
+! *
+! * - dest:
+! *
+ * String into which result points as an index. Must be preallocated, and
+ * at least as big as src. You can use src as dest, but in that case src
+ * is overwritten!
+! *
+! * - src:
+! *
+ * The string to split. Sequences of whitespace are treated as separators, unless
+ * escaped. There are two ways to escape: by using single quotes (anything
+ * between single quotes is treated as one segment), or by using a backslash
+ * to escape the next character. The backslash escape works inside quotation
+ * as well.
+! *
+ * Example:
+! *
+ * "It\'s very\ easy 'to use WiseGuys\' wg_split()' function" is split into:
+! *
+ * "It's"
+ * "very easy"
+ * "to use WiseGuys' wg_split()"
+ * "function"
+! *
+! * - maxsegments:
+! *
+ * The maximum number of segments. If the splitter runs out of segments,
+ * the remainder of the string is stored in the last segment.
+! *
+ * RETURN VALUE:
+ * The number of segments found.
+ */
+***************
+*** 223,229 ****
+ }
+ state = 1;
+
+! case 1:
+ /*** Start segment ***/
+ result[cnt] = w;
+ cnt++;
+--- 223,229 ----
+ }
+ state = 1;
+
+! case 1:
+ /*** Start segment ***/
+ result[cnt] = w;
+ cnt++;
+***************
+*** 237,243 ****
+ p++;
+ state = 0;
+ break;
+! }
+ else if ( *p == '\'' ) {
+ /*** Start quotation ***/
+ p++;
+--- 237,243 ----
+ p++;
+ state = 0;
+ break;
+! }
+ else if ( *p == '\'' ) {
+ /*** Start quotation ***/
+ p++;
+***************
+*** 292,308 ****
+ }
+
+
+ extern void wg_timerstart(wgtimer_t *t)
+ {
+- #ifdef HAVE_GETTIMEOFDAY
+ gettimeofday( &(t->start), NULL );
+- #endif
+ }
+
+
+ extern uint4 wg_timerstop(wgtimer_t *t)
+ {
+- #ifdef HAVE_GETTIMEOFDAY
+ uint4 result;
+ gettimeofday( &(t->stop), NULL );
+ result = (t->stop.tv_sec - t->start.tv_sec) * 1000000 +
+--- 292,308 ----
+ }
+
+
++ #ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ extern void wg_timerstart(wgtimer_t *t)
+ {
+ gettimeofday( &(t->start), NULL );
+ }
++ #endif /* TL : no struct timeval under Win32 */
+
+
++ #ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ extern uint4 wg_timerstop(wgtimer_t *t)
+ {
+ uint4 result;
+ gettimeofday( &(t->stop), NULL );
+ result = (t->stop.tv_sec - t->start.tv_sec) * 1000000 +
+***************
+*** 312,336 ****
+ t->start.tv_usec = t->stop.tv_usec;
+
+ return result;
+- #else
+- return 0;
+- #endif
+ }
+
+
+ /**
+ * wg_strgmov -- a guarded strcpy() variation
+! *
+ * copies src to dest (including terminating zero), and returns
+ * pointer to position of terminating zero in dest. The function is
+ * guaranteed not to write past destlimit. If the copy couldn't be
+! * finished, the function returns NULL after restoring the first
+! * character in dest for your convenience (since this is usually a zero).
+ */
+ char *wg_strgmov( char *dest, const char *src, const char *destlimit )
+ {
+ char tmp, *w;
+!
+ if ( !dest || dest >= destlimit ) {
+ return NULL;
+ }
+--- 312,334 ----
+ t->start.tv_usec = t->stop.tv_usec;
+
+ return result;
+ }
++ #endif /* TL : no struct timeval under Win32 */
+
+
+ /**
+ * wg_strgmov -- a guarded strcpy() variation
+! *
+ * copies src to dest (including terminating zero), and returns
+ * pointer to position of terminating zero in dest. The function is
+ * guaranteed not to write past destlimit. If the copy couldn't be
+! * finished, the function returns NULL after restoring the first
+! * character in dest for your convenience (since this is usually a zero).
+ */
+ char *wg_strgmov( char *dest, const char *src, const char *destlimit )
+ {
+ char tmp, *w;
+!
+ if ( !dest || dest >= destlimit ) {
+ return NULL;
+ }
+***************
+*** 355,361 ****
+ }
+
+ /*
+! * wg_trim() -- remove whitespace surrounding a string.
+ *
+ * Example: " bla bla bla " becomes "bla bla bla" after trimming.
+ *
+--- 353,359 ----
+ }
+
+ /*
+! * wg_trim() -- remove whitespace surrounding a string.
+ *
+ * Example: " bla bla bla " becomes "bla bla bla" after trimming.
+ *
+***************
+*** 373,379 ****
+ char *lastnonspace = &dest[-1];
+ const char *p = src;
+ char *w = dest;
+!
+ while ( isspace((int)*p) ) {
+ p++;
+ }
+--- 371,377 ----
+ char *lastnonspace = &dest[-1];
+ const char *p = src;
+ char *w = dest;
+!
+ while ( isspace((int)*p) ) {
+ p++;
+ }
+*** misc/libtextcat-2.2/src/common.h 2003-05-22 15:02:29.000000000 +0200
+--- misc/build/libtextcat-2.2/src/common.h 2007-01-11 13:19:40.000000000 +0100
+***************
+*** 1,28 ****
+ #ifndef _COMMON_H_
+ #define _COMMON_H_
+ /**
+! * common.h -- a mixed bag of helper functions
+ *
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+--- 1,28 ----
+ #ifndef _COMMON_H_
+ #define _COMMON_H_
+ /**
+! * common.h -- a mixed bag of helper functions
+ *
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+***************
+*** 86,95 ****
+--- 86,97 ----
+ typedef char boole;
+ #endif
+
++ #ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ typedef struct wgtimer_s {
+ struct timeval start;
+ struct timeval stop;
+ } wgtimer_t;
++ #endif /* TL : no struct timeval under Win32 */
+
+
+ extern void *wg_malloc( size_t size );
+***************
+*** 101,113 ****
+
+ extern char *wg_getline( char *line, int size, FILE *fp );
+
+ extern void wg_timerstart(wgtimer_t *t);
+ extern uint4 wg_timerstop(wgtimer_t *t);
+
+ extern unsigned int wg_split( char **result, char *dest, char *src, int maxsegments );
+ extern char *wg_strgmov( char *dest, const char *src, const char *destlimit );
+ extern char *wg_trim( char *dest, const char *src );
+
+!
+ #endif
+
+--- 103,117 ----
+
+ extern char *wg_getline( char *line, int size, FILE *fp );
+
++ #ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ extern void wg_timerstart(wgtimer_t *t);
+ extern uint4 wg_timerstop(wgtimer_t *t);
++ #endif /* TL : no struct timeval under Win32 */
+
+ extern unsigned int wg_split( char **result, char *dest, char *src, int maxsegments );
+ extern char *wg_strgmov( char *dest, const char *src, const char *destlimit );
+ extern char *wg_trim( char *dest, const char *src );
+
+!
+ #endif
+
+*** misc/libtextcat-2.2/src/constants.h 2003-05-22 13:32:43.000000000 +0200
+--- misc/build/libtextcat-2.2/src/constants.h 2007-01-11 13:19:40.000000000 +0100
+***************
+*** 39,44 ****
+--- 39,46 ----
+ */
+ #include <limits.h>
+
++ #define _UTF8_
++
+ #define DESCRIPTION "out of place"
+
+ /* Reported matches are those fingerprints with a score less than best
+***************
+*** 59,72 ****
+ /* Maximum number of n-grams in a fingerprint */
+ #define MAXNGRAMS 400
+
+! /* Maximum size of an n-gram? */
+! #define MAXNGRAMSIZE 5
+
+ /* Which characters are not acceptable in n-grams? */
+ #define INVALID(c) (isspace((int)c) || isdigit((int)c))
+
+ /* Minimum size (in characters) for accepting a document */
+! #define MINDOCSIZE 25
+
+ /* Maximum penalty for missing an n-gram in fingerprint */
+ #define MAXOUTOFPLACE 400
+--- 61,81 ----
+ /* Maximum number of n-grams in a fingerprint */
+ #define MAXNGRAMS 400
+
+! /* Maximum number of character of an n-gram? */
+! #define MAXNGRAMSYMBOL 5
+!
+! /* Maximum size of the string representing an n-gram (must be greater than number of symbol) */
+! #ifdef _UTF8_
+! #define MAXNGRAMSIZE 20
+! #else
+! #define MAXNGRAMSIZE MAXNGRAMSYMBOL
+! #endif
+
+ /* Which characters are not acceptable in n-grams? */
+ #define INVALID(c) (isspace((int)c) || isdigit((int)c))
+
+ /* Minimum size (in characters) for accepting a document */
+! #define MINDOCSIZE 6
+
+ /* Maximum penalty for missing an n-gram in fingerprint */
+ #define MAXOUTOFPLACE 400
+***************
+*** 76,79 ****
+--- 85,91 ----
+
+ #define MAXSCORE INT_MAX
+
++ /* where the fingerprints files are stored */
++ #define DEFAULT_FINGERPRINTS_PATH ""
++
+ #endif
+*** misc/libtextcat-2.2/src/fingerprint.c 2003-05-22 13:32:43.000000000 +0200
+--- misc/build/libtextcat-2.2/src/fingerprint.c 2007-01-12 12:51:59.000000000 +0100
+***************
+*** 6,28 ****
+ * All rights reserved.
+ *
+ * THE BSD LICENSE
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+--- 6,28 ----
+ * All rights reserved.
+ *
+ * THE BSD LICENSE
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+***************
+*** 51,57 ****
+ * The reason why we go through the trouble of doing a partial
+ * (heap)sort is that a full quicksort behaves horribly on the data:
+ * most n-grams have a very low count, resulting in a data set in
+! * nearly-sorted order. This causes quicksort to behave very badly.
+ * Heapsort, on the other hand, behaves handsomely: worst case is
+ * Mlog(N) for M n-grams filtered through a N-sized heap.
+ *
+--- 51,57 ----
+ * The reason why we go through the trouble of doing a partial
+ * (heap)sort is that a full quicksort behaves horribly on the data:
+ * most n-grams have a very low count, resulting in a data set in
+! * nearly-sorted order. This causes quicksort to behave very badly.
+ * Heapsort, on the other hand, behaves handsomely: worst case is
+ * Mlog(N) for M n-grams filtered through a N-sized heap.
+ *
+***************
+*** 63,68 ****
+--- 63,72 ----
+ * - put table/heap datastructure in a separate file.
+ */
+
++ #ifndef _UTF8_
++ #define _UTF8_
++ #endif
++
+ #include "config.h"
+ #include <stdio.h>
+ #ifdef HAVE_STDLIB_H
+***************
+*** 80,89 ****
+--- 84,95 ----
+ #include "wg_mempool.h"
+ #include "constants.h"
+
++ #include "utf8misc.h"
+
+ #define TABLESIZE (1<<TABLEPOW)
+ #define TABLEMASK ((TABLESIZE)-1)
+
++
+ typedef struct {
+
+ sint2 rank;
+***************
+*** 96,102 ****
+ const char *name;
+ ngram_t *fprint;
+ uint4 size;
+!
+ } fp_t;
+
+ typedef struct entry_s {
+--- 102,108 ----
+ const char *name;
+ ngram_t *fprint;
+ uint4 size;
+!
+ } fp_t;
+
+ typedef struct entry_s {
+***************
+*** 105,117 ****
+ struct entry_s *next;
+ } entry_t;
+
+! typedef struct table_s {
+ void *pool;
+ entry_t **table;
+ entry_t *heap;
+
+ struct table_s *next;
+!
+ uint4 heapsize;
+ uint4 size;
+ } table_t;
+--- 111,123 ----
+ struct entry_s *next;
+ } entry_t;
+
+! typedef struct table_s {
+ void *pool;
+ entry_t **table;
+ entry_t *heap;
+
+ struct table_s *next;
+!
+ uint4 heapsize;
+ uint4 size;
+ } table_t;
+***************
+*** 122,128 ****
+ * fast and furious little hash function
+ *
+ * (Note that we could use some kind of rolling checksum, and update it
+! * during n-gram construction)
+ */
+ static uint4 simplehash( const char *p, int len )
+ {
+--- 128,134 ----
+ * fast and furious little hash function
+ *
+ * (Note that we could use some kind of rolling checksum, and update it
+! * during n-gram construction)
+ */
+ static uint4 simplehash( const char *p, int len )
+ {
+***************
+*** 134,162 ****
+ }
+
+
+- /* checks if n-gram lex is a prefix of key and of length len */
+- inline int issame( char *lex, char *key, int len )
+- {
+- int i;
+- for (i=0; i<len; i++) {
+- if ( key[i] != lex[i] ) {
+- return 0;
+- }
+- }
+- if ( lex[i] != 0 ) {
+- return 0;
+- }
+- return 1;
+- }
+-
+
+ /* increases frequency of ngram(p,len) */
+! static inline int increasefreq( table_t *t, char *p, int len )
+! {
+! uint4 hash = simplehash( p, len ) & TABLEMASK;
+ entry_t *entry = t->table[ hash ];
+!
+! while ( entry ) {
+ if ( issame( entry->str, p, len ) ) {
+ /*** Found it! ***/
+ entry->cnt++;
+--- 140,153 ----
+ }
+
+
+
+ /* increases frequency of ngram(p,len) */
+! static int increasefreq( table_t *t, char *p, int len )
+! {
+! uint4 hash = simplehash( p, len ) & TABLEMASK;
+ entry_t *entry = t->table[ hash ];
+!
+! while ( entry ) {
+ if ( issame( entry->str, p, len ) ) {
+ /*** Found it! ***/
+ entry->cnt++;
+***************
+*** 168,174 ****
+ }
+
+ /*** Not found, so create ***/
+! entry = wgmempool_alloc( t->pool, sizeof(entry_t) );
+ strcpy( entry->str, p );
+ entry->cnt = 1;
+
+--- 159,165 ----
+ }
+
+ /*** Not found, so create ***/
+! entry = (entry_t*)(wgmempool_alloc( t->pool, sizeof(entry_t) ));
+ strcpy( entry->str, p );
+ entry->cnt = 1;
+
+***************
+*** 181,192 ****
+ #if 0
+
+ /* looks up ngram(p,len) */
+! static entry_t *findfreq( table_t *t, char *p, int len )
+! {
+! uint4 hash = simplehash( p, len ) & TABLEMASK;
+ entry_t *entry = t->table[ hash ];
+!
+! while ( entry ) {
+ if ( issame( entry->str, p, len ) ) {
+ return entry;
+ }
+--- 172,183 ----
+ #if 0
+
+ /* looks up ngram(p,len) */
+! static entry_t *findfreq( table_t *t, char *p, int len )
+! {
+! uint4 hash = simplehash( p, len ) & TABLEMASK;
+ entry_t *entry = t->table[ hash ];
+!
+! while ( entry ) {
+ if ( issame( entry->str, p, len ) ) {
+ return entry;
+ }
+***************
+*** 219,225 ****
+ #define GREATER(x,y) ((x).cnt > (y).cnt)
+ #define LESS(x,y) ((x).cnt < (y).cnt)
+
+! inline static void siftup( table_t *t, unsigned int child )
+ {
+ entry_t *heap = t->heap;
+ unsigned int parent = (child-1) >> 1;
+--- 210,216 ----
+ #define GREATER(x,y) ((x).cnt > (y).cnt)
+ #define LESS(x,y) ((x).cnt < (y).cnt)
+
+! static void siftup( table_t *t, unsigned int child )
+ {
+ entry_t *heap = t->heap;
+ unsigned int parent = (child-1) >> 1;
+***************
+*** 241,247 ****
+ }
+
+
+! inline static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
+ {
+ entry_t *heap = t->heap;
+ unsigned int child = parent*2 + 1;
+--- 232,238 ----
+ }
+
+
+! static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
+ {
+ entry_t *heap = t->heap;
+ unsigned int child = parent*2 + 1;
+***************
+*** 273,279 ****
+ if (t->size < t->heapsize) {
+ memcpy( &(heap[t->size]), item, sizeof(entry_t));
+ siftup( t, t->size );
+! t->size++;
+ return 0;
+ }
+
+--- 264,270 ----
+ if (t->size < t->heapsize) {
+ memcpy( &(heap[t->size]), item, sizeof(entry_t));
+ siftup( t, t->size );
+! t->size++;
+ return 0;
+ }
+
+***************
+*** 316,333 ****
+
+ /*** Fill result heap ***/
+ for (i=0; i<TABLESIZE; i++) {
+! entry_t *p = t->table[i];
+ while (p) {
+ heapinsert(t, p);
+ p = p->next;
+ }
+! }
+ return 1;
+ }
+
+
+ static table_t *inittable(uint4 maxngrams)
+! {
+ table_t *result = (table_t *)wg_zalloc( sizeof(table_t) );
+ result->table = (entry_t **)wg_zalloc( sizeof(entry_t*) * TABLESIZE );
+ result->pool = wgmempool_Init( 10000, 10 );
+--- 307,324 ----
+
+ /*** Fill result heap ***/
+ for (i=0; i<TABLESIZE; i++) {
+! entry_t *p = t->table[i];
+ while (p) {
+ heapinsert(t, p);
+ p = p->next;
+ }
+! }
+ return 1;
+ }
+
+
+ static table_t *inittable(uint4 maxngrams)
+! {
+ table_t *result = (table_t *)wg_zalloc( sizeof(table_t) );
+ result->table = (entry_t **)wg_zalloc( sizeof(entry_t*) * TABLESIZE );
+ result->pool = wgmempool_Init( 10000, 10 );
+***************
+*** 347,360 ****
+ wgmempool_Done(t->pool);
+ wg_free(t->table);
+ wg_free(t->heap);
+! wg_free(t);
+ }
+
+
+ extern void *fp_Init(const char *name)
+ {
+ fp_t *h = (fp_t *)wg_zalloc( sizeof(fp_t) );
+!
+ if ( name ) {
+ h->name = wg_strdup(name);
+ }
+--- 338,351 ----
+ wgmempool_Done(t->pool);
+ wg_free(t->table);
+ wg_free(t->heap);
+! wg_free(t);
+ }
+
+
+ extern void *fp_Init(const char *name)
+ {
+ fp_t *h = (fp_t *)wg_zalloc( sizeof(fp_t) );
+!
+ if ( name ) {
+ h->name = wg_strdup(name);
+ }
+***************
+*** 458,478 ****
+ return dest;
+ }
+
+!
+ static void createngramtable( table_t *t, const char *buf )
+ {
+ char n[MAXNGRAMSIZE+1];
+ const char *p = buf;
+ int i;
+
+ /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
+! for (;;p++) {
+
+! const char *q = p;
+ char *m = n;
+
+ /*** First char may be an underscore ***/
+! *m++ = *q++;
+ *m = '\0';
+
+ increasefreq( t, n, 1 );
+--- 449,475 ----
+ return dest;
+ }
+
+! /**
+! * this function extract all n-gram from past buffer and put them into the table "t"
+! * [modified] by Jocelyn Merand to accept utf-8 multi-character symbols to be used in OpenOffice
+! */
+ static void createngramtable( table_t *t, const char *buf )
+ {
+ char n[MAXNGRAMSIZE+1];
+ const char *p = buf;
+ int i;
++ int pointer = 0;
+
+ /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
+! while(1) {
+
+! const char *q = &p[pointer]; /*[modified] previously p++ above (for(;;p++)) now, it's pointer wich is increased so we have to get the new pointer on the buffer*/
+ char *m = n;
+
+ /*** First char may be an underscore ***/
+! int decay = charcopy(q, m); /*[modified] previously *q++ = *m++*/
+! q = &(p[pointer+decay]); /*[modified] the old copying method do not manage multi-character symbols*/
+! m += decay; /*[modified]*/
+ *m = '\0';
+
+ increasefreq( t, n, 1 );
+***************
+*** 482,500 ****
+ }
+
+ /*** Let the compiler unroll this ***/
+! for ( i=2; i<=MAXNGRAMSIZE; i++) {
+
+! *m++ = *q;
+ *m = '\0';
+
+ increasefreq( t, n, i );
+
+ if ( *q == '_' ) break;
+! q++;
+ if ( *q == '\0' ) {
+ return;
+ }
+ }
+ }
+ return;
+ }
+--- 479,500 ----
+ }
+
+ /*** Let the compiler unroll this ***/
+! for ( i=2; i<=MAXNGRAMSYMBOL; i++) {
+
+! decay = charcopy(q, m); /*[modified] like above*/
+! m += decay;
+ *m = '\0';
+
+ increasefreq( t, n, i );
+
+ if ( *q == '_' ) break;
+! q += decay;
+ if ( *q == '\0' ) {
+ return;
+ }
+ }
++
++ pointer = nextcharstart(p,pointer); /*[modified] p[pointer] must point on the next start of symbol, but whith utf next start is not surely next char*/
+ }
+ return;
+ }
+***************
+*** 514,520 ****
+ {
+ ngram_t *x = (ngram_t *)a;
+ ngram_t *y = (ngram_t *)b;
+!
+ return mystrcmp( x->str, y->str );
+ }
+
+--- 514,520 ----
+ {
+ ngram_t *x = (ngram_t *)a;
+ ngram_t *y = (ngram_t *)b;
+!
+ return mystrcmp( x->str, y->str );
+ }
+
+***************
+*** 522,533 ****
+ {
+ ngram_t *x = (ngram_t *)a;
+ ngram_t *y = (ngram_t *)b;
+!
+ return x->rank - y->rank;
+ }
+
+ /**
+! * Create a fingerprint:
+ * - record the frequency of each unique n-gram in a hash table
+ * - take the most frequent n-grams
+ * - sort them alphabetically, recording their relative rank
+--- 522,533 ----
+ {
+ ngram_t *x = (ngram_t *)a;
+ ngram_t *y = (ngram_t *)b;
+!
+ return x->rank - y->rank;
+ }
+
+ /**
+! * Create a fingerprint:
+ * - record the frequency of each unique n-gram in a hash table
+ * - take the most frequent n-grams
+ * - sort them alphabetically, recording their relative rank
+***************
+*** 544,563 ****
+ }
+
+ /*** Throw out all invalid chars ***/
+! tmp = prepbuffer( buffer, bufsize );
+ if ( tmp == NULL ) {
+ return 0;
+ }
+-
+ h = (fp_t*)handle;
+ t = inittable(maxngrams);
+
+ /*** Create a hash table containing n-gram counts ***/
+ createngramtable(t, tmp);
+!
+ /*** Take the top N n-grams and add them to the profile ***/
+! table2heap(t);
+! maxngrams = WGMIN( maxngrams, t->size );
+
+ h->fprint = (ngram_t *)wg_malloc( sizeof(ngram_t) * maxngrams );
+ h->size = maxngrams;
+--- 544,564 ----
+ }
+
+ /*** Throw out all invalid chars ***/
+! tmp = prepbuffer( buffer, bufsize );
+! /*printf("Cleaned buffer : %s\n",tmp);*/
+ if ( tmp == NULL ) {
+ return 0;
+ }
+ h = (fp_t*)handle;
+ t = inittable(maxngrams);
++ /*printf("Table initialized\n");*/
+
+ /*** Create a hash table containing n-gram counts ***/
+ createngramtable(t, tmp);
+! /*printf("Table created\n");*/
+ /*** Take the top N n-grams and add them to the profile ***/
+! table2heap(t);
+! maxngrams = WGMIN( maxngrams, t->size );
+
+ h->fprint = (ngram_t *)wg_malloc( sizeof(ngram_t) * maxngrams );
+ h->size = maxngrams;
+***************
+*** 568,574 ****
+ entry_t tmp2;
+
+ heapextract(t, &tmp2);
+!
+ /*** the string and its rank is all we need ***/
+ strcpy( h->fprint[i].str, tmp2.str );
+ h->fprint[i].rank = i;
+--- 569,575 ----
+ entry_t tmp2;
+
+ heapextract(t, &tmp2);
+!
+ /*** the string and its rank is all we need ***/
+ strcpy( h->fprint[i].str, tmp2.str );
+ h->fprint[i].rank = i;
+***************
+*** 578,584 ****
+ wg_free(tmp);
+
+ /*** Sort n-grams alphabetically, for easy comparison ***/
+! qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
+ return 1;
+ }
+
+--- 579,585 ----
+ wg_free(tmp);
+
+ /*** Sort n-grams alphabetically, for easy comparison ***/
+! qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
+ return 1;
+ }
+
+***************
+*** 608,614 ****
+ #endif
+ return 0;
+ }
+!
+ h->fprint = (ngram_t *)wg_malloc(maxngrams * sizeof(ngram_t));
+
+ while (cnt < maxngrams && wg_getline(line,1024,fp)) {
+--- 609,615 ----
+ #endif
+ return 0;
+ }
+!
+ h->fprint = (ngram_t *)wg_malloc(maxngrams * sizeof(ngram_t));
+
+ while (cnt < maxngrams && wg_getline(line,1024,fp)) {
+***************
+*** 635,641 ****
+ h->size = cnt;
+
+ /*** Sort n-grams, for easy comparison later on ***/
+! qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
+
+ fclose(fp);
+
+--- 636,642 ----
+ h->size = cnt;
+
+ /*** Sort n-grams, for easy comparison later on ***/
+! qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
+
+ fclose(fp);
+
+***************
+*** 648,661 ****
+ {
+ uint4 i;
+ fp_t *h = (fp_t *)handle;
+! ngram_t *tmp = wg_malloc( sizeof(ngram_t) * h->size );
+!
+ /*** Make a temporary and sort it on rank ***/
+ memcpy( tmp, h->fprint, h->size * sizeof(ngram_t) );
+! qsort( tmp, h->size, sizeof(ngram_t), ngramcmp_rank );
+
+ for (i=0; i<h->size; i++) {
+! fprintf( fp, "%s\n", tmp[i].str );
+ }
+ wg_free( tmp );
+ }
+--- 649,663 ----
+ {
+ uint4 i;
+ fp_t *h = (fp_t *)handle;
+! ngram_t *tmp = (ngram_t*)wg_malloc( sizeof(ngram_t) * h->size );
+!
+ /*** Make a temporary and sort it on rank ***/
+ memcpy( tmp, h->fprint, h->size * sizeof(ngram_t) );
+! qsort( tmp, h->size, sizeof(ngram_t), ngramcmp_rank );
+
+ for (i=0; i<h->size; i++) {
+! /*fprintf( fp, "%s\t%i\n", tmp[i].str, tmp[i].rank );*/
+! fprintf( fp, "%s\n", tmp[i].str);
+ }
+ wg_free( tmp );
+ }
+***************
+*** 669,675 ****
+ uint4 i = 0;
+ uint4 j = 0;
+ sint4 sum = 0;
+!
+ /*** Compare the profiles in mergesort fashion ***/
+ while ( i < c->size && j < u->size ) {
+
+--- 671,677 ----
+ uint4 i = 0;
+ uint4 j = 0;
+ sint4 sum = 0;
+!
+ /*** Compare the profiles in mergesort fashion ***/
+ while ( i < c->size && j < u->size ) {
+
+***************
+*** 705,711 ****
+ }
+
+ return sum;
+!
+ }
+
+
+--- 707,713 ----
+ }
+
+ return sum;
+!
+ }
+
+
+*** misc/libtextcat-2.2/src/fingerprint.h 2003-05-19 14:16:31.000000000 +0200
+--- misc/build/libtextcat-2.2/src/fingerprint.h 2007-01-11 13:19:40.000000000 +0100
+***************
+*** 41,47 ****
+--- 41,53 ----
+ extern int fp_Read( void *handle, const char *fname, int maxngrams );
+ extern sint4 fp_Compare( void *cat, void *unknown, int cutoff );
+ extern void fp_Show( void *handle );
++ #ifdef __cplusplus
++ extern "C" {
++ #endif
+ extern const char *fp_Name( void *handle );
++ #ifdef __cplusplus
++ }
++ #endif
+ extern void fp_Print( void *handle, FILE *fp );
+
+ #endif
+*** misc/libtextcat-2.2/src/Makefile.in 2003-05-22 13:39:52.000000000 +0200
+--- misc/build/libtextcat-2.2/src/Makefile.in 2007-01-12 12:48:19.181803000 +0100
+***************
+*** 124,143 ****
+ target_vendor = @target_vendor@
+ AUTOMAKE_OPTIONS = 1.4 foreign
+
+! WARNS = -W -Wall -Wshadow -Wpointer-arith
+! IFLAGS =
+! FLAGS = -g -O3 -funroll-loops -D_THREAD_SAFE -D_GNU_SOURCE
+ VERBOSE = -DVERBOSE
+ AM_CFLAGS = $(IFLAGS) $(VERBOSE) $(WARNS) $(FLAGS)
+ AM_LDFLAGS = -g
+
+ noinst_HEADERS = \
+! common.h constants.h fingerprint.h textcat.h wg_mempool.h
+
+
+ lib_LTLIBRARIES = libtextcat.la
+ libtextcat_la_SOURCES = \
+! common.c fingerprint.c textcat.c wg_mempool.c
+
+
+ bin_PROGRAMS = createfp
+--- 124,143 ----
+ target_vendor = @target_vendor@
+ AUTOMAKE_OPTIONS = 1.4 foreign
+
+! #WARNS = -W -Wall -Wshadow -Wpointer-arith
+! IFLAGS =
+! #FLAGS = -g -O3 -funroll-loops -D_THREAD_SAFE -D_GNU_SOURCE
+ VERBOSE = -DVERBOSE
+ AM_CFLAGS = $(IFLAGS) $(VERBOSE) $(WARNS) $(FLAGS)
+ AM_LDFLAGS = -g
+
+ noinst_HEADERS = \
+! common.h constants.h fingerprint.h textcat.h wg_mempool.h utf8misc.h
+
+
+ lib_LTLIBRARIES = libtextcat.la
+ libtextcat_la_SOURCES = \
+! common.c fingerprint.c textcat.c wg_mempool.c utf8misc.c
+
+
+ bin_PROGRAMS = createfp
+***************
+*** 156,162 ****
+ libtextcat_la_LDFLAGS =
+ libtextcat_la_LIBADD =
+ am_libtextcat_la_OBJECTS = common.lo fingerprint.lo textcat.lo \
+! wg_mempool.lo
+ libtextcat_la_OBJECTS = $(am_libtextcat_la_OBJECTS)
+ bin_PROGRAMS = createfp$(EXEEXT)
+ noinst_PROGRAMS = testtextcat$(EXEEXT)
+--- 156,162 ----
+ libtextcat_la_LDFLAGS =
+ libtextcat_la_LIBADD =
+ am_libtextcat_la_OBJECTS = common.lo fingerprint.lo textcat.lo \
+! wg_mempool.lo utf8misc.lo
+ libtextcat_la_OBJECTS = $(am_libtextcat_la_OBJECTS)
+ bin_PROGRAMS = createfp$(EXEEXT)
+ noinst_PROGRAMS = testtextcat$(EXEEXT)
+***************
+*** 177,183 ****
+ @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/common.Plo ./$(DEPDIR)/createfp.Po \
+ @AMDEP_TRUE@ ./$(DEPDIR)/fingerprint.Plo \
+ @AMDEP_TRUE@ ./$(DEPDIR)/testtextcat.Po ./$(DEPDIR)/textcat.Plo \
+! @AMDEP_TRUE@ ./$(DEPDIR)/wg_mempool.Plo
+ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+--- 177,184 ----
+ @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/common.Plo ./$(DEPDIR)/createfp.Po \
+ @AMDEP_TRUE@ ./$(DEPDIR)/fingerprint.Plo \
+ @AMDEP_TRUE@ ./$(DEPDIR)/testtextcat.Po ./$(DEPDIR)/textcat.Plo \
+! @AMDEP_TRUE@ ./$(DEPDIR)/wg_mempool.Plo \
+! @AMDEP_TRUE@ ./$(DEPDIR)/utf8misc.Plo
+ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+***************
+*** 213,219 ****
+ @rm -f stamp-h1
+ cd $(top_builddir) && $(SHELL) ./config.status src/config.h
+
+! $(srcdir)/config.h.in: $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOHEADER)
+ touch $(srcdir)/config.h.in
+
+--- 214,220 ----
+ @rm -f stamp-h1
+ cd $(top_builddir) && $(SHELL) ./config.status src/config.h
+
+! $(srcdir)/config.h.in: $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOHEADER)
+ touch $(srcdir)/config.h.in
+
+***************
+*** 247,253 ****
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+! libtextcat.la: $(libtextcat_la_OBJECTS) $(libtextcat_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libtextcat_la_LDFLAGS) $(libtextcat_la_OBJECTS) $(libtextcat_la_LIBADD) $(LIBS)
+ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+ install-binPROGRAMS: $(bin_PROGRAMS)
+--- 248,254 ----
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+! libtextcat.la: $(libtextcat_la_OBJECTS) $(libtextcat_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libtextcat_la_LDFLAGS) $(libtextcat_la_OBJECTS) $(libtextcat_la_LIBADD) $(LIBS)
+ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+ install-binPROGRAMS: $(bin_PROGRAMS)
+***************
+*** 285,294 ****
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+! createfp$(EXEEXT): $(createfp_OBJECTS) $(createfp_DEPENDENCIES)
+ @rm -f createfp$(EXEEXT)
+ $(LINK) $(createfp_LDFLAGS) $(createfp_OBJECTS) $(createfp_LDADD) $(LIBS)
+! testtextcat$(EXEEXT): $(testtextcat_OBJECTS) $(testtextcat_DEPENDENCIES)
+ @rm -f testtextcat$(EXEEXT)
+ $(LINK) $(testtextcat_LDFLAGS) $(testtextcat_OBJECTS) $(testtextcat_LDADD) $(LIBS)
+
+--- 286,295 ----
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+! createfp$(EXEEXT): $(createfp_OBJECTS) $(createfp_DEPENDENCIES)
+ @rm -f createfp$(EXEEXT)
+ $(LINK) $(createfp_LDFLAGS) $(createfp_OBJECTS) $(createfp_LDADD) $(LIBS)
+! testtextcat$(EXEEXT): $(testtextcat_OBJECTS) $(testtextcat_DEPENDENCIES)
+ @rm -f testtextcat$(EXEEXT)
+ $(LINK) $(testtextcat_LDFLAGS) $(testtextcat_OBJECTS) $(testtextcat_LDADD) $(LIBS)
+
+***************
+*** 304,309 ****
+--- 305,311 ----
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testtextcat.Po@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textcat.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wg_mempool.Plo@am__quote@
++ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf8misc.Plo@am__quote@
+
+ distclean-depend:
+ -rm -rf ./$(DEPDIR)
+*** misc/libtextcat-2.2/src/makefile.mk 2007-01-12 12:55:41.709348000 +0100
+--- misc/build/libtextcat-2.2/src/makefile.mk 2007-01-12 12:48:19.214530000 +0100
+***************
+*** 1 ****
+! dummy
+--- 1,91 ----
+! #*************************************************************************
+! #
+! # $RCSfile: libtextcat-2.2.patch,v $
+! #
+! # $Revision: 1.1 $
+! #
+! # last change: $Author: tl $ $Date: 2007-01-12 12:34:52 $
+! #
+! #* The Contents of this file are made available subject to
+! #* the terms of GNU Lesser General Public License Version 2.1.
+! #*
+! #*
+! #* GNU Lesser General Public License Version 2.1
+! #* =============================================
+! #* Copyright 2005 by Sun Microsystems, Inc.
+! #* 901 San Antonio Road, Palo Alto, CA 94303, USA
+! #*
+! #* This library is free software; you can redistribute it and/or
+! #* modify it under the terms of the GNU Lesser General Public
+! #* License version 2.1, as published by the Free Software Foundation.
+! #*
+! #* This library is distributed in the hope that it will be useful,
+! #* but WITHOUT ANY WARRANTY; without even the implied warranty of
+! #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! #* Lesser General Public License for more details.
+! #*
+! #* You should have received a copy of the GNU Lesser General Public
+! #* License along with this library; if not, write to the Free Software
+! #* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+! #* MA 02111-1307 USA
+! #*
+! #*************************************************************************
+!
+! PRJ = ..$/..$/..$/..$/..
+!
+! PRJNAME = libtextcat
+! TARGET = libtextcat
+! CFLAGSCALL=gsd
+!
+! USE_DEFFILE=TRUE
+! EXTERNAL_WARNINGS_NOT_ERRORS := TRUE
+!
+! .INCLUDE : settings.mk
+!
+! # --- Files --------------------------------------------------------
+!
+! # !! not to be compiled because those belong to a stand alone programs: !!
+! # $(SLO)$/createfp.obj\
+! # $(SLO)$/testtextcat.obj
+!
+! SLOFILES= \
+! $(SLO)$/common.obj\
+! $(SLO)$/fingerprint.obj\
+! $(SLO)$/textcat.obj\
+! $(SLO)$/wg_mempool.obj\
+! $(SLO)$/utf8misc.obj
+!
+! #SHL1TARGET= $(TARGET)$(UPD)$(DLLPOSTFIX)
+! SHL1TARGET= $(TARGET)
+!
+! SHL1STDLIBS=
+!
+! # build DLL
+! SHL1LIBS= $(SLB)$/$(TARGET).lib
+! SHL1IMPLIB= i$(TARGET)
+! SHL1DEPN= $(SHL1LIBS)
+! SHL1DEF= $(MISC)$/$(SHL1TARGET).def
+!
+! # build DEF file
+! DEF1NAME= $(SHL1TARGET)
+! DEF1LIBNAME=$(TARGET)
+! DEF1DEPN=$(MISC)$/$(SHL1TARGET).flt
+!
+! # --- Targets ------------------------------------------------------
+!
+! .INCLUDE : target.mk
+!
+! # copy hand supplied configuration file for Win32 builds to the file
+! # which is included in the source code
+! $(SLOFILES) : config.h
+! config.h :
+! $(GNUCOPY) $(OUT)$/misc$/build$/libtextcat-2.2$/src$/win32_config.h $(OUT)$/misc$/build$/libtextcat-2.2$/src$/config.h
+!
+!
+! $(MISC)$/$(SHL1TARGET).flt: makefile.mk
+! @echo ------------------------------
+! @echo Making: $@
+! @echo Imp>$@
+! @echo __CT>>$@
+! @echo _real>>$@
+! @echo unnamed>>$@
+*** misc/libtextcat-2.2/src/textcat.c 2003-05-22 13:32:43.000000000 +0200
+--- misc/build/libtextcat-2.2/src/textcat.c 2007-01-12 12:52:41.000000000 +0100
+***************
+*** 4,26 ****
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+--- 4,26 ----
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+! *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+! *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+! *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+! *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+! *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+***************
+*** 74,79 ****
+--- 74,80 ----
+ typedef struct {
+
+ void **fprint;
++ char *fprint_disable;
+ uint4 size;
+ uint4 maxsize;
+
+***************
+*** 112,122 ****
+ fp_Done( h->fprint[i] );
+ }
+ wg_free( h->fprint );
+ wg_free( h );
+
+ }
+
+! extern void *textcat_Init( const char *conffile )
+ {
+ textcat_t *h;
+ char line[1024];
+--- 113,133 ----
+ fp_Done( h->fprint[i] );
+ }
+ wg_free( h->fprint );
++ wg_free( h->fprint_disable );
+ wg_free( h );
+
+ }
+
+! /** Replaces older function */
+! extern void *textcat_Init( const char *conffile ){
+! return special_textcat_Init( conffile, DEFAULT_FINGERPRINTS_PATH );
+! }
+!
+! /**
+! * Originaly this function had only one parameter (conffile) it has been modified since OOo use
+! * Basicaly prefix is the directory path where fingerprints are stored
+! */
+! extern void *special_textcat_Init( const char *conffile, const char *prefix )
+ {
+ textcat_t *h;
+ char line[1024];
+***************
+*** 134,144 ****
+ h->size = 0;
+ h->maxsize = 16;
+ h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize );
+
+ while ( wg_getline( line, 1024, fp ) ) {
+ char *p;
+ char *segment[4];
+! int res;
+
+ /*** Skip comments ***/
+ #ifdef HAVE_STRCHR
+--- 145,157 ----
+ h->size = 0;
+ h->maxsize = 16;
+ h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize );
++ h->fprint_disable = (char *)wg_malloc( sizeof(char*) * h->maxsize ); /*added to store the state of languages*/
+
+ while ( wg_getline( line, 1024, fp ) ) {
+ char *p;
+ char *segment[4];
+! char finger_print_file_name[512];
+! int res;
+
+ /*** Skip comments ***/
+ #ifdef HAVE_STRCHR
+***************
+*** 156,172 ****
+ /*** Ensure enough space ***/
+ if ( h->size == h->maxsize ) {
+ h->maxsize *= 2;
+! h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
+ }
+
+ /*** Load data ***/
+ if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) {
+ goto ERROR;
+ }
+! if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) {
+ textcat_Done(h);
+ goto ERROR;
+! }
+ h->size++;
+ }
+
+--- 169,191 ----
+ /*** Ensure enough space ***/
+ if ( h->size == h->maxsize ) {
+ h->maxsize *= 2;
+! h->fprint = (void **)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
+! h->fprint_disable = (char *)wg_realloc( h->fprint_disable, sizeof(char*) * h->maxsize );
+ }
+
+ /*** Load data ***/
+ if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) {
+ goto ERROR;
+ }
+! finger_print_file_name[0] = '\0';
+! strcat(finger_print_file_name, prefix);
+! strcat(finger_print_file_name, segment[0]);
+!
+! if ( fp_Read( h->fprint[h->size], finger_print_file_name, 400 ) == 0 ) {
+ textcat_Done(h);
+ goto ERROR;
+! }
+! h->fprint_disable[h->size] = 0xF0; /*0xF0 is the code for enabled languages, 0x0F is for disabled*/
+ h->size++;
+ }
+
+***************
+*** 203,213 ****
+ result = _TEXTCAT_RESULT_SHORT;
+ goto READY;
+ }
+!
+ /*** Calculate the score for each category. ***/
+ for (i=0; i<h->size; i++) {
+! int score = fp_Compare( h->fprint[i], unknown, threshold );
+! candidates[i].score = score;
+ candidates[i].name = fp_Name( h->fprint[i] );
+ if ( score < minscore ) {
+ minscore = score;
+--- 222,239 ----
+ result = _TEXTCAT_RESULT_SHORT;
+ goto READY;
+ }
+!
+ /*** Calculate the score for each category. ***/
+ for (i=0; i<h->size; i++) {
+! int score;
+! if(h->fprint_disable[i] & 0x0F){ /*if this language is disabled*/
+! score = MAXSCORE;
+! }
+! else{
+! score = fp_Compare( h->fprint[i], unknown, threshold );
+! /*printf("Score for %s : %i\n", fp_Name(h->fprint[i]), score);*/
+! }
+! candidates[i].score = score;
+ candidates[i].name = fp_Name( h->fprint[i] );
+ if ( score < minscore ) {
+ minscore = score;
+***************
+*** 218,224 ****
+ /*** Find the best performers ***/
+ for (i=0; i<h->size; i++) {
+ if ( candidates[i].score < threshold ) {
+-
+ if ( ++cnt == MAXCANDIDATES+1 ) {
+ break;
+ }
+--- 244,249 ----
+***************
+*** 235,241 ****
+ else {
+ char *p = result;
+ char *plimit = result+MAXOUTPUTSIZE;
+!
+ qsort( candidates, cnt, sizeof(candidate_t), cmpcandidates );
+
+ *p = '\0';
+--- 260,266 ----
+ else {
+ char *p = result;
+ char *plimit = result+MAXOUTPUTSIZE;
+!
+ qsort( candidates, cnt, sizeof(candidate_t), cmpcandidates );
+
+ *p = '\0';
+***************
+*** 247,253 ****
+ }
+ READY:
+ fp_Done(unknown);
+! #ifdef SHOULD_FREE
+ free(candidates);
+ #undef SHOULD_FREE
+ #endif
+--- 272,278 ----
+ }
+ READY:
+ fp_Done(unknown);
+! #ifdef SHOULD_FREE
+ free(candidates);
+ #undef SHOULD_FREE
+ #endif
+*** misc/libtextcat-2.2/src/textcat.h 2003-05-19 14:16:31.000000000 +0200
+--- misc/build/libtextcat-2.2/src/textcat.h 2007-01-11 13:19:41.000000000 +0100
+***************
+*** 40,45 ****
+--- 40,48 ----
+ #define _TEXTCAT_RESULT_UNKOWN "UNKNOWN"
+ #define _TEXTCAT_RESULT_SHORT "SHORT"
+
++ #ifdef __cplusplus
++ extern "C" {
++ #endif
+
+ /**
+ * textcat_Init() - Initialize the text classifier. The textfile
+***************
+*** 51,60 ****
+--- 54,72 ----
+ * Returns: handle on success, NULL on error. (At the moment, the
+ * only way errors can occur, is when the library cannot read the
+ * conffile, or one of the fingerprint files listed in it.)
++ *
++ * Replace older function (and has exacly the same behaviour)
++ * see below
+ */
+ extern void *textcat_Init( const char *conffile );
+
+ /**
++ * Originaly this function had only one parameter (conffile) it has been modified since OOo must be able to load alternativ DB
++ * Basicaly prefix is the directory path where fingerprints are stored
++ */
++ extern void *special_textcat_Init( const char *conffile, const char *prefix );
++
++ /**
+ * textcat_Done() - Free up resources for handle
+ */
+ extern void textcat_Done( void *handle );
+***************
+*** 77,80 ****
+--- 89,96 ----
+ * textcat_Version() - Returns a string describing the version of this classifier.
+ */
+ extern char *textcat_Version();
++
++ #ifdef __cplusplus
++ }
++ #endif
+ #endif
+*** misc/libtextcat-2.2/src/utf8misc.c 2007-01-12 12:55:41.584585000 +0100
+--- misc/build/libtextcat-2.2/src/utf8misc.c 2007-01-12 12:54:50.000000000 +0100
+***************
+*** 1 ****
+! dummy
+--- 1,132 ----
+! /***************************************************************************
+! * Copyright (C) 2006 by Jocelyn Merand *
+! * joc.mer@gmail.com *
+! * *
+! * THE BSD LICENSE
+! *
+! * Redistribution and use in source and binary forms, with or without
+! * modification, are permitted provided that the following conditions
+! * are met:
+! *
+! * - Redistributions of source code must retain the above copyright
+! * notice, this list of conditions and the following disclaimer.
+! *
+! * - Redistributions in binary form must reproduce the above copyright
+! * notice, this list of conditions and the following disclaimer in the
+! * documentation and/or other materials provided with the
+! * distribution.
+! *
+! * - Neither the name of the WiseGuys Internet B.V. nor the names of
+! * its contributors may be used to endorse or promote products derived
+! * from this software without specific prior written permission.
+! *
+! * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+! * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+! * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+! * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+! * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+! ***************************************************************************/
+!
+! #ifndef _UTF8_MISC_H_
+! #include "utf8misc.h"
+! #endif
+!
+!
+! int nextcharstart(const char *str, int position){
+! int pointer = position;
+!
+! if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
+!
+! /*then str[pointer] is an escape character*/
+!
+! char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (by bit translation) following characters (only the weightest part)*/
+!
+! while(escape_char & ESCAPE_MASK && str[pointer]){/*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
+! escape_char = escape_char <<1;
+! ++pointer;
+! }
+! }
+! if(str[pointer]){ /*finaly, if we are not on the \0 character, we jump to the next character*/
+! ++pointer;
+! }
+! return pointer;
+! }
+!
+!
+! int charcopy(const char *str, char *dest){
+!
+! int pointer = 0;
+! if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
+!
+! /*then str[pointer] is an escape character*/
+!
+! char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count following characters (only the weightest part)*/
+!
+! while(escape_char & ESCAPE_MASK && str[pointer]){ /*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
+! dest[pointer] = str[pointer];
+! escape_char = escape_char <<1;
+! ++pointer;
+! }
+! }
+! if(str[pointer]){
+! dest[pointer] = str[pointer];
+! ++pointer;
+! }
+!
+! return pointer;
+! }
+!
+!
+! int issame( char *lex, char *key, int len )
+! {
+! /*printf("[%s] prefix of [%s] with length %i", lex, key, len);*/
+! int char_counter = 0;
+! int pointer = 0;
+! while(char_counter < len) {
+!
+! if(key[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
+!
+! /*then key[pointer] is an escap character*/
+!
+! char escape_char = ((key[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (only the weightest part)*/
+!
+! while(escape_char & ESCAPE_MASK && key[pointer] == lex[pointer] ){
+! escape_char = escape_char <<1;
+! ++pointer;
+! }
+! }
+! ++char_counter; /*and we are on a new utf8 character*/
+! if ( key[pointer] != lex[pointer] ) {
+! return 0;
+! /*printf(" NO\n", lex, key, len);*/
+! }
+! ++pointer;
+! }
+! if ( lex[pointer] != '\0' ) {
+! return 0;
+! /*printf(" NO\n");*/
+! }
+!
+! /*printf(" YES\n");*/
+!
+! return 1;
+! }
+!
+!
+! extern int utfstrlen(const char* str){
+! int char_counter = 0;
+! int pointer = 0;
+! while(str[pointer]) {
+! pointer = nextcharstart(str, pointer);
+!
+! ++char_counter; /*and we are on a new utf8 character*/
+! }
+! return char_counter;
+! }
+!
+*** misc/libtextcat-2.2/src/utf8misc.h 2007-01-12 12:55:41.547021000 +0100
+--- misc/build/libtextcat-2.2/src/utf8misc.h 2007-01-11 13:19:41.000000000 +0100
+***************
+*** 1 ****
+! dummy
+--- 1,88 ----
+! /***************************************************************************
+! * Copyright (C) 2006 by Jocelyn Merand *
+! * joc.mer@gmail.com *
+! * *
+! * THE BSD LICENSE
+! *
+! * Redistribution and use in source and binary forms, with or without
+! * modification, are permitted provided that the following conditions
+! * are met:
+! *
+! * - Redistributions of source code must retain the above copyright
+! * notice, this list of conditions and the following disclaimer.
+! *
+! * - Redistributions in binary form must reproduce the above copyright
+! * notice, this list of conditions and the following disclaimer in the
+! * documentation and/or other materials provided with the
+! * distribution.
+! *
+! * - Neither the name of the WiseGuys Internet B.V. nor the names of
+! * its contributors may be used to endorse or promote products derived
+! * from this software without specific prior written permission.
+! *
+! * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+! * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+! * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+! * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+! * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+! ***************************************************************************/
+!
+! #ifndef _UTF8_MISC_H_
+! #define _UTF8_MISC_H_
+!
+! /**
+! * These variables are used in character processing functions
+! * These have been added to manage utf-8 symbols, particularly escape chars
+! */
+! #ifdef _UTF8_
+! #define ESCAPE_MASK 0x80
+! #define WEIGHT_MASK 0xF0
+! #else
+! #define ESCAPE_MASK 0xFF
+! #define WEIGHT_MASK 0x00
+! #endif
+!
+!
+! /*
+! * Is used to jump to the next start of char
+! * of course it's only usefull when encoding is utf-8
+! * This function have been added by Jocelyn Merand to use libtextcat in OOo
+! */
+! int nextcharstart(const char *str, int position);
+!
+!
+! /*Copy the char in str to dest
+! * of course it's only usefull when encoding is utf8 and the symbol is encoded with more than 1 char
+! * return the number of char jumped
+! * This function have been added by Jocelyn Merand to use libtextcat in OOo
+! */
+! int charcopy(const char *str, char *dest);
+!
+!
+! /* checks if n-gram lex is a prefix of key and of length len
+! * if _UTF8_ is defined, it uses escap characters and len is not realy the length of lex
+! * in this case, len is the number of utf-8 char strlen("€") == 3 but len == 1
+! */
+! int issame( char *lex, char *key, int len );
+!
+!
+! /* Counts the number of characters
+! * if _UTF8_ is defined, it uses escap characters and the result is not realy the length of str
+! * in this case, the result is the number of utf-8 char strlen("€") == 3 but utfstrlen("€") == 1
+! */
+! #ifdef __cplusplus
+! extern "C" {
+! #endif
+! extern int utfstrlen(const char* str);
+! #ifdef __cplusplus
+! }
+! #endif
+!
+! #endif
+!
+*** misc/libtextcat-2.2/src/win32_config.h 2007-01-12 12:55:41.643465000 +0100
+--- misc/build/libtextcat-2.2/src/win32_config.h 2007-01-11 13:19:41.000000000 +0100
+***************
+*** 1 ****
+! dummy
+--- 1,136 ----
+! /* src/config.h. Generated by configure. */
+! /* src/config.h.in. Generated from configure.ac by autoheader. */
+!
+! /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+! systems. This function is required for `alloca.c' support on those systems.
+! */
+! /* #undef CRAY_STACKSEG_END */
+!
+! /* Define to 1 if using `alloca.c'. */
+! /* #undef C_ALLOCA */
+!
+! /* Define to 1 if you have `alloca', as a function or macro. */
+! /* #undef HAVE_ALLOCA */
+!
+! /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+! */
+! /* #undef HAVE_ALLOCA_H */
+!
+! /* Define to 1 if you have the <dlfcn.h> header file. */
+! #define HAVE_DLFCN_H 1
+!
+! /* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+! /* #undef HAVE_DOPRNT */
+!
+! /* Define to 1 if you have the `gettimeofday' function. */
+! /* #undef HAVE_GETTIMEOFDAY */
+!
+! /* Define to 1 if you have the <inttypes.h> header file. */
+! /* #undef HAVE_INTTYPES_H */
+!
+! /* Define to 1 if you have the <limits.h> header file. */
+! #define HAVE_LIMITS_H 1
+!
+! /* Define to 1 if your system has a GNU libc compatible `malloc' function, and
+! to 0 otherwise. */
+! #define HAVE_MALLOC 1
+!
+! /* Define to 1 if you have the <memory.h> header file. */
+! #define HAVE_MEMORY_H 1
+!
+! /* Define to 1 if you have the `memset' function. */
+! #define HAVE_MEMSET 1
+!
+! /* Define to 1 if your system has a GNU libc compatible `realloc' function,
+! and to 0 otherwise. */
+! #define HAVE_REALLOC 1
+!
+! /* Define to 1 if you have the <stdint.h> header file. */
+! /* #undef HAVE_STDINT_H */
+!
+! /* Define to 1 if you have the <stdlib.h> header file. */
+! #define HAVE_STDLIB_H 1
+!
+! /* Define to 1 if you have the `strchr' function. */
+! #define HAVE_STRCHR 1
+!
+! /* Define to 1 if you have the `strdup' function. */
+! #define HAVE_STRDUP 1
+!
+! /* Define to 1 if you have the <strings.h> header file. */
+! /* #undef HAVE_STRINGS_H */
+!
+! /* Define to 1 if you have the <string.h> header file. */
+! #define HAVE_STRING_H 1
+!
+! /* Define to 1 if you have the `strpbrk' function. */
+! #define HAVE_STRPBRK 1
+!
+! /* Define to 1 if you have the <sys/stat.h> header file. */
+! #define HAVE_SYS_STAT_H 1
+!
+! /* Define to 1 if you have the <sys/time.h> header file. */
+! /* #undef HAVE_SYS_TIME_H */
+!
+! /* Define to 1 if you have the <sys/types.h> header file. */
+! #define HAVE_SYS_TYPES_H 1
+!
+! /* Define to 1 if you have the <unistd.h> header file. */
+! #define HAVE_UNISTD_H 1
+!
+! /* Define to 1 if you have the `vprintf' function. */
+! #define HAVE_VPRINTF 1
+!
+! /* Name of package */
+! #define PACKAGE "libtextcat"
+!
+! /* Define to the address where bug reports for this package should be sent. */
+! #define PACKAGE_BUGREPORT ""
+!
+! /* Define to the full name of this package. */
+! #define PACKAGE_NAME "libtextcat"
+!
+! /* Define to the full name and version of this package. */
+! #define PACKAGE_STRING "libtextcat 2.2"
+!
+! /* Define to the one symbol short name of this package. */
+! #define PACKAGE_TARNAME "libtextcat"
+!
+! /* Define to the version of this package. */
+! #define PACKAGE_VERSION "2.2"
+!
+! /* If using the C implementation of alloca, define if you know the
+! direction of stack growth for your system; otherwise it will be
+! automatically deduced at run-time.
+! STACK_DIRECTION > 0 => grows toward higher addresses
+! STACK_DIRECTION < 0 => grows toward lower addresses
+! STACK_DIRECTION = 0 => direction of growth unknown */
+! /* #undef STACK_DIRECTION */
+!
+! /* Define to 1 if you have the ANSI C header files. */
+! #define STDC_HEADERS 1
+!
+! /* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+! #define TIME_WITH_SYS_TIME 1
+!
+! /* Define to 1 if your <sys/time.h> declares `struct tm'. */
+! /* #undef TM_IN_SYS_TIME */
+!
+! /* Version number of package */
+! #define VERSION "2.2"
+!
+! /* Define to empty if `const' does not conform to ANSI C. */
+! /* #undef const */
+!
+! /* Define as `__inline' if that's what the C compiler calls it, or to nothing
+! if it is not supported. */
+! /* #undef inline */
+!
+! /* Define to rpl_malloc if the replacement function should be used. */
+! /* #undef malloc */
+!
+! /* Define to rpl_realloc if the replacement function should be used. */
+! /* #undef realloc */
+!
+! /* Define to `unsigned' if <sys/types.h> does not define. */
+! /* #undef size_t */