summaryrefslogtreecommitdiff
path: root/lingucomponent/source/languageguessing
diff options
context:
space:
mode:
authorKurt Zenker <kz@openoffice.org>2007-06-19 15:02:00 +0000
committerKurt Zenker <kz@openoffice.org>2007-06-19 15:02:00 +0000
commit4afc9ff37a66662f4bc21f9e6490508f1e61cd47 (patch)
tree0ad652d4580aa4a1bb620b91fc4b01129abfe761 /lingucomponent/source/languageguessing
parentcab1870e677f33a2501f15916110da46d3336d24 (diff)
INTEGRATION: CWS languageguessing (1.1.2); FILE ADDED
2007/01/12 11:07:26 tl 1.1.2.1: #i73173# integrate Google SoC language-guessing
Diffstat (limited to 'lingucomponent/source/languageguessing')
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.hxx138
1 files changed, 138 insertions, 0 deletions
diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx
new file mode 100644
index 000000000000..65ad0c07e2d4
--- /dev/null
+++ b/lingucomponent/source/languageguessing/simpleguesser.hxx
@@ -0,0 +1,138 @@
+/***************************************************************************
+ * Copyright (C) 2006 by Jocelyn Merand *
+ * joc.mer@gmail.com *
+ * *
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: simpleguesser.hxx,v $
+ *
+ * $Revision: 1.2 $
+ *
+ * last change: $Author: kz $ $Date: 2007-06-19 16:02:00 $
+ *
+ * The Contents of this file are made available subject to
+ * the terms of GNU Lesser General Public License Version 2.1.
+ *
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2005 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ ************************************************************************/
+#ifndef SIMPLEGUESSER_H
+#define SIMPLEGUESSER_H
+
+#include <string.h>
+#include <string>
+#include <cstdlib>
+#include <vector>
+#include <guess.hxx>
+
+#define MAX_STRING_LENGTH_TO_ANALYSE 100
+
+using namespace std;
+
+/**
+@author Jocelyn Merand
+*/
+class SimpleGuesser{
+public:
+ /**inits the object with conf file "./conf.txt"*/
+ SimpleGuesser();
+
+ /** inits the object with conFile config file
+ * @param const char* confFile the string representing the config file
+ * @param const char* prefix the path where fingerprints files are stored
+ */
+ SimpleGuesser(const char* confFile, const char* prefix);
+
+ /** Compares the current Simpleguesser with an other
+ * @param SimpleGuesser& sg the other guesser to compare
+ */
+ void operator=(SimpleGuesser& sg);
+
+ /**
+ * destroy the object
+ */
+ ~SimpleGuesser();
+
+ /**
+ * Analyze a text and return the most probable languages of the text
+ * @param char* text is the text to analyze
+ * @return the list of guess
+ */
+ vector<Guess> GuessLanguage(char* text);
+
+ /**
+ * Analyze a text and return the most probable language of the text
+ * @param char* text is the text to analyze
+ * @return the guess (containing language)
+ */
+ Guess GuessPrimaryLanguage(char* text);
+
+ /**
+ * List all available languages (possibly to be in guesses)
+ * @return the list of languages
+ */
+ vector<Guess> GetAvailableLanguages();
+
+ /**
+ * List all languages (possibly in guesses or not)
+ * @return the list of languages
+ */
+ vector<Guess> GetAllManagedLanguages();
+
+ /**
+ * List all Unavailable languages (disable for any reason)
+ * @return the list of languages
+ */
+ vector<Guess> GetUnavailableLanguages();
+
+ /**
+ * Mark a language enabled
+ * @param string lang the language to enable (build like language-COUNTRY-encoding)
+ */
+ void EnableLanguage(string lang);
+
+ /**
+ * Mark a language disabled
+ * @param string lang the language to disable (build like language-COUNTRY-encoding)
+ */
+ void DisableLanguage(string lang);
+
+ /**
+ * Load a new DB of fingerprints
+ * @param const char* thePathOfConfFile self explaining
+ * @param const char* prefix is the path where the directory witch contains fingerprint files is stored
+ */
+ void SetDBPath(const char* thePathOfConfFile, const char* prefix);
+
+protected:
+
+ //Where typical fingerprints (n-gram tables) are stored
+ void* h;
+
+ //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
+ vector<Guess> GetManagedLanguages(const char mask);
+
+ //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
+ void XableLanguage(string lang, char mask);
+};
+
+#endif