diff options
author | Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> | 2012-02-19 13:49:08 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2012-02-23 10:31:17 +0000 |
commit | 70a7cd0923795ee5c8210b476e2897d12988ad95 (patch) | |
tree | e385d49216d229ce6428148e7ec6bdfdda2216df /l10ntools | |
parent | 796818d7b5a63c30d1ec837172deb73e300bfc38 (diff) |
Add C++ HelpSearch and call from XMLHelp. Fix string conversion bug.
Diffstat (limited to 'l10ntools')
-rw-r--r-- | l10ntools/inc/l10ntools/HelpSearch.hxx | 36 | ||||
-rw-r--r-- | l10ntools/prj/d.lst | 1 | ||||
-rw-r--r-- | l10ntools/source/help/HelpIndexer.cxx | 17 | ||||
-rw-r--r-- | l10ntools/source/help/HelpSearch.cxx | 40 | ||||
-rw-r--r-- | l10ntools/source/help/LuceneHelper.cxx | 33 | ||||
-rw-r--r-- | l10ntools/source/help/LuceneHelper.hxx | 13 | ||||
-rw-r--r-- | l10ntools/source/help/makefile.mk | 9 |
7 files changed, 131 insertions, 18 deletions
diff --git a/l10ntools/inc/l10ntools/HelpSearch.hxx b/l10ntools/inc/l10ntools/HelpSearch.hxx new file mode 100644 index 000000000000..4885b5698222 --- /dev/null +++ b/l10ntools/inc/l10ntools/HelpSearch.hxx @@ -0,0 +1,36 @@ +#ifndef HELPSEARCH_HXX +#define HELPSEARCH_HXX + +#include <l10ntools/dllapi.h> + +#include <CLucene/StdHeader.h> +#include <CLucene.h> + +#include <rtl/ustring.hxx> +#include <vector> + +class L10N_DLLPUBLIC HelpSearch { + private: + rtl::OUString d_lang; + rtl::OUString d_indexDir; + + public: + + /** + * @param lang Help files language. + * @param indexDir The directory where the index files are stored. + */ + HelpSearch(rtl::OUString const &lang, rtl::OUString const &indexDir); + + /** + * Query the index for a certain query string. + * @param queryStr The query. + * @param captionOnly Set to true to search in the caption, not the content. + * @param rDocuments Vector to write the paths of the found documents. + * @param rScores Vector to write the scores to. + */ + bool query(rtl::OUString const &queryStr, bool captionOnly, + std::vector<rtl::OUString> &rDocuments, std::vector<float> &rScores); +}; + +#endif diff --git a/l10ntools/prj/d.lst b/l10ntools/prj/d.lst index 44cf5f001e14..e9329dc93855 100644 --- a/l10ntools/prj/d.lst +++ b/l10ntools/prj/d.lst @@ -48,6 +48,7 @@ mkdir: %_DEST%\bin\help\com\sun\star\help ..\inc\l10ntools\directory.hxx %_DEST%\inc\l10ntools\directory.hxx ..\inc\l10ntools\file.hxx %_DEST%\inc\l10ntools\file.hxx ..\inc\l10ntools\HelpIndexer.hxx %_DEST%\inc\l10ntools\HelpIndexer.hxx +..\inc\l10ntools\HelpSearch.hxx %_DEST%\inc\l10ntools\HelpSearch.hxx ..\source\filter\merge\FCFGMerge.cfg %_DEST%\inc\l10ntools\FCFGMerge.cfg ..\%__SRC%\lib\transex.lib %_DEST%\lib\transex.lib diff --git a/l10ntools/source/help/HelpIndexer.cxx b/l10ntools/source/help/HelpIndexer.cxx index b54814a41895..793348b2b2fa 100644 --- a/l10ntools/source/help/HelpIndexer.cxx +++ b/l10ntools/source/help/HelpIndexer.cxx @@ -1,4 +1,5 @@ #include <l10ntools/HelpIndexer.hxx> +#include "LuceneHelper.hxx" #define TODO @@ -100,22 +101,6 @@ bool HelpIndexer::scanForFiles(rtl::OUString const & path) { return true; } -std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr) -{ - //UTF-16 - if (sizeof(wchar_t) == sizeof(sal_Unicode)) - return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength()); - - //UTF-32 - std::vector<TCHAR> aRet; - for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength();) - { - const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex); - aRet.push_back(nCode); - } - return aRet; -} - bool HelpIndexer::helpDocument(rtl::OUString const & fileName, Document *doc) { // Add the help path as an indexed, untokenized field. rtl::OUString path = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#HLP#")) + d_module + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName; diff --git a/l10ntools/source/help/HelpSearch.cxx b/l10ntools/source/help/HelpSearch.cxx new file mode 100644 index 000000000000..f50c44eb7cbd --- /dev/null +++ b/l10ntools/source/help/HelpSearch.cxx @@ -0,0 +1,40 @@ +#include <l10ntools/HelpSearch.hxx> +#include "LuceneHelper.hxx" + +#include <iostream> + +HelpSearch::HelpSearch(rtl::OUString const &lang, rtl::OUString const &indexDir) : +d_lang(lang), d_indexDir(indexDir) {} + +bool HelpSearch::query(rtl::OUString const &queryStr, bool captionOnly, + std::vector<rtl::OUString> &rDocuments, std::vector<float> &rScores) { + rtl::OString pathStr; + d_indexDir.convertToString(&pathStr, RTL_TEXTENCODING_ASCII_US, 0); + lucene::index::IndexReader *reader = lucene::index::IndexReader::open(pathStr.getStr()); + lucene::search::IndexSearcher searcher(reader); + + TCHAR captionField[] = L"caption"; + TCHAR contentField[] = L"content"; + TCHAR *field = captionOnly ? captionField : contentField; + + bool isWildcard = queryStr[queryStr.getLength() - 1] == L'*'; + std::vector<TCHAR> aQueryStr(OUStringToTCHARVec(queryStr)); + lucene::search::Query *aQuery = (isWildcard ? + (lucene::search::Query*)new lucene::search::WildcardQuery(new lucene::index::Term(field, &aQueryStr[0])) : + (lucene::search::Query*)new lucene::search::TermQuery(new lucene::index::Term(field, &aQueryStr[0]))); + // FIXME: who is responsible for the Term*? + + lucene::search::Hits *hits = searcher.search(aQuery); + for (unsigned i = 0; i < hits->length(); ++i) { + lucene::document::Document &doc = hits->doc(i); // Document* belongs to Hits. + wchar_t const *path = doc.get(L"path"); + rDocuments.push_back(TCHARArrayToOUString(path != 0 ? path : L"")); + rScores.push_back(hits->score(i)); + } + + delete hits; + delete aQuery; + + reader->close(); + return true; +} diff --git a/l10ntools/source/help/LuceneHelper.cxx b/l10ntools/source/help/LuceneHelper.cxx new file mode 100644 index 000000000000..a88542f93009 --- /dev/null +++ b/l10ntools/source/help/LuceneHelper.cxx @@ -0,0 +1,33 @@ +#include "LuceneHelper.hxx" + +std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr) +{ + //UTF-16 + if (sizeof(TCHAR) == sizeof(sal_Unicode)) + return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength() + 1); + + //UTF-32 + std::vector<TCHAR> aRet; + for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength() + 1; ) + { + const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex); + aRet.push_back(nCode); + } + return aRet; +} + +inline unsigned tstrlen(TCHAR const *str) { + unsigned i; + for (i = 0; str[i] != 0; ++i) {} + return i; +} + +rtl::OUString TCHARArrayToOUString(TCHAR const *str) +{ + // UTF-16 + if (sizeof(TCHAR) == sizeof(sal_Unicode)) + return rtl::OUString((sal_Unicode*) str); + + // UTF-32 + return rtl::OUString((char*) str, tstrlen(str), RTL_TEXTENCODING_UCS4); +} diff --git a/l10ntools/source/help/LuceneHelper.hxx b/l10ntools/source/help/LuceneHelper.hxx new file mode 100644 index 000000000000..7591b8ca0760 --- /dev/null +++ b/l10ntools/source/help/LuceneHelper.hxx @@ -0,0 +1,13 @@ +#ifndef LUCENEHELPER_HXX +#define LUCENEHELPER_HXX + +#include <CLucene/StdHeader.h> +#include <CLucene.h> + +#include <rtl/ustring.hxx> +#include <vector> + +std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr); +rtl::OUString TCHARArrayToOUString(TCHAR const *str); + +#endif diff --git a/l10ntools/source/help/makefile.mk b/l10ntools/source/help/makefile.mk index 2ae32329d546..a466e2c9fc24 100644 --- a/l10ntools/source/help/makefile.mk +++ b/l10ntools/source/help/makefile.mk @@ -56,12 +56,16 @@ OBJFILES=\ $(OBJ)$/HelpLinker.obj \ $(OBJ)$/HelpCompiler.obj \ $(OBJ)$/HelpIndexer.obj \ - $(OBJ)$/HelpIndexer_main.obj + $(OBJ)$/HelpIndexer_main.obj \ + $(OBJ)$/HelpSearch.obj \ + $(OBJ)$/LuceneHelper.obj SLOFILES=\ $(SLO)$/HelpLinker.obj \ $(SLO)$/HelpCompiler.obj \ - $(SLO)$/HelpIndexer.obj + $(SLO)$/LuceneHelper.obj \ + $(SLO)$/HelpIndexer.obj \ + $(SLO)$/HelpSearch.obj .IF "$(OS)" == "MACOSX" && "$(CPU)" == "P" && "$(COM)" == "GCC" # There appears to be a GCC 4.0.1 optimization error causing _file:good() to @@ -85,6 +89,7 @@ APP1STDLIBS+=$(SALLIB) $(BERKELEYLIB) $(XSLTLIB) $(EXPATASCII3RDLIB) APP2TARGET=HelpIndexer APP2OBJS=\ + $(OBJ)$/LuceneHelper.obj \ $(OBJ)$/HelpIndexer.obj \ $(OBJ)$/HelpIndexer_main.obj APP2RPATH = NONE |