1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
#include <l10ntools/HelpIndexer.hxx>
#include "LuceneHelper.hxx"
#include <CLucene/analysis/LanguageBasedAnalyzer.h>
#include <rtl/string.hxx>
#include <osl/file.hxx>
#include <algorithm>
using namespace lucene::document;
HelpIndexer::HelpIndexer(rtl::OUString const &lang, rtl::OUString const &module,
rtl::OUString const &captionDir, rtl::OUString const &contentDir, rtl::OUString const &indexDir) :
d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir),
d_error(), d_files() {}
bool HelpIndexer::indexDocuments() {
if (!scanForFiles()) {
return false;
}
rtl::OUString sLang = d_lang.getToken(0, '-');
bool bUseCJK =
sLang.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("ja")) ||
sLang.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("ko")) ||
sLang.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("zh"));
// Construct the analyzer appropriate for the given language
lucene::analysis::Analyzer *analyzer = (
bUseCJK ?
(lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
rtl::OString indexDirStr;
d_indexDir.convertToString(&indexDirStr, RTL_TEXTENCODING_ASCII_US, 0);
lucene::index::IndexWriter writer(indexDirStr.getStr(), analyzer, true);
// Index the identified help files
Document doc;
for (std::set<rtl::OUString>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
doc.clear();
if (!helpDocument(*i, &doc)) {
delete analyzer;
return false;
}
writer.addDocument(&doc);
}
// Optimize the index
writer.optimize();
delete analyzer;
return true;
}
rtl::OUString const & HelpIndexer::getErrorMessage() {
return d_error;
}
bool HelpIndexer::scanForFiles() {
if (!scanForFiles(d_contentDir)) {
return false;
}
if (!scanForFiles(d_captionDir)) {
return false;
}
return true;
}
bool HelpIndexer::scanForFiles(rtl::OUString const & path) {
osl::Directory dir(path);
if (osl::FileBase::E_None != dir.open()) {
d_error = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Error reading directory ")) + path;
return true;
}
osl::DirectoryItem item;
osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type);
while (dir.getNextItem(item) == osl::FileBase::E_None) {
if (fileStatus.getFileType() == osl::FileStatus::Regular) {
d_files.insert(fileStatus.getFileName());
}
}
return true;
}
bool HelpIndexer::helpDocument(rtl::OUString const & fileName, Document *doc) {
// Add the help path as an indexed, untokenized field.
rtl::OUString path = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#HLP#")) + d_module + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
std::vector<TCHAR> aPath(OUStringToTCHARVec(path));
doc->add(*new Field(_T("path"), &aPath[0], Field::STORE_YES | Field::INDEX_UNTOKENIZED));
// Add the caption as a field.
rtl::OUString captionPath = d_captionDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
// Add the content as a field.
rtl::OUString contentPath = d_contentDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
return true;
}
lucene::util::Reader *HelpIndexer::helpFileReader(rtl::OUString const & path) {
osl::File file(path);
if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) {
file.close();
rtl::OString pathStr;
path.convertToString(&pathStr, RTL_TEXTENCODING_ASCII_US, 0); // FIXME: path encoding?
return new lucene::util::FileReader(pathStr.getStr(), "UTF-8");
} else {
return new lucene::util::StringReader(L"");
}
}
|