diff options
author | Andras Timar <atimar@suse.com> | 2012-09-16 16:37:30 +0200 |
---|---|---|
committer | Andras Timar <atimar@suse.com> | 2012-09-16 20:41:26 +0200 |
commit | eaa81cc2f02e9479cc76dba88a291d285046114d (patch) | |
tree | 83e0af2a2c983c55982873e3dafe8cffe1c8a38c /helpcompiler | |
parent | eb5b28c3448af4764592baf432cbeba9d91c3c4a (diff) |
move help compiler/linker/indexer to a new module
we may want to use syntax highlighter class from svtools later,
which is not available for l10ntools
Change-Id: I5a06b77cb6935e3ef68015fb608aa26ac7c53fac
Diffstat (limited to 'helpcompiler')
22 files changed, 2967 insertions, 0 deletions
diff --git a/helpcompiler/Executable_helpindexer.mk b/helpcompiler/Executable_helpindexer.mk new file mode 100644 index 000000000000..f7249b9a66d0 --- /dev/null +++ b/helpcompiler/Executable_helpindexer.mk @@ -0,0 +1,50 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# Version: MPL 1.1 / GPLv3+ / LGPLv3+ +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License or as specified alternatively below. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# Major Contributor(s): +# Copyright (C) 2012 David Ostrovsky <d.ostrovsky@gmx.de> (initial developer) +# +# All Rights Reserved. +# +# For minor contributions see the git repository. +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 3 or later (the "GPLv3+"), or +# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), +# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable +# instead of those above. + +$(eval $(call gb_Executable_Executable,HelpIndexer)) + +$(eval $(call gb_Executable_use_package,HelpIndexer,\ + helpcompiler_inc \ +)) + +$(eval $(call gb_Executable_use_libraries,HelpIndexer,\ + sal \ + helplinker \ +)) + +$(eval $(call gb_Executable_use_externals,HelpIndexer,\ + expat_utf8 \ + libxslt \ + libxml2 \ + berkeleydb \ + clucene \ +)) + +$(eval $(call gb_Executable_add_exception_objects,HelpIndexer,\ + helpcompiler/source/HelpIndexer_main \ +)) + +# vim:set shiftwidth=4 softtabstop=4 expandtab: diff --git a/helpcompiler/Executable_helplinker.mk b/helpcompiler/Executable_helplinker.mk new file mode 100644 index 000000000000..4682943c7d4b --- /dev/null +++ b/helpcompiler/Executable_helplinker.mk @@ -0,0 +1,50 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# Version: MPL 1.1 / GPLv3+ / LGPLv3+ +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License or as specified alternatively below. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# Major Contributor(s): +# Copyright (C) 2012 David Ostrovsky <d.ostrovsky@gmx.de> (initial developer) +# +# All Rights Reserved. +# +# For minor contributions see the git repository. +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 3 or later (the "GPLv3+"), or +# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), +# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable +# instead of those above. + +$(eval $(call gb_Executable_Executable,HelpLinker)) + +$(eval $(call gb_Executable_use_package,HelpLinker,\ + helpcompiler_inc \ +)) + +$(eval $(call gb_Executable_use_libraries,HelpLinker,\ + sal \ + helplinker \ +)) + +$(eval $(call gb_Executable_use_externals,HelpLinker,\ + expat_utf8 \ + libxslt \ + libxml2 \ + berkeleydb \ + clucene \ +)) + +$(eval $(call gb_Executable_add_exception_objects,HelpLinker,\ + helpcompiler/source/HelpLinker_main \ +)) + +# vim:set shiftwidth=4 softtabstop=4 expandtab: diff --git a/helpcompiler/Library_helplinker.mk b/helpcompiler/Library_helplinker.mk new file mode 100644 index 000000000000..8395dd5a1467 --- /dev/null +++ b/helpcompiler/Library_helplinker.mk @@ -0,0 +1,68 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# Version: MPL 1.1 / GPLv3+ / LGPLv3+ +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License or as specified alternatively below. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# Major Contributor(s): +# Copyright (C) 2012 David Ostrovsky <d.ostrovsky@gmx.de> (initial developer) +# +# All Rights Reserved. +# +# For minor contributions see the git repository. +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 3 or later (the "GPLv3+"), or +# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), +# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable +# instead of those above. + +$(eval $(call gb_Library_Library,helplinker)) + +$(eval $(call gb_Library_use_package,helplinker,\ + helpcompiler_inc\ +)) + +$(eval $(call gb_Library_add_defs,helplinker,\ + -DL10N_DLLIMPLEMENTATION \ + -DHELPLINKER_DLLIMPLEMENTATION \ +)) + +$(eval $(call gb_Library_use_libraries,helplinker,\ + sal \ +)) + +$(eval $(call gb_Library_use_externals,helplinker,\ + berkeleydb \ + expat_utf8 \ + libxslt \ + libxml2 \ + clucene \ +)) + +$(eval $(call gb_Library_add_exception_objects,helplinker,\ + helpcompiler/source/HelpCompiler \ + helpcompiler/source/LuceneHelper \ + helpcompiler/source/HelpIndexer \ + helpcompiler/source/HelpSearch \ +)) + +ifeq ($(strip $(OS)$(CPU)$(COM)),MACOSXPGCC) +$(eval $(call gb_Library_add_cxxobjects,helplinker,\ + helpcompiler/source/HelpLinker \ + , $(gb_COMPILERNOOPTFLAGS) $(gb_LinkTarget_EXCEPTIONFLAGS) \ +)) +else +$(eval $(call gb_Library_add_exception_objects,helplinker,\ + helpcompiler/source/HelpLinker \ +)) +endif + +# vim: set noet sw=4 ts=4: diff --git a/helpcompiler/Makefile b/helpcompiler/Makefile new file mode 100644 index 000000000000..ccb1c85a04da --- /dev/null +++ b/helpcompiler/Makefile @@ -0,0 +1,7 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/helpcompiler/Module_helpcompiler.mk b/helpcompiler/Module_helpcompiler.mk new file mode 100644 index 000000000000..f0470395fe84 --- /dev/null +++ b/helpcompiler/Module_helpcompiler.mk @@ -0,0 +1,19 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,helpcompiler)) + +$(eval $(call gb_Module_add_targets,helpcompiler,\ + Executable_helpindexer \ + Executable_helplinker \ + Library_helplinker \ + Package_inc \ +)) + +# vim:set shiftwidth=4 softtabstop=4 expandtab: diff --git a/helpcompiler/Package_inc.mk b/helpcompiler/Package_inc.mk new file mode 100644 index 000000000000..b0717b247a53 --- /dev/null +++ b/helpcompiler/Package_inc.mk @@ -0,0 +1,19 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Package_Package,helpcompiler_inc,$(SRCDIR)/helpcompiler)) + +$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/dllapi.h,inc/dllapi.h)) +$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/compilehelp.hxx,inc/compilehelp.hxx)) +$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpCompiler.hxx,inc/HelpCompiler.hxx)) +$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpIndexer.hxx,inc/HelpIndexer.hxx)) +$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpLinker.hxx,inc/HelpLinker.hxx)) +$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpSearch.hxx,inc/HelpSearch.hxx)) + +# vim: set noet sw=4 ts=4: diff --git a/helpcompiler/inc/HelpCompiler.hxx b/helpcompiler/inc/HelpCompiler.hxx new file mode 100644 index 000000000000..825a55c0330f --- /dev/null +++ b/helpcompiler/inc/HelpCompiler.hxx @@ -0,0 +1,275 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef HELPCOMPILER_HXX +#define HELPCOMPILER_HXX + +#include <string> +#include <boost/unordered_map.hpp> +#include <vector> +#include <list> +#include <fstream> +#include <sstream> +#include <algorithm> +#include <ctype.h> +#ifdef SYSTEM_DB_HEADER +#include SYSTEM_DB_HEADER +#else +#include <berkeleydb/db.h> +#endif + +#include <boost/shared_ptr.hpp> + +#include <libxml/xmlmemory.h> +#include <libxml/debugXML.h> +#include <libxml/HTMLtree.h> +#include <libxml/xmlIO.h> +#include <libxml/xinclude.h> +#include <libxml/catalog.h> + +#include <rtl/ustring.hxx> +#include <osl/thread.h> +#include <osl/process.h> +#include <osl/file.hxx> + +#include <helpcompiler/compilehelp.hxx> + +#if OSL_DEBUG_LEVEL > 2 + #include <iostream> + #define HCDBG(foo) do { if (1) foo; } while(0) +#else + #define HCDBG(foo) do { } while(0) +#endif + +namespace fs +{ + rtl_TextEncoding getThreadTextEncoding( void ); + + enum convert { native }; + class path + { + public: + ::rtl::OUString data; + public: + path() {} + path(const path &rOther) : data(rOther.data) {} + path(const std::string &in, convert) + { + rtl::OUString sWorkingDir; + osl_getProcessWorkingDir(&sWorkingDir.pData); + + rtl::OString tmp(in.c_str()); + rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); + osl::File::getFileURLFromSystemPath(ustrSystemPath, data); + osl::File::getAbsoluteFileURL(sWorkingDir, data, data); + } + path(const std::string &FileURL) + { + rtl::OString tmp(FileURL.c_str()); + data = rtl::OStringToOUString(tmp, getThreadTextEncoding()); + } + std::string native_file_string() const + { + ::rtl::OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(data, ustrSystemPath); + rtl::OString tmp(rtl::OUStringToOString(ustrSystemPath, getThreadTextEncoding())); + HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl); + return std::string(tmp.getStr()); + } +#ifdef WNT + wchar_t const * native_file_string_w() const + { + ::rtl::OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(data, ustrSystemPath); + return (wchar_t const *) ustrSystemPath.getStr(); + } +#endif + std::string native_directory_string() const { return native_file_string(); } + std::string toUTF8() const + { + rtl::OString tmp(rtl::OUStringToOString(data, RTL_TEXTENCODING_UTF8)); + return std::string(tmp.getStr()); + } + bool empty() const { return data.isEmpty(); } + path operator/(const std::string &in) const + { + path ret(*this); + HCDBG(std::cerr << "orig was " << + rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); + rtl::OString tmp(in.c_str()); + rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); + ret.data += rtl::OUString(sal_Unicode('/')); + ret.data += ustrSystemPath; + HCDBG(std::cerr << "final is " << + rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); + return ret; + } + void append(const char *in) + { + rtl::OString tmp(in); + rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); + data = data + ustrSystemPath; + } + void append(const std::string &in) { append(in.c_str()); } + }; + + void create_directory(const fs::path indexDirName); + void copy(const fs::path &src, const fs::path &dest); +} + +struct joaat_hash +{ + size_t operator()(const std::string &str) const + { + size_t hash = 0; + const char *key = str.data(); + for (size_t i = 0; i < str.size(); i++) + { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; + } +}; + +#define get16bits(d) ((((sal_uInt32)(((const sal_uInt8 *)(d))[1])) << 8)\ + +(sal_uInt32)(((const sal_uInt8 *)(d))[0]) ) + +#define pref_hash joaat_hash + +typedef boost::unordered_map<std::string, std::string, pref_hash> Stringtable; +typedef std::list<std::string> LinkedList; +typedef std::vector<std::string> HashSet; + +typedef boost::unordered_map<std::string, LinkedList, pref_hash> Hashtable; + +class StreamTable +{ +public: + std::string document_id; + std::string document_path; + std::string document_module; + std::string document_title; + + HashSet *appl_hidlist; + Hashtable *appl_keywords; + Stringtable *appl_helptexts; + xmlDocPtr appl_doc; + + HashSet *default_hidlist; + Hashtable *default_keywords; + Stringtable *default_helptexts; + xmlDocPtr default_doc; + + StreamTable() : + appl_hidlist(NULL), appl_keywords(NULL), appl_helptexts(NULL), appl_doc(NULL), + default_hidlist(NULL), default_keywords(NULL), default_helptexts(NULL), default_doc(NULL) + {} + void dropdefault() + { + delete default_hidlist; + delete default_keywords; + delete default_helptexts; + if (default_doc) xmlFreeDoc(default_doc); + } + void dropappl() + { + delete appl_hidlist; + delete appl_keywords; + delete appl_helptexts; + if (appl_doc) xmlFreeDoc(appl_doc); + } + ~StreamTable() + { + dropappl(); + dropdefault(); + } +}; + +struct HelpProcessingException +{ + HelpProcessingErrorClass m_eErrorClass; + std::string m_aErrorMsg; + std::string m_aXMLParsingFile; + int m_nXMLParsingLine; + + HelpProcessingException( HelpProcessingErrorClass eErrorClass, const std::string& aErrorMsg ) + : m_eErrorClass( eErrorClass ) + , m_aErrorMsg( aErrorMsg ) + , m_nXMLParsingLine( 0 ) + {} + HelpProcessingException( const std::string& aErrorMsg, const std::string& aXMLParsingFile, int nXMLParsingLine ) + : m_eErrorClass( HELPPROCESSING_XMLPARSING_ERROR ) + , m_aErrorMsg( aErrorMsg ) + , m_aXMLParsingFile( aXMLParsingFile ) + , m_nXMLParsingLine( nXMLParsingLine ) + {} +}; + +class HelpCompiler +{ +public: + HelpCompiler(StreamTable &streamTable, + const fs::path &in_inputFile, + const fs::path &in_src, + const fs::path &in_resEmbStylesheet, + const std::string &in_module, + const std::string &in_lang, + bool in_bExtensionMode); + bool compile( void ) throw (HelpProcessingException); + void addEntryToJarFile(const std::string &prefix, + const std::string &entryName, const std::string &bytesToAdd); + void addEntryToJarFile(const std::string &prefix, + const std::string &entryName, const HashSet &bytesToAdd); + void addEntryToJarFile(const std::string &prefix, + const std::string &entryName, const Stringtable &bytesToAdd); + void addEntryToJarFile(const std::string &prefix, + const std::string &entryName, const Hashtable &bytesToAdd); +private: + xmlDocPtr getSourceDocument(const fs::path &filePath); + xmlNodePtr clone(xmlNodePtr node, const std::string& appl); + StreamTable &streamTable; + const fs::path inputFile, src; + const std::string module, lang; + const fs::path resEmbStylesheet; + bool bExtensionMode; + std::string gui; +}; + +inline char tocharlower(char c) +{ + return static_cast<char>(tolower(c)); +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/HelpIndexer.hxx b/helpcompiler/inc/HelpIndexer.hxx new file mode 100644 index 000000000000..0a2fb9421719 --- /dev/null +++ b/helpcompiler/inc/HelpIndexer.hxx @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#ifndef HELPINDEXER_HXX +#define HELPINDEXER_HXX + +#include <helpcompiler/dllapi.h> + +#include <rtl/ustring.hxx> +#include <set> + +// I assume that TCHAR is defined as wchar_t throughout + +namespace lucene +{ +namespace document +{ +class Document; +} +namespace util +{ +class Reader; +} +} + +class L10N_DLLPUBLIC HelpIndexer { + private: + rtl::OUString d_lang; + rtl::OUString d_module; + rtl::OUString d_captionDir; + rtl::OUString d_contentDir; + rtl::OUString d_indexDir; + rtl::OUString d_error; + std::set<rtl::OUString> d_files; + + public: + + /** + * @param lang Help files language. + * @param module The module of the helpfiles. + * @param srcDir The help directory to index + * @param outDir The directory to write the "module".idxl directory to + */ + HelpIndexer(rtl::OUString const &lang, rtl::OUString const &module, + rtl::OUString const &srcDir, rtl::OUString const &outDir); + + /** + * Run the indexer. + * @return true if index successfully generated. + */ + bool indexDocuments(); + + /** + * Get the error string (empty if no error occurred). + */ + rtl::OUString const & getErrorMessage(); + + private: + + /** + * Scan the caption & contents directories for help files. + */ + bool scanForFiles(); + + /** + * Scan for files in the given directory. + */ + bool scanForFiles(rtl::OUString const &path); + + /** + * Fill the Document with information on the given help file. + */ + bool helpDocument(rtl::OUString const & fileName, lucene::document::Document *doc); + + /** + * Create a reader for the given file, and create an "empty" reader in case the file doesn't exist. + */ + lucene::util::Reader *helpFileReader(rtl::OUString const & path); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/HelpLinker.hxx b/helpcompiler/inc/HelpLinker.hxx new file mode 100644 index 000000000000..f6b8ddb86040 --- /dev/null +++ b/helpcompiler/inc/HelpLinker.hxx @@ -0,0 +1,104 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef HELPLINKER_HXX +#define HELPLINKER_HXX + +#include <helpcompiler/dllapi.h> +#include <libxslt/transform.h> + +#ifdef AIX +# undef _THREAD_SAFE +#endif + +#define DBHELP_ONLY + +class L10N_DLLPUBLIC IndexerPreProcessor +{ +private: + std::string m_aModuleName; + fs::path m_fsIndexBaseDir; + fs::path m_fsCaptionFilesDirName; + fs::path m_fsContentFilesDirName; + + xsltStylesheetPtr m_xsltStylesheetPtrCaption; + xsltStylesheetPtr m_xsltStylesheetPtrContent; + +public: + IndexerPreProcessor( const std::string& aModuleName, const fs::path& fsIndexBaseDir, + const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet ); + ~IndexerPreProcessor(); + + void processDocument( xmlDocPtr doc, const std::string& EncodedDocPath ); +}; + +class L10N_DLLPUBLIC HelpLinker +{ +public: + void main(std::vector<std::string> &args, + std::string* pExtensionPath = NULL, + std::string* pDestination = NULL, + const rtl::OUString* pOfficeHelpPath = NULL ) + + throw( HelpProcessingException ); + + HelpLinker() + : m_pIndexerPreProcessor(NULL) + {} + ~HelpLinker() + { delete m_pIndexerPreProcessor; } + +private: + Stringtable additionalFiles; + HashSet helpFiles; + fs::path sourceRoot; + fs::path embeddStylesheet; + fs::path idxCaptionStylesheet; + fs::path idxContentStylesheet; + fs::path zipdir; + fs::path outputFile; + std::string extsource; + std::string extdestination; + std::string module; + std::string lang; + std::string extensionPath; + std::string extensionDestination; + bool bExtensionMode; + fs::path indexDirName; + fs::path indexDirParentName; + IndexerPreProcessor* m_pIndexerPreProcessor; + void initIndexerPreProcessor(); + void link() throw( HelpProcessingException ); + void addBookmark( DB* dbBase, FILE* pFile_DBHelp, std::string thishid, + const std::string& fileB, const std::string& anchorB, + const std::string& jarfileB, const std::string& titleB ); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/HelpSearch.hxx b/helpcompiler/inc/HelpSearch.hxx new file mode 100644 index 000000000000..e232b5ad2ff8 --- /dev/null +++ b/helpcompiler/inc/HelpSearch.hxx @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#ifndef HELPSEARCH_HXX +#define HELPSEARCH_HXX + +#include <helpcompiler/dllapi.h> + +#include <rtl/ustring.hxx> +#include <vector> + +class L10N_DLLPUBLIC HelpSearch{ + private: + rtl::OUString d_lang; + rtl::OString d_indexDir; + + public: + + /** + * @param lang Help files language. + * @param indexDir The directory where the index files are stored. + */ + HelpSearch(rtl::OUString const &lang, rtl::OUString const &indexDir); + + /** + * Query the index for a certain query string. + * @param queryStr The query. + * @param captionOnly Set to true to search in the caption, not the content. + * @param rDocuments Vector to write the paths of the found documents. + * @param rScores Vector to write the scores to. + */ + bool query(rtl::OUString const &queryStr, bool captionOnly, + std::vector<rtl::OUString> &rDocuments, std::vector<float> &rScores); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/compilehelp.hxx b/helpcompiler/inc/compilehelp.hxx new file mode 100644 index 000000000000..cbac6e6c87b9 --- /dev/null +++ b/helpcompiler/inc/compilehelp.hxx @@ -0,0 +1,71 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef COMPILE_HXX +#define COMPILE_HXX + +#include "sal/types.h" + +#if defined(HELPLINKER_DLLIMPLEMENTATION) +#define HELPLINKER_DLLPUBLIC SAL_DLLPUBLIC_EXPORT +#else +#define HELPLINKER_DLLPUBLIC SAL_DLLPUBLIC_IMPORT +#endif +#define HELPLINKER_DLLPRIVATE SAL_DLLPRIVATE + +#include <rtl/ustring.hxx> + +enum HelpProcessingErrorClass +{ + HELPPROCESSING_NO_ERROR, + HELPPROCESSING_GENERAL_ERROR, // Missing files, options etc. + HELPPROCESSING_INTERNAL_ERROR, // Unexpected problems + HELPPROCESSING_XMLPARSING_ERROR // Errors thrown by libxml +}; + +struct HelpProcessingErrorInfo +{ + HelpProcessingErrorClass m_eErrorClass; + rtl::OUString m_aErrorMsg; + rtl::OUString m_aXMLParsingFile; + sal_Int32 m_nXMLParsingLine; + + HelpProcessingErrorInfo( void ) + : m_eErrorClass( HELPPROCESSING_NO_ERROR ) + , m_nXMLParsingLine( -1 ) + {} + + HelpProcessingErrorInfo& operator=( const struct HelpProcessingException& e ); +}; + + +// Returns true in case of success, false in case of error +HELPLINKER_DLLPUBLIC bool compileExtensionHelp +( + const rtl::OUString& aOfficeHelpPath, + const rtl::OUString& aExtensionName, + const rtl::OUString& aExtensionLanguageRoot, + sal_Int32 nXhpFileCount, const rtl::OUString* pXhpFiles, + const rtl::OUString& aDestination, + HelpProcessingErrorInfo& o_rHelpProcessingErrorInfo +); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/dllapi.h b/helpcompiler/inc/dllapi.h new file mode 100644 index 000000000000..184a590944ed --- /dev/null +++ b/helpcompiler/inc/dllapi.h @@ -0,0 +1,49 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef _L10N_DLLAPI_H +#define _L10N_DLLAPI_H + +#include "sal/config.h" +#include "sal/types.h" + +#if defined L10N_DLLIMPLEMENTATION +#define L10N_DLLPUBLIC SAL_DLLPUBLIC_EXPORT +#else +#define L10N_DLLPUBLIC SAL_DLLPUBLIC_IMPORT +#endif + +#if defined UNX && ! defined MACOS +#define L10N_PLUGIN_PUBLIC L10N_DLLPUBLIC +#else +#define L10N_PLUGIN_PUBLIC SAL_DLLPRIVATE +#endif + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/prj/build.lst b/helpcompiler/prj/build.lst new file mode 100644 index 000000000000..2f74f6096c21 --- /dev/null +++ b/helpcompiler/prj/build.lst @@ -0,0 +1,3 @@ +tr helpcompiler : BERKELEYDB:berkeleydb EXPAT:expat LIBXSLT:libxslt CLUCENE:clucene sal NULL +tr helpcompiler usr1 - all tr_mkout NULL +tr helpcompiler\prj nmake - all tr_prj NULL diff --git a/helpcompiler/prj/d.lst b/helpcompiler/prj/d.lst new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/helpcompiler/prj/d.lst diff --git a/helpcompiler/source/HelpCompiler.cxx b/helpcompiler/source/HelpCompiler.cxx new file mode 100644 index 000000000000..07989e4b953b --- /dev/null +++ b/helpcompiler/source/HelpCompiler.cxx @@ -0,0 +1,461 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +#include <helpcompiler/HelpCompiler.hxx> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <libxslt/xslt.h> +#include <libxslt/xsltInternals.h> +#include <libxslt/transform.h> +#include <libxslt/xsltutils.h> +#include <osl/thread.hxx> + +static void impl_sleep( sal_uInt32 nSec ) +{ + TimeValue aTime; + aTime.Seconds = nSec; + aTime.Nanosec = 0; + + osl::Thread::wait( aTime ); +} + +HelpCompiler::HelpCompiler(StreamTable &in_streamTable, const fs::path &in_inputFile, + const fs::path &in_src, const fs::path &in_resEmbStylesheet, + const std::string &in_module, const std::string &in_lang, bool in_bExtensionMode) + : streamTable(in_streamTable), inputFile(in_inputFile), + src(in_src), module(in_module), lang(in_lang), resEmbStylesheet(in_resEmbStylesheet), + bExtensionMode( in_bExtensionMode ) +{ + xmlKeepBlanksDefaultValue = 0; + char* guitmp = getenv("GUI"); + if (guitmp) + { + gui = (strcmp(guitmp, "UNX") ? gui : "UNIX"); + gui = (strcmp(guitmp, "MAC") ? gui : "MAC"); + gui = (strcmp(guitmp, "WNT") ? gui : "WIN"); + } +} + +xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath) +{ + static const char *params[4 + 1]; + static xsltStylesheetPtr cur = NULL; + + xmlDocPtr res; + if( bExtensionMode ) + { + res = xmlParseFile(filePath.native_file_string().c_str()); + if( !res ){ + impl_sleep( 3 ); + res = xmlParseFile(filePath.native_file_string().c_str()); + } + } + else + { + if (!cur) + { + static std::string fsroot('\'' + src.toUTF8() + '\''); + static std::string esclang('\'' + lang + '\''); + + xmlSubstituteEntitiesDefault(1); + xmlLoadExtDtdDefaultValue = 1; + cur = xsltParseStylesheetFile((const xmlChar *)resEmbStylesheet.native_file_string().c_str()); + + int nbparams = 0; + params[nbparams++] = "Language"; + params[nbparams++] = esclang.c_str(); + params[nbparams++] = "fsroot"; + params[nbparams++] = fsroot.c_str(); + params[nbparams] = NULL; + } + xmlDocPtr doc = xmlParseFile(filePath.native_file_string().c_str()); + if( !doc ) + { + impl_sleep( 3 ); + doc = xmlParseFile(filePath.native_file_string().c_str()); + } + + //???res = xmlParseFile(filePath.native_file_string().c_str()); + + res = xsltApplyStylesheet(cur, doc, params); + xmlFreeDoc(doc); + } + return res; +} + +// returns a node representing the whole stuff compiled for the current +// application. +xmlNodePtr HelpCompiler::clone(xmlNodePtr node, const std::string& appl) +{ + xmlNodePtr root = xmlCopyNode(node, 2); + if (node->xmlChildrenNode) + { + xmlNodePtr list = node->xmlChildrenNode; + while (list) + { + if (strcmp((const char*)list->name, "switchinline") == 0 || strcmp((const char*)list->name, "switch") == 0) + { + std::string tmp=""; + if (strcmp((const char*)xmlGetProp(list, (xmlChar*)"select"), "sys")) + { + tmp = gui; + } + if (strcmp((const char*)xmlGetProp(list, (xmlChar*)"select"), "appl")) + { + tmp = appl; + } + if (tmp.compare("") != 0) + { + bool isCase=false; + xmlNodePtr caseList=list->xmlChildrenNode; + while (caseList) + { + xmlChar *select = xmlGetProp(caseList, (xmlChar*)"select"); + if (select) + { + if (!strcmp((const char*)select, tmp.c_str()) && !isCase) + { + isCase=true; + xmlNodePtr clp = caseList->xmlChildrenNode; + while (clp) + { + xmlAddChild(root, clone(clp, appl)); + clp = clp->next; + } + } + xmlFree(select); + } + else + { + if ((strcmp((const char*)caseList->name, "defaultinline") != 0) && (strcmp((const char*)caseList->name, "default") != 0)) + { + xmlAddChild(root, clone(caseList, appl)); + } + else + { + if (!isCase) + { + xmlNodePtr clp = caseList->xmlChildrenNode; + while (clp) + { + xmlAddChild(root, clone(clp, appl)); + clp = clp->next; + } + } + } + } + caseList = caseList->next; + } + } + } + else + { + xmlAddChild(root, clone(list, appl)); + } + list = list->next; + } + } + return root; +} + +class myparser +{ +public: + std::string documentId; + std::string fileName; + std::string title; + HashSet *hidlist; + Hashtable *keywords; + Stringtable *helptexts; +private: + HashSet extendedHelpText; +public: + myparser(const std::string &indocumentId, const std::string &infileName, + const std::string &intitle) : documentId(indocumentId), fileName(infileName), + title(intitle) + { + hidlist = new HashSet; + keywords = new Hashtable; + helptexts = new Stringtable; + } + void traverse( xmlNodePtr parentNode ); +private: + std::string module; + std::string dump(xmlNodePtr node); +}; + +std::string myparser::dump(xmlNodePtr node) +{ + std::string app; + if (node->xmlChildrenNode) + { + xmlNodePtr list = node->xmlChildrenNode; + while (list) + { + app += dump(list); + list = list->next; + } + } + if (xmlNodeIsText(node)) + { + xmlChar *pContent = xmlNodeGetContent(node); + app += std::string((const char*)pContent); + xmlFree(pContent); + } + return app; +} + +void trim(std::string& str) +{ + std::string::size_type pos = str.find_last_not_of(' '); + if(pos != std::string::npos) + { + str.erase(pos + 1); + pos = str.find_first_not_of(' '); + if(pos != std::string::npos) + str.erase(0, pos); + } + else + str.erase(str.begin(), str.end()); +} + +void myparser::traverse( xmlNodePtr parentNode ) +{ + // traverse all nodes that belong to the parent + xmlNodePtr test ; + for (test = parentNode->xmlChildrenNode; test; test = test->next) + { + if (fileName.empty() && !strcmp((const char*)test->name, "filename")) + { + xmlNodePtr node = test->xmlChildrenNode; + if (xmlNodeIsText(node)) + { + xmlChar *pContent = xmlNodeGetContent(node); + fileName = std::string((const char*)pContent); + xmlFree(pContent); + } + } + else if (title.empty() && !strcmp((const char*)test->name, "title")) + { + title = dump(test); + if (title.empty()) + title = "<notitle>"; + } + else if (!strcmp((const char*)test->name, "bookmark")) + { + xmlChar *branchxml = xmlGetProp(test, (const xmlChar*)"branch"); + xmlChar *idxml = xmlGetProp(test, (const xmlChar*)"id"); + std::string branch((const char*)branchxml); + std::string anchor((const char*)idxml); + xmlFree (branchxml); + xmlFree (idxml); + + std::string hid; + + if (branch.find("hid") == 0) + { + size_t index = branch.find('/'); + if (index != std::string::npos) + { + hid = branch.substr(1 + index); + // one shall serve as a documentId + if (documentId.empty()) + documentId = hid; + extendedHelpText.push_back(hid); + std::string foo = anchor.empty() ? hid : hid + "#" + anchor; + HCDBG(std::cerr << "hid pushback" << foo << std::endl); + hidlist->push_back( anchor.empty() ? hid : hid + "#" + anchor); + } + else + continue; + } + else if (branch.compare("index") == 0) + { + LinkedList ll; + + for (xmlNodePtr nd = test->xmlChildrenNode; nd; nd = nd->next) + { + if (strcmp((const char*)nd->name, "bookmark_value")) + continue; + + std::string embedded; + xmlChar *embeddedxml = xmlGetProp(nd, (const xmlChar*)"embedded"); + if (embeddedxml) + { + embedded = std::string((const char*)embeddedxml); + xmlFree (embeddedxml); + std::transform (embedded.begin(), embedded.end(), + embedded.begin(), tocharlower); + } + + bool isEmbedded = !embedded.empty() && embedded.compare("true") == 0; + if (isEmbedded) + continue; + + std::string keyword = dump(nd); + size_t keywordSem = keyword.find(';'); + if (keywordSem != std::string::npos) + { + std::string tmppre = + keyword.substr(0,keywordSem); + trim(tmppre); + std::string tmppos = + keyword.substr(1+keywordSem); + trim(tmppos); + keyword = tmppre + ";" + tmppos; + } + ll.push_back(keyword); + } + if (!ll.empty()) + (*keywords)[anchor] = ll; + } + else if (branch.compare("contents") == 0) + { + // currently not used + } + } + else if (!strcmp((const char*)test->name, "ahelp")) + { + std::string text = dump(test); + trim(text); + std::string name; + + HashSet::const_iterator aEnd = extendedHelpText.end(); + for (HashSet::const_iterator iter = extendedHelpText.begin(); iter != aEnd; ++iter) + { + name = *iter; + (*helptexts)[name] = text; + } + extendedHelpText.clear(); + } + // traverse children + traverse(test); + } +} + +bool HelpCompiler::compile( void ) throw( HelpProcessingException ) +{ + // we now have the jaroutputstream, which will contain the document. + // now determine the document as a dom tree in variable docResolved + + xmlDocPtr docResolvedOrg = getSourceDocument(inputFile); + + // now add path to the document + // resolve the dom + + if (!docResolvedOrg) + { + impl_sleep( 3 ); + docResolvedOrg = getSourceDocument(inputFile); + if( !docResolvedOrg ) + { + std::stringstream aStrStream; + aStrStream << "ERROR: file not existing: " << inputFile.native_file_string().c_str() << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + } + + std::string documentId; + std::string fileName; + std::string title; + // returns a clone of the document with switch-cases resolved + std::string appl = module.substr(1); + for (unsigned int i = 0; i < appl.length(); ++i) + { + appl[i]=toupper(appl[i]); + } + xmlNodePtr docResolved = clone(xmlDocGetRootElement(docResolvedOrg), appl); + myparser aparser(documentId, fileName, title); + aparser.traverse(docResolved); + documentId = aparser.documentId; + fileName = aparser.fileName; + title = aparser.title; + + HCDBG(std::cerr << documentId << " : " << fileName << " : " << title << std::endl); + + xmlDocPtr docResolvedDoc = xmlCopyDoc(docResolvedOrg, false); + xmlDocSetRootElement(docResolvedDoc, docResolved); + + streamTable.dropappl(); + streamTable.appl_doc = docResolvedDoc; + streamTable.appl_hidlist = aparser.hidlist; + streamTable.appl_helptexts = aparser.helptexts; + streamTable.appl_keywords = aparser.keywords; + + streamTable.document_id = documentId; + streamTable.document_path = fileName; + streamTable.document_title = title; + std::string actMod = module; + + if ( !bExtensionMode && !fileName.empty()) + { + if (fileName.find("/text/") == 0) + { + int len = strlen("/text/"); + actMod = fileName.substr(len); + actMod = actMod.substr(0, actMod.find('/')); + } + } + streamTable.document_module = actMod; + xmlFreeDoc(docResolvedOrg); + return true; +} + +namespace fs +{ + rtl_TextEncoding getThreadTextEncoding( void ) + { + static bool bNeedsInit = true; + static rtl_TextEncoding nThreadTextEncoding; + if( bNeedsInit ) + { + bNeedsInit = false; + nThreadTextEncoding = osl_getThreadTextEncoding(); + } + return nThreadTextEncoding; + } + + void create_directory(const fs::path indexDirName) + { + HCDBG( + std::cerr << "creating " << + rtl::OUStringToOString(indexDirName.data, RTL_TEXTENCODING_UTF8).getStr() + << std::endl + ); + osl::Directory::createPath(indexDirName.data); + } + + void copy(const fs::path &src, const fs::path &dest) + { + osl::File::copy(src.data, dest.data); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpIndexer.cxx b/helpcompiler/source/HelpIndexer.cxx new file mode 100644 index 000000000000..05db910f7e17 --- /dev/null +++ b/helpcompiler/source/HelpIndexer.cxx @@ -0,0 +1,172 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#include <helpcompiler/HelpIndexer.hxx> + +#include <rtl/string.hxx> +#include <rtl/uri.hxx> +#include <rtl/ustrbuf.hxx> +#include <osl/file.hxx> +#include <osl/thread.h> +#include <boost/scoped_ptr.hpp> +#include <algorithm> + +#include "LuceneHelper.hxx" + +using namespace lucene::document; + +HelpIndexer::HelpIndexer(rtl::OUString const &lang, rtl::OUString const &module, + rtl::OUString const &srcDir, rtl::OUString const &outDir) + : d_lang(lang), d_module(module) +{ + d_indexDir = rtl::OUStringBuffer(outDir).append('/'). + append(module).appendAscii(RTL_CONSTASCII_STRINGPARAM(".idxl")).toString(); + d_captionDir = srcDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/caption")); + d_contentDir = srcDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/content")); +} + +bool HelpIndexer::indexDocuments() +{ + if (!scanForFiles()) + return false; + + try + { + rtl::OUString sLang = d_lang.getToken(0, '-'); + bool bUseCJK = sLang == "ja" || sLang == "ko" || sLang == "zh"; + + // Construct the analyzer appropriate for the given language + boost::scoped_ptr<lucene::analysis::Analyzer> analyzer; + if (bUseCJK) + analyzer.reset(new lucene::analysis::LanguageBasedAnalyzer(L"cjk")); + else + analyzer.reset(new lucene::analysis::standard::StandardAnalyzer()); + + rtl::OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(d_indexDir, ustrSystemPath); + + rtl::OString indexDirStr = rtl::OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding()); + lucene::index::IndexWriter writer(indexDirStr.getStr(), analyzer.get(), true); + //Double limit of tokens allowed, otherwise we'll get a too-many-tokens + //exception for ja help. Could alternative ignore the exception and get + //truncated results as per java-Lucene apparently + writer.setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2); + + // Index the identified help files + Document doc; + for (std::set<rtl::OUString>::iterator i = d_files.begin(); i != d_files.end(); ++i) { + helpDocument(*i, &doc); + writer.addDocument(&doc); + doc.clear(); + } + writer.optimize(); + + // Optimize the index + writer.optimize(); + } + catch (CLuceneError &e) + { + d_error = rtl::OUString::createFromAscii(e.what()); + return false; + } + + return true; +} + +rtl::OUString const & HelpIndexer::getErrorMessage() { + return d_error; +} + +bool HelpIndexer::scanForFiles() { + if (!scanForFiles(d_contentDir)) { + return false; + } + if (!scanForFiles(d_captionDir)) { + return false; + } + return true; +} + +bool HelpIndexer::scanForFiles(rtl::OUString const & path) { + + osl::Directory dir(path); + if (osl::FileBase::E_None != dir.open()) { + d_error = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Error reading directory ")) + path; + return true; + } + + osl::DirectoryItem item; + osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type); + while (dir.getNextItem(item) == osl::FileBase::E_None) { + item.getFileStatus(fileStatus); + if (fileStatus.getFileType() == osl::FileStatus::Regular) { + d_files.insert(fileStatus.getFileName()); + } + } + + return true; +} + +bool HelpIndexer::helpDocument(rtl::OUString const & fileName, Document *doc) { + // Add the help path as an indexed, untokenized field. + + rtl::OUString path = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#HLP#")) + + d_module + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName; + std::vector<TCHAR> aPath(OUStringToTCHARVec(path)); + doc->add(*_CLNEW Field(_T("path"), &aPath[0], Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + + rtl::OUString sEscapedFileName = + rtl::Uri::encode(fileName, + rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8); + + // Add the caption as a field. + rtl::OUString captionPath = d_captionDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + sEscapedFileName; + doc->add(*_CLNEW Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED)); + + // Add the content as a field. + rtl::OUString contentPath = d_contentDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + sEscapedFileName; + doc->add(*_CLNEW Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED)); + + return true; +} + +lucene::util::Reader *HelpIndexer::helpFileReader(rtl::OUString const & path) { + osl::File file(path); + if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) { + file.close(); + rtl::OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(path, ustrSystemPath); + rtl::OString pathStr = rtl::OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding()); + return _CLNEW lucene::util::FileReader(pathStr.getStr(), "UTF-8"); + } else { + return _CLNEW lucene::util::StringReader(L""); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpIndexer_main.cxx b/helpcompiler/source/HelpIndexer_main.cxx new file mode 100644 index 000000000000..3fe04021b8d9 --- /dev/null +++ b/helpcompiler/source/HelpIndexer_main.cxx @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#include <helpcompiler/HelpIndexer.hxx> +#include <osl/file.hxx> +#include <osl/process.h> +#include <osl/thread.h> +#include <string> +#include <iostream> + +#include "LuceneHelper.hxx" + +int main(int argc, char **argv) { + const std::string pLang("-lang"); + const std::string pModule("-mod"); + const std::string pDir("-dir"); + + std::string lang; + std::string module; + std::string dir; + + bool error = false; + for (int i = 1; i < argc; ++i) { + if (pLang.compare(argv[i]) == 0) { + if (i + 1 < argc) { + lang = argv[++i]; + } else { + error = true; + } + } else if (pModule.compare(argv[i]) == 0) { + if (i + 1 < argc) { + module = argv[++i]; + } else { + error = true; + } + } else if (pDir.compare(argv[i]) == 0) { + if (i + 1 < argc) { + dir = argv[++i]; + } else { + error = true; + } + } else { + error = true; + } + } + + if (error) { + std::cerr << "Error parsing command-line arguments" << std::endl; + } + + if (error || lang.empty() || module.empty() || dir.empty()) { + std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -dir Dir" << std::endl; + return 1; + } + + std::string captionDir(dir + SAL_PATHDELIMITER + "caption"); + std::string contentDir(dir + SAL_PATHDELIMITER + "content"); + std::string indexDir(dir + SAL_PATHDELIMITER + module + ".idxl"); + + rtl::OUString sDir; + + osl::File::getFileURLFromSystemPath( + rtl::OUString(dir.c_str(), dir.size(), osl_getThreadTextEncoding()), + sDir); + + rtl::OUString cwd; + osl_getProcessWorkingDir(&cwd.pData); + + osl::File::getAbsoluteFileURL(cwd, sDir, sDir); + + HelpIndexer indexer( + rtl::OUString(lang.c_str(), lang.size(), osl_getThreadTextEncoding()), + rtl::OUString(module.c_str(), module.size(), osl_getThreadTextEncoding()), + sDir, sDir); + + if (!indexer.indexDocuments()) { + std::cerr << rtl::OUStringToOString(indexer.getErrorMessage(), osl_getThreadTextEncoding()).getStr() << std::endl; + return 2; + } + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpLinker.cxx b/helpcompiler/source/HelpLinker.cxx new file mode 100644 index 000000000000..242627c0476d --- /dev/null +++ b/helpcompiler/source/HelpLinker.cxx @@ -0,0 +1,1087 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include <helpcompiler/HelpCompiler.hxx> +#include <helpcompiler/HelpLinker.hxx> + +#include <map> + +#include <string.h> +#include <limits.h> + +#include <libxslt/xslt.h> +#include <libxslt/xsltutils.h> +#include <libxslt/functions.h> +#include <libxslt/extensions.h> + +#include <sal/main.h> +#include <sal/types.h> +#include <osl/time.h> +#include <rtl/bootstrap.hxx> + +#include <expat.h> + +IndexerPreProcessor::IndexerPreProcessor + ( const std::string& aModuleName, const fs::path& fsIndexBaseDir, + const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet ) + : m_aModuleName( aModuleName ) + , m_fsIndexBaseDir( fsIndexBaseDir ) +{ + m_fsCaptionFilesDirName = fsIndexBaseDir / "caption"; + fs::create_directory( m_fsCaptionFilesDirName ); + + m_fsContentFilesDirName = fsIndexBaseDir / "content"; + fs::create_directory( m_fsContentFilesDirName ); + + m_xsltStylesheetPtrCaption = xsltParseStylesheetFile + ((const xmlChar *)idxCaptionStylesheet.native_file_string().c_str()); + m_xsltStylesheetPtrContent = xsltParseStylesheetFile + ((const xmlChar *)idxContentStylesheet.native_file_string().c_str()); +} + +IndexerPreProcessor::~IndexerPreProcessor() +{ + if( m_xsltStylesheetPtrCaption ) + xsltFreeStylesheet( m_xsltStylesheetPtrCaption ); + if( m_xsltStylesheetPtrContent ) + xsltFreeStylesheet( m_xsltStylesheetPtrContent ); +} + +std::string getEncodedPath( const std::string& Path ) +{ + rtl::OString aOStr_Path( Path.c_str() ); + rtl::OUString aOUStr_Path( rtl::OStringToOUString + ( aOStr_Path, fs::getThreadTextEncoding() ) ); + rtl::OUString aPathURL; + osl::File::getFileURLFromSystemPath( aOUStr_Path, aPathURL ); + rtl::OString aOStr_PathURL( rtl::OUStringToOString + ( aPathURL, fs::getThreadTextEncoding() ) ); + std::string aStdStr_PathURL( aOStr_PathURL.getStr() ); + return aStdStr_PathURL; +} + +void IndexerPreProcessor::processDocument + ( xmlDocPtr doc, const std::string &EncodedDocPath ) +{ + std::string aStdStr_EncodedDocPathURL = getEncodedPath( EncodedDocPath ); + + if( m_xsltStylesheetPtrCaption ) + { + xmlDocPtr resCaption = xsltApplyStylesheet( m_xsltStylesheetPtrCaption, doc, NULL ); + xmlNodePtr pResNodeCaption = resCaption->xmlChildrenNode; + if( pResNodeCaption ) + { + fs::path fsCaptionPureTextFile_docURL = m_fsCaptionFilesDirName / aStdStr_EncodedDocPathURL; +#ifdef WNT //We need _wfopen to support long file paths on Windows XP + FILE* pFile_docURL = _wfopen( + fsCaptionPureTextFile_docURL.native_file_string_w(), L"w" ); +#else + FILE* pFile_docURL = fopen( + fsCaptionPureTextFile_docURL.native_file_string().c_str(), "w" ); +#endif + if( pFile_docURL ) + { + fprintf( pFile_docURL, "%s\n", pResNodeCaption->content ); + fclose( pFile_docURL ); + } + } + xmlFreeDoc(resCaption); + } + + if( m_xsltStylesheetPtrContent ) + { + xmlDocPtr resContent = xsltApplyStylesheet( m_xsltStylesheetPtrContent, doc, NULL ); + xmlNodePtr pResNodeContent = resContent->xmlChildrenNode; + if( pResNodeContent ) + { + fs::path fsContentPureTextFile_docURL = m_fsContentFilesDirName / aStdStr_EncodedDocPathURL; +#ifdef WNT //We need _wfopen to support long file paths on Windows XP + FILE* pFile_docURL = _wfopen( + fsContentPureTextFile_docURL.native_file_string_w(), L"w" ); +#else + FILE* pFile_docURL = fopen( + fsContentPureTextFile_docURL.native_file_string().c_str(), "w" ); +#endif + if( pFile_docURL ) + { + fprintf( pFile_docURL, "%s\n", pResNodeContent->content ); + fclose( pFile_docURL ); + } + } + xmlFreeDoc(resContent); + } +} + +struct Data +{ + std::vector<std::string> _idList; + typedef std::vector<std::string>::const_iterator cIter; + + void append(const std::string &id) + { + _idList.push_back(id); + } + + std::string getString() const + { + std::string ret; + cIter aEnd = _idList.end(); + for (cIter aIter = _idList.begin(); aIter != aEnd; ++aIter) + ret += *aIter + ";"; + return ret; + } +}; + +void writeKeyValue_DBHelp( FILE* pFile, const std::string& aKeyStr, const std::string& aValueStr ) +{ + if( pFile == NULL ) + return; + char cLF = 10; + unsigned int nKeyLen = aKeyStr.length(); + unsigned int nValueLen = aValueStr.length(); + fprintf( pFile, "%x ", nKeyLen ); + if( nKeyLen > 0 ) + { + if (fwrite( aKeyStr.c_str(), 1, nKeyLen, pFile ) != nKeyLen) + fprintf(stderr, "fwrite to db failed\n"); + } + if (fprintf( pFile, " %x ", nValueLen ) < 0) + fprintf(stderr, "fwrite to db failed\n"); + if( nValueLen > 0 ) + { + if (fwrite( aValueStr.c_str(), 1, nValueLen, pFile ) != nValueLen) + fprintf(stderr, "fwrite to db failed\n"); + } + if (fprintf( pFile, "%c", cLF ) < 0) + fprintf(stderr, "fwrite to db failed\n"); +} + +class HelpKeyword +{ +private: + typedef boost::unordered_map<std::string, Data, pref_hash> DataHashtable; + DataHashtable _hash; + +public: + void insert(const std::string &key, const std::string &id) + { + Data &data = _hash[key]; + data.append(id); + } + + void dump(DB* table) + { + DataHashtable::const_iterator aEnd = _hash.end(); + for (DataHashtable::const_iterator aIter = _hash.begin(); aIter != aEnd; ++aIter) + { + const std::string &keystr = aIter->first; + DBT key; + memset(&key, 0, sizeof(key)); + key.data = const_cast<char*>(keystr.c_str()); + key.size = keystr.length(); + + const Data &data = aIter->second; + std::string str = data.getString(); + DBT value; + memset(&value, 0, sizeof(value)); + value.data = const_cast<char*>(str.c_str()); + value.size = str.length(); + + table->put(table, NULL, &key, &value, 0); + } + } + + void dump_DBHelp( const fs::path& rFileName ) + { +#ifdef WNT //We need _wfopen to support long file paths on Windows XP + FILE* pFile = _wfopen( rFileName.native_file_string_w(), L"wb" ); +#else + FILE* pFile = fopen( rFileName.native_file_string().c_str(), "wb" ); +#endif + if( pFile == NULL ) + return; + + DataHashtable::const_iterator aEnd = _hash.end(); + for (DataHashtable::const_iterator aIter = _hash.begin(); aIter != aEnd; ++aIter) + writeKeyValue_DBHelp( pFile, aIter->first, aIter->second.getString() ); + + fclose( pFile ); + } +}; + +namespace URLEncoder +{ + static std::string encode(const std::string &rIn) + { + const char *good = "!$&'()*+,-.=@_"; + static const char hex[17] = "0123456789ABCDEF"; + + std::string result; + for (size_t i=0; i < rIn.length(); ++i) + { + unsigned char c = rIn[i]; + if (isalnum (c) || strchr (good, c)) + result += c; + else { + result += '%'; + result += hex[c >> 4]; + result += hex[c & 0xf]; + } + } + return result; + } +} + +void HelpLinker::addBookmark( DB* dbBase, FILE* pFile_DBHelp, std::string thishid, + const std::string& fileB, const std::string& anchorB, + const std::string& jarfileB, const std::string& titleB) +{ + HCDBG(std::cerr << "HelpLinker::addBookmark " << thishid << " " << + fileB << " " << anchorB << " " << jarfileB << " " << titleB << std::endl); + + thishid = URLEncoder::encode(thishid); + + DBT key; + memset(&key, 0, sizeof(key)); + key.data = const_cast<char*>(thishid.c_str()); + key.size = thishid.length(); + + int fileLen = fileB.length(); + if (!anchorB.empty()) + fileLen += (1 + anchorB.length()); + int dataLen = 1 + fileLen + 1 + jarfileB.length() + 1 + titleB.length(); + + std::vector<unsigned char> dataB(dataLen); + size_t i = 0; + dataB[i++] = static_cast<unsigned char>(fileLen); + for (size_t j = 0; j < fileB.length(); ++j) + dataB[i++] = static_cast<unsigned char>(fileB[j]); + if (!anchorB.empty()) + { + dataB[i++] = '#'; + for (size_t j = 0; j < anchorB.length(); ++j) + dataB[i++] = anchorB[j]; + } + dataB[i++] = static_cast<unsigned char>(jarfileB.length()); + for (size_t j = 0; j < jarfileB.length(); ++j) + dataB[i++] = jarfileB[j]; + + dataB[i++] = static_cast<unsigned char>(titleB.length()); + for (size_t j = 0; j < titleB.length(); ++j) + dataB[i++] = titleB[j]; + + DBT data; + memset(&data, 0, sizeof(data)); + data.data = &dataB[0]; + data.size = dataB.size(); + + if( dbBase != NULL ) + dbBase->put(dbBase, NULL, &key, &data, 0); + + if( pFile_DBHelp != NULL ) + { + std::string aValueStr( dataB.begin(), dataB.end() ); + writeKeyValue_DBHelp( pFile_DBHelp, thishid, aValueStr ); + } +} + +void HelpLinker::initIndexerPreProcessor() +{ + if( m_pIndexerPreProcessor ) + delete m_pIndexerPreProcessor; + std::string mod = module; + std::transform (mod.begin(), mod.end(), mod.begin(), tocharlower); + m_pIndexerPreProcessor = new IndexerPreProcessor( mod, indexDirParentName, + idxCaptionStylesheet, idxContentStylesheet ); +} + +/** +* +*/ +void HelpLinker::link() throw( HelpProcessingException ) +{ + bool bIndexForExtension = true; + + if( bExtensionMode ) + { + indexDirParentName = extensionDestination; + } + else + { + indexDirParentName = zipdir; + fs::create_directory(indexDirParentName); + } + +#ifdef CMC_DEBUG + std::cerr << "will not delete tmpdir of " << indexDirParentName.native_file_string().c_str() << std::endl; +#endif + + std::string mod = module; + std::transform (mod.begin(), mod.end(), mod.begin(), tocharlower); + + // do the work here + // continue with introduction of the overall process thing into the + // here all hzip files will be worked on + std::string appl = mod; + if (appl[0] == 's') + appl = appl.substr(1); + + bool bUse_ = true; +#ifdef DBHELP_ONLY + if( !bExtensionMode ) + bUse_ = false; +#endif + + DB* helpText(0); +#ifndef DBHELP_ONLY + fs::path helpTextFileName(indexDirParentName / (mod + ".ht")); + db_create(&helpText,0,0); + helpText->open(helpText, NULL, helpTextFileName.native_file_string().c_str(), NULL, DB_BTREE, + DB_CREATE | DB_TRUNCATE, 0644); +#endif + + fs::path helpTextFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".ht_" : ".ht"))); +#ifdef WNT + //We need _wfopen to support long file paths on Windows XP + FILE* pFileHelpText_DBHelp = _wfopen + ( helpTextFileName_DBHelp.native_file_string_w(), L"wb" ); +#else + + FILE* pFileHelpText_DBHelp = fopen + ( helpTextFileName_DBHelp.native_file_string().c_str(), "wb" ); +#endif + DB* dbBase(0); +#ifndef DBHELP_ONLY + fs::path dbBaseFileName(indexDirParentName / (mod + ".db")); + db_create(&dbBase,0,0); + dbBase->open(dbBase, NULL, dbBaseFileName.native_file_string().c_str(), NULL, DB_BTREE, + DB_CREATE | DB_TRUNCATE, 0644); +#endif + + fs::path dbBaseFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".db_" : ".db"))); +#ifdef WNT + //We need _wfopen to support long file paths on Windows XP + FILE* pFileDbBase_DBHelp = _wfopen + ( dbBaseFileName_DBHelp.native_file_string_w(), L"wb" ); +#else + FILE* pFileDbBase_DBHelp = fopen + ( dbBaseFileName_DBHelp.native_file_string().c_str(), "wb" ); +#endif + +#ifndef DBHELP_ONLY + DB* keyWord(0); + fs::path keyWordFileName(indexDirParentName / (mod + ".key")); + db_create(&keyWord,0,0); + keyWord->open(keyWord, NULL, keyWordFileName.native_file_string().c_str(), NULL, DB_BTREE, + DB_CREATE | DB_TRUNCATE, 0644); +#endif + + fs::path keyWordFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".key_" : ".key"))); + + HelpKeyword helpKeyword; + + // catch HelpProcessingException to avoid locking data bases + try + { + + // lastly, initialize the indexBuilder + if ( (!bExtensionMode || bIndexForExtension) && !helpFiles.empty()) + initIndexerPreProcessor(); + + // here we start our loop over the hzip files. + HashSet::iterator end = helpFiles.end(); + for (HashSet::iterator iter = helpFiles.begin(); iter != end; ++iter) + { + // process one file + // streamTable contains the streams in the hzip file + StreamTable streamTable; + const std::string &xhpFileName = *iter; + + if (!bExtensionMode && xhpFileName.rfind(".xhp") != xhpFileName.length()-4) + { + // only work on .xhp - files + SAL_WARN("helpcompiler", + "ERROR: input list entry '" + << xhpFileName + << "' has the wrong extension (only files with extension .xhp " + << "are accepted)"); + + continue; + } + + fs::path langsourceRoot(sourceRoot); + fs::path xhpFile; + + if( bExtensionMode ) + { + // langsourceRoot == sourceRoot for extensions + std::string xhpFileNameComplete( extensionPath ); + xhpFileNameComplete.append( '/' + xhpFileName ); + xhpFile = fs::path( xhpFileNameComplete ); + } + else + { + langsourceRoot.append('/' + lang + '/'); + xhpFile = fs::path(xhpFileName, fs::native); + } + + HelpCompiler hc( streamTable, xhpFile, langsourceRoot, + embeddStylesheet, module, lang, bExtensionMode ); + + HCDBG(std::cerr << "before compile of " << xhpFileName << std::endl); + bool success = hc.compile(); + HCDBG(std::cerr << "after compile of " << xhpFileName << std::endl); + + if (!success && !bExtensionMode) + { + std::stringstream aStrStream; + aStrStream << + "\nERROR: compiling help particle '" + << xhpFileName + << "' for language '" + << lang + << "' failed!"; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + std::string documentPath = streamTable.document_path; + if (documentPath.find("/") == 0) + documentPath = documentPath.substr(1); + + std::string documentJarfile = streamTable.document_module + ".jar"; + + std::string documentTitle = streamTable.document_title; + if (documentTitle.empty()) + documentTitle = "<notitle>"; + + const std::string& fileB = documentPath; + const std::string& jarfileB = documentJarfile; + std::string& titleB = documentTitle; + + // add once this as its own id. + addBookmark(dbBase, pFileDbBase_DBHelp, documentPath, fileB, std::string(), jarfileB, titleB); + + // first the database *.db + // ByteArrayInputStream bais = null; + // ObjectInputStream ois = null; + + const HashSet *hidlist = streamTable.appl_hidlist; + if (!hidlist) + hidlist = streamTable.default_hidlist; + if (hidlist && !hidlist->empty()) + { + // now iterate over all elements of the hidlist + HashSet::const_iterator aEnd = hidlist->end(); + for (HashSet::const_iterator hidListIter = hidlist->begin(); + hidListIter != aEnd; ++hidListIter) + { + std::string thishid = *hidListIter; + + std::string anchorB; + size_t index = thishid.rfind('#'); + if (index != std::string::npos) + { + anchorB = thishid.substr(1 + index); + thishid = thishid.substr(0, index); + } + addBookmark(dbBase, pFileDbBase_DBHelp, thishid, fileB, anchorB, jarfileB, titleB); + } + } + + // now the keywords + const Hashtable *anchorToLL = streamTable.appl_keywords; + if (!anchorToLL) + anchorToLL = streamTable.default_keywords; + if (anchorToLL && !anchorToLL->empty()) + { + std::string fakedHid = URLEncoder::encode(documentPath); + Hashtable::const_iterator aEnd = anchorToLL->end(); + for (Hashtable::const_iterator enumer = anchorToLL->begin(); + enumer != aEnd; ++enumer) + { + const std::string &anchor = enumer->first; + addBookmark(dbBase, pFileDbBase_DBHelp, documentPath, fileB, + anchor, jarfileB, titleB); + std::string totalId = fakedHid + "#" + anchor; + // std::cerr << hzipFileName << std::endl; + const LinkedList& ll = enumer->second; + LinkedList::const_iterator aOtherEnd = ll.end(); + for (LinkedList::const_iterator llIter = ll.begin(); + llIter != aOtherEnd; ++llIter) + { + helpKeyword.insert(*llIter, totalId); + } + } + + } + + // and last the helptexts + const Stringtable *helpTextHash = streamTable.appl_helptexts; + if (!helpTextHash) + helpTextHash = streamTable.default_helptexts; + if (helpTextHash && !helpTextHash->empty()) + { + Stringtable::const_iterator aEnd = helpTextHash->end(); + for (Stringtable::const_iterator helpTextIter = helpTextHash->begin(); + helpTextIter != aEnd; ++helpTextIter) + { + std::string helpTextId = helpTextIter->first; + const std::string& helpTextText = helpTextIter->second; + + helpTextId = URLEncoder::encode(helpTextId); + + DBT keyDbt; + memset(&keyDbt, 0, sizeof(keyDbt)); + keyDbt.data = const_cast<char*>(helpTextId.c_str()); + keyDbt.size = helpTextId.length(); + + DBT textDbt; + memset(&textDbt, 0, sizeof(textDbt)); + textDbt.data = const_cast<char*>(helpTextText.c_str()); + textDbt.size = helpTextText.length(); + + if( helpText != NULL ) + helpText->put(helpText, NULL, &keyDbt, &textDbt, 0); + + if( pFileHelpText_DBHelp != NULL ) + writeKeyValue_DBHelp( pFileHelpText_DBHelp, helpTextId, helpTextText ); + } + } + + //IndexerPreProcessor + if( !bExtensionMode || bIndexForExtension ) + { + // now the indexing + xmlDocPtr document = streamTable.appl_doc; + if (!document) + document = streamTable.default_doc; + if (document) + { + std::string temp = module; + std::transform (temp.begin(), temp.end(), temp.begin(), tocharlower); + m_pIndexerPreProcessor->processDocument(document, URLEncoder::encode(documentPath) ); + } + } + + } // while loop over hzip files ending + + } // try + catch( const HelpProcessingException& ) + { + // catch HelpProcessingException to avoid locking data bases +#ifndef DBHELP_ONLY + helpText->close(helpText, 0); + dbBase->close(dbBase, 0); + keyWord->close(keyWord, 0); +#endif + if( pFileHelpText_DBHelp != NULL ) + fclose( pFileHelpText_DBHelp ); + if( pFileDbBase_DBHelp != NULL ) + fclose( pFileDbBase_DBHelp ); + throw; + } + +#ifndef DBHELP_ONLY + helpText->close(helpText, 0); + dbBase->close(dbBase, 0); + helpKeyword.dump(keyWord); + keyWord->close(keyWord, 0); +#endif + if( pFileHelpText_DBHelp != NULL ) + fclose( pFileHelpText_DBHelp ); + if( pFileDbBase_DBHelp != NULL ) + fclose( pFileDbBase_DBHelp ); + + helpKeyword.dump_DBHelp( keyWordFileName_DBHelp); + + if( !bExtensionMode ) + { + // New index + Stringtable::iterator aEnd = additionalFiles.end(); + for (Stringtable::iterator enumer = additionalFiles.begin(); enumer != aEnd; + ++enumer) + { + const std::string &additionalFileName = enumer->second; + const std::string &additionalFileKey = enumer->first; + + fs::path fsAdditionalFileName( additionalFileName, fs::native ); + HCDBG({ + std::string aNativeStr = fsAdditionalFileName.native_file_string(); + const char* pStr = aNativeStr.c_str(); + std::cerr << pStr << std::endl; + }); + + fs::path fsTargetName( indexDirParentName / additionalFileKey ); + + fs::copy( fsAdditionalFileName, fsTargetName ); + } + } +} + + +void HelpLinker::main( std::vector<std::string> &args, + std::string* pExtensionPath, std::string* pDestination, + const rtl::OUString* pOfficeHelpPath ) + throw( HelpProcessingException ) +{ + bExtensionMode = false; + helpFiles.clear(); + + if ((!args.empty()) && args[0][0] == '@') + { + std::vector<std::string> stringList; + std::ifstream fileReader(args[0].substr(1).c_str()); + + while (fileReader) + { + std::string token; + fileReader >> token; + if (!token.empty()) + stringList.push_back(token); + } + fileReader.close(); + + args = stringList; + } + + size_t i = 0; + bool bSrcOption = false; + while (i < args.size()) + { + if (args[i].compare("-extlangsrc") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "extension source missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + extsource = args[i]; + } + else if (args[i].compare("-extlangdest") == 0) + { + //If this argument is not provided then the location provided in -extsource will + //also be the destination + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "extension destination missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + extdestination = args[i]; + } + else if (args[i].compare("-src") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "sourceroot missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + bSrcOption = true; + sourceRoot = fs::path(args[i], fs::native); + } + else if (args[i].compare("-sty") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "embeddingStylesheet missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + embeddStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-zipdir") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "idxtemp missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + zipdir = fs::path(args[i], fs::native); + } + else if (args[i].compare("-idxcaption") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "idxcaption stylesheet missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + idxCaptionStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-idxcontent") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "idxcontent stylesheet missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + idxContentStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-o") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "outputfilename missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + outputFile = fs::path(args[i], fs::native); + } + else if (args[i].compare("-mod") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "module name missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + module = args[i]; + } + else if (args[i].compare("-lang") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "language name missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + lang = args[i]; + } + else if (args[i].compare("-hid") == 0) + { + ++i; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, "obsolete -hid argument used" ); + } + else if (args[i].compare("-add") == 0) + { + std::string addFile, addFileUnderPath; + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "pathname missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + addFileUnderPath = args[i]; + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "pathname missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + addFile = args[i]; + if (!addFileUnderPath.empty() && !addFile.empty()) + additionalFiles[addFileUnderPath] = addFile; + } + else + helpFiles.push_back(args[i]); + ++i; + } + + //We can be called from the helplinker executable or the extension manager + //In the latter case extsource is not used. + if( (pExtensionPath && pExtensionPath->length() > 0 && pOfficeHelpPath) + || !extsource.empty()) + { + bExtensionMode = true; + if (!extsource.empty()) + { + //called from helplinker.exe, pExtensionPath and pOfficeHelpPath + //should be NULL + sourceRoot = fs::path(extsource, fs::native); + extensionPath = sourceRoot.toUTF8(); + + if (extdestination.empty()) + { + std::stringstream aStrStream; + aStrStream << "-extlangdest is missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + else + { + //Convert from system path to file URL!!! + fs::path p(extdestination, fs::native); + extensionDestination = p.toUTF8(); + } + } + else + { //called from extension manager + extensionPath = *pExtensionPath; + sourceRoot = fs::path(extensionPath); + extensionDestination = *pDestination; + } + //check if -src option was used. This option must not be used + //when extension help is compiled. + if (bSrcOption) + { + std::stringstream aStrStream; + aStrStream << "-src must not be used together with -extsource missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + } + + if (!bExtensionMode && zipdir.empty()) + { + std::stringstream aStrStream; + aStrStream << "no index dir given" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + + if ( (!bExtensionMode && idxCaptionStylesheet.empty()) + || (!extsource.empty() && idxCaptionStylesheet.empty()) ) + { + //No extension mode and extension mode using commandline + //!extsource.empty indicates extension mode using commandline + // -idxcaption paramter is required + std::stringstream aStrStream; + aStrStream << "no index caption stylesheet given" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + else if ( bExtensionMode && extsource.empty()) + { + //This part is used when compileExtensionHelp is called from the extensions manager. + //If extension help is compiled using helplinker in the build process + rtl::OUString aIdxCaptionPathFileURL( *pOfficeHelpPath ); + aIdxCaptionPathFileURL += rtl::OUString("/idxcaption.xsl"); + + rtl::OString aOStr_IdxCaptionPathFileURL( rtl::OUStringToOString + ( aIdxCaptionPathFileURL, fs::getThreadTextEncoding() ) ); + std::string aStdStr_IdxCaptionPathFileURL( aOStr_IdxCaptionPathFileURL.getStr() ); + + idxCaptionStylesheet = fs::path( aStdStr_IdxCaptionPathFileURL ); + } + + if ( (!bExtensionMode && idxContentStylesheet.empty()) + || (!extsource.empty() && idxContentStylesheet.empty()) ) + { + //No extension mode and extension mode using commandline + //!extsource.empty indicates extension mode using commandline + // -idxcontent paramter is required + std::stringstream aStrStream; + aStrStream << "no index content stylesheet given" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + else if ( bExtensionMode && extsource.empty()) + { + //If extension help is compiled using helplinker in the build process + //then -idxcontent must be supplied + //This part is used when compileExtensionHelp is called from the extensions manager. + rtl::OUString aIdxContentPathFileURL( *pOfficeHelpPath ); + aIdxContentPathFileURL += rtl::OUString("/idxcontent.xsl"); + + rtl::OString aOStr_IdxContentPathFileURL( rtl::OUStringToOString + ( aIdxContentPathFileURL, fs::getThreadTextEncoding() ) ); + std::string aStdStr_IdxContentPathFileURL( aOStr_IdxContentPathFileURL.getStr() ); + + idxContentStylesheet = fs::path( aStdStr_IdxContentPathFileURL ); + } + if (!bExtensionMode && embeddStylesheet.empty()) + { + std::stringstream aStrStream; + aStrStream << "no embedding resolving file given" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + if (sourceRoot.empty()) + { + std::stringstream aStrStream; + aStrStream << "no sourceroot given" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + if (!bExtensionMode && outputFile.empty()) + { + std::stringstream aStrStream; + aStrStream << "no output file given" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + if (module.empty()) + { + std::stringstream aStrStream; + aStrStream << "module missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + if (!bExtensionMode && lang.empty()) + { + std::stringstream aStrStream; + aStrStream << "language missing" << std::endl; + throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() ); + } + link(); +} + +// Variable to set an exception in "C" StructuredXMLErrorFunction +static const HelpProcessingException* GpXMLParsingException = NULL; + +extern "C" void StructuredXMLErrorFunction(void *userData, xmlErrorPtr error) +{ + (void)userData; + (void)error; + + std::string aErrorMsg = error->message; + std::string aXMLParsingFile; + if( error->file != NULL ) + aXMLParsingFile = error->file; + int nXMLParsingLine = error->line; + HelpProcessingException* pException = new HelpProcessingException( aErrorMsg, aXMLParsingFile, nXMLParsingLine ); + GpXMLParsingException = pException; + + // Reset error handler + xmlSetStructuredErrorFunc( NULL, NULL ); +} + +HelpProcessingErrorInfo& HelpProcessingErrorInfo::operator=( const struct HelpProcessingException& e ) +{ + m_eErrorClass = e.m_eErrorClass; + rtl::OString tmpErrorMsg( e.m_aErrorMsg.c_str() ); + m_aErrorMsg = rtl::OStringToOUString( tmpErrorMsg, fs::getThreadTextEncoding() ); + rtl::OString tmpXMLParsingFile( e.m_aXMLParsingFile.c_str() ); + m_aXMLParsingFile = rtl::OStringToOUString( tmpXMLParsingFile, fs::getThreadTextEncoding() ); + m_nXMLParsingLine = e.m_nXMLParsingLine; + return *this; +} + + +// Returns true in case of success, false in case of error +HELPLINKER_DLLPUBLIC bool compileExtensionHelp +( + const rtl::OUString& aOfficeHelpPath, + const rtl::OUString& aExtensionName, + const rtl::OUString& aExtensionLanguageRoot, + sal_Int32 nXhpFileCount, const rtl::OUString* pXhpFiles, + const rtl::OUString& aDestination, + HelpProcessingErrorInfo& o_rHelpProcessingErrorInfo +) +{ + bool bSuccess = true; + + std::vector<std::string> args; + args.reserve(nXhpFileCount + 2); + args.push_back(std::string("-mod")); + rtl::OString aOExtensionName = rtl::OUStringToOString( aExtensionName, fs::getThreadTextEncoding() ); + args.push_back(std::string(aOExtensionName.getStr())); + + for( sal_Int32 iXhp = 0 ; iXhp < nXhpFileCount ; ++iXhp ) + { + rtl::OUString aXhpFile = pXhpFiles[iXhp]; + + rtl::OString aOXhpFile = rtl::OUStringToOString( aXhpFile, fs::getThreadTextEncoding() ); + args.push_back(std::string(aOXhpFile.getStr())); + } + + rtl::OString aOExtensionLanguageRoot = rtl::OUStringToOString( aExtensionLanguageRoot, fs::getThreadTextEncoding() ); + const char* pExtensionPath = aOExtensionLanguageRoot.getStr(); + std::string aStdStrExtensionPath = pExtensionPath; + rtl::OString aODestination = rtl::OUStringToOString(aDestination, fs::getThreadTextEncoding()); + const char* pDestination = aODestination.getStr(); + std::string aStdStrDestination = pDestination; + + // Set error handler + xmlSetStructuredErrorFunc( NULL, (xmlStructuredErrorFunc)StructuredXMLErrorFunction ); + try + { + HelpLinker* pHelpLinker = new HelpLinker(); + pHelpLinker->main( args, &aStdStrExtensionPath, &aStdStrDestination, &aOfficeHelpPath ); + delete pHelpLinker; + } + catch( const HelpProcessingException& e ) + { + if( GpXMLParsingException != NULL ) + { + o_rHelpProcessingErrorInfo = *GpXMLParsingException; + delete GpXMLParsingException; + GpXMLParsingException = NULL; + } + else + { + o_rHelpProcessingErrorInfo = e; + } + bSuccess = false; + } + // Reset error handler + xmlSetStructuredErrorFunc( NULL, NULL ); + + // i83624: Tree files + ::rtl::OUString aTreeFileURL = aExtensionLanguageRoot; + aTreeFileURL += rtl::OUString("/help.tree"); + osl::DirectoryItem aTreeFileItem; + osl::FileBase::RC rcGet = osl::DirectoryItem::get( aTreeFileURL, aTreeFileItem ); + osl::FileStatus aFileStatus( osl_FileStatus_Mask_FileSize ); + if( rcGet == osl::FileBase::E_None && + aTreeFileItem.getFileStatus( aFileStatus ) == osl::FileBase::E_None && + aFileStatus.isValid( osl_FileStatus_Mask_FileSize ) ) + { + sal_uInt64 ret, len = aFileStatus.getFileSize(); + char* s = new char[ int(len) ]; // the buffer to hold the installed files + osl::File aFile( aTreeFileURL ); + aFile.open( osl_File_OpenFlag_Read ); + aFile.read( s, len, ret ); + aFile.close(); + + XML_Parser parser = XML_ParserCreate( 0 ); + XML_Status parsed = XML_Parse( parser, s, int( len ), true ); + + if (XML_STATUS_ERROR == parsed) + { + XML_Error nError = XML_GetErrorCode( parser ); + o_rHelpProcessingErrorInfo.m_eErrorClass = HELPPROCESSING_XMLPARSING_ERROR; + o_rHelpProcessingErrorInfo.m_aErrorMsg = rtl::OUString::createFromAscii( XML_ErrorString( nError ) );; + o_rHelpProcessingErrorInfo.m_aXMLParsingFile = aTreeFileURL; + // CRAHSES!!! o_rHelpProcessingErrorInfo.m_nXMLParsingLine = XML_GetCurrentLineNumber( parser ); + bSuccess = false; + } + + XML_ParserFree( parser ); + delete[] s; + } + + return bSuccess; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpLinker_main.cxx b/helpcompiler/source/HelpLinker_main.cxx new file mode 100644 index 000000000000..1c5ca7e3372f --- /dev/null +++ b/helpcompiler/source/HelpLinker_main.cxx @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include <helpcompiler/HelpCompiler.hxx> +#include <helpcompiler/HelpLinker.hxx> +#include <iostream> +#include <sal/main.h> + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { + std::vector<std::string> args; + for (int i = 1; i < argc; ++i) + args.push_back(std::string(argv[i])); + try + { + HelpLinker* pHelpLinker = new HelpLinker(); + pHelpLinker->main( args ); + delete pHelpLinker; + } + catch( const HelpProcessingException& e ) + { + std::cerr << e.m_aErrorMsg; + exit(1); + } + return 0; +} + diff --git a/helpcompiler/source/HelpSearch.cxx b/helpcompiler/source/HelpSearch.cxx new file mode 100644 index 000000000000..40022c22b505 --- /dev/null +++ b/helpcompiler/source/HelpSearch.cxx @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#include <helpcompiler/HelpSearch.hxx> +#include <osl/file.hxx> +#include <osl/thread.hxx> + +#include "LuceneHelper.hxx" + +HelpSearch::HelpSearch(rtl::OUString const &lang, rtl::OUString const &indexDir) + : d_lang(lang) +{ + rtl::OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(indexDir, ustrSystemPath); + d_indexDir = rtl::OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding()); +} + +bool HelpSearch::query(rtl::OUString const &queryStr, bool captionOnly, + std::vector<rtl::OUString> &rDocuments, std::vector<float> &rScores) { + + lucene::index::IndexReader *reader = lucene::index::IndexReader::open(d_indexDir.getStr()); + lucene::search::IndexSearcher searcher(reader); + + TCHAR captionField[] = L"caption"; + TCHAR contentField[] = L"content"; + TCHAR *field = captionOnly ? captionField : contentField; + + bool isWildcard = queryStr[queryStr.getLength() - 1] == L'*'; + std::vector<TCHAR> aQueryStr(OUStringToTCHARVec(queryStr)); + lucene::search::Query *pQuery; + if (isWildcard) + pQuery = _CLNEW lucene::search::WildcardQuery(_CLNEW lucene::index::Term(field, &aQueryStr[0])); + else + pQuery = _CLNEW lucene::search::TermQuery(_CLNEW lucene::index::Term(field, &aQueryStr[0])); + + lucene::search::Hits *hits = searcher.search(pQuery); + for (unsigned i = 0; i < hits->length(); ++i) { + lucene::document::Document &doc = hits->doc(i); // Document* belongs to Hits. + wchar_t const *path = doc.get(L"path"); + rDocuments.push_back(TCHARArrayToOUString(path != 0 ? path : L"")); + rScores.push_back(hits->score(i)); + } + + _CLDELETE(hits); + _CLDELETE(pQuery); + + reader->close(); + _CLDELETE(reader); + + return true; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/LuceneHelper.cxx b/helpcompiler/source/LuceneHelper.cxx new file mode 100644 index 000000000000..bee9090cc2b7 --- /dev/null +++ b/helpcompiler/source/LuceneHelper.cxx @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#include "LuceneHelper.hxx" + +std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr) +{ + //UTF-16 + if (sizeof(TCHAR) == sizeof(sal_Unicode)) + return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength() + 1); + + //UTF-32 + std::vector<TCHAR> aRet; + for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength(); ) + { + const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex); + aRet.push_back(nCode); + } + aRet.push_back(0); + return aRet; +} + +rtl::OUString TCHARArrayToOUString(TCHAR const *str) +{ + // UTF-16 + if (sizeof(TCHAR) == sizeof(sal_Unicode)) + return rtl::OUString((const sal_Unicode*)(str)); + + // UTF-32 + return rtl::OUString((const sal_uInt32*)str, wcslen(str)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/LuceneHelper.hxx b/helpcompiler/source/LuceneHelper.hxx new file mode 100644 index 000000000000..a0248f836a1f --- /dev/null +++ b/helpcompiler/source/LuceneHelper.hxx @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> + * (initial developer) + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#ifndef LUCENEHELPER_HXX +#define LUCENEHELPER_HXX + +#if defined _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4068 4263 4264 4266) +#endif + +#if defined(__GNUC__) && defined(HAVE_GCC_VISIBILITY_FEATURE) +# pragma GCC visibility push (default) +#endif + +#include <CLucene.h> +#include <CLucene/analysis/LanguageBasedAnalyzer.h> + +#if defined(__GNUC__) && defined(HAVE_GCC_VISIBILITY_FEATURE) +# pragma GCC visibility pop +#endif + +#if defined _MSC_VER +#pragma warning(pop) +#endif + +#include <rtl/ustring.hxx> +#include <vector> + +std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr); +rtl::OUString TCHARArrayToOUString(TCHAR const *str); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |