diff options
Diffstat (limited to 'xmlhelp/source/cxxhelp/qe/XmlIndex.cxx')
-rw-r--r-- | xmlhelp/source/cxxhelp/qe/XmlIndex.cxx | 457 |
1 files changed, 0 insertions, 457 deletions
diff --git a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx deleted file mode 100644 index 707a09f1a42e..000000000000 --- a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx +++ /dev/null @@ -1,457 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: XmlIndex.cxx,v $ - * $Revision: 1.19 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include <osl/diagnose.h> -#include <qe/XmlIndex.hxx> -#include <qe/DocGenerator.hxx> -#include <util/ConceptList.hxx> -#ifndef _XMLSEARCH_UTIL_RANDOMACCESSSTREAM_HXX_ -#include <util/RandomAccessStream.hxx> -#endif -#include <util/Decompressor.hxx> -#include <qe/Query.hxx> - -using namespace xmlsearch; -using namespace xmlsearch::excep; -using namespace xmlsearch::qe; - - -// extern sal_Int32 getInteger_( const sal_Int8* ); - - -XmlIndex::XmlIndex( const rtl::OUString& indexDir ) - throw( IOException ) - : currentBatchOffset_( 0 ), - maxDocNumberInCache_( -1 ), - indexAccessor_( indexDir ), - dict_( indexAccessor_ ), - contextTables_( 0 ), - allListsL_( 0 ), - allLists_( 0 ), - positionsL_( 0 ), - positions_( 0 ), - contextsDataL_( 0 ), - contextsData_( 0 ), - concepts_( 0 ), - documents_( 0 ) -{ - // reading DOCS - try - { - allListsL_ = indexAccessor_.readByteArray( allLists_, - rtl::OUString::createFromAscii("DOCS") ); // reading DOCS - } - catch( IOException ) - { - OSL_ENSURE( allLists_ != 0, "XmlIndex::XmlIndex -> cannot open DOCS/docs" ); - throw; - } - - // reading CONTEXTS - try - { - contextsDataL_ = indexAccessor_.readByteArray( contextsData_, - rtl::OUString::createFromAscii("CONTEXTS") ); // reading CONTEXTS - } - catch( IOException ) - { - OSL_ENSURE( allLists_ != 0, "XmlIndex::XmlIndex -> cannot open CONTEXTS/contexts" ); - delete[] allLists_; - throw; - } - - // reading POSITIONS - { - positionsFile_ = indexAccessor_.getStream( rtl::OUString::createFromAscii( "POSITIONS" ), - rtl::OUString::createFromAscii( "r" ) ); - - OSL_ENSURE( positionsFile_ != 0, "XmlIndex::XmlIndex -> cannot open POSITIONS/positions" ); - - if( positionsFile_ ) - { - //!!! temporary: better than fixed large value, worse than 'intelligent' size mgt - allInCache_ = true; - if( allInCache_ ) // yes, intended - { - reset(); - positions_ = new sal_Int8[ positionsL_ = positionsFile_->length() ]; - positionsFile_->readBytes( positions_,positionsL_ ); - } - } - else - { - delete[] allLists_; - delete[] contextsData_; - throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no POSITIONS/positions") ); - } - } - - - // reading DOCS.TAB - { - util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "DOCS.TAB" ), - rtl::OUString::createFromAscii( "r" ) ); - - if( in ) - { - sal_Int8 a[4]; - a[0] = a[1] = a[2] = 0; - in->readBytes( &a[3],1 ); - sal_Int32 k1 = ::getInteger_( a ); - util::StreamDecompressor sddocs( in ); - sddocs.ascDecode( k1,concepts_ ); - in->readBytes( &a[3],1 ); - sal_Int32 k2 = ::getInteger_( a ); - offsets_.push_back( 0 ); - util::StreamDecompressor sdoffsets( in ); - sdoffsets.ascDecode( k2,offsets_ ); - delete in; - } - else - { - delete[] allLists_; - delete[] contextsData_; - delete[] positions_; - delete positionsFile_; - throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no DOCS.TAB/docs.tab") ); - } - } - - // reading OFFSETS - { - util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "OFFSETS" ), - rtl::OUString::createFromAscii( "r" ) ); - if( in ) - { - sal_Int8 a[4]; - a[0] = a[1] = a[2] = 0; - in->readBytes( &a[3],1 ); - sal_Int32 k1 = ::getInteger_( a ); - util::StreamDecompressor sddocs( in ); - sddocs.decode( k1,documents_ ); - in->readBytes( &a[3],1 ); - sal_Int32 k2 = ::getInteger_( a ); - util::StreamDecompressor sdoffsets( in ); - sdoffsets.ascDecode( k2,microIndexOffsets_ ); - in->readBytes( &a[3],1 ); - sal_Int32 k3 = ::getInteger_( a ); - util::StreamDecompressor sdtitles( in ); - sdtitles.decode( k3,titles_ ); - - in->readBytes( &a[3],1 ); - sal_Int32 k4 = ::getInteger_( a ); - // contextsOffsets_ = new IntegerArray(_documents.cardinality() + 1); - util::StreamDecompressor co(in); - // _contextsOffsets.add(0); // first, trivial offset - co.ascDecode( k4,contextsOffsets_ ); - delete in; - } - else - { - delete[] allLists_; - delete[] contextsData_; - delete[] positions_; - delete positionsFile_; - throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no OFFSETS/offsets") ); - } - } - - // reading linknames - { - util::RandomAccessStream* in = - indexAccessor_.getStream( rtl::OUString::createFromAscii( "LINKNAMES" ), - rtl::OUString::createFromAscii( "r" ) ); - if( ! in ) - { - delete[] allLists_; - delete[] contextsData_; - delete[] positions_; - delete positionsFile_; - throw IOException( - rtl::OUString::createFromAscii( "BtreeDict::BtreeDict -> no LINKNAMES/linknames" ) ); - } - - sal_Int32 len = in->length(); - char* bff = new char[ 1 + len ], *bff1 = new char[ 1 + len ]; - bff[ len ] = 0; - in->readBytes( reinterpret_cast<sal_Int8*>( bff ),len ); - delete in; - - // Now the buffer must be densified. - int i,len1 = 0; - for( i = 0; i < len; ++i ) - { - if( bff[i] ) - bff1[ len1++ ] = bff[i]; - } - bff1[len1] = 0; - delete[] bff; - rtl::OString aStr( bff1 ); // build a string from the densified buffer; - delete[] bff1; - -// // Now determine the order -// #define NAMECOUNT 16 -// #define UNREACHABLEPLACE 100000; -// /** -// * The available names cannot be determined from LINKNAMES at current, -// * because LINKNAMES is a serialized Java-object -// * Always update LINKNAMES if index.xsl or default.xsl are modified. -// */ -// rtl::OString LN[NAMECOUNT]; -// LN[0] = "text:span"; -// LN[1] = "help:help-text"; -// LN[2] = "help:to-be-embedded"; -// LN[3] = "headingheading"; -// LN[4] = "office:body"; -// LN[5] = "text:p"; -// LN[6] = "office:document"; -// LN[7] = "help:link"; -// LN[8] = "help:key-word"; -// LN[9] = "table:table"; -// LN[10] = "table:table-header-row"; -// LN[11] = "table:table-row"; -// LN[12] = "table:table-cell"; -// LN[13] = "text:unordered-list"; -// LN[14] = "text:ordered-list"; -// LN[15] = "text:list-item"; - // Now determine the order - -#define NAMECOUNT 16 -#define UNREACHABLEPLACE 100000; - /** - * The available names cannot be determined from LINKNAMES at current, - * because LINKNAMES is a serialized Java-object - * Always update LINKNAMES if index.xsl or default.xsl are modified. - */ - - rtl::OString LN[NAMECOUNT]; - LN[0] = "helpdocument"; - LN[1] = "body"; - LN[2] = "title"; - LN[3] = "table"; - LN[4] = "tablecell"; - LN[5] = "tablerow"; - LN[6] = "list"; - LN[7] = "listitem"; - LN[8] = "item"; - LN[9] = "emph"; - LN[10] = "paragraph"; - LN[11] = "section"; - LN[12] = "bookmark"; - LN[13] = "bookmark_value"; - LN[14] = "ahelp"; - LN[15] = "link"; - - // Determine index in file - int idx[NAMECOUNT]; - /*int*/ linkNamesL_ = NAMECOUNT; - for( i = 0; i < NAMECOUNT; ++i ) - if( ( idx[i] = aStr.indexOf( LN[i] ) ) == -1 ) { - idx[i] = UNREACHABLEPLACE; - --linkNamesL_; - } - - linkNames_ = new rtl::OUString[linkNamesL_]; - for( i = 0; i < linkNamesL_; ++i ) { - // TODO what happens to first if we never hit Place? - int first = 0; - int Place = UNREACHABLEPLACE; // This is the defintely last place - for( int j = 0; j < NAMECOUNT; ++j ) - { - if( idx[j] < Place ) - Place = idx[first = j]; - } - idx[first] = UNREACHABLEPLACE; - linkNames_[i] = rtl::OUString( LN[first].getStr(),LN[first].getLength(),RTL_TEXTENCODING_UTF8 ); - } - -#undef NAMECOUNT -#undef UNREACHABLEPLACE - } // end linknames - - - { - contextTables_ = new ContextTables(contextsOffsets_, - contextsDataL_,contextsData_, - linkNamesL_,linkNames_ ); - } -} - - -XmlIndex::~XmlIndex() -{ - delete[] allLists_; - delete[] contextsData_; - delete[] linkNames_; - delete[] positions_; - delete positionsFile_; - delete contextTables_; -} - - - -void XmlIndex::reset() -{ - maxDocNumberInCache_ = allInCache_ ? static_cast<sal_Int32>(microIndexOffsets_.size()) - 1 : -1; -} - - -sal_Int32 binarySearch( const std::vector<sal_Int32>& arr,sal_Int32 value ) -{ - sal_Int32 i = 0, j = arr.size(), k; - while (i <= j) - if (arr[k = (i + j)/2] < value) - i = k + 1; - else if (value < arr[k]) - j = k - 1; - else - return k; - return -1; -} - - -NonnegativeIntegerGenerator* XmlIndex::getDocumentIterator( sal_Int32 concept ) -{ - sal_Int32 index = binarySearch( concepts_,concept ); - - if( index >= 0 ) - return new util::ConceptList( allLists_,allListsL_,offsets_[index] ); - else - return 0; -} - - -bool XmlIndex::occursInText( sal_Int32 concept ) -{ - return binarySearch( concepts_,concept) >= 0; -} - - -sal_Int8* XmlIndex::getPositions( sal_Int32& len,sal_Int32 docNo ) throw( excep::XmlSearchException ) -{ - contextTables_->setMicroindex( docNo ); - if( docNo > maxDocNumberInCache_ ) - readMicroindexes( docNo ); - - len = positionsL_; - return positions_; -} - - -rtl::OUString XmlIndex::documentName( sal_Int32 docNumber ) throw( excep::XmlSearchException ) -{ - if( docNumber < 0 || documents_.size() <= sal_uInt32( docNumber ) ) - { - rtl::OUString message = rtl::OUString::createFromAscii( "XmlIndex::documentName -> " ); - throw excep::XmlSearchException( message ); - } - - return dict_.fetch( documents_[ docNumber ] ); -} - - - - -void XmlIndex::readMicroindexes( sal_Int32 docNo ) throw( xmlsearch::excep::IOException ) -{ - currentBatchOffset_ = microIndexOffsets_[docNo]; - sal_Int32 offsetLimit = currentBatchOffset_ + positionsL_; - sal_Int32 upTo = 0, nextDoc = docNo; - sal_Int32 lastOffset = 0; - - do - { - if( ++nextDoc == sal_Int32( microIndexOffsets_.size() ) ) - lastOffset = sal_Int32( positionsFile_->length() ); - else if( microIndexOffsets_[ nextDoc ] > offsetLimit ) - lastOffset = microIndexOffsets_[ nextDoc ]; - } - while( lastOffset == 0 ); - - if( lastOffset > offsetLimit ) - { - upTo = microIndexOffsets_[ nextDoc - 1 ]; - maxDocNumberInCache_ = nextDoc - 2; - } - else - { - upTo = lastOffset; - maxDocNumberInCache_ = nextDoc - 1; - } - - if( maxDocNumberInCache_ < docNo ) - { // cache too small - // for current microindex - // System.out.println("expanding cache to " + _positionsCacheSize); - delete[] positions_; - positions_ = new sal_Int8[ positionsL_ = lastOffset - currentBatchOffset_ ]; - readMicroindexes( docNo ); - return; - } - - positionsFile_->seek( currentBatchOffset_ ); - positionsFile_->readBytes( positions_,upTo - currentBatchOffset_ ); -} - - -QueryHitData* XmlIndex::hitToData( QueryHit* hit ) -{ - sal_Int32 termsL,matchesL; - sal_Int32 *matches = hit->getMatches( matchesL ); - rtl::OUString *terms = new rtl::OUString[ termsL = matchesL >>/*>*/ 1 ]; - for( sal_Int32 i = 0; i < termsL; ++i ) - { - sal_Int32 aInt = ( i << 1 ); - if( 0 <= aInt && aInt < matchesL ) - { - sal_Int32 match = matches[ aInt ]; - if( match > 0 ) - try - { - terms[i] = fetch( match ); - } - catch( const excep::XmlSearchException& ) - { - } - } - } - - sal_Int32 document = hit->getDocument(); - QueryHitData *res = new QueryHitData( hit->getPenalty(), - documentName( document ), - termsL,terms ); - contextTables_->setMicroindex( document ); - contextTables_->resetContextSearch(); - return res; -} - - |