diff options
author | Andreas Bille <abi@openoffice.org> | 2001-05-08 11:02:45 +0000 |
---|---|---|
committer | Andreas Bille <abi@openoffice.org> | 2001-05-08 11:02:45 +0000 |
commit | b62e52a1289f4c4f14f517958fdc09151c4ac20c (patch) | |
tree | c81347382ce60c5fc9307439a0fd7b0a766ff2f8 /xmlhelp/source/cxxhelp/qe/XmlIndex.cxx | |
parent | 45436f7242a4890c1849077544e2405f4b67945d (diff) |
Initial revision
XmlSearch query engine C++ version
Diffstat (limited to 'xmlhelp/source/cxxhelp/qe/XmlIndex.cxx')
-rw-r--r-- | xmlhelp/source/cxxhelp/qe/XmlIndex.cxx | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx new file mode 100644 index 000000000000..f96497db15c3 --- /dev/null +++ b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx @@ -0,0 +1,327 @@ +/************************************************************************* + * + * $RCSfile: XmlIndex.cxx,v $ + * + * $Revision: 1.1 $ + * + * last change: $Author: abi $ $Date: 2001-05-08 12:02:45 $ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ +#ifndef _XMLSEARCH_QE_XMLINDEX_HXX_ +#include <qe/XmlIndex.hxx> +#endif +#ifndef _XMLSEARCH_QE_DOCGENERATOR_HXX_ +#include <qe/DocGenerator.hxx> +#endif +#ifndef _XMLSEARCH_UTIL_CONCEPTLIST_HXX_ +#include <util/ConceptList.hxx> +#endif +#ifndef _XMLSEARCH_UTIL_RANDOMACCESSSTREAM_HXX_ +#include <util/RandomAccessStream.hxx> +#endif +#ifndef _XMLSEARCH_UTIL_DECOMPRESSOR_HXX_ +#include <util/Decompressor.hxx> +#endif + + +using namespace xmlsearch; +using namespace xmlsearch::qe; + + +// extern sal_Int32 getInteger_( const sal_Int8* ); + + +XmlIndex::XmlIndex( const rtl::OUString& indexDir ) + : indexAccessor_( indexDir ), + dict_( 0 ), + documents_( 0 ), + concepts_( 0 ), + allLists_( 0 ), + allListsL_( 0 ), + positionsL_( 0 ), + positions_( 0 ), + contextsDataL_( 0 ), + contextsData_( 0 ), + contextTables_( 0 ) +{ + // reading DOCS + { + allListsL_ = indexAccessor_.readByteArray( allLists_, + rtl::OUString::createFromAscii("DOCS") ); // reading DOCS + } + + // reading CONTEXTS + { + contextsDataL_ = indexAccessor_.readByteArray( contextsData_, + rtl::OUString::createFromAscii("CONTEXTS") ); // reading CONTEXTS + } + + // reading POSITIONS + { + positionsFile_ = indexAccessor_.getStream( rtl::OUString::createFromAscii( "POSITIONS" ), + rtl::OUString::createFromAscii( "r" ) ); + + //!!! temporary: better than fixed large value, worse than 'intelligent' size mgt + if( allInCache_ = true ) // yes, intended + { + reset(); + positions_ = new sal_Int8[ positionsL_ = positionsFile_->length() ]; + positionsFile_->readBytes( positions_,positionsL_ ); + } + } + + + // reading DOCS.TAB + { + util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "DOCS.TAB" ), + rtl::OUString::createFromAscii( "r" ) ); + sal_Int8 a[4]; + a[0] = a[1] = a[2] = 0; + in->readBytes( &a[3],1 ); + sal_Int32 k1 = ::getInteger_( a ); + util::StreamDecompressor sddocs( in ); + sddocs.ascDecode( k1,concepts_ ); + in->readBytes( &a[3],1 ); + sal_Int32 k2 = ::getInteger_( a ); + offsets_.push_back( 0 ); + util::StreamDecompressor sdoffsets( in ); + sdoffsets.ascDecode( k2,offsets_ ); + delete in; + +// int a; +// for( a = 0; a < offsets_.size(); ++a ) +// cout << "concepts_[" << a << "] = " << concepts_[a] << endl; +// for( a = 0; a < offsets_.size(); ++a ) +// cout << "offsets_[" << a << "] = " << offsets_[a] << endl; + } + + // reading OFFSETS + { + util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "OFFSETS" ), + rtl::OUString::createFromAscii( "r" ) ); + sal_Int8 a[4]; + a[0] = a[1] = a[2] = 0; + in->readBytes( &a[3],1 ); + sal_Int32 k1 = ::getInteger_( a ); + util::StreamDecompressor sddocs( in ); + sddocs.decode( k1,documents_ ); + in->readBytes( &a[3],1 ); + sal_Int32 k2 = ::getInteger_( a ); + util::StreamDecompressor sdoffsets( in ); + sdoffsets.ascDecode( k2,microIndexOffsets_ ); + in->readBytes( &a[3],1 ); + sal_Int32 k3 = ::getInteger_( a ); + util::StreamDecompressor sdtitles( in ); + sdtitles.decode( k3,titles_ ); + + in->readBytes( &a[3],1 ); + sal_Int32 k4 = ::getInteger_( a ); + // contextsOffsets_ = new IntegerArray(_documents.cardinality() + 1); + util::StreamDecompressor co(in); + // _contextsOffsets.add(0); // first, trivial offset + co.ascDecode( k4,contextsOffsets_ ); + + delete in; + } + + // Hard coding linknames ( object serialization is hard to undo ) + { + linkNames_ = new rtl::OUString[ linkNamesL_ = 8 ]; + linkNames_[0] = rtl::OUString::createFromAscii( "help:link" ); + linkNames_[1] = rtl::OUString::createFromAscii( "help:help-text" ); + linkNames_[2] = rtl::OUString::createFromAscii( "text:p" ); + linkNames_[3] = rtl::OUString::createFromAscii( "text:span" ); + linkNames_[4] = rtl::OUString::createFromAscii( "headingheading" ); + linkNames_[5] = rtl::OUString::createFromAscii( "office:body" ); + linkNames_[6] = rtl::OUString::createFromAscii( "help:to-be-embedded" ); + linkNames_[7] = rtl::OUString::createFromAscii( "office:document" ); + } + + + { + contextTables_ = new ContextTables( contextsOffsets_, + contextsDataL_,contextsData_, + linkNamesL_,linkNames_ ); + } +} + + +XmlIndex::~XmlIndex() +{ + delete[] allLists_; + delete[] contextsData_; + delete[] linkNames_; + delete[] positions_; + delete positionsFile_; + delete contextTables_; +} + + + +void XmlIndex::reset() +{ + maxDocNumberInCache_ = ( allInCache_ ? ( microIndexOffsets_.size() - 1 ) : sal_Int32( -1 ) ); +} + + +sal_Int32 binarySearch( const std::vector<sal_Int32>& arr,sal_Int32 value ) +{ + sal_Int32 i = 0, j = arr.size(), k; + while (i <= j) + if (arr[k = (i + j)/2] < value) + i = k + 1; + else if (value < arr[k]) + j = k - 1; + else + return k; + return -1; +} + + +NonnegativeIntegerGenerator* XmlIndex::getDocumentIterator( sal_Int32 concept ) +{ +// #ifdef ABIDEBUG +// cout << concept << endl; +// #endif + + sal_Int32 index = binarySearch( concepts_,concept ); + +#ifdef ABIDEBUG +// cout << index << " " << allListsL_ << " " << allLists_ << endl; + +// for( int i = 0; i < allListsL_; ++i ) +// cout << "_allList[" << i << "] = " << sal_Int32( allLists_[i] ) << endl; + +// for( int i = 0; i < offsets_.size(); ++i ) +// cout << "offsets[" << i << "] = " << offsets_[i] << endl; +#endif + + if( index >= 0 ) + return new util::ConceptList( allLists_,allListsL_,offsets_[index] ); + else + return 0; +} + + +bool XmlIndex::occursInText( sal_Int32 concept ) +{ + return binarySearch( concepts_,concept) >= 0; +} + + +sal_Int8* XmlIndex::getPositions( sal_Int32& len,sal_Int32 docNo ) throw( excep::XmlSearchException ) +{ + contextTables_->setMicroindex( docNo ); + if( docNo > maxDocNumberInCache_ ) + readMicroindexes( docNo ); + + len = positionsL_; + return positions_; +} + + +rtl::OUString XmlIndex::documentName( sal_Int32 docNumber ) throw( excep::XmlSearchException ) +{ + if( docNumber < 0 || documents_.size() <= sal_uInt32( docNumber ) ) + { + rtl::OUString message = rtl::OUString::createFromAscii( "XmlIndex::documentName -> " ); + throw excep::XmlSearchException( message ); + } + + return dict_.fetch( documents_[ docNumber ] ); +} + + + + +void XmlIndex::readMicroindexes( sal_Int32 docNo ) throw( xmlsearch::excep::IOException ) +{ + currentBatchOffset_ = microIndexOffsets_[docNo]; + sal_Int32 offsetLimit = currentBatchOffset_ + positionsL_; + sal_Int32 upTo = 0, nextDoc = docNo; + sal_Int32 lastOffset = 0; + + do + { + if( ++nextDoc == sal_Int32( microIndexOffsets_.size() ) ) + lastOffset = sal_Int32( positionsFile_->length() ); + else if( microIndexOffsets_[ nextDoc ] > offsetLimit ) + lastOffset = microIndexOffsets_[ nextDoc ]; + } + while( lastOffset == 0 ); + + if( lastOffset > offsetLimit ) + { + upTo = microIndexOffsets_[ nextDoc - 1 ]; + maxDocNumberInCache_ = nextDoc - 2; + } + else + { + upTo = lastOffset; + maxDocNumberInCache_ = nextDoc - 1; + } + + if( maxDocNumberInCache_ < docNo ) + { // cache too small + // for current microindex + // System.out.println("expanding cache to " + _positionsCacheSize); + delete[] positions_; + positions_ = new sal_Int8[ positionsL_ = lastOffset - currentBatchOffset_ ]; + readMicroindexes( docNo ); + return; + } + + positionsFile_->seek( currentBatchOffset_ ); + positionsFile_->readBytes( positions_,upTo - currentBatchOffset_ ); +} |