summaryrefslogtreecommitdiff
path: root/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx
diff options
context:
space:
mode:
authorAndreas Bille <abi@openoffice.org>2001-05-08 11:02:45 +0000
committerAndreas Bille <abi@openoffice.org>2001-05-08 11:02:45 +0000
commitb62e52a1289f4c4f14f517958fdc09151c4ac20c (patch)
treec81347382ce60c5fc9307439a0fd7b0a766ff2f8 /xmlhelp/source/cxxhelp/qe/XmlIndex.cxx
parent45436f7242a4890c1849077544e2405f4b67945d (diff)
Initial revision
XmlSearch query engine C++ version
Diffstat (limited to 'xmlhelp/source/cxxhelp/qe/XmlIndex.cxx')
-rw-r--r--xmlhelp/source/cxxhelp/qe/XmlIndex.cxx327
1 files changed, 327 insertions, 0 deletions
diff --git a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx
new file mode 100644
index 000000000000..f96497db15c3
--- /dev/null
+++ b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx
@@ -0,0 +1,327 @@
+/*************************************************************************
+ *
+ * $RCSfile: XmlIndex.cxx,v $
+ *
+ * $Revision: 1.1 $
+ *
+ * last change: $Author: abi $ $Date: 2001-05-08 12:02:45 $
+ *
+ * The Contents of this file are made available subject to the terms of
+ * either of the following licenses
+ *
+ * - GNU Lesser General Public License Version 2.1
+ * - Sun Industry Standards Source License Version 1.1
+ *
+ * Sun Microsystems Inc., October, 2000
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2000 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ *
+ * Sun Industry Standards Source License Version 1.1
+ * =================================================
+ * The contents of this file are subject to the Sun Industry Standards
+ * Source License Version 1.1 (the "License"); You may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at http://www.openoffice.org/license.html.
+ *
+ * Software provided under this License is provided on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
+ * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
+ * See the License for the specific provisions governing your rights and
+ * obligations concerning the Software.
+ *
+ * The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ * Copyright: 2000 by Sun Microsystems, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+#ifndef _XMLSEARCH_QE_XMLINDEX_HXX_
+#include <qe/XmlIndex.hxx>
+#endif
+#ifndef _XMLSEARCH_QE_DOCGENERATOR_HXX_
+#include <qe/DocGenerator.hxx>
+#endif
+#ifndef _XMLSEARCH_UTIL_CONCEPTLIST_HXX_
+#include <util/ConceptList.hxx>
+#endif
+#ifndef _XMLSEARCH_UTIL_RANDOMACCESSSTREAM_HXX_
+#include <util/RandomAccessStream.hxx>
+#endif
+#ifndef _XMLSEARCH_UTIL_DECOMPRESSOR_HXX_
+#include <util/Decompressor.hxx>
+#endif
+
+
+using namespace xmlsearch;
+using namespace xmlsearch::qe;
+
+
+// extern sal_Int32 getInteger_( const sal_Int8* );
+
+
+XmlIndex::XmlIndex( const rtl::OUString& indexDir )
+ : indexAccessor_( indexDir ),
+ dict_( 0 ),
+ documents_( 0 ),
+ concepts_( 0 ),
+ allLists_( 0 ),
+ allListsL_( 0 ),
+ positionsL_( 0 ),
+ positions_( 0 ),
+ contextsDataL_( 0 ),
+ contextsData_( 0 ),
+ contextTables_( 0 )
+{
+ // reading DOCS
+ {
+ allListsL_ = indexAccessor_.readByteArray( allLists_,
+ rtl::OUString::createFromAscii("DOCS") ); // reading DOCS
+ }
+
+ // reading CONTEXTS
+ {
+ contextsDataL_ = indexAccessor_.readByteArray( contextsData_,
+ rtl::OUString::createFromAscii("CONTEXTS") ); // reading CONTEXTS
+ }
+
+ // reading POSITIONS
+ {
+ positionsFile_ = indexAccessor_.getStream( rtl::OUString::createFromAscii( "POSITIONS" ),
+ rtl::OUString::createFromAscii( "r" ) );
+
+ //!!! temporary: better than fixed large value, worse than 'intelligent' size mgt
+ if( allInCache_ = true ) // yes, intended
+ {
+ reset();
+ positions_ = new sal_Int8[ positionsL_ = positionsFile_->length() ];
+ positionsFile_->readBytes( positions_,positionsL_ );
+ }
+ }
+
+
+ // reading DOCS.TAB
+ {
+ util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "DOCS.TAB" ),
+ rtl::OUString::createFromAscii( "r" ) );
+ sal_Int8 a[4];
+ a[0] = a[1] = a[2] = 0;
+ in->readBytes( &a[3],1 );
+ sal_Int32 k1 = ::getInteger_( a );
+ util::StreamDecompressor sddocs( in );
+ sddocs.ascDecode( k1,concepts_ );
+ in->readBytes( &a[3],1 );
+ sal_Int32 k2 = ::getInteger_( a );
+ offsets_.push_back( 0 );
+ util::StreamDecompressor sdoffsets( in );
+ sdoffsets.ascDecode( k2,offsets_ );
+ delete in;
+
+// int a;
+// for( a = 0; a < offsets_.size(); ++a )
+// cout << "concepts_[" << a << "] = " << concepts_[a] << endl;
+// for( a = 0; a < offsets_.size(); ++a )
+// cout << "offsets_[" << a << "] = " << offsets_[a] << endl;
+ }
+
+ // reading OFFSETS
+ {
+ util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "OFFSETS" ),
+ rtl::OUString::createFromAscii( "r" ) );
+ sal_Int8 a[4];
+ a[0] = a[1] = a[2] = 0;
+ in->readBytes( &a[3],1 );
+ sal_Int32 k1 = ::getInteger_( a );
+ util::StreamDecompressor sddocs( in );
+ sddocs.decode( k1,documents_ );
+ in->readBytes( &a[3],1 );
+ sal_Int32 k2 = ::getInteger_( a );
+ util::StreamDecompressor sdoffsets( in );
+ sdoffsets.ascDecode( k2,microIndexOffsets_ );
+ in->readBytes( &a[3],1 );
+ sal_Int32 k3 = ::getInteger_( a );
+ util::StreamDecompressor sdtitles( in );
+ sdtitles.decode( k3,titles_ );
+
+ in->readBytes( &a[3],1 );
+ sal_Int32 k4 = ::getInteger_( a );
+ // contextsOffsets_ = new IntegerArray(_documents.cardinality() + 1);
+ util::StreamDecompressor co(in);
+ // _contextsOffsets.add(0); // first, trivial offset
+ co.ascDecode( k4,contextsOffsets_ );
+
+ delete in;
+ }
+
+ // Hard coding linknames ( object serialization is hard to undo )
+ {
+ linkNames_ = new rtl::OUString[ linkNamesL_ = 8 ];
+ linkNames_[0] = rtl::OUString::createFromAscii( "help:link" );
+ linkNames_[1] = rtl::OUString::createFromAscii( "help:help-text" );
+ linkNames_[2] = rtl::OUString::createFromAscii( "text:p" );
+ linkNames_[3] = rtl::OUString::createFromAscii( "text:span" );
+ linkNames_[4] = rtl::OUString::createFromAscii( "headingheading" );
+ linkNames_[5] = rtl::OUString::createFromAscii( "office:body" );
+ linkNames_[6] = rtl::OUString::createFromAscii( "help:to-be-embedded" );
+ linkNames_[7] = rtl::OUString::createFromAscii( "office:document" );
+ }
+
+
+ {
+ contextTables_ = new ContextTables( contextsOffsets_,
+ contextsDataL_,contextsData_,
+ linkNamesL_,linkNames_ );
+ }
+}
+
+
+XmlIndex::~XmlIndex()
+{
+ delete[] allLists_;
+ delete[] contextsData_;
+ delete[] linkNames_;
+ delete[] positions_;
+ delete positionsFile_;
+ delete contextTables_;
+}
+
+
+
+void XmlIndex::reset()
+{
+ maxDocNumberInCache_ = ( allInCache_ ? ( microIndexOffsets_.size() - 1 ) : sal_Int32( -1 ) );
+}
+
+
+sal_Int32 binarySearch( const std::vector<sal_Int32>& arr,sal_Int32 value )
+{
+ sal_Int32 i = 0, j = arr.size(), k;
+ while (i <= j)
+ if (arr[k = (i + j)/2] < value)
+ i = k + 1;
+ else if (value < arr[k])
+ j = k - 1;
+ else
+ return k;
+ return -1;
+}
+
+
+NonnegativeIntegerGenerator* XmlIndex::getDocumentIterator( sal_Int32 concept )
+{
+// #ifdef ABIDEBUG
+// cout << concept << endl;
+// #endif
+
+ sal_Int32 index = binarySearch( concepts_,concept );
+
+#ifdef ABIDEBUG
+// cout << index << " " << allListsL_ << " " << allLists_ << endl;
+
+// for( int i = 0; i < allListsL_; ++i )
+// cout << "_allList[" << i << "] = " << sal_Int32( allLists_[i] ) << endl;
+
+// for( int i = 0; i < offsets_.size(); ++i )
+// cout << "offsets[" << i << "] = " << offsets_[i] << endl;
+#endif
+
+ if( index >= 0 )
+ return new util::ConceptList( allLists_,allListsL_,offsets_[index] );
+ else
+ return 0;
+}
+
+
+bool XmlIndex::occursInText( sal_Int32 concept )
+{
+ return binarySearch( concepts_,concept) >= 0;
+}
+
+
+sal_Int8* XmlIndex::getPositions( sal_Int32& len,sal_Int32 docNo ) throw( excep::XmlSearchException )
+{
+ contextTables_->setMicroindex( docNo );
+ if( docNo > maxDocNumberInCache_ )
+ readMicroindexes( docNo );
+
+ len = positionsL_;
+ return positions_;
+}
+
+
+rtl::OUString XmlIndex::documentName( sal_Int32 docNumber ) throw( excep::XmlSearchException )
+{
+ if( docNumber < 0 || documents_.size() <= sal_uInt32( docNumber ) )
+ {
+ rtl::OUString message = rtl::OUString::createFromAscii( "XmlIndex::documentName -> " );
+ throw excep::XmlSearchException( message );
+ }
+
+ return dict_.fetch( documents_[ docNumber ] );
+}
+
+
+
+
+void XmlIndex::readMicroindexes( sal_Int32 docNo ) throw( xmlsearch::excep::IOException )
+{
+ currentBatchOffset_ = microIndexOffsets_[docNo];
+ sal_Int32 offsetLimit = currentBatchOffset_ + positionsL_;
+ sal_Int32 upTo = 0, nextDoc = docNo;
+ sal_Int32 lastOffset = 0;
+
+ do
+ {
+ if( ++nextDoc == sal_Int32( microIndexOffsets_.size() ) )
+ lastOffset = sal_Int32( positionsFile_->length() );
+ else if( microIndexOffsets_[ nextDoc ] > offsetLimit )
+ lastOffset = microIndexOffsets_[ nextDoc ];
+ }
+ while( lastOffset == 0 );
+
+ if( lastOffset > offsetLimit )
+ {
+ upTo = microIndexOffsets_[ nextDoc - 1 ];
+ maxDocNumberInCache_ = nextDoc - 2;
+ }
+ else
+ {
+ upTo = lastOffset;
+ maxDocNumberInCache_ = nextDoc - 1;
+ }
+
+ if( maxDocNumberInCache_ < docNo )
+ { // cache too small
+ // for current microindex
+ // System.out.println("expanding cache to " + _positionsCacheSize);
+ delete[] positions_;
+ positions_ = new sal_Int8[ positionsL_ = lastOffset - currentBatchOffset_ ];
+ readMicroindexes( docNo );
+ return;
+ }
+
+ positionsFile_->seek( currentBatchOffset_ );
+ positionsFile_->readBytes( positions_,upTo - currentBatchOffset_ );
+}