/************************************************************************* * * $RCSfile: BtreeDict.cxx,v $ * * $Revision: 1.1 $ * * last change: $Author: abi $ $Date: 2001-05-08 11:54:54 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses * * - GNU Lesser General Public License Version 2.1 * - Sun Industry Standards Source License Version 1.1 * * Sun Microsystems Inc., October, 2000 * * GNU Lesser General Public License Version 2.1 * ============================================= * Copyright 2000 by Sun Microsystems, Inc. * 901 San Antonio Road, Palo Alto, CA 94303, USA * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * * * Sun Industry Standards Source License Version 1.1 * ================================================= * The contents of this file are subject to the Sun Industry Standards * Source License Version 1.1 (the "License"); You may not use this file * except in compliance with the License. You may obtain a copy of the * License at http://www.openoffice.org/license.html. * * Software provided under this License is provided on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. * See the License for the specific provisions governing your rights and * obligations concerning the Software. * * The Initial Developer of the Original Code is: Sun Microsystems, Inc. * * Copyright: 2000 by Sun Microsystems, Inc. * * All Rights Reserved. * * Contributor(s): _______________________________________ * * ************************************************************************/ #ifdef ABIDEBUG #include #endif #ifndef __SGI_STL_VECTOR #include #endif #ifndef _XMLSEARCH_DB_BTREEDICT_HXX_ #include #endif #ifndef _XMLSEARCH_DB_BLOCK_HXX_ #include #endif #ifndef _XMLSEARCH_DB_BLOCKFACTORY_HXX_ #include #endif #ifndef _XMLSEARCH_DB_BLOCKMANAGER_HXX_ #include #endif #ifndef _XMLSEARCH_DB_DBENV_HXX_ #include #endif #ifndef _XMLEARCH_UTIL_RANDOMACCESSSTREAM_HXX_ #include #endif #ifndef _XMLSEARCH_DB_DBENVIMPL_HXX_ #include #endif extern xmlsearch::util::RandomAccessStream* theFile(); const sal_Int32 xmlsearch::db::BtreeDict::ENTHEADERLEN = 6; const sal_Int32 xmlsearch::db::BtreeDict::BLOCKSIZE = 2048; const sal_Int32 xmlsearch::db::BtreeDict::HEADERLEN = 8; const sal_Int32 xmlsearch::db::BtreeDict::DATALEN = xmlsearch::db::BtreeDict::BLOCKSIZE - xmlsearch::db::BtreeDict::HEADERLEN; const sal_Int32 xmlsearch::db::BtreeDict::nBlocksLimit = 64; const sal_Int32 xmlsearch::db::BtreeDict::MaxKeyLength = 255; const sal_Int32 xmlsearch::db::BtreeDict::lastPtrIndex = 508; //!!! Careful with that number, Eugene namespace xmlsearch { namespace db { class DictBlock : public Block { friend class BtreeDict; public: DictBlock( const DBEnv* dbenv ) : Block( dbenv ) { } sal_Int32 free() const { return getFree() + firstEntry(); } sal_Int32 numberOfEntries() const { return getInteger( 0 ); } sal_Int32 nthPointer( sal_Int32 n ) const { return getInteger( 4*(n+1) ); } sal_Int32 getChildIdx( sal_Int32 index ) const { return nthPointer( BtreeDict::lastPtrIndex - index ); } sal_Int32 entryKeyLength( sal_Int32 i ) const { return getData()[i] & 0xFF; } sal_Int32 entryID( sal_Int32 i ) const { return getInteger(i + 2); } sal_Int32 entryCompression( sal_Int32 i ) const { return getData()[i+1] & 0xFF; } sal_Int32 entryLength( sal_Int32 entry ) const { return BtreeDict::ENTHEADERLEN + entryKeyLength( entry ); } sal_Int32 entryKey( sal_Int32 entry ) const { return entry + BtreeDict::ENTHEADERLEN; } sal_Int32 firstEntry() const { return 4; } sal_Int32 nextEntry( sal_Int32 entry ) const { return entry + entryLength( entry ); } void restoreKeyInBuffer( sal_Int32 entry, sal_Int8* buffer ) const { sal_Int32 howMany = entryKeyLength( entry ); sal_Int32 where = entryCompression( entry ); sal_Int32 from = entryKey( entry ); while( howMany-- > 0 ) buffer[ where++ ] = getData()[ from++ ]; } rtl::OUString restoreKey( sal_Int32 entry, sal_Int8* buffer ) const { sal_Int32 howMany = entryKeyLength( entry ); sal_Int32 where = entryCompression( entry ); sal_Int32 from = entryKey( entry ); while( howMany-- > 0 ) buffer[ where++ ] = getData()[ from++ ]; return rtl::OUString( (sal_Char*)(buffer),where,RTL_TEXTENCODING_UTF8 ); } rtl::OUString findID( sal_Int32 id ) const throw( xmlsearch::excep::XmlSearchException ) { sal_Int8 buffer[ BtreeDict::MaxKeyLength ]; sal_Int32 freeSpace = free(); for( sal_Int32 ent = firstEntry(); ent < freeSpace; ent = nextEntry( ent ) ) if( entryID( ent ) == id ) // found return restoreKey( ent,buffer ); else restoreKeyInBuffer( ent,buffer ); throw xmlsearch::excep::XmlSearchException( rtl::OUString::createFromAscii( "DictBlock::findID -> ID not found in block" ) ); } void withPrefix( const BtreeDict* owner, const rtl::OUString& prefix, sal_Int32 prefLen, std::vector& result) const throw( xmlsearch::excep::XmlSearchException ) { sal_Int8 buffer[ BtreeDict::MaxKeyLength ]; const int freeSpace = free(); int entryPtr = firstEntry(); if( isLeaf() ) while (entryPtr < freeSpace) { if( restoreKey( entryPtr,buffer).compareTo( prefix,prefix.getLength() ) == 0 ) result.push_back( entryID( entryPtr ) ); entryPtr = nextEntry(entryPtr); } else { owner->lock( getNum() ); sal_Int32 entryIndex = 0; while( entryPtr < freeSpace ) { rtl::OUString key = restoreKey( entryPtr,buffer ); if( key.getLength() > prefLen ) key = key.copy( 0,prefLen ); sal_Int32 cmp = key.compareTo(prefix); if( cmp < 0 ) { entryPtr = nextEntry( entryPtr ); ++entryIndex; } else if( cmp == 0 ) { result.push_back( entryID( entryPtr ) ); owner->accessBlock( getChildIdx( entryIndex ) )->withPrefix( owner,prefix,prefLen,result ); entryPtr = nextEntry( entryPtr ); ++entryIndex; } else { owner->unlock( getNum() ); owner->accessBlock( getChildIdx( entryIndex ) )->withPrefix( owner,prefix,prefLen,result ); return; } } owner->unlock( getNum() ); owner->accessBlock( getChildIdx( numberOfEntries() ) )->withPrefix( owner,prefix,prefLen,result ); } } void setBlockNumbers( sal_Int32* blocks ) { for( sal_Int32 e = firstEntry(); e < getFree() ; e = nextEntry(e) ) blocks[ entryID(e) ] = getNum(); } #ifdef ABIDEBUG void listBlock() const { sal_Int8 buffer[ BtreeDict::MaxKeyLength ]; sal_Int32 freeSpace = free(); sal_Int32 entryPtr = firstEntry(); if ( isLeaf() ) while( entryPtr < freeSpace ) { // cout << ( restoreKey( entryPtr,buffer ) + rtl::OUString::createFromAscii(" ") ) // << entryID( entryPtr ) // << endl; entryPtr = nextEntry( entryPtr ); } else cout << "not leaf"; } #endif protected: void doMap( BtreeDict* owner,EntryProcessor* processor ) const throw( xmlsearch::excep::XmlSearchException ) { sal_Int8 buffer[ BtreeDict::MaxKeyLength ]; sal_Int32 freeSpace = free(); sal_Int32 entryPtr = firstEntry(); if( isLeaf() ) while( entryPtr < freeSpace ) { processor->processEntry( restoreKey( entryPtr,buffer ), entryID( entryPtr ) ); entryPtr = nextEntry( entryPtr ); } else { owner->lock( getNum() ); sal_Int8 entryIdx = 0; while( entryPtr < freeSpace ) { owner->accessBlock( getChildIdx( entryIdx ) )->doMap( owner,processor ); processor->processEntry( restoreKey( entryPtr,buffer ), entryID( entryPtr ) ); entryPtr = nextEntry( entryPtr ); ++entryIdx; } owner->accessBlock( getChildIdx( entryIdx ) )->doMap( owner,processor ); owner->unlock( getNum() ); } } }; // end class DictBlock } // end namespace db } // end namespace xmlsearch using namespace xmlsearch; using namespace xmlsearch::db; using namespace xmlsearch::util; class BlockProcessorImpl : public BlockProcessor { public: BlockProcessorImpl( BtreeDict* bla ) : bla_( bla ) { } ~BlockProcessorImpl() { } void process( Block* block ) const { dynamic_cast(block)->setBlockNumbers( bla_->get_blocks() ); } private: BtreeDict* bla_; }; BtreeDict::BtreeDict( DBEnv* dbenv ) : // parameter_( parameter ), blockManager_( dbenv ? dbenv : new DBEnvImpl() ), root_( 2 ), blocks_( new sal_Int32[5664] ) { BlockProcessorImpl blProc( this ); blockManager_.mapBlocks( blProc ); } BtreeDict::~BtreeDict() { delete[] blocks_; } const DictBlock* BtreeDict::accessBlock( sal_Int32 id ) const { return dynamic_cast< const DictBlock* >( blockManager_.accessBlock( id ) ); } void BtreeDict::lock( sal_Int32 blNum ) const throw( excep::IllegalIndexException ) { blockManager_.lock( blNum ); } void BtreeDict::unlock( sal_Int32 blNum ) const throw( excep::IllegalIndexException ) { blockManager_.unlock( blNum ); } bool BtreeDict::isLocked( sal_Int32 blNum ) const throw( excep::IllegalIndexException ) { return blockManager_.isLocked( blNum ); } sal_Int32 BtreeDict::fetch( const rtl::OUString& key ) const throw( excep::XmlSearchException ) { rtl::OString searchString( key.getStr(),key.getLength(),RTL_TEXTENCODING_UTF8 ); return find( accessBlock( root_ ), reinterpret_cast< const sal_Int8* >( searchString.getStr() ), searchString.getLength() ); } rtl::OUString BtreeDict::fetch( sal_Int32 conceptID ) const throw( excep::XmlSearchException ) { return findID( blocks_[conceptID], conceptID ); } std::vector< sal_Int32 > BtreeDict::withPrefix( const rtl::OUString& prefix ) const throw( excep::XmlSearchException ) { std::vector< sal_Int32 > result; accessBlock( root_ )->withPrefix( this,prefix,prefix.getLength(),result ); return result; } sal_Int32 BtreeDict::find( const DictBlock* bl, const sal_Int8* key, sal_Int32 inputKeyLen ) const throw( excep::XmlSearchException ) { sal_Int32 entryPtr = bl->firstEntry(); sal_Int32 freeSpace = bl->free(); sal_Int32 nCharsEqual = 0; sal_Int32 compression = 0; for( sal_Int32 entryIdx = 0; ; ) { if( entryPtr == freeSpace ) return find( bl, key, inputKeyLen, bl->numberOfEntries() ); else if( compression == nCharsEqual ) { sal_Int32 keyLen = bl->entryKeyLength( entryPtr ); sal_Int32 keyPtr = bl->entryKey( entryPtr ), i; for( i = 0; i < keyLen && key[ nCharsEqual ] == bl->getData()[keyPtr + i]; i++ ) ++nCharsEqual; if( i == keyLen ) { if( nCharsEqual == inputKeyLen ) return bl->entryID( entryPtr ); } else if( ( key[ nCharsEqual ] & 0xFF ) < ( bl->getData()[ keyPtr + i ] & 0xFF) ) return find( bl,key,inputKeyLen,entryIdx ); } else if( compression < nCharsEqual ) // compression dropped return find( bl,key,inputKeyLen,entryPtr == freeSpace ? bl->numberOfEntries() : entryIdx ); do { entryPtr = bl->nextEntry( entryPtr ); ++entryIdx; } while( bl->entryCompression( entryPtr ) > nCharsEqual ); compression = bl->entryCompression( entryPtr ); } } sal_Int32 BtreeDict::find( const DictBlock* bl, const sal_Int8* key, sal_Int32 inputKeyLen, sal_Int32 index ) const throw( xmlsearch::excep::XmlSearchException ) { return bl->isLeaf() ? 0 : find( child( bl,index ),key,inputKeyLen ); } const DictBlock* BtreeDict::child( const DictBlock* bl,sal_Int32 index) const throw( excep::XmlSearchException ) { return accessBlock( bl->getChildIdx(index) ); } rtl::OUString BtreeDict::findID( sal_Int32 blNum,sal_Int32 id ) const throw( xmlsearch::excep::XmlSearchException ) { return accessBlock( blNum )->findID( id ); } #ifndef _XMLSEARCH_UTIL_RANDOMACCESSSTREAM_HXX_ #include #endif void BtreeDict::test() { // DictBlock* block = new DictBlock( DATALEN ); // for( int i = 0; i < theFile()->length()/BLOCKSIZE; ++i ) // { // theFile()->seek( i * BLOCKSIZE ); // block->read( theFile() ); // block->listBlock(); // } // const DictBlock* bla = accessBlock(4); // if( ! bla ) // ; // cout << rtl::OUString::createFromAscii( "zero" ) << endl; // else // bla->listBlock(); } // Definitions for DBEnvImpl DBEnvImpl::DBEnvImpl() { file_ = theFile(); } DBEnvImpl::~DBEnvImpl() { } sal_Int32 DBEnvImpl::getEntryHeaderLen() const { return BtreeDict::ENTHEADERLEN; } sal_Int32 DBEnvImpl::getBlockCount() const { return file_->length() / BtreeDict::BLOCKSIZE; } sal_Int32 DBEnvImpl::getMaximumBlockCount() const { return BtreeDict::nBlocksLimit; } sal_Int32 util::DBEnvImpl::getDataLen() const { return BtreeDict::DATALEN; } sal_Int32 DBEnvImpl::getBlockLen() const { return BtreeDict::BLOCKSIZE; } void DBEnvImpl::read( sal_Int32 blNum,xmlsearch::db::Block*& block ) const { if( ! block ) block = new xmlsearch::db::DictBlock( this ); file_->seek( blNum * getBlockLen() ); block->read( file_ ); // if( block->getNum() != blNum ) // cout << rtl::OUString::createFromAscii( " aua aua " ) << endl; } void DBEnvImpl::write( sal_Int32 blNum,xmlsearch::db::Block* block ) { if( ! block ) return; } void bla() { BtreeDict source( new DBEnvImpl() ); std::vector< sal_Int32 > bla = source.withPrefix( rtl::OUString::createFromAscii( "text" ) ); for( sal_uInt32 i = 0; i < bla.size(); ++i ) ; // cout << bla[i] << endl; // source.test(); } #ifdef ABIDEBUG #include void blu() { rtl::OUString indexDir = rtl::OUString::createFromAscii( "//./home/ab106281/work/index" ); qe::QueryProcessor proc( indexDir ); vector< rtl::OUString > bla(1); bla[ 0 ] = rtl::OUString::createFromAscii( "text" ); rtl::OUString scope = rtl::OUString::createFromAscii( "headingheading" ); sal_Int32 nHits = 100; qe::QueryStatement statement( nHits, bla, scope ); proc.processQuery( statement ); } #endif