From 6c9ddd803f8944af59778da38460cfe6cda3d32d Mon Sep 17 00:00:00 2001 From: Oliver Bolte Date: Fri, 13 Feb 2009 08:59:55 +0000 Subject: CWS-TOOLING: integrate CWS ab66 2009-01-26 13:58:36 +0100 ab r266922 : #i98368# Fixed extension path handling for images 2009-01-08 13:04:55 +0100 ab r265997 : #i95412# Added missing dtor 2009-01-08 11:56:40 +0100 ab r265991 : #i95412# Applied patch --- xmlhelp/source/cxxhelp/qe/ConceptData.cxx | 105 ---- xmlhelp/source/cxxhelp/qe/ContextTables.cxx | 572 --------------------- xmlhelp/source/cxxhelp/qe/DocGenerator.cxx | 559 +++------------------ xmlhelp/source/cxxhelp/qe/Query.cxx | 393 --------------- xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx | 169 ------- xmlhelp/source/cxxhelp/qe/Search.cxx | 720 --------------------------- xmlhelp/source/cxxhelp/qe/XmlIndex.cxx | 457 ----------------- xmlhelp/source/cxxhelp/qe/makefile.mk | 9 +- 8 files changed, 65 insertions(+), 2919 deletions(-) delete mode 100644 xmlhelp/source/cxxhelp/qe/ConceptData.cxx delete mode 100644 xmlhelp/source/cxxhelp/qe/ContextTables.cxx delete mode 100644 xmlhelp/source/cxxhelp/qe/Query.cxx delete mode 100644 xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx delete mode 100644 xmlhelp/source/cxxhelp/qe/Search.cxx delete mode 100644 xmlhelp/source/cxxhelp/qe/XmlIndex.cxx (limited to 'xmlhelp/source/cxxhelp/qe') diff --git a/xmlhelp/source/cxxhelp/qe/ConceptData.cxx b/xmlhelp/source/cxxhelp/qe/ConceptData.cxx deleted file mode 100644 index 83399d370c03..000000000000 --- a/xmlhelp/source/cxxhelp/qe/ConceptData.cxx +++ /dev/null @@ -1,105 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: ConceptData.cxx,v $ - * $Revision: 1.8 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include -#include -#include - - -using namespace xmlsearch::qe; - - -const sal_Int32 ConceptData::ProxPerTerm = 10; - - -ConceptData::ConceptData( sal_Int32 id, - sal_Int32 role, - double score, - sal_Int32 queryNo, - sal_Int32 nColumns, - ContextTables* contextTables ) - : queryNo_( sal_uInt8( queryNo & 0xF ) ), - nColumns_( sal_uInt8( nColumns & 0xF ) ), - role_( sal_uInt8( role & 0xF ) ), - concept_( id ), - proximity_( nColumns * ProxPerTerm ), - penalty_( score ), - m_nRefcount( 0 ), - ctx_( contextTables ), - next_( 0 ) -{ -} - - -ConceptData::~ConceptData() -{ -} - - -void ConceptData::runBy( std::vector< Query* >& queries ) -{ - rtl::Reference< ConceptData > cd( this ); - do - { - Query* query = queries[ cd->queryNo_ ]; - query->updateEstimate( cd->role_,cd->penalty_ ); - } - while( (cd = cd->next_).is() ); -} - - -void ConceptData::addLast( ConceptData* r ) -{ - if( next_.is() ) - next_->addLast( r ); - else - next_ = r; -} - - -void ConceptData::generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos ) -{ - if( array[ queryNo_ ] != RoleFiller::STOP() ) // not 'prohibited' - { - sal_Int32 wcl = ctx_->wordContextLin( pos ); - roleFillers_.push_back( new RoleFiller( nColumns_, - this, - role_, - pos, - wcl, - pos + proximity_ ) ); - roleFillers_.back()->use( array, queryNo_ ); - } - // !!! maybe eliminate tail recursion - if( next_.is() ) - next_->generateFillers( array,pos ); -} diff --git a/xmlhelp/source/cxxhelp/qe/ContextTables.cxx b/xmlhelp/source/cxxhelp/qe/ContextTables.cxx deleted file mode 100644 index 139348aeb153..000000000000 --- a/xmlhelp/source/cxxhelp/qe/ContextTables.cxx +++ /dev/null @@ -1,572 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: ContextTables.cxx,v $ - * $Revision: 1.10 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" - -#ifndef _rtl_MEMORY_H_ -#include -#endif -#include -#ifndef _XMLSEARCH_UTIL_BYTEARRAYDECOMPRESSOR_HXX_ -#include -#endif - -using namespace xmlsearch; -using namespace xmlsearch::qe; - - - -Tables::Tables( ContextTables* p ) - : initialWordsCached_( new sal_Int32[ initialWordsCachedL_ = p->initialWordsL_ ] ), - destsCached_( new sal_Int32[ destsCachedL_ = p->destsL_ ] ), - linkTypesCached_( new sal_Int32[ linkTypesCachedL_ = p->linkTypesL_ ] ), - seqNumbersCached_( new sal_Int32[ seqNumbersCachedL_ = p->seqNumbersL_ ] ) -{ - rtl_copyMemory( (void*)initialWordsCached_, - (void*)p->initialWords_, - sizeof(sal_Int32) * p->initialWordsL_ ); - - rtl_copyMemory( (void*)destsCached_, - (void*)p->dests_, - sizeof(sal_Int32) * p->destsL_ ); - - rtl_copyMemory( (void*)linkTypesCached_, - (void*)p->linkTypes_, - sizeof(sal_Int32) * p->linkTypesL_ ); - - rtl_copyMemory( (void*)seqNumbersCached_, - (void*)p->seqNumbers_, - sizeof(sal_Int32) * p->seqNumbersL_ ); -} - - - -Tables::~Tables() -{ - delete[] seqNumbersCached_; - delete[] linkTypesCached_; - delete[] destsCached_; - delete[] initialWordsCached_; -} - - - -void Tables::setTables( ContextTables* p ) -{ - delete[] p->initialWords_; - p->initialWordsL_ = initialWordsCachedL_; - p->initialWords_ = initialWordsCached_; - initialWordsCached_ = 0; - - delete[] p->dests_; - p->destsL_ = destsCachedL_; - p->dests_ = destsCached_; - destsCached_ = 0; - - delete[] p->linkTypes_; - p->linkTypesL_ = linkTypesCachedL_; - p->linkTypes_ = linkTypesCached_; - linkTypesCached_ = 0; - - delete[] p->seqNumbers_; - p->seqNumbersL_ = seqNumbersCachedL_; - p->seqNumbers_ = seqNumbersCached_; - seqNumbersCached_ = 0; - - p->nTextNodes_ = initialWordsCachedL_; -} - - - - -ContextTables::ContextTables( const std::vector< sal_Int32 >& offsets, - sal_Int32 contextDataL,sal_Int8 *contextData, - sal_Int32 linkNamesL,rtl::OUString *linkNames ) - : lastDocNo_( -1 ), - initialWordsL_( 0 ), - destsL_( 0 ), - linkTypesL_( 0 ), - seqNumbersL_( 0 ), - markersL_( 0 ), - initialWords_( 0 ), - dests_( 0 ), - linkTypes_( 0 ), - seqNumbers_( 0 ), - markers_( 0 ), - contextDataL_( contextDataL ), - contextData_( contextData ), - linkNamesL_( linkNamesL ), - linkNames_( linkNames ), - cache_( offsets.size() ), - kTable_( 5 ), - auxArray_( 4096 ), - offsets_( offsets ) -{ - for( sal_uInt32 i = 0; i < offsets_.size(); ++i ) - cache_[i] = 0; -} - - - -ContextTables::~ContextTables() -{ - delete[] markers_; - delete[] seqNumbers_; - delete[] linkTypes_; - delete[] dests_; - delete[] initialWords_; - - for( sal_uInt32 i = 0; i < cache_.size(); ++i ) - delete cache_[i]; -} - - - -void ContextTables::setMicroindex( sal_Int32 docNo ) throw( excep::XmlSearchException ) -{ - if( docNo != lastDocNo_ ) - { // check if we need to do anything - if( cache_[ docNo ] ) - cache_[ docNo ]->setTables( this ); - else - { - sal_Int32 offset = offsets_[ docNo ]; - sal_Int32 k0 = contextData_[ offset ] & 0xFF; - util::ByteArrayDecompressor compr( contextDataL_,contextData_,offset + 1 ); - kTable_.clear(); - compr.decode( k0,kTable_ ); - // decompress initialWords into auxiliary array - auxArray_.clear(); - compr.ascDecode( kTable_[0],auxArray_ ); // _initialWords - - delete[] initialWords_; - initialWords_ = new sal_Int32[ initialWordsL_ = auxArray_.size() ]; - sal_Int32 k; - for( k = 0; k < initialWordsL_; ++k ) //?opt - initialWords_[k] = auxArray_[k]; - - nTextNodes_ = initialWordsL_; - // decompress destinations into auxiliary array - auxArray_.clear(); - compr.decode( kTable_[1],auxArray_ ); // _dests - auxArray_.push_back( -1 ); // sentinel, root - - delete[] dests_; - dests_ = new sal_Int32[ destsL_ = auxArray_.size() ]; - for( k = 0; k < destsL_; ++k ) //?opt - dests_[k] = auxArray_[k]; - - delete[] linkTypes_; - linkTypes_ = new sal_Int32[ linkTypesL_ = destsL_ - nTextNodes_ - 1 ]; - compr.decode( kTable_[2],linkTypes_ ); - - delete[] seqNumbers_; - seqNumbers_ = new sal_Int32[ seqNumbersL_ = destsL_ - 1 ]; - compr.decode( kTable_[ 3 ],seqNumbers_ ); - - cache_[docNo] = new Tables( this ); - } - - lastDocNo_ = docNo; - delete[] markers_; - markers_ = new sal_Int32[ markersL_ = destsL_ ]; - } - initialWordsIndex_ = 0; -} - - - -sal_Int32 ContextTables::linkCode( const rtl::OUString& linkName_ ) -{ - for( sal_Int32 i = 0; i < linkNamesL_; ++i ) - if( linkName_ == linkNames_[i] ) - return i; - return -1; // when not found -} - - -bool* ContextTables::getIgnoredElementsSet( sal_Int32& len, - const sal_Int32 ignoredElementsL, - const rtl::OUString* ignoredElements ) -{ - bool *result = 0; - if( ignoredElements && ignoredElementsL > 0 ) - { - for( sal_Int32 i = 0; i < ignoredElementsL; ++i ) - { - sal_Int32 code = linkCode( ignoredElements[i] ); - if( code > -1 ) - { - if( ! result ) - result = new bool[ len = linkNamesL_ ]; - - result[ code ] = true; - } - } - } - return result; -} - - - -bool ContextTables::notIgnored( sal_Int32 ctx, - sal_Int32 ignoredElementsL,bool* ignoredElements ) -{ - (void)ignoredElementsL; - - do - { - if( ignoredElements[ linkTypes_[ ctx ] ] ) - return false; - } - while( ( ctx = dests_[ ctx ] ) > -1 ); // parentContext 'hand inlined' - return true; -} - - -/* - * starting with ctx and going up the ancestry tree look for the first - * context with the given linkCode - */ - -sal_Int32 ContextTables::firstParentWithCode( const sal_Int32 pos,const sal_Int32 linkCode_ ) -{ - sal_Int32 ctx = dests_[ wordContextLin(pos) ]; // first parent of text node - const sal_Int32 shift = nTextNodes_; - const sal_Int32 limit = destsL_ - 1; - while( linkTypes_[ ctx - shift ] != linkCode_ ) - if( ( ctx = dests_[ ctx ] ) == limit ) - return -1; - return ctx; -} - - -void ContextTables::resetContextSearch() -{ - initialWordsIndex_ = 0; -} - - -sal_Int32 ContextTables::wordContextLin(sal_Int32 wordNumber) -{ - for (sal_Int32 i = initialWordsIndex_; i < nTextNodes_; ++i ) - if (initialWords_[i] > wordNumber) - { // first such i - // - 1 if wordNumbers can be the same - initialWordsIndex_ = i; // cached to speed up next search - return i - 1; - } - return nTextNodes_ - 1; -} - - -// void ContextTables::appendSegment( sal_Int32 context,rtl::OUStringBuffer& result ) -// { -// result.append( context < nTextNodes_ ? -// rtl::OUString::createFromAscii( "text()" ) : -// linkNames_[ linkTypes_[ context - nTextNodes_ ] ] ); -// result.append(sal_Unicode( '[' ) ); -// result.append( seqNumbers_[ context ] ); -// result.append(sal_Unicode( "]/" ) ); -// } - - -// /* -// * XPath (forking) location of the hit -// */ - -// void ContextTables::hitLocation( sal_Int32 termsL,rtl::OUString* terms, -// sal_Int32 matchesL,sal_Int32* matches, -// StringBuffer& result ) -// { -// const sal_Int32 N = termsL; -// std::vector< sal_Int32 > stacks( N ); -// sal_Int32* wordNumbers = new sal_Int32[N]; -// std::vector< sal_Int32 > stack; -// sal_Int32 lastInitialWordIndex = -1; -// sal_Int32 pattern = 0,context = 0,nPopped = 0,pathStart = 0,pathEnd = 0; -// for( sal_Int32 i = 0,marker = 1; i < N; i++,marker <<= 1 ) -// if ( terms[i] ) -// { -// const sal_Int32 wordNumber = matches[i*2 + 1]; -// const sal_Int32 initialWordIndex = findIndexBin(wordNumber); -// wordNumbers[i] = wordNumber - initialWords_[initialWordIndex] + 1; -// if( initialWordIndex == lastInitialWordIndex ) // save work -// ; // do nothing, path will be reused -// else -// { -// pattern |= marker; -// stack = stacks[i] = new IntegerArray(); - -// context = initialWordIndex; -// do -// { -// const sal_Int32 parent = dests_[context]; -// if( parent != -1 ) -// { -// stack.add( context ); -// markers_[context] |= marker; -// context = parent; -// } -// else -// break; -// } -// while( true ); -// lastInitialWordIndex = initialWordIndex; -// } -// } - -// // find and output common path -// // process first non-missing match - -// sal_Int32 i = 0, marker = 1, nMissing = 0; - -// // find first non-missing matching term -// // there has to be at least one if the hit was built -// // count potential leading missing terms to output appropriate elements -// // before outputting elements for matches - -// for ( ; i < N; i++,marker <<= 1 ) -// if (terms[i] != null) -// { -// result.append( rtl::OUString::createFromAscii( " 0) -// { -// context = stack.popLast(); -// if ( markers_[context] == pattern ) -// { -// markers_[context] = 0; -// appendSegment( context,result ); // belongs to common -// context = -1; // used -// ++nPopped; -// } -// else -// break; -// } -// // end of 'matches' && common path -// result.append("\">"); -// // output elements for any leading missingTerms -// while (--nMissing >= 0) -// result.append(""); - -// result.append(" 0 ) -// { -// context = stack.popLast(); -// appendSegment(context, result); -// markers_[context] = 0; -// } - -// pathEnd = result.length(); - -// result.append("\" tokenNumber=\""); -// result.append(wordNumbers[i]); -// result.append("]\"/>"); - -// break; // just the first non-zero -// } -// else -// ++nMissing; // only count leading missing terms - -// // process the remaining matches -// for (i++, marker <<= 1 ; i < N; i++, marker <<= 1) -// if (terms[i] != null) { -// result.append(" 0) { -// context = stack.popLast(); -// appendSegment(context, result); -// _markers[context] = 0; -// } -// pathEnd = result.length(); -// } -// result.append("\" tokenNumber=\""); -// result.append(wordNumbers[i]); -// result.append("]\"/>"); -// } -// else -// result.append(""); -// result.append(""); -// } - - -// /* -// * QueryHitData is initialized in the caller -// * this function fills the commonPath for all matching terms -// * and relative paths for the individual terms -// */ - -// void ContextTables::hitLocation(String[] terms, sal_Int32[] matches, QueryHitData data) { -// StringBuffer buffer = new StringBuffer(512); -// const sal_Int32 N = terms.length; -// IntegerArray[] stacks = new IntegerArray[N]; -// sal_Int32[] wordNumbers = new sal_Int32[N]; -// IntegerArray stack; -// sal_Int32 lastInitialWordIndex = -1; -// sal_Int32 pattern = 0, nPopped = 0, pathStart = 0, pathEnd = 0; -// for (sal_Int32 i = 0, marker = 1; i < N; i++, marker <<= 1) -// if (terms[i] != null) { -// const sal_Int32 wordNumber = matches[i*2 + 1]; -// const sal_Int32 initialWordIndex = findIndexBin(wordNumber); -// wordNumbers[i] = wordNumber - _initialWords[initialWordIndex] + 1; -// if (initialWordIndex == lastInitialWordIndex) // save work -// ; // do nothing, path will be reused -// else { -// pattern |= marker; -// stack = stacks[i] = new IntegerArray(); -// for (sal_Int32 ctx = initialWordIndex;;) { -// const sal_Int32 parent = _dests[ctx]; -// if (parent != -1) { -// stack.add(ctx); -// _markers[ctx] |= marker; -// ctx = parent; -// } -// else -// break; -// } -// lastInitialWordIndex = initialWordIndex; -// } -// } -// // find and output common path -// // process first match -// StringBuffer path = new StringBuffer(256); -// String previousPath = null; // we may be copying subpaths from it -// sal_Int32 i = 0, marker = 1; -// for ( ; i < N; i++, marker <<= 1) -// if (terms[i] != null) { -// sal_Int32 context = 0; -// stack = stacks[i]; -// while (stack.cardinality() > 0) { -// context = stack.popLast(); -// if (_markers[context] == pattern) { -// _markers[context] = 0; -// appendSegment(context, path); // belongs to common -// context = -1; // used -// ++nPopped; -// } -// else -// break; -// } -// data.setCommonPath(path.toString()); -// // end of 'matches' && common path -// path.setLength(0); // will now be used for relative paths -// pathStart = 0; -// if (context != -1) { -// appendSegment(context, path); -// _markers[context] = 0; -// } -// while (stack.cardinality() > 0) { -// context = stack.popLast(); -// appendSegment(context, path); -// _markers[context] = 0; -// } -// pathEnd = path.length(); -// data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]); -// break; // just the first non-zero -// } - -// // process the remaining matches -// for (i++, marker <<= 1 ; i < N; i++, marker <<= 1) -// if (terms[i] != null) { -// path.setLength(0); -// stack = stacks[i]; -// if (stack == null) // reuse previously generated path -// path.append(previousPath.substring(pathStart, pathEnd)); -// else { -// stack.pop(nPopped); -// pathStart = path.length(); -// while (stack.cardinality() > 0) { -// const sal_Int32 context = stack.popLast(); -// appendSegment(context, path); -// _markers[context] = 0; -// } -// pathEnd = path.length(); -// } -// data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]); -// } -// } - -// private sal_Int32 ContextTables::findIndexBin(const sal_Int32 wordNumber) { -// sal_Int32 i = 0, j = _nTextNodes - 1; -// while (i <= j) { -// const sal_Int32 k = (i + j) >>> 1; -// if (_initialWords[k] < wordNumber) -// i = k + 1; -// else if (_initialWords[k] > wordNumber) -// j = k - 1; -// else -// return k; -// } -// return i - 1; -// } - - /* - public void addGoverningFiller(int query, RoleFiller rf, int parent) { - // !!! for now assume just one query - GoverningContext gc = null; - if (_governingContexts[parent] == null) { - // find parent governing context - for (int c = _dests[parent]; ; c = _dests[c]) - if (_governingContexts[c] != null || c == 0) { - // System.out.println("parent found at " + c); - gc = new GoverningContext(c, rf); - break; - } - } - else - gc = new GoverningContext(_governingContexts[parent], rf); - _governingContexts[parent] = gc; - } - */ - - - - - - - - diff --git a/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx b/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx index b3aea87db3d2..0e8413ea2de2 100644 --- a/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx +++ b/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx @@ -1,495 +1,64 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: DocGenerator.cxx,v $ - * $Revision: 1.10 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include -#include - - -using namespace xmlsearch; -using namespace xmlsearch::qe; - - -const sal_Int32 NonnegativeIntegerGenerator::END = -1; -const sal_Int32 ConceptGroupGenerator::NConceptsInGroup = 16; -const sal_Int32 ConceptGroupGenerator::BitsInLabel = 4; - - -RoleFiller RoleFiller::roleFiller_; - - -RoleFiller::RoleFiller() - : m_nRefcount( 0 ), - fixedRole_( 0 ), - filled_( 0 ), - begin_( 0 ), - end_( 0 ), - parentContext_( 0 ), - limit_( 0 ), - next_( 0 ), - fillers_( 0 ), - conceptData_( 0 ) -{ -} - - -RoleFiller::RoleFiller( sal_Int32 nColumns, - ConceptData* first, - sal_Int32 role, - sal_Int32 pos, - sal_Int32 parentContext, - sal_Int32 limit ) - : m_nRefcount( 0 ), - fixedRole_( sal_uInt8( role & 0xF ) ), // primary/constitutive concept/role - next_( 0 ), - fillers_( nColumns ), - conceptData_( first ) -{ - filled_ = sal_Int16( 1 << fixedRole_ ); - begin_ = pos; // offset in file - // _end = _begin + first.getConceptLength(); - end_ = begin_ + 1; - limit_ = limit; - parentContext_ = parentContext; - next_ = 0; - for( sal_uInt32 i = 0; i < fillers_.size(); ++i ) - fillers_[i] = 0; - fillers_[ role ] = this; -} - - -RoleFiller::~RoleFiller() -{ - -} - - -void RoleFiller::scoreList( Query* query,sal_Int32 document ) -{ - sal_Int32 nColumns = query->getNColumns(); - RoleFiller* candidateHit = this; // function called for the head of list - RoleFiller* next; // lookahead: if overlap, if so, is it better - - // 'candidateHit' always points at the current candidate to be converted to a QueryHit - // 'penalty' is its penalty - // 'next' is used to explore earlier overlapping fillers - // the decision to emit a QueryHit is made when either there's no next - // or next doesn't overlap the current candidate - // the loop's logic makes sure that at emit time there's no better/earlier filler - // to overlap with the candidate - - double penalty_ = candidateHit->penalty( query,nColumns ); - - for( next = candidateHit->next_; next; next = next->next_ ) - if( next->end_ < candidateHit->begin_ ) - { // no overlap - candidateHit->makeQueryHit( query,document,penalty_ ); - candidateHit = next; - penalty_ = candidateHit->penalty( query,nColumns ); - } - else - { // !!! can be computed in two steps - double penalty2 = next->penalty( query,nColumns ); - if( penalty2 <= penalty_ ) - { // prefer next, disregard candidateHit - penalty_ = penalty2; - candidateHit = next; - } - } - candidateHit->makeQueryHit(query,document,penalty_); -} - - - - -void RoleFiller::makeQueryHit( Query* query,sal_Int32 doc,double penalty_ ) -{ - QueryHit* hit = query->maybeCreateQueryHit( penalty_,doc, - begin_,end_,parentContext_ ); - if( hit ) - { - sal_Int32 N; - sal_Int32* matches = hit->getMatches( N ); - N /= 2; - - for( sal_Int32 i = 0,j = 0; i < N; ++i ) - if( filled_ & 1 << i ) - { - matches[ j++ ] = fillers_[ i ]->getConcept(); - matches[ j++ ] = fillers_[ i ]->begin_; - } - else - j += 2; - } -} - - - -sal_Int32 RoleFiller::getConcept() -{ - return conceptData_->getConcept(); -} - - - -void RoleFiller::use( std::vector< RoleFiller*>& place,sal_Int32 query ) -{ - RoleFiller* rf = place[ query ]; - if( rf ) - { - place[ query ] = this; // put at the head of list - next_ = rf; - while( rf->limit_ >= begin_ ) - { - // check if we can grow/improve a hit - // we don't ever replace filler's fixed role - if( fixedRole_ != rf->fixedRole_ && - // in same parent context eg. PARA - rf->parentContext_ == parentContext_ ) - { - if( ( rf->filled_ & ( 1 << fixedRole_ ) ) == 0 ) - { - // not filled yet - rf->filled_ |= 1 << fixedRole_; - rf->fillers_[ fixedRole_ ] = this; - rf->end_ = end_; - } - else - rf->considerReplacementWith( this ); - } - - if( rf->next_ ) - rf = rf->next_; - else - return; - } - } - else - place[query] = this; -} - - -void RoleFiller::considerReplacementWith( RoleFiller* replacement ) -{ - // !!! simplistic for now - // needs gap and out of order - sal_Int32 role = replacement->fixedRole_; - if( replacement->getScore() > fillers_[role]->getScore() ) - fillers_[ role ] = replacement; -} - - - -double RoleFiller::penalty( Query* query,sal_Int32 nColumns ) -{ - sal_Int32 length = end_ - begin_ + 1; - double penalty_ = query->lookupPenalty( filled_ ); - // !!! here is a chance to check against query - // if hit worth scoring further - // might not be if query already has lots of good hits - for( sal_Int32 i = 0; i < nColumns; ++i ) - if( filled_ & ( 1 << i ) ) - { - penalty_ += fillers_[i]->conceptData_->getPenalty(); - //length -= _fillers[i]._conceptData.getConceptLength() + 1; - length -= 2; // !!! ??? c.length is not used ? - if( filled_ >> (i + 1) ) - for( sal_Int32 j = i + 1; j < nColumns; ++j ) - if( ( filled_ & 1 << j ) && fillers_[j]->begin_ < begin_ ) - penalty_ += query->getOutOufOrderPenalty(); - } - double result = penalty_ + length * query->getGapPenalty(); - return result < 0.0 ? 0.0 : result; // !!! quick fix -} - - - -NextDocGenerator::NextDocGenerator( ConceptData* cd,XmlIndex* env ) - : document_( 0 ), - concept_( cd ? cd->getConcept() : -1 ), - queryMask_( cd ? cd->getQueryMask() : -1 ), - terms_( cd ), - iterator_( env->getDocumentIterator( concept_ ) ) -{ -} - - - -void NextDocGeneratorHeap::reset() -{ - for( sal_Int32 i = 0; i < heapSize_; ++i ) - { - delete heap_[i]; heap_[i] = 0; - } - free_ = 0; - nonEmpty_ = false; -} - - - -void NextDocGeneratorHeap::addGenerator( NextDocGenerator* gen ) -{ - if( sal_uInt32( free_ ) == heap_.size() ) - { - heap_.push_back( 0 ); - } - - heap_[free_++] = gen; -} - - - -void NextDocGeneratorHeap::start() -{ - if( ( heapSize_ = free_ ) > 0 ) - { - for( sal_Int32 i = heapSize_ / 2; i >= 0; --i ) - heapify(i); - nonEmpty_ = true; - } - else - nonEmpty_ = false; -} - - -void NextDocGeneratorHeap::step() throw( excep::XmlSearchException ) -{ - if( heap_[0]->next() != NonnegativeIntegerGenerator::END ) - heapify(0); - else if ( heapSize_ > 1 ) - { - delete heap_[0]; - heap_[0] = heap_[--heapSize_]; - heap_[ heapSize_ ] = 0; - heapify(0); - } - else - nonEmpty_ = false; -} - - -void NextDocGeneratorHeap::heapify( sal_Int32 i ) -{ - NextDocGenerator* temp; - for( sal_Int32 r,l,smallest; ; ) - { - r = ( i + 1 ) << 1; - l = r - 1; - smallest = ( l < heapSize_ && heap_[l]->smallerThan( heap_[i] ) ) ? l : i; - if( r < heapSize_ && heap_[r]->smallerThan( heap_[ smallest ] ) ) - smallest = r; - if( smallest != i ) - { - temp = heap_[ smallest ]; - heap_[ smallest ] = heap_[ i ]; - heap_[i] = temp; - i = smallest; - } - else - break; - } -} - - -bool NextDocGeneratorHeap::atDocument( sal_Int32 document ) -{ - return nonEmpty_ && heap_[0]->getDocument() == document; -} - - - - -ConceptGroupGenerator::ConceptGroupGenerator( sal_Int32 dataL,sal_Int8* data,sal_Int32 index,sal_Int32 k ) - : last_( 0 ), - k1_( k ), - k2_( BitsInLabel ), - table_( NConceptsInGroup ), - bits_( new util::ByteArrayDecompressor( dataL,data,index ) ) -{ -} - - - -ConceptGroupGenerator::ConceptGroupGenerator() - : last_( 0 ), - k1_( 0 ), - k2_( BitsInLabel ), - table_( NConceptsInGroup ), - bits_( 0 ) -{ -} - - -ConceptGroupGenerator::~ConceptGroupGenerator() -{ - delete bits_; -} - - -void ConceptGroupGenerator::generateFillers( std::vector< RoleFiller* >& array ) -{ - cData_->generateFillers( array,last_ ); -} - - -bool ConceptGroupGenerator::next() throw( excep::XmlSearchException ) -{ - while( bits_->readNext( k1_,this ) ) - { - sal_Int32 bla = bits_->read( k2_ ); - if( ( cData_ = table_[ bla ] ).is() ) - return true; - } - return false; -} - - -sal_Int32 ConceptGroupGenerator::decodeConcepts( sal_Int32 k, - sal_Int32 shift, - sal_Int32 *concepts ) - throw( excep::XmlSearchException ) -{ - return bits_->ascendingDecode( k,shift,concepts ); -} - - - -void ConceptGroupGenerator::init( sal_Int32 bytesL,sal_Int8* bytes,sal_Int32 index,sal_Int32 k ) -{ - k1_ = k; - delete bits_; - bits_ = new util::ByteArrayDecompressor( bytesL,bytes,index ); - last_ = 0; - for( sal_Int32 i = 0;i < NConceptsInGroup; i++ ) - table_[i] = 0; -} - - - -void ConceptGroupGenerator::addTerms( sal_Int32 index,ConceptData* terms ) -{ - table_[ index ] = terms; -} - - - -void GeneratorHeap::reset() -{ - for( sal_Int32 i = 0; i < heapSize_; ++i ) - { - delete heap_[i]; - heap_[i] = 0; - } - free_ = 0; -} - - -void GeneratorHeap::addGenerator( ConceptGroupGenerator* cd ) -{ - if( sal_uInt32( free_ ) == heap_.size() ) - { - heap_.push_back( 0 ); - } - - heap_[free_++] = cd; -} - - -void GeneratorHeap::buildHeap() -{ - for( sal_Int32 i = heapSize_/2; i >= 0; i-- ) - heapify(i); -} - - -void GeneratorHeap::heapify( sal_Int32 root ) -{ - for( sal_Int32 smallest = 0; ; ) - { - const sal_Int32 right = ( root + 1 ) << 1; - const sal_Int32 left = right - 1; - smallest = ( left < heapSize_ && heap_[left]->position() < heap_[ root ]->position() ) ? left : root; - if( right< heapSize_ && heap_[right]->position() < heap_[smallest]->position() ) - smallest = right; - if( smallest != root ) - { - ConceptGroupGenerator* temp = heap_[smallest]; - heap_[smallest] = heap_[root]; - heap_[root] = temp; - root = smallest; - } - else - break; - } -} - - -bool GeneratorHeap::start( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException ) -{ - if( ( heapSize_ = free_ ) > 0 ) - { - for( sal_Int32 i = 0; i < free_; ++i ) - heap_[i]->next(); - - buildHeap(); - heap_[0]->generateFillers( array ); - return true; - } - else - return false; -} - - -bool GeneratorHeap::next( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException ) -{ - if( heapSize_ > 0 ) - { - if( ! heap_[0]->next() ) // no more - { - if( heapSize_ > 1) - { - delete heap_[0]; - heap_[0] = heap_[--heapSize_]; - heap_[heapSize_] = 0; - } - else - { - heapSize_ = 0; - return false; - } - } - heapify(0); - heap_[0]->generateFillers( array ); - return true; - } - else - return false; -} +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2008 by Sun Microsystems, Inc. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * $RCSfile: DocGenerator.cxx,v $ + * $Revision: 1.10 $ + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_xmlhelp.hxx" +#include +#include + + +using namespace xmlsearch; +using namespace xmlsearch::qe; + + +const sal_Int32 NonnegativeIntegerGenerator::END = -1; + + +RoleFiller RoleFiller::roleFiller_; + + +RoleFiller::RoleFiller() + : m_nRefcount( 0 ), + fixedRole_( 0 ), + filled_( 0 ), + begin_( 0 ), + end_( 0 ), + parentContext_( 0 ), + limit_( 0 ), + next_( 0 ), + fillers_( 0 ) +{ +} + + +RoleFiller::~RoleFiller() +{ + +} diff --git a/xmlhelp/source/cxxhelp/qe/Query.cxx b/xmlhelp/source/cxxhelp/qe/Query.cxx deleted file mode 100644 index 861df3da1ec6..000000000000 --- a/xmlhelp/source/cxxhelp/qe/Query.cxx +++ /dev/null @@ -1,393 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: Query.cxx,v $ - * $Revision: 1.13 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include -#include -#include -#include -#include - - -using namespace xmlsearch::qe; - - -sal_Int32* QueryHit::getMatches( sal_Int32& matchesL ) -{ - matchesL = matchesL_; - return matches_; -} - - -/******************************************************************************/ -/* */ -/* HitStore */ -/* */ -/******************************************************************************/ - - -HitStore::HitStore( double initialStandard,sal_Int32 limit,sal_Int32 nColumns ) - : limit_( limit ), - nColumns_( nColumns ), - index_( 0 ), - free_( 0 ), - standard_( initialStandard ), - heap_( limit ) -{ - for( sal_uInt32 i = 0; i < heap_.size(); ++i ) - heap_[i] = 0; -} - - - -HitStore::~HitStore() -{ - for( sal_uInt32 i = 0; i < heap_.size(); ++i ) - delete heap_[i]; -} - - - -bool HitStore::goodEnough( double penalty, sal_Int32 begin, sal_Int32 end ) -{ - return free_ == limit_ ? heap_[0]->worseThan( penalty,begin,end ) : true; -} - - -QueryHit* HitStore::createQueryHit( double penalty,sal_Int32 doc,sal_Int32 begin,sal_Int32 end ) -{ - QueryHit* hit = new QueryHit( nColumns_,penalty,doc,begin,end ); - if( free_ == limit_ ) - { // goodEnough'ness checked already - delete heap_[0]; - heap_[0] = hit; - heapify( 0 ); - standard_ = heap_[0]->getPenalty(); - } - else if( free_ < limit_ ) - { - heap_[ free_++ ] = hit; - if( free_ == limit_ ) - { // we have the needed number - for( sal_Int32 i = free_/2; i >= 0; --i ) // build heap - heapify( i ); - standard_ = heap_[0]->getPenalty(); - } - } - return hit; -} - - -struct CompareQueryHit -{ - bool operator()( const QueryHit* l,const QueryHit* r ) - { - return l->compareTo( r ); - } -}; - - -#include - - -QueryHit* HitStore::firstBestQueryHit() -{ - if( free_ > 0) - { - CompareQueryHit bla; - heap_.resize( free_ ); - std::stable_sort( heap_.begin(),heap_.end(),bla ); - index_ = 0; - return nextBestQueryHit(); - } - else - return 0; -} - - -QueryHit* HitStore::nextBestQueryHit() -{ - return index_ < free_ ? heap_[ index_++ ] : 0; -} - - -void HitStore::heapify( sal_Int32 i ) -{ - for( sal_Int32 r,l,worst; ; ) - { - r = (i + 1) << 1; l = r - 1; - worst = l < free_ && heap_[i]->betterThan( heap_[l] ) ? l : i; - if( r < free_ && heap_[ worst ]->betterThan( heap_[r] ) ) - worst = r; - if (worst != i) - { - QueryHit* temp = heap_[ worst ]; - heap_[ worst ] = heap_[ i ]; - heap_[i] = temp; - i = worst; // continue - } - else - break; - } -} - - -// sal_Int32 HitStore::partition( sal_Int32 p,sal_Int32 r ) -// { -// QueryHit* x = heap_[ ((p + r) >> 1) & 0x7FFFFFFF ]; -// sal_Int32 i = p - 1, j = r + 1; -// while( true ) -// { -// while( x->compareTo( heap_[--j] ) ) -// ; -// while( heap_[++i]->compareTo( x ) ) -// ; -// if( i < j ) -// { -// QueryHit* t = heap_[i]; -// heap_[i] = heap_[j]; -// heap_[j] = t; -// } -// else -// return j; -// } -// } - - -// void HitStore::quicksort( sal_Int32 p,sal_Int32 r ) -// { -// while( p < r ) -// { -// sal_Int32 q = partition( p,r ); -// quicksort(p, q); -// p = q + 1; -// } -// } - - - -/******************************************************************************/ -/* */ -/* Query */ -/* */ -/******************************************************************************/ - - -#define MissingTermPenalty 10.0 - - -Query::Query( XmlIndex* env, - sal_Int32 nColumns, - sal_Int32 nHits, - sal_Int32 missingPenaltiesL, - double* missingPenalties ) - : env_( env ), - ctx_( env ? env->getContextInfo() : 0 ), - store_( nColumns * MissingTermPenalty - 0.0001,nHits,nColumns ), - nHitsRequested_( nHits ), - nColumns_( nColumns ), - currentStandard_( nColumns * MissingTermPenalty - 0.0001 ), - missingPenaltyL_( nColumns ), - upperboundTemplateL_( nColumns ), - penaltiesL_( missingPenaltiesL ), - missingPenalty_( new double[ nColumns ] ), - upperboundTemplate_( new double[ nColumns ] ), - penalties_( missingPenalties ), - ignoredElementsL_( 0 ), - ignoredElements_( 0 ), - missingTermsPenalty_( 0.0 ) -{ - // for the EmptyQuery case (awaits arch improvement pass) - - if( missingPenalties ) - for( sal_Int32 i = 0;i < nColumns_; ++i ) - missingPenalty_[i] = missingPenalties[i]; - else - for( sal_Int32 i = 0;i < nColumns_; ++i ) - missingPenalty_[i] = MissingTermPenalty; - - makePenaltiesTable(); - // _roleFillerList = RoleFiller.STOP; -} - - -Query::~Query() -{ - delete[] missingPenalty_; - delete[] upperboundTemplate_; - delete[] penalties_; - delete[] ignoredElements_; -} - - -void Query::setIgnoredElements( const sal_Int32 ignoredElementsL,const rtl::OUString* ignoredElements ) -{ - if( ctx_ ) - ignoredElements_ = ctx_->getIgnoredElementsSet( ignoredElementsL_, - ignoredElementsL,ignoredElements ); - - if( ! ctx_ ) - { - ignoredElementsL_ = 0; - ignoredElements_ = 0; - } -} - - - -void Query::missingTerms( sal_Int32 nMissingTerms ) -{ - missingTermsPenalty_ = MissingTermPenalty * nMissingTerms; -} - - - -ConceptData* Query::makeConceptData( sal_Int32 col,sal_Int32 concept,double penalty,sal_Int32 queryNo ) -{ - return new ConceptData( concept,col,penalty,queryNo,nColumns_,env_->getContextInfo() );; -} - - -void Query::getHits( std::vector< QueryHitData* >& data,sal_Int32 n ) -{ - if( n <= 0 ) - return; - - QueryHit* qh = store_.firstBestQueryHit(); - - while( qh ) - { - data.push_back( env_->hitToData( qh ) ); - qh = data.size() < sal_uInt32( n ) ? store_.nextBestQueryHit() : 0; - } -} - - -QueryHit* Query::maybeCreateQueryHit( double penalty, - sal_Int32 doc, sal_Int32 begin, sal_Int32 end, sal_Int32 parentContext ) -{ - // hits are located using only terms actually present in text - // if B is not present, the query A B C reduces to A C and penalties - // are computed as if B did not occur in query - // to meaningfully merge results from different servers, some of which - // may have B, penalty has to be normalized to the common computing scheme - - QueryHit* res = - ( store_.goodEnough( penalty += missingTermsPenalty_,begin,end ) - && ( ! ignoredElements_ || ctx_->notIgnored( parentContext,ignoredElementsL_,ignoredElements_ ) ) ) - ? - store_.createQueryHit( penalty,doc,begin,end ) - : - 0; - return res; -} - - -void Query::makePenaltiesTable() -{ - sal_Int32 nPatterns = 1 << nColumns_; - delete[] penalties_; - penalties_ = new double[ penaltiesL_ = nPatterns ]; - for (sal_Int32 i = 0; i < nPatterns; ++i ) - penalties_[i] = computePenalty(i); -} - - -double Query::computePenalty( sal_Int32 n ) -{ - double penalty = 0.0; - for( sal_Int32 i = 0; i < nColumns_; ++i ) - if( ( n & 1 << i ) == 0 ) - penalty += missingPenalty_[i]; - return penalty; -} - - -void Query::resetForNextDocument() -{ - currentStandard_ = store_.getCurrentStandard(); - // "everything's missing" - for( sal_Int32 i = 0; i < nColumns_; i++ ) - upperboundTemplate_[i] = missingPenalty_[i]; - vote_ = false; -} - - -bool Query::vote() -{ - double sum = 0.0; - for( sal_Int32 i = 0; i < nColumns_; i++ ) - sum += upperboundTemplate_[i]; - return vote_ = (sum <= currentStandard_ ); -} - - -void Query::updateEstimate( sal_Int32 role,double penalty ) -{ - if( penalty < upperboundTemplate_[ role ] ) - upperboundTemplate_[ role ] = penalty; -} - - -/******************************************************************************/ -/* */ -/* QueryHitIterator */ -/* */ -/******************************************************************************/ - - - -QueryHitIterator::QueryHitIterator( const QueryResults* result ) - : index_( -1 ), - result_( result ) -{ -} - - -QueryHitIterator::~QueryHitIterator() -{ - delete result_; -} - - -bool QueryHitIterator::next() -{ - return accessible_ = ( ++index_ < sal_Int32( result_->queryHits_.size() ) ); -} - - -QueryHitData* QueryHitIterator::getHit( const PrefixTranslator* ) const -{ - if( accessible_ ) - return result_->queryHits_[index_]; - else - return 0; -} diff --git a/xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx b/xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx deleted file mode 100644 index c5f892fb603f..000000000000 --- a/xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx +++ /dev/null @@ -1,169 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: QueryProcessor.cxx,v $ - * $Revision: 1.10 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include -#include - - - -using namespace std; -using namespace xmlsearch; -using namespace xmlsearch::excep; -using namespace xmlsearch::qe; - - -const double QueryProcessor::INFLpenalty = 0.0; - - -QueryProcessor::QueryProcessor( const rtl::OUString& installDir ) - throw( IOException ) - : env_( installDir ) -{ -} - - -QueryProcessor::~QueryProcessor() -{ - // delete env_; -} - - - -QueryResults* QueryProcessor::processQuery( const QueryStatement& ment ) -{ - Search search( &env_ ); - Query* query = processQuery( search,ment ); - query->setIgnoredElements( 0,0 ); - search.startSearch(); - return makeQueryResults( query,ment.getHitCount() ); -} - - -Query* QueryProcessor::processQuery( Search& search,const QueryStatement& ment ) -{ - sal_Int32 nValidTerms = 0, nMissingTerms = 0, nContentTerms = 0; - double variantPenalty = 0.0; - - const sal_Int32 nHits = ment.getHitCount(); - const rtl::OUString scope = ment.getScope(); - const vector< rtl::OUString >& terms = ment.getTerms(); - const sal_Int32 nTerms = terms.size(); - - vector< sal_Int32 > primary( nTerms ); - vector< sal_Int32 > missingTerms( nTerms ); - vector< vector< sal_Int32 > > columns( nTerms ); - - for( int i = 0; i < nTerms; ++i ) - { - const sal_Int32 lgt = terms[i].getLength(); - const sal_Unicode* str = terms[i].getStr(); - - if( str[0] == sal_Unicode('+') ) - { - // poor man's escape for query control - // not implemented yet - } - else - { - ++nContentTerms; - rtl::OUString term = terms[i].toAsciiLowerCase(); - sal_Int32 id = 0; - std::vector< sal_Int32 > ids; - if( str[0] == sal_Unicode('\"') ) - { - id = env_.fetch( term.copy( 1 ) ); // goes to BtreeDict::fetch - } - else if( str[lgt-1] == sal_Unicode( '*' ) ) - { - ids = env_.withPrefix( term.copy( 0,lgt - 1 ) ); // goes to BtreeDict::withPrefix - variantPenalty = 0.0; - } - else - { - sal_Int32 formID; - id = env_.fetch( term ); - - // std::vector< rtl::OUString > variants( morph_->getVariants( term ) ); - std::vector< rtl::OUString > variants; - - for( sal_uInt32 j = 0; j < variants.size(); ++j ) - { - formID = env_.fetch( variants[j] ); - if( formID > 0 && formID != id ) - ids.push_back( formID ); - } - variantPenalty = INFLpenalty; - } - - if( ids.size() > 0 || id > 0 ) - { - columns[ nValidTerms ] = ids; - primary[ nValidTerms++ ] = id; - } - else - { - ++nMissingTerms; - // !!! not used now (intended to fill out appropriate missing terms in QueryHits - missingTerms.push_back( nContentTerms - 1 ); - } - - } - } - - return search.addQuery( scope, - nValidTerms,nMissingTerms,nHits, - variantPenalty, - primary, - columns ); -} - - - -QueryResults::QueryResults( Query* query, sal_Int32 nHits ) -{ - if( query ) - query->getHits( queryHits_,nHits ); -} - - -QueryResults::~QueryResults() -{ - for( sal_uInt32 i = 0; i < queryHits_.size(); ++i ) - delete queryHits_[i]; -} - - -QueryResults* QueryProcessor::makeQueryResults( Query* query,sal_Int32 nHits ) -{ - return new QueryResults( query,nHits ); -} - diff --git a/xmlhelp/source/cxxhelp/qe/Search.cxx b/xmlhelp/source/cxxhelp/qe/Search.cxx deleted file mode 100644 index 493205661d64..000000000000 --- a/xmlhelp/source/cxxhelp/qe/Search.cxx +++ /dev/null @@ -1,720 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: Search.cxx,v $ - * $Revision: 1.11 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include - - -using namespace std; -using namespace xmlsearch; -using namespace xmlsearch::qe; - - - - -/******************************************************************************/ -/* */ -/* Emtpy ConceptData/Query */ -/* */ -/******************************************************************************/ - - -class EmptyConceptData - : public ConceptData -{ -public: - - virtual void generateFillers( std::vector< RoleFiller* >& , sal_Int32 ) { } - -}; // end class EmptyQuery - - - -class EmptyQuery - : public Query -{ -public: - - EmptyQuery() - : Query( 0,0,0,0,0 ), - conceptDataInstance_( ) - { - } - - ConceptData* makeConceptData( sal_Int32 col, - sal_Int32 concept, - double penalty, - sal_Int32 queryNo ) - { - (void)col; - (void)concept; - (void)penalty; - (void)queryNo; - - return &conceptDataInstance_; - } - -private: - - EmptyConceptData conceptDataInstance_; - -}; // end class EmptyQuery - - - -/******************************************************************************/ -/* */ -/* ConceptData1/Query1 */ -/* */ -/******************************************************************************/ - - -class ConceptData1 - : public ConceptData -{ -public: - - ConceptData1( sal_Int32 id, - sal_Int32 role, - double score, - sal_Int32 queryNo, - sal_Int32 nColumns, - ContextTables* ctxInfo, - sal_Int32 code ) - : ConceptData( id,role,score,queryNo,nColumns,ctxInfo ), - fieldCode_( code ) - { - } - - virtual void generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos ); - - -private: - - sal_Int32 fieldCode_; - -}; // end class ConceptData1 - - -class Query1 - : public Query -{ -public: - - Query1( XmlIndex* env, - sal_Int32 nColumns, - sal_Int32 nHits, - sal_Int32 missingPenaltiesL, - double* missingPenalties, - sal_Int32 fieldCode ) - : Query( env,nColumns,nHits,missingPenaltiesL,missingPenalties ), - searchFieldCode_( fieldCode ) - { - } - - virtual ConceptData* makeConceptData( sal_Int32 col, - sal_Int32 concept, - double score, - sal_Int32 query ); - - -private: - - sal_Int32 searchFieldCode_; -}; - - - -/********************************************************************************/ -// Impl -/********************************************************************************/ - - -void ConceptData1::generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos ) -{ - if( array[ queryNo_ ] != RoleFiller::STOP() ) - { // not 'prohibited' - // !!! candidate for a single _ctx op - sal_Int32 ancestor = ctx_->firstParentWithCode(pos,fieldCode_); - if( ancestor != -1 ) - { - - - RoleFiller* p = new RoleFiller( nColumns_, - this, - role_, - pos, - ancestor, - pos + proximity_); - p->use( array,queryNo_ ); - } - } - - if( next_.is() ) - next_->generateFillers( array,pos ); -} - - - -ConceptData* Query1::makeConceptData( sal_Int32 col, - sal_Int32 concept, - double score, - sal_Int32 query ) -{ - return new ConceptData1( concept,col,score,query,nColumns_,ctx_,searchFieldCode_ ); -} - - - -/******************************************************************************/ -/* */ -/* QueryFactoryImpl */ -/* */ -/******************************************************************************/ - - -class QueryFactoryImpl -{ -public: - - Query* makeQuery( XmlIndex* env,const rtl::OUString& context,sal_Int32 nColumns,sal_Int32 nHits); - - Query* empty() { return &emptyQueryInstance_; } - -private: - - EmptyQuery emptyQueryInstance_; - -}; // end class QueryFactoryImpl - - - - -Query* QueryFactoryImpl::makeQuery( XmlIndex* env, - const rtl::OUString& context, - sal_Int32 nColumns, - sal_Int32 nHits ) -{ - if( ! context.getLength() ) - return new Query( env,nColumns,nHits,0,0 ); - else if( context.indexOf( sal_Unicode( '|' ) ) != -1 ) - { - return 0; //t - } - else if( context.indexOf( rtl::OUString::createFromAscii( "//" ) ) != -1 ) - { - return 0; //t - } - else if( context.indexOf( sal_Unicode( '/' ) ) != -1 ) - { - return 0; //t - } - else if( context.indexOf( sal_Unicode( '@' ) ) != -1 ) - { - return 0; //t - } - else if( context.indexOf( sal_Unicode( '[' ) ) != -1 ) - { - return 0; //t - } - else - { - sal_Int32 code = env->getContextInfo()->linkCode( context ); - if( code != -1 ) - return new Query1( env,nColumns,nHits,0,0,code); - else - return &emptyQueryInstance_; - } -} - - - -Search::Search( XmlIndex* env ) - : env_( env ), - queryFactory_( 0 ), - nextDocGenHeap_(), - firstGenerator_(), - free2_( 0 ), - limit_( 0 ), - base_( 0 ), - concepts_( new sal_Int32[ ConceptGroupGenerator::NConceptsInGroup ] ), - dataL_( 0 ), - data_( 0 ) -{ -} - - - -Search::~Search() -{ - sal_uInt32 i; - Query* stopq = queryFactory_ ? queryFactory_->empty() : 0; - ConceptData* stopc = stopq ? stopq->makeConceptData( 0,0,0.0,0 ) : 0; - (void)stopc; - - for( i = 0; i < queries_.size(); ++i ) - if( queries_[i] != stopq ) - delete queries_[i]; - - delete[] concepts_; - - delete queryFactory_; -} - - - - -Query* Search::addQuery( const rtl::OUString& context, - sal_Int32 nValidTerms,sal_Int32 nMissingTerms,sal_Int32 nHits, - double variantPenalty, - const std::vector< sal_Int32 >& primary, - const std::vector< std::vector< sal_Int32 > >& columns ) -{ - // by now, scope == context - if( ! queryFactory_ ) - queryFactory_ = new QueryFactoryImpl(); - - Query* query = queryFactory_->makeQuery( env_,context,nValidTerms,nHits ); - query->missingTerms( nMissingTerms ); - queries_.push_back( query ); - - for( sal_Int32 i = 0; i < nValidTerms; ++i ) - { - if( primary[i] > 0 ) - addTerm( i,primary[i],0.0 ); - - for( sal_uInt32 j = 0; j < columns[i].size(); ++j ) - addTerm( i,columns[i][j],variantPenalty ); - } - - // start stop - query->addControlConceptData( this,queries_.size()-1 ); - return query; -} - - - -void Search::startSearch() -{ - sal_Int32 i,j; - // set up ConceptData lists - // order search terms - quicksort( 0, free2_ - 1); - - // remove duplicates - for (i = 0; i < free2_ - 1; i = j) - { - for (j = i + 1; j < free2_; j++) - { - if( conceptData_[i]->crqEquals( conceptData_[j].get() ) ) - conceptData_[j] = 0; - else - i = j; - } - } - - // create lists - for( i = 0; i < free2_ - 1; i = j ) - { - for (j = i + 1; j < free2_; j++ ) - { - if( conceptData_[j].is() ) - { - if( conceptData_[i]->cEquals( conceptData_[j].get() ) ) - { - conceptData_[i]->addLast( conceptData_[j].get() ); - conceptData_[j] = 0; - } - else - i = j; - } - } - } - - // densify - for( i = 0; i < free2_ - 1; i++) - { - if( ! conceptData_[i].is() ) - { - for( j = i + 1; j < free2_; j++) - { - if (conceptData_[j].is() ) - { - conceptData_[i] = conceptData_[j]; - conceptData_[j] = 0; - break; - } - } - } - } - - // set up new document generators - nextDocGenHeap_.reset(); - for( i = 0; i < free2_ && conceptData_[i].is(); i++) - { - NextDocGenerator* gen = new NextDocGenerator( conceptData_[i].get(),env_ ); - try - { - sal_Int32 doc; - gen->first(); - if( ( doc = gen->getDocument() ) != NonnegativeIntegerGenerator::END ) - { - /* !!! ??? is concept length used any more in any way - conceptData_[i]. - setConceptLength(_env. - getConceptLength(conceptData_[i].getConcept())); - */ - nextDocGenHeap_.addGenerator( gen ); - } - } - catch( ... ) - { - } - } - - nextDocGenHeap_.start(); - env_->reset(); - env_->resetContextSearch(); - searchDocument(); -} - - - - -void Search::addTerm( sal_Int32 col,sal_Int32 concept,double score ) -{ - if( env_->occursInText( concept ) ) - { - ConceptData* cd = queries_[queries_.size()-1]->makeConceptData( col,concept,score,queries_.size()-1 ); - if( sal_uInt32( free2_ ) == conceptData_.size() ) - { - conceptData_.push_back( 0 ); -// conceptVisitor_ = &conceptData_[0]; - } - conceptData_[ free2_++ ] = cd; - } -} - - - - - -void Search::searchDocument() -{ - std::vector< RoleFiller* > start( queries_.size() ); - do - { - try - { - switch( nextDocument( start ) ) - { - case 0: // multi group - genHeap_.start( start ); - while( genHeap_.next( start ) ) - ; - break; - - case 1: // single group - while( firstGenerator_.next() ) - firstGenerator_.generateFillers( start ); - break; - - case 2: // reached the end - return; - } - } - catch( const excep::XmlSearchException& ) - { - continue; - } - - RoleFiller* next; - for( sal_uInt32 i = 0; i < queries_.size(); ++i ) - { - if( ( next = start[i] ) != 0 && next != RoleFiller::STOP() ) - next->scoreList( queries_[i],document_ ); - else if( queries_[i]->zoned() ) - { - RoleFiller* rfs = queries_[i]->getRoleFillers(); - if( rfs && rfs != RoleFiller::STOP() ) - rfs->scoreList( queries_[i],document_ ); - } - } - genHeap_.reset(); - } - while( nextDocGenHeap_.isNonEmpty() ); - - // #i80952 -#if 0 - for( sal_uInt32 i = 0; i < start.size(); ++i ) - if( start[i] != RoleFiller::STOP() ) - delete start[i]; -#endif -} - - - -sal_Int32 Search::nextDocument( std::vector< RoleFiller* >& start ) throw( xmlsearch::excep::XmlSearchException ) -{ - while( nextDocGenHeap_.isNonEmpty() ) - { // still something to do - sal_uInt32 i; - for( i = 0; i < queries_.size(); ++i ) - if( queries_[i] ) - queries_[i]->resetForNextDocument(); - - // gather all concepts this document has - // and store associated conceptData - sal_Int32 index = 0; - document_ = nextDocGenHeap_.getDocument(); - docConcepts_.clear(); - queryMasks_.clear(); - do - { - docConcepts_.push_back( nextDocGenHeap_.getConcept() ); - queryMasks_.push_back( nextDocGenHeap_.getQueryMask() ); - ConceptData *conceptData = ( conceptData_[ index++ ] = nextDocGenHeap_.getTerms() ).get(); - conceptData->runBy( queries_ ); - nextDocGenHeap_.step(); - } - while( nextDocGenHeap_.atDocument( document_) ); - - // if there is no saturation model, some query will always vote YES - // and so every document will be opened - // even if this case, however, savings can be achieved by not generating fillers - // for some queries (not scoring, etc) - // and, with more care, creation of some GroupGenerators can be avoided - // saturating queries with lots of good hits will lead to best results - - sal_Int32 voteMask = 0; - Query* query; - for( i = 0; i < queries_.size(); ++i ) - { - query = queries_[i]; - if( query ) - { - query->saveRoleFillers( 0 ); - if( query->vote() ) - { - // normal reset - start[i] = query->zoned() ? RoleFiller::STOP() : 0; - voteMask |= 1 << i; - } - else - start[i] = RoleFiller::STOP(); // prohibit setting - } - } - - // we may eliminate some ConceptGroupGenerators - // those which would be used only by Queries which voted NO - if( voteMask != 0 ) - { // need to open up document - ConceptGroupGenerator* gen; - // !!! don't gather Fillers for disinterested Queries - if( openDocumentIndex( document_ ) ) - { // multi group - // set up all needed generators - sal_Int32 j = 0; - while( ( queryMasks_[j] & voteMask ) == 0 ) - ++j; - // assert(j < index); - sal_Int32 c = docConcepts_[j]; - sal_Int32 group = 0; - // find first group - while( /*group < maxConcepts_.size() &&*/ - c > maxConcepts_[ group ] && ++group < limit_ ) - ; - gen = makeGenerator( group ); - gen->addTerms( indexOf(c),conceptData_[j].get() ); - - for( ++j; j < index; j++ ) - if( ( queryMasks_[j] & voteMask ) > 0 ) - { - c = docConcepts_[j]; - if( c > max_ ) - { // need to find another group - // assert(group < _limit); - while( /*group < maxConcepts_.size() &&*/ - c > maxConcepts_[ group ] && ++group < limit_ ) - ; - gen = makeGenerator( group ); - } - gen->addTerms( indexOf(c),conceptData_[j].get() ); - } - return 0; - } - else - { // single group - for( sal_Int32 j = 0; j < index; j++ ) - if( queryMasks_[j] & voteMask ) - firstGenerator_.addTerms( indexOf( docConcepts_[j] ),conceptData_[j].get() ); - return 1; - } - } - } - return 2; -} - - - - -bool Search::openDocumentIndex( sal_Int32 docNo ) throw( excep::XmlSearchException ) -{ - data_ = env_->getPositions( dataL_,docNo ); - base_ = env_->getDocumentIndex( docNo ); - - startingIndex_ = 0; - sal_Int32 kk = data_[ base_ ] & 0xFF, k2; - - switch( kk >> 6 ) - { // get type - case 0: // single group, no extents - k2 = data_[base_ + 1]; - firstGenerator_.init( dataL_,data_,base_ += 2,k2 ); - // decode concept table - nConcepts_ = firstGenerator_.decodeConcepts( kk & 0x3F,0,concepts_ ); - return false; - - case 2: // multi group, no extents - { - kTable_.clear(); - offsets_.clear(); - maxConcepts_.clear(); - util::ByteArrayDecompressor compr( dataL_,data_,base_ + 1 ); - compr.decode( kk & 0x3F,kTable_ ); - - sal_Int32 last = kTable_.back(); - kTable_.pop_back(); - compr.ascDecode( last,offsets_ ); - last = kTable_.back(); - kTable_.pop_back(); - compr.ascDecode( last,maxConcepts_ ); - - base_ += 1 + compr.bytesRead(); - limit_ = maxConcepts_.size(); - } - return true; - - case 1: // single group, extents - case 3: // multi group, extents - throw excep::XmlSearchException( rtl::OUString::createFromAscii( "extents not yet implemented\n" ) ); - } - return false; -} - - - - - -ConceptGroupGenerator* Search::makeGenerator( sal_Int32 group ) - throw( excep::XmlSearchException ) -{ - sal_Int32 shift,index; - if( group > 0 ) - { - index = base_ + offsets_[ group - 1 ]; - shift = maxConcepts_[ group - 1 ]; - } - else - { - index = base_; - shift = 0; - } - - // initialize generator - ConceptGroupGenerator* gen = - new ConceptGroupGenerator( dataL_,data_,index,kTable_[ 1 + 2*group ] ); - // decode concept table - nConcepts_ = gen->decodeConcepts( kTable_[2*group],shift,concepts_ ); - - if( group < limit_ ) - max_ = concepts_[ nConcepts_ ] = maxConcepts_[ group ]; - else - max_ = concepts_[ nConcepts_ - 1 ]; - - genHeap_.addGenerator( gen ); - startingIndex_ = 0; // in _concepts; lower search index - return gen; -} - - - -sal_Int32 Search::indexOf(sal_Int32 concept) throw( excep::XmlSearchException ) -{ - sal_Int32 i = startingIndex_,j = nConcepts_,k; - while( i <= j ) - if( concepts_[ k = (i + j)/2 ] < concept ) - i = k + 1; - else if( concept < concepts_[k] ) - j = k - 1; - else - { - startingIndex_ = k + 1; - return k; - } - throw excep::XmlSearchException( rtl::OUString::createFromAscii( "indexOf not found" ) ); -} - - - - -sal_Int32 Search::partition( sal_Int32 p,sal_Int32 r ) -{ - rtl::Reference< ConceptData > x = conceptData_[ ((p + r) >> 1) & 0x7FFFFFFF ]; - sal_Int32 i = p - 1, j = r + 1; - while( true ) - { - while( x->compareWith( conceptData_[--j].get() ) ) - ; - while( conceptData_[++i]->compareWith( x.get() ) ) - ; - if( i < j ) - { - rtl::Reference< ConceptData > t = conceptData_[i]; - conceptData_[i] = conceptData_[j]; - conceptData_[j] = t; - } - else - return j; - } -} - - - -void Search::quicksort( sal_Int32 p,sal_Int32 r ) -{ - while (p < r) - { - sal_Int32 q = partition( p,r ); - quicksort(p, q); - p = q + 1; - } -} diff --git a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx deleted file mode 100644 index 707a09f1a42e..000000000000 --- a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx +++ /dev/null @@ -1,457 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2008 by Sun Microsystems, Inc. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * $RCSfile: XmlIndex.cxx,v $ - * $Revision: 1.19 $ - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_xmlhelp.hxx" -#include -#include -#include -#include -#ifndef _XMLSEARCH_UTIL_RANDOMACCESSSTREAM_HXX_ -#include -#endif -#include -#include - -using namespace xmlsearch; -using namespace xmlsearch::excep; -using namespace xmlsearch::qe; - - -// extern sal_Int32 getInteger_( const sal_Int8* ); - - -XmlIndex::XmlIndex( const rtl::OUString& indexDir ) - throw( IOException ) - : currentBatchOffset_( 0 ), - maxDocNumberInCache_( -1 ), - indexAccessor_( indexDir ), - dict_( indexAccessor_ ), - contextTables_( 0 ), - allListsL_( 0 ), - allLists_( 0 ), - positionsL_( 0 ), - positions_( 0 ), - contextsDataL_( 0 ), - contextsData_( 0 ), - concepts_( 0 ), - documents_( 0 ) -{ - // reading DOCS - try - { - allListsL_ = indexAccessor_.readByteArray( allLists_, - rtl::OUString::createFromAscii("DOCS") ); // reading DOCS - } - catch( IOException ) - { - OSL_ENSURE( allLists_ != 0, "XmlIndex::XmlIndex -> cannot open DOCS/docs" ); - throw; - } - - // reading CONTEXTS - try - { - contextsDataL_ = indexAccessor_.readByteArray( contextsData_, - rtl::OUString::createFromAscii("CONTEXTS") ); // reading CONTEXTS - } - catch( IOException ) - { - OSL_ENSURE( allLists_ != 0, "XmlIndex::XmlIndex -> cannot open CONTEXTS/contexts" ); - delete[] allLists_; - throw; - } - - // reading POSITIONS - { - positionsFile_ = indexAccessor_.getStream( rtl::OUString::createFromAscii( "POSITIONS" ), - rtl::OUString::createFromAscii( "r" ) ); - - OSL_ENSURE( positionsFile_ != 0, "XmlIndex::XmlIndex -> cannot open POSITIONS/positions" ); - - if( positionsFile_ ) - { - //!!! temporary: better than fixed large value, worse than 'intelligent' size mgt - allInCache_ = true; - if( allInCache_ ) // yes, intended - { - reset(); - positions_ = new sal_Int8[ positionsL_ = positionsFile_->length() ]; - positionsFile_->readBytes( positions_,positionsL_ ); - } - } - else - { - delete[] allLists_; - delete[] contextsData_; - throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no POSITIONS/positions") ); - } - } - - - // reading DOCS.TAB - { - util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "DOCS.TAB" ), - rtl::OUString::createFromAscii( "r" ) ); - - if( in ) - { - sal_Int8 a[4]; - a[0] = a[1] = a[2] = 0; - in->readBytes( &a[3],1 ); - sal_Int32 k1 = ::getInteger_( a ); - util::StreamDecompressor sddocs( in ); - sddocs.ascDecode( k1,concepts_ ); - in->readBytes( &a[3],1 ); - sal_Int32 k2 = ::getInteger_( a ); - offsets_.push_back( 0 ); - util::StreamDecompressor sdoffsets( in ); - sdoffsets.ascDecode( k2,offsets_ ); - delete in; - } - else - { - delete[] allLists_; - delete[] contextsData_; - delete[] positions_; - delete positionsFile_; - throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no DOCS.TAB/docs.tab") ); - } - } - - // reading OFFSETS - { - util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "OFFSETS" ), - rtl::OUString::createFromAscii( "r" ) ); - if( in ) - { - sal_Int8 a[4]; - a[0] = a[1] = a[2] = 0; - in->readBytes( &a[3],1 ); - sal_Int32 k1 = ::getInteger_( a ); - util::StreamDecompressor sddocs( in ); - sddocs.decode( k1,documents_ ); - in->readBytes( &a[3],1 ); - sal_Int32 k2 = ::getInteger_( a ); - util::StreamDecompressor sdoffsets( in ); - sdoffsets.ascDecode( k2,microIndexOffsets_ ); - in->readBytes( &a[3],1 ); - sal_Int32 k3 = ::getInteger_( a ); - util::StreamDecompressor sdtitles( in ); - sdtitles.decode( k3,titles_ ); - - in->readBytes( &a[3],1 ); - sal_Int32 k4 = ::getInteger_( a ); - // contextsOffsets_ = new IntegerArray(_documents.cardinality() + 1); - util::StreamDecompressor co(in); - // _contextsOffsets.add(0); // first, trivial offset - co.ascDecode( k4,contextsOffsets_ ); - delete in; - } - else - { - delete[] allLists_; - delete[] contextsData_; - delete[] positions_; - delete positionsFile_; - throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no OFFSETS/offsets") ); - } - } - - // reading linknames - { - util::RandomAccessStream* in = - indexAccessor_.getStream( rtl::OUString::createFromAscii( "LINKNAMES" ), - rtl::OUString::createFromAscii( "r" ) ); - if( ! in ) - { - delete[] allLists_; - delete[] contextsData_; - delete[] positions_; - delete positionsFile_; - throw IOException( - rtl::OUString::createFromAscii( "BtreeDict::BtreeDict -> no LINKNAMES/linknames" ) ); - } - - sal_Int32 len = in->length(); - char* bff = new char[ 1 + len ], *bff1 = new char[ 1 + len ]; - bff[ len ] = 0; - in->readBytes( reinterpret_cast( bff ),len ); - delete in; - - // Now the buffer must be densified. - int i,len1 = 0; - for( i = 0; i < len; ++i ) - { - if( bff[i] ) - bff1[ len1++ ] = bff[i]; - } - bff1[len1] = 0; - delete[] bff; - rtl::OString aStr( bff1 ); // build a string from the densified buffer; - delete[] bff1; - -// // Now determine the order -// #define NAMECOUNT 16 -// #define UNREACHABLEPLACE 100000; -// /** -// * The available names cannot be determined from LINKNAMES at current, -// * because LINKNAMES is a serialized Java-object -// * Always update LINKNAMES if index.xsl or default.xsl are modified. -// */ -// rtl::OString LN[NAMECOUNT]; -// LN[0] = "text:span"; -// LN[1] = "help:help-text"; -// LN[2] = "help:to-be-embedded"; -// LN[3] = "headingheading"; -// LN[4] = "office:body"; -// LN[5] = "text:p"; -// LN[6] = "office:document"; -// LN[7] = "help:link"; -// LN[8] = "help:key-word"; -// LN[9] = "table:table"; -// LN[10] = "table:table-header-row"; -// LN[11] = "table:table-row"; -// LN[12] = "table:table-cell"; -// LN[13] = "text:unordered-list"; -// LN[14] = "text:ordered-list"; -// LN[15] = "text:list-item"; - // Now determine the order - -#define NAMECOUNT 16 -#define UNREACHABLEPLACE 100000; - /** - * The available names cannot be determined from LINKNAMES at current, - * because LINKNAMES is a serialized Java-object - * Always update LINKNAMES if index.xsl or default.xsl are modified. - */ - - rtl::OString LN[NAMECOUNT]; - LN[0] = "helpdocument"; - LN[1] = "body"; - LN[2] = "title"; - LN[3] = "table"; - LN[4] = "tablecell"; - LN[5] = "tablerow"; - LN[6] = "list"; - LN[7] = "listitem"; - LN[8] = "item"; - LN[9] = "emph"; - LN[10] = "paragraph"; - LN[11] = "section"; - LN[12] = "bookmark"; - LN[13] = "bookmark_value"; - LN[14] = "ahelp"; - LN[15] = "link"; - - // Determine index in file - int idx[NAMECOUNT]; - /*int*/ linkNamesL_ = NAMECOUNT; - for( i = 0; i < NAMECOUNT; ++i ) - if( ( idx[i] = aStr.indexOf( LN[i] ) ) == -1 ) { - idx[i] = UNREACHABLEPLACE; - --linkNamesL_; - } - - linkNames_ = new rtl::OUString[linkNamesL_]; - for( i = 0; i < linkNamesL_; ++i ) { - // TODO what happens to first if we never hit Place? - int first = 0; - int Place = UNREACHABLEPLACE; // This is the defintely last place - for( int j = 0; j < NAMECOUNT; ++j ) - { - if( idx[j] < Place ) - Place = idx[first = j]; - } - idx[first] = UNREACHABLEPLACE; - linkNames_[i] = rtl::OUString( LN[first].getStr(),LN[first].getLength(),RTL_TEXTENCODING_UTF8 ); - } - -#undef NAMECOUNT -#undef UNREACHABLEPLACE - } // end linknames - - - { - contextTables_ = new ContextTables(contextsOffsets_, - contextsDataL_,contextsData_, - linkNamesL_,linkNames_ ); - } -} - - -XmlIndex::~XmlIndex() -{ - delete[] allLists_; - delete[] contextsData_; - delete[] linkNames_; - delete[] positions_; - delete positionsFile_; - delete contextTables_; -} - - - -void XmlIndex::reset() -{ - maxDocNumberInCache_ = allInCache_ ? static_cast(microIndexOffsets_.size()) - 1 : -1; -} - - -sal_Int32 binarySearch( const std::vector& arr,sal_Int32 value ) -{ - sal_Int32 i = 0, j = arr.size(), k; - while (i <= j) - if (arr[k = (i + j)/2] < value) - i = k + 1; - else if (value < arr[k]) - j = k - 1; - else - return k; - return -1; -} - - -NonnegativeIntegerGenerator* XmlIndex::getDocumentIterator( sal_Int32 concept ) -{ - sal_Int32 index = binarySearch( concepts_,concept ); - - if( index >= 0 ) - return new util::ConceptList( allLists_,allListsL_,offsets_[index] ); - else - return 0; -} - - -bool XmlIndex::occursInText( sal_Int32 concept ) -{ - return binarySearch( concepts_,concept) >= 0; -} - - -sal_Int8* XmlIndex::getPositions( sal_Int32& len,sal_Int32 docNo ) throw( excep::XmlSearchException ) -{ - contextTables_->setMicroindex( docNo ); - if( docNo > maxDocNumberInCache_ ) - readMicroindexes( docNo ); - - len = positionsL_; - return positions_; -} - - -rtl::OUString XmlIndex::documentName( sal_Int32 docNumber ) throw( excep::XmlSearchException ) -{ - if( docNumber < 0 || documents_.size() <= sal_uInt32( docNumber ) ) - { - rtl::OUString message = rtl::OUString::createFromAscii( "XmlIndex::documentName -> " ); - throw excep::XmlSearchException( message ); - } - - return dict_.fetch( documents_[ docNumber ] ); -} - - - - -void XmlIndex::readMicroindexes( sal_Int32 docNo ) throw( xmlsearch::excep::IOException ) -{ - currentBatchOffset_ = microIndexOffsets_[docNo]; - sal_Int32 offsetLimit = currentBatchOffset_ + positionsL_; - sal_Int32 upTo = 0, nextDoc = docNo; - sal_Int32 lastOffset = 0; - - do - { - if( ++nextDoc == sal_Int32( microIndexOffsets_.size() ) ) - lastOffset = sal_Int32( positionsFile_->length() ); - else if( microIndexOffsets_[ nextDoc ] > offsetLimit ) - lastOffset = microIndexOffsets_[ nextDoc ]; - } - while( lastOffset == 0 ); - - if( lastOffset > offsetLimit ) - { - upTo = microIndexOffsets_[ nextDoc - 1 ]; - maxDocNumberInCache_ = nextDoc - 2; - } - else - { - upTo = lastOffset; - maxDocNumberInCache_ = nextDoc - 1; - } - - if( maxDocNumberInCache_ < docNo ) - { // cache too small - // for current microindex - // System.out.println("expanding cache to " + _positionsCacheSize); - delete[] positions_; - positions_ = new sal_Int8[ positionsL_ = lastOffset - currentBatchOffset_ ]; - readMicroindexes( docNo ); - return; - } - - positionsFile_->seek( currentBatchOffset_ ); - positionsFile_->readBytes( positions_,upTo - currentBatchOffset_ ); -} - - -QueryHitData* XmlIndex::hitToData( QueryHit* hit ) -{ - sal_Int32 termsL,matchesL; - sal_Int32 *matches = hit->getMatches( matchesL ); - rtl::OUString *terms = new rtl::OUString[ termsL = matchesL >>/*>*/ 1 ]; - for( sal_Int32 i = 0; i < termsL; ++i ) - { - sal_Int32 aInt = ( i << 1 ); - if( 0 <= aInt && aInt < matchesL ) - { - sal_Int32 match = matches[ aInt ]; - if( match > 0 ) - try - { - terms[i] = fetch( match ); - } - catch( const excep::XmlSearchException& ) - { - } - } - } - - sal_Int32 document = hit->getDocument(); - QueryHitData *res = new QueryHitData( hit->getPenalty(), - documentName( document ), - termsL,terms ); - contextTables_->setMicroindex( document ); - contextTables_->resetContextSearch(); - return res; -} - - diff --git a/xmlhelp/source/cxxhelp/qe/makefile.mk b/xmlhelp/source/cxxhelp/qe/makefile.mk index e86231898cb1..249950c4f02a 100644 --- a/xmlhelp/source/cxxhelp/qe/makefile.mk +++ b/xmlhelp/source/cxxhelp/qe/makefile.mk @@ -45,14 +45,7 @@ ENABLE_EXCEPTIONS=TRUE CFLAGS+=-GR .ENDIF -SLOFILES=\ - $(SLO)$/ConceptData.obj \ - $(SLO)$/ContextTables.obj \ - $(SLO)$/DocGenerator.obj \ - $(SLO)$/Query.obj \ - $(SLO)$/QueryProcessor.obj \ - $(SLO)$/Search.obj \ - $(SLO)$/XmlIndex.obj +SLOFILES=$(SLO)$/DocGenerator.obj # --- Targets ------------------------------------------------------ -- cgit