summaryrefslogtreecommitdiff
path: root/xmlhelp/source/cxxhelp/qe
diff options
context:
space:
mode:
Diffstat (limited to 'xmlhelp/source/cxxhelp/qe')
-rw-r--r--xmlhelp/source/cxxhelp/qe/ConceptData.cxx105
-rw-r--r--xmlhelp/source/cxxhelp/qe/ContextTables.cxx572
-rw-r--r--xmlhelp/source/cxxhelp/qe/DocGenerator.cxx559
-rw-r--r--xmlhelp/source/cxxhelp/qe/Query.cxx393
-rw-r--r--xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx169
-rw-r--r--xmlhelp/source/cxxhelp/qe/Search.cxx720
-rw-r--r--xmlhelp/source/cxxhelp/qe/XmlIndex.cxx457
-rw-r--r--xmlhelp/source/cxxhelp/qe/makefile.mk9
8 files changed, 65 insertions, 2919 deletions
diff --git a/xmlhelp/source/cxxhelp/qe/ConceptData.cxx b/xmlhelp/source/cxxhelp/qe/ConceptData.cxx
deleted file mode 100644
index 83399d370c03..000000000000
--- a/xmlhelp/source/cxxhelp/qe/ConceptData.cxx
+++ /dev/null
@@ -1,105 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: ConceptData.cxx,v $
- * $Revision: 1.8 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-#include <qe/ConceptData.hxx>
-#include <qe/Query.hxx>
-#include <qe/DocGenerator.hxx>
-
-
-using namespace xmlsearch::qe;
-
-
-const sal_Int32 ConceptData::ProxPerTerm = 10;
-
-
-ConceptData::ConceptData( sal_Int32 id,
- sal_Int32 role,
- double score,
- sal_Int32 queryNo,
- sal_Int32 nColumns,
- ContextTables* contextTables )
- : queryNo_( sal_uInt8( queryNo & 0xF ) ),
- nColumns_( sal_uInt8( nColumns & 0xF ) ),
- role_( sal_uInt8( role & 0xF ) ),
- concept_( id ),
- proximity_( nColumns * ProxPerTerm ),
- penalty_( score ),
- m_nRefcount( 0 ),
- ctx_( contextTables ),
- next_( 0 )
-{
-}
-
-
-ConceptData::~ConceptData()
-{
-}
-
-
-void ConceptData::runBy( std::vector< Query* >& queries )
-{
- rtl::Reference< ConceptData > cd( this );
- do
- {
- Query* query = queries[ cd->queryNo_ ];
- query->updateEstimate( cd->role_,cd->penalty_ );
- }
- while( (cd = cd->next_).is() );
-}
-
-
-void ConceptData::addLast( ConceptData* r )
-{
- if( next_.is() )
- next_->addLast( r );
- else
- next_ = r;
-}
-
-
-void ConceptData::generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos )
-{
- if( array[ queryNo_ ] != RoleFiller::STOP() ) // not 'prohibited'
- {
- sal_Int32 wcl = ctx_->wordContextLin( pos );
- roleFillers_.push_back( new RoleFiller( nColumns_,
- this,
- role_,
- pos,
- wcl,
- pos + proximity_ ) );
- roleFillers_.back()->use( array, queryNo_ );
- }
- // !!! maybe eliminate tail recursion
- if( next_.is() )
- next_->generateFillers( array,pos );
-}
diff --git a/xmlhelp/source/cxxhelp/qe/ContextTables.cxx b/xmlhelp/source/cxxhelp/qe/ContextTables.cxx
deleted file mode 100644
index 139348aeb153..000000000000
--- a/xmlhelp/source/cxxhelp/qe/ContextTables.cxx
+++ /dev/null
@@ -1,572 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: ContextTables.cxx,v $
- * $Revision: 1.10 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-
-#ifndef _rtl_MEMORY_H_
-#include <rtl/memory.h>
-#endif
-#include <qe/ContextTables.hxx>
-#ifndef _XMLSEARCH_UTIL_BYTEARRAYDECOMPRESSOR_HXX_
-#include <util/Decompressor.hxx>
-#endif
-
-using namespace xmlsearch;
-using namespace xmlsearch::qe;
-
-
-
-Tables::Tables( ContextTables* p )
- : initialWordsCached_( new sal_Int32[ initialWordsCachedL_ = p->initialWordsL_ ] ),
- destsCached_( new sal_Int32[ destsCachedL_ = p->destsL_ ] ),
- linkTypesCached_( new sal_Int32[ linkTypesCachedL_ = p->linkTypesL_ ] ),
- seqNumbersCached_( new sal_Int32[ seqNumbersCachedL_ = p->seqNumbersL_ ] )
-{
- rtl_copyMemory( (void*)initialWordsCached_,
- (void*)p->initialWords_,
- sizeof(sal_Int32) * p->initialWordsL_ );
-
- rtl_copyMemory( (void*)destsCached_,
- (void*)p->dests_,
- sizeof(sal_Int32) * p->destsL_ );
-
- rtl_copyMemory( (void*)linkTypesCached_,
- (void*)p->linkTypes_,
- sizeof(sal_Int32) * p->linkTypesL_ );
-
- rtl_copyMemory( (void*)seqNumbersCached_,
- (void*)p->seqNumbers_,
- sizeof(sal_Int32) * p->seqNumbersL_ );
-}
-
-
-
-Tables::~Tables()
-{
- delete[] seqNumbersCached_;
- delete[] linkTypesCached_;
- delete[] destsCached_;
- delete[] initialWordsCached_;
-}
-
-
-
-void Tables::setTables( ContextTables* p )
-{
- delete[] p->initialWords_;
- p->initialWordsL_ = initialWordsCachedL_;
- p->initialWords_ = initialWordsCached_;
- initialWordsCached_ = 0;
-
- delete[] p->dests_;
- p->destsL_ = destsCachedL_;
- p->dests_ = destsCached_;
- destsCached_ = 0;
-
- delete[] p->linkTypes_;
- p->linkTypesL_ = linkTypesCachedL_;
- p->linkTypes_ = linkTypesCached_;
- linkTypesCached_ = 0;
-
- delete[] p->seqNumbers_;
- p->seqNumbersL_ = seqNumbersCachedL_;
- p->seqNumbers_ = seqNumbersCached_;
- seqNumbersCached_ = 0;
-
- p->nTextNodes_ = initialWordsCachedL_;
-}
-
-
-
-
-ContextTables::ContextTables( const std::vector< sal_Int32 >& offsets,
- sal_Int32 contextDataL,sal_Int8 *contextData,
- sal_Int32 linkNamesL,rtl::OUString *linkNames )
- : lastDocNo_( -1 ),
- initialWordsL_( 0 ),
- destsL_( 0 ),
- linkTypesL_( 0 ),
- seqNumbersL_( 0 ),
- markersL_( 0 ),
- initialWords_( 0 ),
- dests_( 0 ),
- linkTypes_( 0 ),
- seqNumbers_( 0 ),
- markers_( 0 ),
- contextDataL_( contextDataL ),
- contextData_( contextData ),
- linkNamesL_( linkNamesL ),
- linkNames_( linkNames ),
- cache_( offsets.size() ),
- kTable_( 5 ),
- auxArray_( 4096 ),
- offsets_( offsets )
-{
- for( sal_uInt32 i = 0; i < offsets_.size(); ++i )
- cache_[i] = 0;
-}
-
-
-
-ContextTables::~ContextTables()
-{
- delete[] markers_;
- delete[] seqNumbers_;
- delete[] linkTypes_;
- delete[] dests_;
- delete[] initialWords_;
-
- for( sal_uInt32 i = 0; i < cache_.size(); ++i )
- delete cache_[i];
-}
-
-
-
-void ContextTables::setMicroindex( sal_Int32 docNo ) throw( excep::XmlSearchException )
-{
- if( docNo != lastDocNo_ )
- { // check if we need to do anything
- if( cache_[ docNo ] )
- cache_[ docNo ]->setTables( this );
- else
- {
- sal_Int32 offset = offsets_[ docNo ];
- sal_Int32 k0 = contextData_[ offset ] & 0xFF;
- util::ByteArrayDecompressor compr( contextDataL_,contextData_,offset + 1 );
- kTable_.clear();
- compr.decode( k0,kTable_ );
- // decompress initialWords into auxiliary array
- auxArray_.clear();
- compr.ascDecode( kTable_[0],auxArray_ ); // _initialWords
-
- delete[] initialWords_;
- initialWords_ = new sal_Int32[ initialWordsL_ = auxArray_.size() ];
- sal_Int32 k;
- for( k = 0; k < initialWordsL_; ++k ) //?opt
- initialWords_[k] = auxArray_[k];
-
- nTextNodes_ = initialWordsL_;
- // decompress destinations into auxiliary array
- auxArray_.clear();
- compr.decode( kTable_[1],auxArray_ ); // _dests
- auxArray_.push_back( -1 ); // sentinel, root
-
- delete[] dests_;
- dests_ = new sal_Int32[ destsL_ = auxArray_.size() ];
- for( k = 0; k < destsL_; ++k ) //?opt
- dests_[k] = auxArray_[k];
-
- delete[] linkTypes_;
- linkTypes_ = new sal_Int32[ linkTypesL_ = destsL_ - nTextNodes_ - 1 ];
- compr.decode( kTable_[2],linkTypes_ );
-
- delete[] seqNumbers_;
- seqNumbers_ = new sal_Int32[ seqNumbersL_ = destsL_ - 1 ];
- compr.decode( kTable_[ 3 ],seqNumbers_ );
-
- cache_[docNo] = new Tables( this );
- }
-
- lastDocNo_ = docNo;
- delete[] markers_;
- markers_ = new sal_Int32[ markersL_ = destsL_ ];
- }
- initialWordsIndex_ = 0;
-}
-
-
-
-sal_Int32 ContextTables::linkCode( const rtl::OUString& linkName_ )
-{
- for( sal_Int32 i = 0; i < linkNamesL_; ++i )
- if( linkName_ == linkNames_[i] )
- return i;
- return -1; // when not found
-}
-
-
-bool* ContextTables::getIgnoredElementsSet( sal_Int32& len,
- const sal_Int32 ignoredElementsL,
- const rtl::OUString* ignoredElements )
-{
- bool *result = 0;
- if( ignoredElements && ignoredElementsL > 0 )
- {
- for( sal_Int32 i = 0; i < ignoredElementsL; ++i )
- {
- sal_Int32 code = linkCode( ignoredElements[i] );
- if( code > -1 )
- {
- if( ! result )
- result = new bool[ len = linkNamesL_ ];
-
- result[ code ] = true;
- }
- }
- }
- return result;
-}
-
-
-
-bool ContextTables::notIgnored( sal_Int32 ctx,
- sal_Int32 ignoredElementsL,bool* ignoredElements )
-{
- (void)ignoredElementsL;
-
- do
- {
- if( ignoredElements[ linkTypes_[ ctx ] ] )
- return false;
- }
- while( ( ctx = dests_[ ctx ] ) > -1 ); // parentContext 'hand inlined'
- return true;
-}
-
-
-/*
- * starting with ctx and going up the ancestry tree look for the first
- * context with the given linkCode
- */
-
-sal_Int32 ContextTables::firstParentWithCode( const sal_Int32 pos,const sal_Int32 linkCode_ )
-{
- sal_Int32 ctx = dests_[ wordContextLin(pos) ]; // first parent of text node
- const sal_Int32 shift = nTextNodes_;
- const sal_Int32 limit = destsL_ - 1;
- while( linkTypes_[ ctx - shift ] != linkCode_ )
- if( ( ctx = dests_[ ctx ] ) == limit )
- return -1;
- return ctx;
-}
-
-
-void ContextTables::resetContextSearch()
-{
- initialWordsIndex_ = 0;
-}
-
-
-sal_Int32 ContextTables::wordContextLin(sal_Int32 wordNumber)
-{
- for (sal_Int32 i = initialWordsIndex_; i < nTextNodes_; ++i )
- if (initialWords_[i] > wordNumber)
- { // first such i
- // - 1 if wordNumbers can be the same
- initialWordsIndex_ = i; // cached to speed up next search
- return i - 1;
- }
- return nTextNodes_ - 1;
-}
-
-
-// void ContextTables::appendSegment( sal_Int32 context,rtl::OUStringBuffer& result )
-// {
-// result.append( context < nTextNodes_ ?
-// rtl::OUString::createFromAscii( "text()" ) :
-// linkNames_[ linkTypes_[ context - nTextNodes_ ] ] );
-// result.append(sal_Unicode( '[' ) );
-// result.append( seqNumbers_[ context ] );
-// result.append(sal_Unicode( "]/" ) );
-// }
-
-
-// /*
-// * XPath (forking) location of the hit
-// */
-
-// void ContextTables::hitLocation( sal_Int32 termsL,rtl::OUString* terms,
-// sal_Int32 matchesL,sal_Int32* matches,
-// StringBuffer& result )
-// {
-// const sal_Int32 N = termsL;
-// std::vector< sal_Int32 > stacks( N );
-// sal_Int32* wordNumbers = new sal_Int32[N];
-// std::vector< sal_Int32 > stack;
-// sal_Int32 lastInitialWordIndex = -1;
-// sal_Int32 pattern = 0,context = 0,nPopped = 0,pathStart = 0,pathEnd = 0;
-// for( sal_Int32 i = 0,marker = 1; i < N; i++,marker <<= 1 )
-// if ( terms[i] )
-// {
-// const sal_Int32 wordNumber = matches[i*2 + 1];
-// const sal_Int32 initialWordIndex = findIndexBin(wordNumber);
-// wordNumbers[i] = wordNumber - initialWords_[initialWordIndex] + 1;
-// if( initialWordIndex == lastInitialWordIndex ) // save work
-// ; // do nothing, path will be reused
-// else
-// {
-// pattern |= marker;
-// stack = stacks[i] = new IntegerArray();
-
-// context = initialWordIndex;
-// do
-// {
-// const sal_Int32 parent = dests_[context];
-// if( parent != -1 )
-// {
-// stack.add( context );
-// markers_[context] |= marker;
-// context = parent;
-// }
-// else
-// break;
-// }
-// while( true );
-// lastInitialWordIndex = initialWordIndex;
-// }
-// }
-
-// // find and output common path
-// // process first non-missing match
-
-// sal_Int32 i = 0, marker = 1, nMissing = 0;
-
-// // find first non-missing matching term
-// // there has to be at least one if the hit was built
-// // count potential leading missing terms to output appropriate elements
-// // before outputting elements for matches
-
-// for ( ; i < N; i++,marker <<= 1 )
-// if (terms[i] != null)
-// {
-// result.append( rtl::OUString::createFromAscii( "<Matches path=\"" ) );
-// stack = stacks[i];
-// while (stack.size() > 0)
-// {
-// context = stack.popLast();
-// if ( markers_[context] == pattern )
-// {
-// markers_[context] = 0;
-// appendSegment( context,result ); // belongs to common
-// context = -1; // used
-// ++nPopped;
-// }
-// else
-// break;
-// }
-// // end of 'matches' && common path
-// result.append("\">");
-// // output elements for any leading missingTerms
-// while (--nMissing >= 0)
-// result.append("<MissingTerm/>");
-
-// result.append("<Match term=\"");
-// result.append(terms[i]);
-// result.append("\" path=\"");
-// pathStart = result.getLength();
-// if (context != -1)
-// {
-// appendSegment(context, result);
-// markers_[context] = 0;
-// }
-// while (stack.size() > 0 )
-// {
-// context = stack.popLast();
-// appendSegment(context, result);
-// markers_[context] = 0;
-// }
-
-// pathEnd = result.length();
-
-// result.append("\" tokenNumber=\"");
-// result.append(wordNumbers[i]);
-// result.append("]\"/>");
-
-// break; // just the first non-zero
-// }
-// else
-// ++nMissing; // only count leading missing terms
-
-// // process the remaining matches
-// for (i++, marker <<= 1 ; i < N; i++, marker <<= 1)
-// if (terms[i] != null) {
-// result.append("<Match term=\"");
-// result.append(terms[i]);
-// result.append("\" path=\"");
-// stack = stacks[i];
-// if (stack == null) // reuse previously generated path
-// result.append(result.substring(pathStart, pathEnd));
-// else {
-// stack.pop(nPopped);
-// pathStart = result.length();
-// while (stack.cardinality() > 0) {
-// context = stack.popLast();
-// appendSegment(context, result);
-// _markers[context] = 0;
-// }
-// pathEnd = result.length();
-// }
-// result.append("\" tokenNumber=\"");
-// result.append(wordNumbers[i]);
-// result.append("]\"/>");
-// }
-// else
-// result.append("<MissingTerm/>");
-// result.append("</Matches>");
-// }
-
-
-// /*
-// * QueryHitData is initialized in the caller
-// * this function fills the commonPath for all matching terms
-// * and relative paths for the individual terms
-// */
-
-// void ContextTables::hitLocation(String[] terms, sal_Int32[] matches, QueryHitData data) {
-// StringBuffer buffer = new StringBuffer(512);
-// const sal_Int32 N = terms.length;
-// IntegerArray[] stacks = new IntegerArray[N];
-// sal_Int32[] wordNumbers = new sal_Int32[N];
-// IntegerArray stack;
-// sal_Int32 lastInitialWordIndex = -1;
-// sal_Int32 pattern = 0, nPopped = 0, pathStart = 0, pathEnd = 0;
-// for (sal_Int32 i = 0, marker = 1; i < N; i++, marker <<= 1)
-// if (terms[i] != null) {
-// const sal_Int32 wordNumber = matches[i*2 + 1];
-// const sal_Int32 initialWordIndex = findIndexBin(wordNumber);
-// wordNumbers[i] = wordNumber - _initialWords[initialWordIndex] + 1;
-// if (initialWordIndex == lastInitialWordIndex) // save work
-// ; // do nothing, path will be reused
-// else {
-// pattern |= marker;
-// stack = stacks[i] = new IntegerArray();
-// for (sal_Int32 ctx = initialWordIndex;;) {
-// const sal_Int32 parent = _dests[ctx];
-// if (parent != -1) {
-// stack.add(ctx);
-// _markers[ctx] |= marker;
-// ctx = parent;
-// }
-// else
-// break;
-// }
-// lastInitialWordIndex = initialWordIndex;
-// }
-// }
-// // find and output common path
-// // process first match
-// StringBuffer path = new StringBuffer(256);
-// String previousPath = null; // we may be copying subpaths from it
-// sal_Int32 i = 0, marker = 1;
-// for ( ; i < N; i++, marker <<= 1)
-// if (terms[i] != null) {
-// sal_Int32 context = 0;
-// stack = stacks[i];
-// while (stack.cardinality() > 0) {
-// context = stack.popLast();
-// if (_markers[context] == pattern) {
-// _markers[context] = 0;
-// appendSegment(context, path); // belongs to common
-// context = -1; // used
-// ++nPopped;
-// }
-// else
-// break;
-// }
-// data.setCommonPath(path.toString());
-// // end of 'matches' && common path
-// path.setLength(0); // will now be used for relative paths
-// pathStart = 0;
-// if (context != -1) {
-// appendSegment(context, path);
-// _markers[context] = 0;
-// }
-// while (stack.cardinality() > 0) {
-// context = stack.popLast();
-// appendSegment(context, path);
-// _markers[context] = 0;
-// }
-// pathEnd = path.length();
-// data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]);
-// break; // just the first non-zero
-// }
-
-// // process the remaining matches
-// for (i++, marker <<= 1 ; i < N; i++, marker <<= 1)
-// if (terms[i] != null) {
-// path.setLength(0);
-// stack = stacks[i];
-// if (stack == null) // reuse previously generated path
-// path.append(previousPath.substring(pathStart, pathEnd));
-// else {
-// stack.pop(nPopped);
-// pathStart = path.length();
-// while (stack.cardinality() > 0) {
-// const sal_Int32 context = stack.popLast();
-// appendSegment(context, path);
-// _markers[context] = 0;
-// }
-// pathEnd = path.length();
-// }
-// data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]);
-// }
-// }
-
-// private sal_Int32 ContextTables::findIndexBin(const sal_Int32 wordNumber) {
-// sal_Int32 i = 0, j = _nTextNodes - 1;
-// while (i <= j) {
-// const sal_Int32 k = (i + j) >>> 1;
-// if (_initialWords[k] < wordNumber)
-// i = k + 1;
-// else if (_initialWords[k] > wordNumber)
-// j = k - 1;
-// else
-// return k;
-// }
-// return i - 1;
-// }
-
- /*
- public void addGoverningFiller(int query, RoleFiller rf, int parent) {
- // !!! for now assume just one query
- GoverningContext gc = null;
- if (_governingContexts[parent] == null) {
- // find parent governing context
- for (int c = _dests[parent]; ; c = _dests[c])
- if (_governingContexts[c] != null || c == 0) {
- // System.out.println("parent found at " + c);
- gc = new GoverningContext(c, rf);
- break;
- }
- }
- else
- gc = new GoverningContext(_governingContexts[parent], rf);
- _governingContexts[parent] = gc;
- }
- */
-
-
-
-
-
-
-
-
diff --git a/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx b/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx
index b3aea87db3d2..0e8413ea2de2 100644
--- a/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx
+++ b/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx
@@ -1,495 +1,64 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: DocGenerator.cxx,v $
- * $Revision: 1.10 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-#include <qe/DocGenerator.hxx>
-#include <qe/Query.hxx>
-
-
-using namespace xmlsearch;
-using namespace xmlsearch::qe;
-
-
-const sal_Int32 NonnegativeIntegerGenerator::END = -1;
-const sal_Int32 ConceptGroupGenerator::NConceptsInGroup = 16;
-const sal_Int32 ConceptGroupGenerator::BitsInLabel = 4;
-
-
-RoleFiller RoleFiller::roleFiller_;
-
-
-RoleFiller::RoleFiller()
- : m_nRefcount( 0 ),
- fixedRole_( 0 ),
- filled_( 0 ),
- begin_( 0 ),
- end_( 0 ),
- parentContext_( 0 ),
- limit_( 0 ),
- next_( 0 ),
- fillers_( 0 ),
- conceptData_( 0 )
-{
-}
-
-
-RoleFiller::RoleFiller( sal_Int32 nColumns,
- ConceptData* first,
- sal_Int32 role,
- sal_Int32 pos,
- sal_Int32 parentContext,
- sal_Int32 limit )
- : m_nRefcount( 0 ),
- fixedRole_( sal_uInt8( role & 0xF ) ), // primary/constitutive concept/role
- next_( 0 ),
- fillers_( nColumns ),
- conceptData_( first )
-{
- filled_ = sal_Int16( 1 << fixedRole_ );
- begin_ = pos; // offset in file
- // _end = _begin + first.getConceptLength();
- end_ = begin_ + 1;
- limit_ = limit;
- parentContext_ = parentContext;
- next_ = 0;
- for( sal_uInt32 i = 0; i < fillers_.size(); ++i )
- fillers_[i] = 0;
- fillers_[ role ] = this;
-}
-
-
-RoleFiller::~RoleFiller()
-{
-
-}
-
-
-void RoleFiller::scoreList( Query* query,sal_Int32 document )
-{
- sal_Int32 nColumns = query->getNColumns();
- RoleFiller* candidateHit = this; // function called for the head of list
- RoleFiller* next; // lookahead: if overlap, if so, is it better
-
- // 'candidateHit' always points at the current candidate to be converted to a QueryHit
- // 'penalty' is its penalty
- // 'next' is used to explore earlier overlapping fillers
- // the decision to emit a QueryHit is made when either there's no next
- // or next doesn't overlap the current candidate
- // the loop's logic makes sure that at emit time there's no better/earlier filler
- // to overlap with the candidate
-
- double penalty_ = candidateHit->penalty( query,nColumns );
-
- for( next = candidateHit->next_; next; next = next->next_ )
- if( next->end_ < candidateHit->begin_ )
- { // no overlap
- candidateHit->makeQueryHit( query,document,penalty_ );
- candidateHit = next;
- penalty_ = candidateHit->penalty( query,nColumns );
- }
- else
- { // !!! can be computed in two steps
- double penalty2 = next->penalty( query,nColumns );
- if( penalty2 <= penalty_ )
- { // prefer next, disregard candidateHit
- penalty_ = penalty2;
- candidateHit = next;
- }
- }
- candidateHit->makeQueryHit(query,document,penalty_);
-}
-
-
-
-
-void RoleFiller::makeQueryHit( Query* query,sal_Int32 doc,double penalty_ )
-{
- QueryHit* hit = query->maybeCreateQueryHit( penalty_,doc,
- begin_,end_,parentContext_ );
- if( hit )
- {
- sal_Int32 N;
- sal_Int32* matches = hit->getMatches( N );
- N /= 2;
-
- for( sal_Int32 i = 0,j = 0; i < N; ++i )
- if( filled_ & 1 << i )
- {
- matches[ j++ ] = fillers_[ i ]->getConcept();
- matches[ j++ ] = fillers_[ i ]->begin_;
- }
- else
- j += 2;
- }
-}
-
-
-
-sal_Int32 RoleFiller::getConcept()
-{
- return conceptData_->getConcept();
-}
-
-
-
-void RoleFiller::use( std::vector< RoleFiller*>& place,sal_Int32 query )
-{
- RoleFiller* rf = place[ query ];
- if( rf )
- {
- place[ query ] = this; // put at the head of list
- next_ = rf;
- while( rf->limit_ >= begin_ )
- {
- // check if we can grow/improve a hit
- // we don't ever replace filler's fixed role
- if( fixedRole_ != rf->fixedRole_ &&
- // in same parent context eg. PARA
- rf->parentContext_ == parentContext_ )
- {
- if( ( rf->filled_ & ( 1 << fixedRole_ ) ) == 0 )
- {
- // not filled yet
- rf->filled_ |= 1 << fixedRole_;
- rf->fillers_[ fixedRole_ ] = this;
- rf->end_ = end_;
- }
- else
- rf->considerReplacementWith( this );
- }
-
- if( rf->next_ )
- rf = rf->next_;
- else
- return;
- }
- }
- else
- place[query] = this;
-}
-
-
-void RoleFiller::considerReplacementWith( RoleFiller* replacement )
-{
- // !!! simplistic for now
- // needs gap and out of order
- sal_Int32 role = replacement->fixedRole_;
- if( replacement->getScore() > fillers_[role]->getScore() )
- fillers_[ role ] = replacement;
-}
-
-
-
-double RoleFiller::penalty( Query* query,sal_Int32 nColumns )
-{
- sal_Int32 length = end_ - begin_ + 1;
- double penalty_ = query->lookupPenalty( filled_ );
- // !!! here is a chance to check against query
- // if hit worth scoring further
- // might not be if query already has lots of good hits
- for( sal_Int32 i = 0; i < nColumns; ++i )
- if( filled_ & ( 1 << i ) )
- {
- penalty_ += fillers_[i]->conceptData_->getPenalty();
- //length -= _fillers[i]._conceptData.getConceptLength() + 1;
- length -= 2; // !!! ??? c.length is not used ?
- if( filled_ >> (i + 1) )
- for( sal_Int32 j = i + 1; j < nColumns; ++j )
- if( ( filled_ & 1 << j ) && fillers_[j]->begin_ < begin_ )
- penalty_ += query->getOutOufOrderPenalty();
- }
- double result = penalty_ + length * query->getGapPenalty();
- return result < 0.0 ? 0.0 : result; // !!! quick fix
-}
-
-
-
-NextDocGenerator::NextDocGenerator( ConceptData* cd,XmlIndex* env )
- : document_( 0 ),
- concept_( cd ? cd->getConcept() : -1 ),
- queryMask_( cd ? cd->getQueryMask() : -1 ),
- terms_( cd ),
- iterator_( env->getDocumentIterator( concept_ ) )
-{
-}
-
-
-
-void NextDocGeneratorHeap::reset()
-{
- for( sal_Int32 i = 0; i < heapSize_; ++i )
- {
- delete heap_[i]; heap_[i] = 0;
- }
- free_ = 0;
- nonEmpty_ = false;
-}
-
-
-
-void NextDocGeneratorHeap::addGenerator( NextDocGenerator* gen )
-{
- if( sal_uInt32( free_ ) == heap_.size() )
- {
- heap_.push_back( 0 );
- }
-
- heap_[free_++] = gen;
-}
-
-
-
-void NextDocGeneratorHeap::start()
-{
- if( ( heapSize_ = free_ ) > 0 )
- {
- for( sal_Int32 i = heapSize_ / 2; i >= 0; --i )
- heapify(i);
- nonEmpty_ = true;
- }
- else
- nonEmpty_ = false;
-}
-
-
-void NextDocGeneratorHeap::step() throw( excep::XmlSearchException )
-{
- if( heap_[0]->next() != NonnegativeIntegerGenerator::END )
- heapify(0);
- else if ( heapSize_ > 1 )
- {
- delete heap_[0];
- heap_[0] = heap_[--heapSize_];
- heap_[ heapSize_ ] = 0;
- heapify(0);
- }
- else
- nonEmpty_ = false;
-}
-
-
-void NextDocGeneratorHeap::heapify( sal_Int32 i )
-{
- NextDocGenerator* temp;
- for( sal_Int32 r,l,smallest; ; )
- {
- r = ( i + 1 ) << 1;
- l = r - 1;
- smallest = ( l < heapSize_ && heap_[l]->smallerThan( heap_[i] ) ) ? l : i;
- if( r < heapSize_ && heap_[r]->smallerThan( heap_[ smallest ] ) )
- smallest = r;
- if( smallest != i )
- {
- temp = heap_[ smallest ];
- heap_[ smallest ] = heap_[ i ];
- heap_[i] = temp;
- i = smallest;
- }
- else
- break;
- }
-}
-
-
-bool NextDocGeneratorHeap::atDocument( sal_Int32 document )
-{
- return nonEmpty_ && heap_[0]->getDocument() == document;
-}
-
-
-
-
-ConceptGroupGenerator::ConceptGroupGenerator( sal_Int32 dataL,sal_Int8* data,sal_Int32 index,sal_Int32 k )
- : last_( 0 ),
- k1_( k ),
- k2_( BitsInLabel ),
- table_( NConceptsInGroup ),
- bits_( new util::ByteArrayDecompressor( dataL,data,index ) )
-{
-}
-
-
-
-ConceptGroupGenerator::ConceptGroupGenerator()
- : last_( 0 ),
- k1_( 0 ),
- k2_( BitsInLabel ),
- table_( NConceptsInGroup ),
- bits_( 0 )
-{
-}
-
-
-ConceptGroupGenerator::~ConceptGroupGenerator()
-{
- delete bits_;
-}
-
-
-void ConceptGroupGenerator::generateFillers( std::vector< RoleFiller* >& array )
-{
- cData_->generateFillers( array,last_ );
-}
-
-
-bool ConceptGroupGenerator::next() throw( excep::XmlSearchException )
-{
- while( bits_->readNext( k1_,this ) )
- {
- sal_Int32 bla = bits_->read( k2_ );
- if( ( cData_ = table_[ bla ] ).is() )
- return true;
- }
- return false;
-}
-
-
-sal_Int32 ConceptGroupGenerator::decodeConcepts( sal_Int32 k,
- sal_Int32 shift,
- sal_Int32 *concepts )
- throw( excep::XmlSearchException )
-{
- return bits_->ascendingDecode( k,shift,concepts );
-}
-
-
-
-void ConceptGroupGenerator::init( sal_Int32 bytesL,sal_Int8* bytes,sal_Int32 index,sal_Int32 k )
-{
- k1_ = k;
- delete bits_;
- bits_ = new util::ByteArrayDecompressor( bytesL,bytes,index );
- last_ = 0;
- for( sal_Int32 i = 0;i < NConceptsInGroup; i++ )
- table_[i] = 0;
-}
-
-
-
-void ConceptGroupGenerator::addTerms( sal_Int32 index,ConceptData* terms )
-{
- table_[ index ] = terms;
-}
-
-
-
-void GeneratorHeap::reset()
-{
- for( sal_Int32 i = 0; i < heapSize_; ++i )
- {
- delete heap_[i];
- heap_[i] = 0;
- }
- free_ = 0;
-}
-
-
-void GeneratorHeap::addGenerator( ConceptGroupGenerator* cd )
-{
- if( sal_uInt32( free_ ) == heap_.size() )
- {
- heap_.push_back( 0 );
- }
-
- heap_[free_++] = cd;
-}
-
-
-void GeneratorHeap::buildHeap()
-{
- for( sal_Int32 i = heapSize_/2; i >= 0; i-- )
- heapify(i);
-}
-
-
-void GeneratorHeap::heapify( sal_Int32 root )
-{
- for( sal_Int32 smallest = 0; ; )
- {
- const sal_Int32 right = ( root + 1 ) << 1;
- const sal_Int32 left = right - 1;
- smallest = ( left < heapSize_ && heap_[left]->position() < heap_[ root ]->position() ) ? left : root;
- if( right< heapSize_ && heap_[right]->position() < heap_[smallest]->position() )
- smallest = right;
- if( smallest != root )
- {
- ConceptGroupGenerator* temp = heap_[smallest];
- heap_[smallest] = heap_[root];
- heap_[root] = temp;
- root = smallest;
- }
- else
- break;
- }
-}
-
-
-bool GeneratorHeap::start( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException )
-{
- if( ( heapSize_ = free_ ) > 0 )
- {
- for( sal_Int32 i = 0; i < free_; ++i )
- heap_[i]->next();
-
- buildHeap();
- heap_[0]->generateFillers( array );
- return true;
- }
- else
- return false;
-}
-
-
-bool GeneratorHeap::next( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException )
-{
- if( heapSize_ > 0 )
- {
- if( ! heap_[0]->next() ) // no more
- {
- if( heapSize_ > 1)
- {
- delete heap_[0];
- heap_[0] = heap_[--heapSize_];
- heap_[heapSize_] = 0;
- }
- else
- {
- heapSize_ = 0;
- return false;
- }
- }
- heapify(0);
- heap_[0]->generateFillers( array );
- return true;
- }
- else
- return false;
-}
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocGenerator.cxx,v $
+ * $Revision: 1.10 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_xmlhelp.hxx"
+#include <qe/DocGenerator.hxx>
+#include <qe/Query.hxx>
+
+
+using namespace xmlsearch;
+using namespace xmlsearch::qe;
+
+
+const sal_Int32 NonnegativeIntegerGenerator::END = -1;
+
+
+RoleFiller RoleFiller::roleFiller_;
+
+
+RoleFiller::RoleFiller()
+ : m_nRefcount( 0 ),
+ fixedRole_( 0 ),
+ filled_( 0 ),
+ begin_( 0 ),
+ end_( 0 ),
+ parentContext_( 0 ),
+ limit_( 0 ),
+ next_( 0 ),
+ fillers_( 0 )
+{
+}
+
+
+RoleFiller::~RoleFiller()
+{
+
+}
diff --git a/xmlhelp/source/cxxhelp/qe/Query.cxx b/xmlhelp/source/cxxhelp/qe/Query.cxx
deleted file mode 100644
index 861df3da1ec6..000000000000
--- a/xmlhelp/source/cxxhelp/qe/Query.cxx
+++ /dev/null
@@ -1,393 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: Query.cxx,v $
- * $Revision: 1.13 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-#include <qe/Query.hxx>
-#include <qe/XmlIndex.hxx>
-#include <qe/ConceptData.hxx>
-#include <qe/QueryProcessor.hxx>
-#include <qe/ContextTables.hxx>
-
-
-using namespace xmlsearch::qe;
-
-
-sal_Int32* QueryHit::getMatches( sal_Int32& matchesL )
-{
- matchesL = matchesL_;
- return matches_;
-}
-
-
-/******************************************************************************/
-/* */
-/* HitStore */
-/* */
-/******************************************************************************/
-
-
-HitStore::HitStore( double initialStandard,sal_Int32 limit,sal_Int32 nColumns )
- : limit_( limit ),
- nColumns_( nColumns ),
- index_( 0 ),
- free_( 0 ),
- standard_( initialStandard ),
- heap_( limit )
-{
- for( sal_uInt32 i = 0; i < heap_.size(); ++i )
- heap_[i] = 0;
-}
-
-
-
-HitStore::~HitStore()
-{
- for( sal_uInt32 i = 0; i < heap_.size(); ++i )
- delete heap_[i];
-}
-
-
-
-bool HitStore::goodEnough( double penalty, sal_Int32 begin, sal_Int32 end )
-{
- return free_ == limit_ ? heap_[0]->worseThan( penalty,begin,end ) : true;
-}
-
-
-QueryHit* HitStore::createQueryHit( double penalty,sal_Int32 doc,sal_Int32 begin,sal_Int32 end )
-{
- QueryHit* hit = new QueryHit( nColumns_,penalty,doc,begin,end );
- if( free_ == limit_ )
- { // goodEnough'ness checked already
- delete heap_[0];
- heap_[0] = hit;
- heapify( 0 );
- standard_ = heap_[0]->getPenalty();
- }
- else if( free_ < limit_ )
- {
- heap_[ free_++ ] = hit;
- if( free_ == limit_ )
- { // we have the needed number
- for( sal_Int32 i = free_/2; i >= 0; --i ) // build heap
- heapify( i );
- standard_ = heap_[0]->getPenalty();
- }
- }
- return hit;
-}
-
-
-struct CompareQueryHit
-{
- bool operator()( const QueryHit* l,const QueryHit* r )
- {
- return l->compareTo( r );
- }
-};
-
-
-#include <algorithm>
-
-
-QueryHit* HitStore::firstBestQueryHit()
-{
- if( free_ > 0)
- {
- CompareQueryHit bla;
- heap_.resize( free_ );
- std::stable_sort( heap_.begin(),heap_.end(),bla );
- index_ = 0;
- return nextBestQueryHit();
- }
- else
- return 0;
-}
-
-
-QueryHit* HitStore::nextBestQueryHit()
-{
- return index_ < free_ ? heap_[ index_++ ] : 0;
-}
-
-
-void HitStore::heapify( sal_Int32 i )
-{
- for( sal_Int32 r,l,worst; ; )
- {
- r = (i + 1) << 1; l = r - 1;
- worst = l < free_ && heap_[i]->betterThan( heap_[l] ) ? l : i;
- if( r < free_ && heap_[ worst ]->betterThan( heap_[r] ) )
- worst = r;
- if (worst != i)
- {
- QueryHit* temp = heap_[ worst ];
- heap_[ worst ] = heap_[ i ];
- heap_[i] = temp;
- i = worst; // continue
- }
- else
- break;
- }
-}
-
-
-// sal_Int32 HitStore::partition( sal_Int32 p,sal_Int32 r )
-// {
-// QueryHit* x = heap_[ ((p + r) >> 1) & 0x7FFFFFFF ];
-// sal_Int32 i = p - 1, j = r + 1;
-// while( true )
-// {
-// while( x->compareTo( heap_[--j] ) )
-// ;
-// while( heap_[++i]->compareTo( x ) )
-// ;
-// if( i < j )
-// {
-// QueryHit* t = heap_[i];
-// heap_[i] = heap_[j];
-// heap_[j] = t;
-// }
-// else
-// return j;
-// }
-// }
-
-
-// void HitStore::quicksort( sal_Int32 p,sal_Int32 r )
-// {
-// while( p < r )
-// {
-// sal_Int32 q = partition( p,r );
-// quicksort(p, q);
-// p = q + 1;
-// }
-// }
-
-
-
-/******************************************************************************/
-/* */
-/* Query */
-/* */
-/******************************************************************************/
-
-
-#define MissingTermPenalty 10.0
-
-
-Query::Query( XmlIndex* env,
- sal_Int32 nColumns,
- sal_Int32 nHits,
- sal_Int32 missingPenaltiesL,
- double* missingPenalties )
- : env_( env ),
- ctx_( env ? env->getContextInfo() : 0 ),
- store_( nColumns * MissingTermPenalty - 0.0001,nHits,nColumns ),
- nHitsRequested_( nHits ),
- nColumns_( nColumns ),
- currentStandard_( nColumns * MissingTermPenalty - 0.0001 ),
- missingPenaltyL_( nColumns ),
- upperboundTemplateL_( nColumns ),
- penaltiesL_( missingPenaltiesL ),
- missingPenalty_( new double[ nColumns ] ),
- upperboundTemplate_( new double[ nColumns ] ),
- penalties_( missingPenalties ),
- ignoredElementsL_( 0 ),
- ignoredElements_( 0 ),
- missingTermsPenalty_( 0.0 )
-{
- // for the EmptyQuery case (awaits arch improvement pass)
-
- if( missingPenalties )
- for( sal_Int32 i = 0;i < nColumns_; ++i )
- missingPenalty_[i] = missingPenalties[i];
- else
- for( sal_Int32 i = 0;i < nColumns_; ++i )
- missingPenalty_[i] = MissingTermPenalty;
-
- makePenaltiesTable();
- // _roleFillerList = RoleFiller.STOP;
-}
-
-
-Query::~Query()
-{
- delete[] missingPenalty_;
- delete[] upperboundTemplate_;
- delete[] penalties_;
- delete[] ignoredElements_;
-}
-
-
-void Query::setIgnoredElements( const sal_Int32 ignoredElementsL,const rtl::OUString* ignoredElements )
-{
- if( ctx_ )
- ignoredElements_ = ctx_->getIgnoredElementsSet( ignoredElementsL_,
- ignoredElementsL,ignoredElements );
-
- if( ! ctx_ )
- {
- ignoredElementsL_ = 0;
- ignoredElements_ = 0;
- }
-}
-
-
-
-void Query::missingTerms( sal_Int32 nMissingTerms )
-{
- missingTermsPenalty_ = MissingTermPenalty * nMissingTerms;
-}
-
-
-
-ConceptData* Query::makeConceptData( sal_Int32 col,sal_Int32 concept,double penalty,sal_Int32 queryNo )
-{
- return new ConceptData( concept,col,penalty,queryNo,nColumns_,env_->getContextInfo() );;
-}
-
-
-void Query::getHits( std::vector< QueryHitData* >& data,sal_Int32 n )
-{
- if( n <= 0 )
- return;
-
- QueryHit* qh = store_.firstBestQueryHit();
-
- while( qh )
- {
- data.push_back( env_->hitToData( qh ) );
- qh = data.size() < sal_uInt32( n ) ? store_.nextBestQueryHit() : 0;
- }
-}
-
-
-QueryHit* Query::maybeCreateQueryHit( double penalty,
- sal_Int32 doc, sal_Int32 begin, sal_Int32 end, sal_Int32 parentContext )
-{
- // hits are located using only terms actually present in text
- // if B is not present, the query A B C reduces to A C and penalties
- // are computed as if B did not occur in query
- // to meaningfully merge results from different servers, some of which
- // may have B, penalty has to be normalized to the common computing scheme
-
- QueryHit* res =
- ( store_.goodEnough( penalty += missingTermsPenalty_,begin,end )
- && ( ! ignoredElements_ || ctx_->notIgnored( parentContext,ignoredElementsL_,ignoredElements_ ) ) )
- ?
- store_.createQueryHit( penalty,doc,begin,end )
- :
- 0;
- return res;
-}
-
-
-void Query::makePenaltiesTable()
-{
- sal_Int32 nPatterns = 1 << nColumns_;
- delete[] penalties_;
- penalties_ = new double[ penaltiesL_ = nPatterns ];
- for (sal_Int32 i = 0; i < nPatterns; ++i )
- penalties_[i] = computePenalty(i);
-}
-
-
-double Query::computePenalty( sal_Int32 n )
-{
- double penalty = 0.0;
- for( sal_Int32 i = 0; i < nColumns_; ++i )
- if( ( n & 1 << i ) == 0 )
- penalty += missingPenalty_[i];
- return penalty;
-}
-
-
-void Query::resetForNextDocument()
-{
- currentStandard_ = store_.getCurrentStandard();
- // "everything's missing"
- for( sal_Int32 i = 0; i < nColumns_; i++ )
- upperboundTemplate_[i] = missingPenalty_[i];
- vote_ = false;
-}
-
-
-bool Query::vote()
-{
- double sum = 0.0;
- for( sal_Int32 i = 0; i < nColumns_; i++ )
- sum += upperboundTemplate_[i];
- return vote_ = (sum <= currentStandard_ );
-}
-
-
-void Query::updateEstimate( sal_Int32 role,double penalty )
-{
- if( penalty < upperboundTemplate_[ role ] )
- upperboundTemplate_[ role ] = penalty;
-}
-
-
-/******************************************************************************/
-/* */
-/* QueryHitIterator */
-/* */
-/******************************************************************************/
-
-
-
-QueryHitIterator::QueryHitIterator( const QueryResults* result )
- : index_( -1 ),
- result_( result )
-{
-}
-
-
-QueryHitIterator::~QueryHitIterator()
-{
- delete result_;
-}
-
-
-bool QueryHitIterator::next()
-{
- return accessible_ = ( ++index_ < sal_Int32( result_->queryHits_.size() ) );
-}
-
-
-QueryHitData* QueryHitIterator::getHit( const PrefixTranslator* ) const
-{
- if( accessible_ )
- return result_->queryHits_[index_];
- else
- return 0;
-}
diff --git a/xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx b/xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx
deleted file mode 100644
index c5f892fb603f..000000000000
--- a/xmlhelp/source/cxxhelp/qe/QueryProcessor.cxx
+++ /dev/null
@@ -1,169 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: QueryProcessor.cxx,v $
- * $Revision: 1.10 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-#include <qe/QueryProcessor.hxx>
-#include <db/DBEnv.hxx>
-
-
-
-using namespace std;
-using namespace xmlsearch;
-using namespace xmlsearch::excep;
-using namespace xmlsearch::qe;
-
-
-const double QueryProcessor::INFLpenalty = 0.0;
-
-
-QueryProcessor::QueryProcessor( const rtl::OUString& installDir )
- throw( IOException )
- : env_( installDir )
-{
-}
-
-
-QueryProcessor::~QueryProcessor()
-{
- // delete env_;
-}
-
-
-
-QueryResults* QueryProcessor::processQuery( const QueryStatement& ment )
-{
- Search search( &env_ );
- Query* query = processQuery( search,ment );
- query->setIgnoredElements( 0,0 );
- search.startSearch();
- return makeQueryResults( query,ment.getHitCount() );
-}
-
-
-Query* QueryProcessor::processQuery( Search& search,const QueryStatement& ment )
-{
- sal_Int32 nValidTerms = 0, nMissingTerms = 0, nContentTerms = 0;
- double variantPenalty = 0.0;
-
- const sal_Int32 nHits = ment.getHitCount();
- const rtl::OUString scope = ment.getScope();
- const vector< rtl::OUString >& terms = ment.getTerms();
- const sal_Int32 nTerms = terms.size();
-
- vector< sal_Int32 > primary( nTerms );
- vector< sal_Int32 > missingTerms( nTerms );
- vector< vector< sal_Int32 > > columns( nTerms );
-
- for( int i = 0; i < nTerms; ++i )
- {
- const sal_Int32 lgt = terms[i].getLength();
- const sal_Unicode* str = terms[i].getStr();
-
- if( str[0] == sal_Unicode('+') )
- {
- // poor man's escape for query control
- // not implemented yet
- }
- else
- {
- ++nContentTerms;
- rtl::OUString term = terms[i].toAsciiLowerCase();
- sal_Int32 id = 0;
- std::vector< sal_Int32 > ids;
- if( str[0] == sal_Unicode('\"') )
- {
- id = env_.fetch( term.copy( 1 ) ); // goes to BtreeDict::fetch
- }
- else if( str[lgt-1] == sal_Unicode( '*' ) )
- {
- ids = env_.withPrefix( term.copy( 0,lgt - 1 ) ); // goes to BtreeDict::withPrefix
- variantPenalty = 0.0;
- }
- else
- {
- sal_Int32 formID;
- id = env_.fetch( term );
-
- // std::vector< rtl::OUString > variants( morph_->getVariants( term ) );
- std::vector< rtl::OUString > variants;
-
- for( sal_uInt32 j = 0; j < variants.size(); ++j )
- {
- formID = env_.fetch( variants[j] );
- if( formID > 0 && formID != id )
- ids.push_back( formID );
- }
- variantPenalty = INFLpenalty;
- }
-
- if( ids.size() > 0 || id > 0 )
- {
- columns[ nValidTerms ] = ids;
- primary[ nValidTerms++ ] = id;
- }
- else
- {
- ++nMissingTerms;
- // !!! not used now (intended to fill out appropriate missing terms in QueryHits
- missingTerms.push_back( nContentTerms - 1 );
- }
-
- }
- }
-
- return search.addQuery( scope,
- nValidTerms,nMissingTerms,nHits,
- variantPenalty,
- primary,
- columns );
-}
-
-
-
-QueryResults::QueryResults( Query* query, sal_Int32 nHits )
-{
- if( query )
- query->getHits( queryHits_,nHits );
-}
-
-
-QueryResults::~QueryResults()
-{
- for( sal_uInt32 i = 0; i < queryHits_.size(); ++i )
- delete queryHits_[i];
-}
-
-
-QueryResults* QueryProcessor::makeQueryResults( Query* query,sal_Int32 nHits )
-{
- return new QueryResults( query,nHits );
-}
-
diff --git a/xmlhelp/source/cxxhelp/qe/Search.cxx b/xmlhelp/source/cxxhelp/qe/Search.cxx
deleted file mode 100644
index 493205661d64..000000000000
--- a/xmlhelp/source/cxxhelp/qe/Search.cxx
+++ /dev/null
@@ -1,720 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: Search.cxx,v $
- * $Revision: 1.11 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-#include <qe/Search.hxx>
-
-
-using namespace std;
-using namespace xmlsearch;
-using namespace xmlsearch::qe;
-
-
-
-
-/******************************************************************************/
-/* */
-/* Emtpy ConceptData/Query */
-/* */
-/******************************************************************************/
-
-
-class EmptyConceptData
- : public ConceptData
-{
-public:
-
- virtual void generateFillers( std::vector< RoleFiller* >& , sal_Int32 ) { }
-
-}; // end class EmptyQuery
-
-
-
-class EmptyQuery
- : public Query
-{
-public:
-
- EmptyQuery()
- : Query( 0,0,0,0,0 ),
- conceptDataInstance_( )
- {
- }
-
- ConceptData* makeConceptData( sal_Int32 col,
- sal_Int32 concept,
- double penalty,
- sal_Int32 queryNo )
- {
- (void)col;
- (void)concept;
- (void)penalty;
- (void)queryNo;
-
- return &conceptDataInstance_;
- }
-
-private:
-
- EmptyConceptData conceptDataInstance_;
-
-}; // end class EmptyQuery
-
-
-
-/******************************************************************************/
-/* */
-/* ConceptData1/Query1 */
-/* */
-/******************************************************************************/
-
-
-class ConceptData1
- : public ConceptData
-{
-public:
-
- ConceptData1( sal_Int32 id,
- sal_Int32 role,
- double score,
- sal_Int32 queryNo,
- sal_Int32 nColumns,
- ContextTables* ctxInfo,
- sal_Int32 code )
- : ConceptData( id,role,score,queryNo,nColumns,ctxInfo ),
- fieldCode_( code )
- {
- }
-
- virtual void generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos );
-
-
-private:
-
- sal_Int32 fieldCode_;
-
-}; // end class ConceptData1
-
-
-class Query1
- : public Query
-{
-public:
-
- Query1( XmlIndex* env,
- sal_Int32 nColumns,
- sal_Int32 nHits,
- sal_Int32 missingPenaltiesL,
- double* missingPenalties,
- sal_Int32 fieldCode )
- : Query( env,nColumns,nHits,missingPenaltiesL,missingPenalties ),
- searchFieldCode_( fieldCode )
- {
- }
-
- virtual ConceptData* makeConceptData( sal_Int32 col,
- sal_Int32 concept,
- double score,
- sal_Int32 query );
-
-
-private:
-
- sal_Int32 searchFieldCode_;
-};
-
-
-
-/********************************************************************************/
-// Impl
-/********************************************************************************/
-
-
-void ConceptData1::generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos )
-{
- if( array[ queryNo_ ] != RoleFiller::STOP() )
- { // not 'prohibited'
- // !!! candidate for a single _ctx op
- sal_Int32 ancestor = ctx_->firstParentWithCode(pos,fieldCode_);
- if( ancestor != -1 )
- {
-
-
- RoleFiller* p = new RoleFiller( nColumns_,
- this,
- role_,
- pos,
- ancestor,
- pos + proximity_);
- p->use( array,queryNo_ );
- }
- }
-
- if( next_.is() )
- next_->generateFillers( array,pos );
-}
-
-
-
-ConceptData* Query1::makeConceptData( sal_Int32 col,
- sal_Int32 concept,
- double score,
- sal_Int32 query )
-{
- return new ConceptData1( concept,col,score,query,nColumns_,ctx_,searchFieldCode_ );
-}
-
-
-
-/******************************************************************************/
-/* */
-/* QueryFactoryImpl */
-/* */
-/******************************************************************************/
-
-
-class QueryFactoryImpl
-{
-public:
-
- Query* makeQuery( XmlIndex* env,const rtl::OUString& context,sal_Int32 nColumns,sal_Int32 nHits);
-
- Query* empty() { return &emptyQueryInstance_; }
-
-private:
-
- EmptyQuery emptyQueryInstance_;
-
-}; // end class QueryFactoryImpl
-
-
-
-
-Query* QueryFactoryImpl::makeQuery( XmlIndex* env,
- const rtl::OUString& context,
- sal_Int32 nColumns,
- sal_Int32 nHits )
-{
- if( ! context.getLength() )
- return new Query( env,nColumns,nHits,0,0 );
- else if( context.indexOf( sal_Unicode( '|' ) ) != -1 )
- {
- return 0; //t
- }
- else if( context.indexOf( rtl::OUString::createFromAscii( "//" ) ) != -1 )
- {
- return 0; //t
- }
- else if( context.indexOf( sal_Unicode( '/' ) ) != -1 )
- {
- return 0; //t
- }
- else if( context.indexOf( sal_Unicode( '@' ) ) != -1 )
- {
- return 0; //t
- }
- else if( context.indexOf( sal_Unicode( '[' ) ) != -1 )
- {
- return 0; //t
- }
- else
- {
- sal_Int32 code = env->getContextInfo()->linkCode( context );
- if( code != -1 )
- return new Query1( env,nColumns,nHits,0,0,code);
- else
- return &emptyQueryInstance_;
- }
-}
-
-
-
-Search::Search( XmlIndex* env )
- : env_( env ),
- queryFactory_( 0 ),
- nextDocGenHeap_(),
- firstGenerator_(),
- free2_( 0 ),
- limit_( 0 ),
- base_( 0 ),
- concepts_( new sal_Int32[ ConceptGroupGenerator::NConceptsInGroup ] ),
- dataL_( 0 ),
- data_( 0 )
-{
-}
-
-
-
-Search::~Search()
-{
- sal_uInt32 i;
- Query* stopq = queryFactory_ ? queryFactory_->empty() : 0;
- ConceptData* stopc = stopq ? stopq->makeConceptData( 0,0,0.0,0 ) : 0;
- (void)stopc;
-
- for( i = 0; i < queries_.size(); ++i )
- if( queries_[i] != stopq )
- delete queries_[i];
-
- delete[] concepts_;
-
- delete queryFactory_;
-}
-
-
-
-
-Query* Search::addQuery( const rtl::OUString& context,
- sal_Int32 nValidTerms,sal_Int32 nMissingTerms,sal_Int32 nHits,
- double variantPenalty,
- const std::vector< sal_Int32 >& primary,
- const std::vector< std::vector< sal_Int32 > >& columns )
-{
- // by now, scope == context
- if( ! queryFactory_ )
- queryFactory_ = new QueryFactoryImpl();
-
- Query* query = queryFactory_->makeQuery( env_,context,nValidTerms,nHits );
- query->missingTerms( nMissingTerms );
- queries_.push_back( query );
-
- for( sal_Int32 i = 0; i < nValidTerms; ++i )
- {
- if( primary[i] > 0 )
- addTerm( i,primary[i],0.0 );
-
- for( sal_uInt32 j = 0; j < columns[i].size(); ++j )
- addTerm( i,columns[i][j],variantPenalty );
- }
-
- // start stop
- query->addControlConceptData( this,queries_.size()-1 );
- return query;
-}
-
-
-
-void Search::startSearch()
-{
- sal_Int32 i,j;
- // set up ConceptData lists
- // order search terms
- quicksort( 0, free2_ - 1);
-
- // remove duplicates
- for (i = 0; i < free2_ - 1; i = j)
- {
- for (j = i + 1; j < free2_; j++)
- {
- if( conceptData_[i]->crqEquals( conceptData_[j].get() ) )
- conceptData_[j] = 0;
- else
- i = j;
- }
- }
-
- // create lists
- for( i = 0; i < free2_ - 1; i = j )
- {
- for (j = i + 1; j < free2_; j++ )
- {
- if( conceptData_[j].is() )
- {
- if( conceptData_[i]->cEquals( conceptData_[j].get() ) )
- {
- conceptData_[i]->addLast( conceptData_[j].get() );
- conceptData_[j] = 0;
- }
- else
- i = j;
- }
- }
- }
-
- // densify
- for( i = 0; i < free2_ - 1; i++)
- {
- if( ! conceptData_[i].is() )
- {
- for( j = i + 1; j < free2_; j++)
- {
- if (conceptData_[j].is() )
- {
- conceptData_[i] = conceptData_[j];
- conceptData_[j] = 0;
- break;
- }
- }
- }
- }
-
- // set up new document generators
- nextDocGenHeap_.reset();
- for( i = 0; i < free2_ && conceptData_[i].is(); i++)
- {
- NextDocGenerator* gen = new NextDocGenerator( conceptData_[i].get(),env_ );
- try
- {
- sal_Int32 doc;
- gen->first();
- if( ( doc = gen->getDocument() ) != NonnegativeIntegerGenerator::END )
- {
- /* !!! ??? is concept length used any more in any way
- conceptData_[i].
- setConceptLength(_env.
- getConceptLength(conceptData_[i].getConcept()));
- */
- nextDocGenHeap_.addGenerator( gen );
- }
- }
- catch( ... )
- {
- }
- }
-
- nextDocGenHeap_.start();
- env_->reset();
- env_->resetContextSearch();
- searchDocument();
-}
-
-
-
-
-void Search::addTerm( sal_Int32 col,sal_Int32 concept,double score )
-{
- if( env_->occursInText( concept ) )
- {
- ConceptData* cd = queries_[queries_.size()-1]->makeConceptData( col,concept,score,queries_.size()-1 );
- if( sal_uInt32( free2_ ) == conceptData_.size() )
- {
- conceptData_.push_back( 0 );
-// conceptVisitor_ = &conceptData_[0];
- }
- conceptData_[ free2_++ ] = cd;
- }
-}
-
-
-
-
-
-void Search::searchDocument()
-{
- std::vector< RoleFiller* > start( queries_.size() );
- do
- {
- try
- {
- switch( nextDocument( start ) )
- {
- case 0: // multi group
- genHeap_.start( start );
- while( genHeap_.next( start ) )
- ;
- break;
-
- case 1: // single group
- while( firstGenerator_.next() )
- firstGenerator_.generateFillers( start );
- break;
-
- case 2: // reached the end
- return;
- }
- }
- catch( const excep::XmlSearchException& )
- {
- continue;
- }
-
- RoleFiller* next;
- for( sal_uInt32 i = 0; i < queries_.size(); ++i )
- {
- if( ( next = start[i] ) != 0 && next != RoleFiller::STOP() )
- next->scoreList( queries_[i],document_ );
- else if( queries_[i]->zoned() )
- {
- RoleFiller* rfs = queries_[i]->getRoleFillers();
- if( rfs && rfs != RoleFiller::STOP() )
- rfs->scoreList( queries_[i],document_ );
- }
- }
- genHeap_.reset();
- }
- while( nextDocGenHeap_.isNonEmpty() );
-
- // #i80952
-#if 0
- for( sal_uInt32 i = 0; i < start.size(); ++i )
- if( start[i] != RoleFiller::STOP() )
- delete start[i];
-#endif
-}
-
-
-
-sal_Int32 Search::nextDocument( std::vector< RoleFiller* >& start ) throw( xmlsearch::excep::XmlSearchException )
-{
- while( nextDocGenHeap_.isNonEmpty() )
- { // still something to do
- sal_uInt32 i;
- for( i = 0; i < queries_.size(); ++i )
- if( queries_[i] )
- queries_[i]->resetForNextDocument();
-
- // gather all concepts this document has
- // and store associated conceptData
- sal_Int32 index = 0;
- document_ = nextDocGenHeap_.getDocument();
- docConcepts_.clear();
- queryMasks_.clear();
- do
- {
- docConcepts_.push_back( nextDocGenHeap_.getConcept() );
- queryMasks_.push_back( nextDocGenHeap_.getQueryMask() );
- ConceptData *conceptData = ( conceptData_[ index++ ] = nextDocGenHeap_.getTerms() ).get();
- conceptData->runBy( queries_ );
- nextDocGenHeap_.step();
- }
- while( nextDocGenHeap_.atDocument( document_) );
-
- // if there is no saturation model, some query will always vote YES
- // and so every document will be opened
- // even if this case, however, savings can be achieved by not generating fillers
- // for some queries (not scoring, etc)
- // and, with more care, creation of some GroupGenerators can be avoided
- // saturating queries with lots of good hits will lead to best results
-
- sal_Int32 voteMask = 0;
- Query* query;
- for( i = 0; i < queries_.size(); ++i )
- {
- query = queries_[i];
- if( query )
- {
- query->saveRoleFillers( 0 );
- if( query->vote() )
- {
- // normal reset
- start[i] = query->zoned() ? RoleFiller::STOP() : 0;
- voteMask |= 1 << i;
- }
- else
- start[i] = RoleFiller::STOP(); // prohibit setting
- }
- }
-
- // we may eliminate some ConceptGroupGenerators
- // those which would be used only by Queries which voted NO
- if( voteMask != 0 )
- { // need to open up document
- ConceptGroupGenerator* gen;
- // !!! don't gather Fillers for disinterested Queries
- if( openDocumentIndex( document_ ) )
- { // multi group
- // set up all needed generators
- sal_Int32 j = 0;
- while( ( queryMasks_[j] & voteMask ) == 0 )
- ++j;
- // assert(j < index);
- sal_Int32 c = docConcepts_[j];
- sal_Int32 group = 0;
- // find first group
- while( /*group < maxConcepts_.size() &&*/
- c > maxConcepts_[ group ] && ++group < limit_ )
- ;
- gen = makeGenerator( group );
- gen->addTerms( indexOf(c),conceptData_[j].get() );
-
- for( ++j; j < index; j++ )
- if( ( queryMasks_[j] & voteMask ) > 0 )
- {
- c = docConcepts_[j];
- if( c > max_ )
- { // need to find another group
- // assert(group < _limit);
- while( /*group < maxConcepts_.size() &&*/
- c > maxConcepts_[ group ] && ++group < limit_ )
- ;
- gen = makeGenerator( group );
- }
- gen->addTerms( indexOf(c),conceptData_[j].get() );
- }
- return 0;
- }
- else
- { // single group
- for( sal_Int32 j = 0; j < index; j++ )
- if( queryMasks_[j] & voteMask )
- firstGenerator_.addTerms( indexOf( docConcepts_[j] ),conceptData_[j].get() );
- return 1;
- }
- }
- }
- return 2;
-}
-
-
-
-
-bool Search::openDocumentIndex( sal_Int32 docNo ) throw( excep::XmlSearchException )
-{
- data_ = env_->getPositions( dataL_,docNo );
- base_ = env_->getDocumentIndex( docNo );
-
- startingIndex_ = 0;
- sal_Int32 kk = data_[ base_ ] & 0xFF, k2;
-
- switch( kk >> 6 )
- { // get type
- case 0: // single group, no extents
- k2 = data_[base_ + 1];
- firstGenerator_.init( dataL_,data_,base_ += 2,k2 );
- // decode concept table
- nConcepts_ = firstGenerator_.decodeConcepts( kk & 0x3F,0,concepts_ );
- return false;
-
- case 2: // multi group, no extents
- {
- kTable_.clear();
- offsets_.clear();
- maxConcepts_.clear();
- util::ByteArrayDecompressor compr( dataL_,data_,base_ + 1 );
- compr.decode( kk & 0x3F,kTable_ );
-
- sal_Int32 last = kTable_.back();
- kTable_.pop_back();
- compr.ascDecode( last,offsets_ );
- last = kTable_.back();
- kTable_.pop_back();
- compr.ascDecode( last,maxConcepts_ );
-
- base_ += 1 + compr.bytesRead();
- limit_ = maxConcepts_.size();
- }
- return true;
-
- case 1: // single group, extents
- case 3: // multi group, extents
- throw excep::XmlSearchException( rtl::OUString::createFromAscii( "extents not yet implemented\n" ) );
- }
- return false;
-}
-
-
-
-
-
-ConceptGroupGenerator* Search::makeGenerator( sal_Int32 group )
- throw( excep::XmlSearchException )
-{
- sal_Int32 shift,index;
- if( group > 0 )
- {
- index = base_ + offsets_[ group - 1 ];
- shift = maxConcepts_[ group - 1 ];
- }
- else
- {
- index = base_;
- shift = 0;
- }
-
- // initialize generator
- ConceptGroupGenerator* gen =
- new ConceptGroupGenerator( dataL_,data_,index,kTable_[ 1 + 2*group ] );
- // decode concept table
- nConcepts_ = gen->decodeConcepts( kTable_[2*group],shift,concepts_ );
-
- if( group < limit_ )
- max_ = concepts_[ nConcepts_ ] = maxConcepts_[ group ];
- else
- max_ = concepts_[ nConcepts_ - 1 ];
-
- genHeap_.addGenerator( gen );
- startingIndex_ = 0; // in _concepts; lower search index
- return gen;
-}
-
-
-
-sal_Int32 Search::indexOf(sal_Int32 concept) throw( excep::XmlSearchException )
-{
- sal_Int32 i = startingIndex_,j = nConcepts_,k;
- while( i <= j )
- if( concepts_[ k = (i + j)/2 ] < concept )
- i = k + 1;
- else if( concept < concepts_[k] )
- j = k - 1;
- else
- {
- startingIndex_ = k + 1;
- return k;
- }
- throw excep::XmlSearchException( rtl::OUString::createFromAscii( "indexOf not found" ) );
-}
-
-
-
-
-sal_Int32 Search::partition( sal_Int32 p,sal_Int32 r )
-{
- rtl::Reference< ConceptData > x = conceptData_[ ((p + r) >> 1) & 0x7FFFFFFF ];
- sal_Int32 i = p - 1, j = r + 1;
- while( true )
- {
- while( x->compareWith( conceptData_[--j].get() ) )
- ;
- while( conceptData_[++i]->compareWith( x.get() ) )
- ;
- if( i < j )
- {
- rtl::Reference< ConceptData > t = conceptData_[i];
- conceptData_[i] = conceptData_[j];
- conceptData_[j] = t;
- }
- else
- return j;
- }
-}
-
-
-
-void Search::quicksort( sal_Int32 p,sal_Int32 r )
-{
- while (p < r)
- {
- sal_Int32 q = partition( p,r );
- quicksort(p, q);
- p = q + 1;
- }
-}
diff --git a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx b/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx
deleted file mode 100644
index 707a09f1a42e..000000000000
--- a/xmlhelp/source/cxxhelp/qe/XmlIndex.cxx
+++ /dev/null
@@ -1,457 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2008 by Sun Microsystems, Inc.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * $RCSfile: XmlIndex.cxx,v $
- * $Revision: 1.19 $
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_xmlhelp.hxx"
-#include <osl/diagnose.h>
-#include <qe/XmlIndex.hxx>
-#include <qe/DocGenerator.hxx>
-#include <util/ConceptList.hxx>
-#ifndef _XMLSEARCH_UTIL_RANDOMACCESSSTREAM_HXX_
-#include <util/RandomAccessStream.hxx>
-#endif
-#include <util/Decompressor.hxx>
-#include <qe/Query.hxx>
-
-using namespace xmlsearch;
-using namespace xmlsearch::excep;
-using namespace xmlsearch::qe;
-
-
-// extern sal_Int32 getInteger_( const sal_Int8* );
-
-
-XmlIndex::XmlIndex( const rtl::OUString& indexDir )
- throw( IOException )
- : currentBatchOffset_( 0 ),
- maxDocNumberInCache_( -1 ),
- indexAccessor_( indexDir ),
- dict_( indexAccessor_ ),
- contextTables_( 0 ),
- allListsL_( 0 ),
- allLists_( 0 ),
- positionsL_( 0 ),
- positions_( 0 ),
- contextsDataL_( 0 ),
- contextsData_( 0 ),
- concepts_( 0 ),
- documents_( 0 )
-{
- // reading DOCS
- try
- {
- allListsL_ = indexAccessor_.readByteArray( allLists_,
- rtl::OUString::createFromAscii("DOCS") ); // reading DOCS
- }
- catch( IOException )
- {
- OSL_ENSURE( allLists_ != 0, "XmlIndex::XmlIndex -> cannot open DOCS/docs" );
- throw;
- }
-
- // reading CONTEXTS
- try
- {
- contextsDataL_ = indexAccessor_.readByteArray( contextsData_,
- rtl::OUString::createFromAscii("CONTEXTS") ); // reading CONTEXTS
- }
- catch( IOException )
- {
- OSL_ENSURE( allLists_ != 0, "XmlIndex::XmlIndex -> cannot open CONTEXTS/contexts" );
- delete[] allLists_;
- throw;
- }
-
- // reading POSITIONS
- {
- positionsFile_ = indexAccessor_.getStream( rtl::OUString::createFromAscii( "POSITIONS" ),
- rtl::OUString::createFromAscii( "r" ) );
-
- OSL_ENSURE( positionsFile_ != 0, "XmlIndex::XmlIndex -> cannot open POSITIONS/positions" );
-
- if( positionsFile_ )
- {
- //!!! temporary: better than fixed large value, worse than 'intelligent' size mgt
- allInCache_ = true;
- if( allInCache_ ) // yes, intended
- {
- reset();
- positions_ = new sal_Int8[ positionsL_ = positionsFile_->length() ];
- positionsFile_->readBytes( positions_,positionsL_ );
- }
- }
- else
- {
- delete[] allLists_;
- delete[] contextsData_;
- throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no POSITIONS/positions") );
- }
- }
-
-
- // reading DOCS.TAB
- {
- util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "DOCS.TAB" ),
- rtl::OUString::createFromAscii( "r" ) );
-
- if( in )
- {
- sal_Int8 a[4];
- a[0] = a[1] = a[2] = 0;
- in->readBytes( &a[3],1 );
- sal_Int32 k1 = ::getInteger_( a );
- util::StreamDecompressor sddocs( in );
- sddocs.ascDecode( k1,concepts_ );
- in->readBytes( &a[3],1 );
- sal_Int32 k2 = ::getInteger_( a );
- offsets_.push_back( 0 );
- util::StreamDecompressor sdoffsets( in );
- sdoffsets.ascDecode( k2,offsets_ );
- delete in;
- }
- else
- {
- delete[] allLists_;
- delete[] contextsData_;
- delete[] positions_;
- delete positionsFile_;
- throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no DOCS.TAB/docs.tab") );
- }
- }
-
- // reading OFFSETS
- {
- util::RandomAccessStream* in = indexAccessor_.getStream( rtl::OUString::createFromAscii( "OFFSETS" ),
- rtl::OUString::createFromAscii( "r" ) );
- if( in )
- {
- sal_Int8 a[4];
- a[0] = a[1] = a[2] = 0;
- in->readBytes( &a[3],1 );
- sal_Int32 k1 = ::getInteger_( a );
- util::StreamDecompressor sddocs( in );
- sddocs.decode( k1,documents_ );
- in->readBytes( &a[3],1 );
- sal_Int32 k2 = ::getInteger_( a );
- util::StreamDecompressor sdoffsets( in );
- sdoffsets.ascDecode( k2,microIndexOffsets_ );
- in->readBytes( &a[3],1 );
- sal_Int32 k3 = ::getInteger_( a );
- util::StreamDecompressor sdtitles( in );
- sdtitles.decode( k3,titles_ );
-
- in->readBytes( &a[3],1 );
- sal_Int32 k4 = ::getInteger_( a );
- // contextsOffsets_ = new IntegerArray(_documents.cardinality() + 1);
- util::StreamDecompressor co(in);
- // _contextsOffsets.add(0); // first, trivial offset
- co.ascDecode( k4,contextsOffsets_ );
- delete in;
- }
- else
- {
- delete[] allLists_;
- delete[] contextsData_;
- delete[] positions_;
- delete positionsFile_;
- throw IOException( rtl::OUString::createFromAscii( "XmlIndex::XmlIndex -> no OFFSETS/offsets") );
- }
- }
-
- // reading linknames
- {
- util::RandomAccessStream* in =
- indexAccessor_.getStream( rtl::OUString::createFromAscii( "LINKNAMES" ),
- rtl::OUString::createFromAscii( "r" ) );
- if( ! in )
- {
- delete[] allLists_;
- delete[] contextsData_;
- delete[] positions_;
- delete positionsFile_;
- throw IOException(
- rtl::OUString::createFromAscii( "BtreeDict::BtreeDict -> no LINKNAMES/linknames" ) );
- }
-
- sal_Int32 len = in->length();
- char* bff = new char[ 1 + len ], *bff1 = new char[ 1 + len ];
- bff[ len ] = 0;
- in->readBytes( reinterpret_cast<sal_Int8*>( bff ),len );
- delete in;
-
- // Now the buffer must be densified.
- int i,len1 = 0;
- for( i = 0; i < len; ++i )
- {
- if( bff[i] )
- bff1[ len1++ ] = bff[i];
- }
- bff1[len1] = 0;
- delete[] bff;
- rtl::OString aStr( bff1 ); // build a string from the densified buffer;
- delete[] bff1;
-
-// // Now determine the order
-// #define NAMECOUNT 16
-// #define UNREACHABLEPLACE 100000;
-// /**
-// * The available names cannot be determined from LINKNAMES at current,
-// * because LINKNAMES is a serialized Java-object
-// * Always update LINKNAMES if index.xsl or default.xsl are modified.
-// */
-// rtl::OString LN[NAMECOUNT];
-// LN[0] = "text:span";
-// LN[1] = "help:help-text";
-// LN[2] = "help:to-be-embedded";
-// LN[3] = "headingheading";
-// LN[4] = "office:body";
-// LN[5] = "text:p";
-// LN[6] = "office:document";
-// LN[7] = "help:link";
-// LN[8] = "help:key-word";
-// LN[9] = "table:table";
-// LN[10] = "table:table-header-row";
-// LN[11] = "table:table-row";
-// LN[12] = "table:table-cell";
-// LN[13] = "text:unordered-list";
-// LN[14] = "text:ordered-list";
-// LN[15] = "text:list-item";
- // Now determine the order
-
-#define NAMECOUNT 16
-#define UNREACHABLEPLACE 100000;
- /**
- * The available names cannot be determined from LINKNAMES at current,
- * because LINKNAMES is a serialized Java-object
- * Always update LINKNAMES if index.xsl or default.xsl are modified.
- */
-
- rtl::OString LN[NAMECOUNT];
- LN[0] = "helpdocument";
- LN[1] = "body";
- LN[2] = "title";
- LN[3] = "table";
- LN[4] = "tablecell";
- LN[5] = "tablerow";
- LN[6] = "list";
- LN[7] = "listitem";
- LN[8] = "item";
- LN[9] = "emph";
- LN[10] = "paragraph";
- LN[11] = "section";
- LN[12] = "bookmark";
- LN[13] = "bookmark_value";
- LN[14] = "ahelp";
- LN[15] = "link";
-
- // Determine index in file
- int idx[NAMECOUNT];
- /*int*/ linkNamesL_ = NAMECOUNT;
- for( i = 0; i < NAMECOUNT; ++i )
- if( ( idx[i] = aStr.indexOf( LN[i] ) ) == -1 ) {
- idx[i] = UNREACHABLEPLACE;
- --linkNamesL_;
- }
-
- linkNames_ = new rtl::OUString[linkNamesL_];
- for( i = 0; i < linkNamesL_; ++i ) {
- // TODO what happens to first if we never hit Place?
- int first = 0;
- int Place = UNREACHABLEPLACE; // This is the defintely last place
- for( int j = 0; j < NAMECOUNT; ++j )
- {
- if( idx[j] < Place )
- Place = idx[first = j];
- }
- idx[first] = UNREACHABLEPLACE;
- linkNames_[i] = rtl::OUString( LN[first].getStr(),LN[first].getLength(),RTL_TEXTENCODING_UTF8 );
- }
-
-#undef NAMECOUNT
-#undef UNREACHABLEPLACE
- } // end linknames
-
-
- {
- contextTables_ = new ContextTables(contextsOffsets_,
- contextsDataL_,contextsData_,
- linkNamesL_,linkNames_ );
- }
-}
-
-
-XmlIndex::~XmlIndex()
-{
- delete[] allLists_;
- delete[] contextsData_;
- delete[] linkNames_;
- delete[] positions_;
- delete positionsFile_;
- delete contextTables_;
-}
-
-
-
-void XmlIndex::reset()
-{
- maxDocNumberInCache_ = allInCache_ ? static_cast<sal_Int32>(microIndexOffsets_.size()) - 1 : -1;
-}
-
-
-sal_Int32 binarySearch( const std::vector<sal_Int32>& arr,sal_Int32 value )
-{
- sal_Int32 i = 0, j = arr.size(), k;
- while (i <= j)
- if (arr[k = (i + j)/2] < value)
- i = k + 1;
- else if (value < arr[k])
- j = k - 1;
- else
- return k;
- return -1;
-}
-
-
-NonnegativeIntegerGenerator* XmlIndex::getDocumentIterator( sal_Int32 concept )
-{
- sal_Int32 index = binarySearch( concepts_,concept );
-
- if( index >= 0 )
- return new util::ConceptList( allLists_,allListsL_,offsets_[index] );
- else
- return 0;
-}
-
-
-bool XmlIndex::occursInText( sal_Int32 concept )
-{
- return binarySearch( concepts_,concept) >= 0;
-}
-
-
-sal_Int8* XmlIndex::getPositions( sal_Int32& len,sal_Int32 docNo ) throw( excep::XmlSearchException )
-{
- contextTables_->setMicroindex( docNo );
- if( docNo > maxDocNumberInCache_ )
- readMicroindexes( docNo );
-
- len = positionsL_;
- return positions_;
-}
-
-
-rtl::OUString XmlIndex::documentName( sal_Int32 docNumber ) throw( excep::XmlSearchException )
-{
- if( docNumber < 0 || documents_.size() <= sal_uInt32( docNumber ) )
- {
- rtl::OUString message = rtl::OUString::createFromAscii( "XmlIndex::documentName -> " );
- throw excep::XmlSearchException( message );
- }
-
- return dict_.fetch( documents_[ docNumber ] );
-}
-
-
-
-
-void XmlIndex::readMicroindexes( sal_Int32 docNo ) throw( xmlsearch::excep::IOException )
-{
- currentBatchOffset_ = microIndexOffsets_[docNo];
- sal_Int32 offsetLimit = currentBatchOffset_ + positionsL_;
- sal_Int32 upTo = 0, nextDoc = docNo;
- sal_Int32 lastOffset = 0;
-
- do
- {
- if( ++nextDoc == sal_Int32( microIndexOffsets_.size() ) )
- lastOffset = sal_Int32( positionsFile_->length() );
- else if( microIndexOffsets_[ nextDoc ] > offsetLimit )
- lastOffset = microIndexOffsets_[ nextDoc ];
- }
- while( lastOffset == 0 );
-
- if( lastOffset > offsetLimit )
- {
- upTo = microIndexOffsets_[ nextDoc - 1 ];
- maxDocNumberInCache_ = nextDoc - 2;
- }
- else
- {
- upTo = lastOffset;
- maxDocNumberInCache_ = nextDoc - 1;
- }
-
- if( maxDocNumberInCache_ < docNo )
- { // cache too small
- // for current microindex
- // System.out.println("expanding cache to " + _positionsCacheSize);
- delete[] positions_;
- positions_ = new sal_Int8[ positionsL_ = lastOffset - currentBatchOffset_ ];
- readMicroindexes( docNo );
- return;
- }
-
- positionsFile_->seek( currentBatchOffset_ );
- positionsFile_->readBytes( positions_,upTo - currentBatchOffset_ );
-}
-
-
-QueryHitData* XmlIndex::hitToData( QueryHit* hit )
-{
- sal_Int32 termsL,matchesL;
- sal_Int32 *matches = hit->getMatches( matchesL );
- rtl::OUString *terms = new rtl::OUString[ termsL = matchesL >>/*>*/ 1 ];
- for( sal_Int32 i = 0; i < termsL; ++i )
- {
- sal_Int32 aInt = ( i << 1 );
- if( 0 <= aInt && aInt < matchesL )
- {
- sal_Int32 match = matches[ aInt ];
- if( match > 0 )
- try
- {
- terms[i] = fetch( match );
- }
- catch( const excep::XmlSearchException& )
- {
- }
- }
- }
-
- sal_Int32 document = hit->getDocument();
- QueryHitData *res = new QueryHitData( hit->getPenalty(),
- documentName( document ),
- termsL,terms );
- contextTables_->setMicroindex( document );
- contextTables_->resetContextSearch();
- return res;
-}
-
-
diff --git a/xmlhelp/source/cxxhelp/qe/makefile.mk b/xmlhelp/source/cxxhelp/qe/makefile.mk
index e86231898cb1..249950c4f02a 100644
--- a/xmlhelp/source/cxxhelp/qe/makefile.mk
+++ b/xmlhelp/source/cxxhelp/qe/makefile.mk
@@ -45,14 +45,7 @@ ENABLE_EXCEPTIONS=TRUE
CFLAGS+=-GR
.ENDIF
-SLOFILES=\
- $(SLO)$/ConceptData.obj \
- $(SLO)$/ContextTables.obj \
- $(SLO)$/DocGenerator.obj \
- $(SLO)$/Query.obj \
- $(SLO)$/QueryProcessor.obj \
- $(SLO)$/Search.obj \
- $(SLO)$/XmlIndex.obj
+SLOFILES=$(SLO)$/DocGenerator.obj
# --- Targets ------------------------------------------------------