📄 abstractaggregator.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.search.score;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2004-2007 Paolo Boldi and Sebastiano Vigna * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITfNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import java.io.IOException;import it.unimi.dsi.fastutil.ints.AbstractIntIterator;import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.CachingDocumentIterator;import it.unimi.dsi.mg4j.search.DocumentIterator;/** A {@link Scorer} that aggregates a number of underlying {@link it.unimi.dsi.mg4j.search.score.DelegatingScorer delegating scorers}, providing equalisation if required. *  * <p>An aggregator combines the results of several scorers following some policy (see, e.g., * {@link it.unimi.dsi.mg4j.search.score.LinearAggregator}). In doing so, often the aggregator * needs to explore the first scores returned by each scorer, and tune some internal parameters. This * procedure, <em>equalisation</em>, is supported by this class: if {@link #equalize(int)} is provided with a * positive number of samples, they will be fetched from the underlying document iterator, scored, and * passed to the implementing subclass so that equalisation information can be properly set up. *  * <p>Additionally, this class ensures that if several scorers need access to intervals,  * the document iterator to be scored is decorated with a {@link it.unimi.dsi.mg4j.search.CachingDocumentIterator}, * so that several scorer can access intervals. *  * <p>Since this class uses the same document iterator for <em>all</em> aggregated scorers, they * must be necessarily {@linkplain it.unimi.dsi.mg4j.search.score.DelegatingScorer delegating scorers}. *  * <p>Implementing subclasses must provide the following methods:  * <ul> * <li>{@link #setupEqualizationFactors()}, which is called in case equalisation is required and * must examine {@link #actualSamples} elements from {@link #sampleScore} (each element is a tuple * of scores, one for each scorer) and use that information to set the equalisation factors (if {@link #samples} * is zero, default values must be applied); * <li>{@link #score(double[])}, which must compute the equalised aggregated score using * the given array of scores (each to be thought as a score coming from the respective scorer). * </ul> *  * <p>Additionally, implementing subclasses must remember to call {@link #equalize(int)} * when generating a {@linkplain it.unimi.dsi.lang.FlyweightPrototype#copy() flyweight copy}, * so that the state of the aggregator is reproduced correctly. */public abstract class AbstractAggregator extends AbstractIntIterator implements Scorer {		/** The current document iterator. */	protected DocumentIterator documentIterator;	/** The number of underlying scorers. */	protected final int n;	/** The underlying scorers. */	protected final Scorer[] scorer;	/** The current score. */	protected final double[] currScore;	/** Whether we need caching the intervals. */	protected final boolean needsCaching;	/** Cached sample of document pointers. */	protected int[] sampleDocument;	/** Cached sample of document scores. */	protected double[][] sampleScore;	/** The number of samples for equalisation (0 means no equalisation). */	protected int samples;	/** The next sample to be returned, if smaller than {@link #actualSamples}. */	protected int currSample;	/** The actual number of samples obtained (might be less than {@link #samples} if we exhausted the document iterator). */	protected int actualSamples;		/** Creates an aggregator.	 * 	 * @param scorer the scorers.	 */	public AbstractAggregator( final Scorer[] scorer ) {		this.n = scorer.length;		this.scorer = scorer;		this.currScore = new double[ n ];		int needsIntervals = 0;		for( int i = scorer.length; i-- != 0; ) {			if ( ! ( scorer[ i ] instanceof DelegatingScorer ) ) throw new IllegalArgumentException( "An aggregator needs delegating scorers" );			if ( scorer[ i ].usesIntervals() ) needsIntervals++;		}		needsCaching = needsIntervals > 1;		actualSamples = -1;	}	public double score( final Index index ) {		throw new UnsupportedOperationException();	}		public double score() throws IOException {		// If we are still walking through the sample, return a score from there		if ( currSample <= actualSamples ) return score( sampleScore[ currSample - 1 ] );		// Otherwise, create new score array and pass it to the implementing subclass.		final double[] currScore = this.currScore;		for( int i = n; i-- != 0; ) currScore[ i ] = scorer[ i ].score(); 		return score( currScore );	}			/** Set the number of samples for equalisation.	 *  	 * @param samples the number of samples to be used to equalise scores; a value	 * of zero disables equalisation.	 */		public synchronized void equalize( int samples ) {		this.samples = samples;		if ( samples == 0 ) {			sampleDocument = null;			sampleScore = null;			actualSamples = -1;		}		else {			sampleDocument = new int[ samples ];			sampleScore = new double[ samples ][ n ];		}	}		/** Delegates to the underlying scorers.	 * 	 * @return true if at least one underlying scorer supports weights.	 */	public synchronized boolean setWeights( final Reference2DoubleMap<Index> index2weight ) {		boolean atLeastOne = false;		for( int i = n; i-- != 0; ) atLeastOne |= scorer[ i ].setWeights( index2weight );		return atLeastOne;	}		/** Delegates to the underlying scorers.	 * 	 * @return true if at least one underlying scorer uses intervals.	 */	public boolean usesIntervals() {		for( int i = n; i-- != 0; ) if ( scorer[ i ].usesIntervals() ) return true;		return false;	}	/** Delegates to the underlying scorers, possibly wrapping the argument in a	 * {@link CachingDocumentIterator}; then, if {@link #samples} is nonzero computes	 * that many document scores and invokes {@link #setupEqualizationFactors()}.	 */	public void wrap( DocumentIterator documentIterator ) throws IOException {		if ( needsCaching ) documentIterator = new CachingDocumentIterator( documentIterator );		for( int i = n; i-- != 0; ) scorer[ i ].wrap( documentIterator );		if ( samples > 0 ) {			// Let us prepare a sample.			int i;			for( i = 0; i < samples && ( sampleDocument[ i ] = documentIterator.nextDocument() ) != -1; i++ ) {				;				for( int j = n; j-- != 0; ) sampleScore[ i ][ j ] = scorer[ j ].score();			}			actualSamples = i;			currSample = 0;		}		// This must be *always* called--in the worst case, it will just set all factors to 1.		setupEqualizationFactors();		this.documentIterator = documentIterator;	}		/** Computes an aggregated score using the given array of basic scores.	 * The array is parallel to {@link #scorer}.	 *  	 * @param score an array of scores.	 * @return the aggregated scorer.	 */	protected abstract double score( double score[] );		/** Sets up the equalisation factors.	 * 	 * <p>Implementations should look into {@link #sampleScore} and set up the	 * equalisation logic. Note that this method is responsible for setting	 * up appropriate equalisation factors <em>even if no equalisation is required</em> 	 * (e.g., setting all factors to 1 ).	 */	protected abstract void setupEqualizationFactors();	public int nextDocument() throws IOException {		if ( currSample < actualSamples ) return sampleDocument[ currSample++ ];		currSample = Integer.MAX_VALUE;		return documentIterator.nextDocument();	}	public boolean hasNext() {		return currSample < actualSamples || documentIterator.hasNext();	}		public int nextInt() {		if ( ! hasNext() ) throw new UnsupportedOperationException();		try {			return nextDocument();		}		catch ( IOException e ) {			throw new RuntimeException( e );		}	}}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -