📄 countersetupvisitor.java
字号:
package it.unimi.dsi.mg4j.search.visitor;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntArrays;import it.unimi.dsi.mg4j.index.IndexIterator;import java.io.IOException;import java.util.Arrays;/** A visitor using the information collected by a * {@link it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor} * to set up term frequencies and counters. * * <p>Term {@linkplain #frequency frequencies} and {@linkplain #count counts} are stored * in publicly accessible parallel arrays of integers indexed by <em>offsets</em>, * as defined by a {@link it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor} provided at construction time. * Additionally, the {@linkplain #indexNumber index number} (a position into the array returned by * {@link it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor#indices()}) and the * {@linkplain #term term} for each offset are available. * * <p>When instances of this class perform a visit, they prepare the arrays and * fill those contaning {@linkplain #frequency frequencies} and {@linkplain #indexNumber index numbers}. * It is up to an instance of {@link it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor} * (which requires an instance of this class in its constructor) to fill * the {@linkplain #count counts} with data related to * the current document. * * <p>For a more complete picture, see {@link it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor}. */public class CounterSetupVisitor extends AbstractDocumentIteratorVisitor { /** For each offset, the corresponding index as a position in {@link TermCollectionVisitor#indices()}. */ public int[] indexNumber; /** For each offset, the corresponding term. */ public CharSequence[] term; /** For each offset, its count. */ public int[] count; /** For each offset, its frequency. */ public int[] frequency; /** The underlying term-collection visitor. */ private final TermCollectionVisitor termCollectionVisitor; /** Creates a new counter-setup visitor based on a given term-collection visitor. * * @param termCollectionVisitor a term-collection visitor. */ public CounterSetupVisitor( TermCollectionVisitor termCollectionVisitor ) { this.termCollectionVisitor = termCollectionVisitor; prepare(); } /** Prepares the internal state of this visitor using data from the associated * {@link TermCollectionVisitor}. * * <p>Note that because of this dependency, it is essential that you * first prepare and visit with the associated {@link TermCollectionVisitor}, * and then prepare and visit with this visitor. */ public CounterSetupVisitor prepare() { count = new int[ termCollectionVisitor.numberOfPairs() ]; frequency = new int[ termCollectionVisitor.numberOfPairs() ]; indexNumber = new int[ termCollectionVisitor.numberOfPairs() ]; term = new CharSequence[ termCollectionVisitor.numberOfPairs() ]; return this; } public boolean visit( final IndexIterator indexIterator ) throws IOException { if ( indexIterator.frequency() > 0 && indexIterator.index().hasCounts ) { // We fill the frequency and index entries final int id = indexIterator.id(); // offset into all arrays this.frequency[ id ] = indexIterator.frequency(); this.indexNumber[ id ] = termCollectionVisitor.indexMap().getInt( indexIterator.index() ); this.term[ id ] = indexIterator.term(); } return true; } /** Updates the {@link #count} using the provided index iterator. * * <p>This method is usually called back by a {@link CounterCollectionVisitor} built upon * this counter-setup visitor. It simply retrieves the index iterator * {@linkplain IndexIterator#id() id} and use it as an index into * {@link #count} to store {@link IndexIterator#count()}. * * @param indexIterator an index iterator. * @throws IOException */ public void update( final IndexIterator indexIterator ) throws IOException { count[ indexIterator.id() ] = indexIterator.count(); } /** Zeroes all counters, but not frequencies. */ public void clear() { IntArrays.fill( count, 0 ); } public String toString() { return "[" + Arrays.toString( frequency ) + ", " + Arrays.toString( count ) +"]"; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -