⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 multitermindexiteratortest.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package test.it.unimi.dsi.mg4j.index;import it.unimi.dsi.fastutil.ints.IntIterator;import it.unimi.dsi.mg4j.index.BitStreamIndex;import it.unimi.dsi.mg4j.index.DiskBasedIndex;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;import it.unimi.dsi.mg4j.query.nodes.Query;import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;import it.unimi.dsi.mg4j.query.parser.QueryParserException;import it.unimi.dsi.mg4j.query.parser.SimpleParser;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor;import it.unimi.dsi.util.Interval;import it.unimi.dsi.mg4j.search.OrDocumentIterator;import it.unimi.dsi.mg4j.search.visitor.AbstractDocumentIteratorVisitor;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import it.unimi.dsi.mg4j.tool.IndexBuilder;import java.io.File;import java.io.IOException;import java.lang.reflect.InvocationTargetException;import java.net.URISyntaxException;import junit.framework.TestCase;import org.apache.commons.configuration.ConfigurationException;import test.it.unimi.dsi.mg4j.document.StringArrayDocumentCollection;import test.it.unimi.dsi.mg4j.search.IntArrayIndexIterator;public class MultiTermIndexIteratorTest extends TestCase {	private BitStreamIndex index;	private SimpleParser simpleParser;	public void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();		new IndexBuilder( basename, new StringArrayDocumentCollection( "a", "b", "c" ) ).run();		index = DiskBasedIndex.getInstance( basename + "-text", true, true );		simpleParser = new SimpleParser( index.termProcessor );	}	public void testSkipBug() throws QueryParserException, QueryBuilderVisitorException, IOException {		Query query = simpleParser.parse( "a + b + c" );		DocumentIteratorBuilderVisitor documentIteratorBuilderVisitor = new DocumentIteratorBuilderVisitor( null, index, Integer.MAX_VALUE );		DocumentIterator documentIterator = query.accept( documentIteratorBuilderVisitor );		assertEquals( 2, documentIterator.skipTo( 2 ) );		documentIterator.dispose();	}		public void test() throws IOException {		IndexIterator i0 = new IntArrayIndexIterator( new int[] { 0, 1, 2 }, 				new int[][] { 				{ 0, 3 }, 				{ 0 }, 				{ 0 }, 				} );		IndexIterator i1 = new IntArrayIndexIterator( new int[] { 0, 2 }, 				new int[][] { 				{ 1 },				{ 1 },				} );		IndexIterator i2 = new IntArrayIndexIterator( new int[] { 0, 1, 3 }, 				new int[][] { 				{ 2 },				{ 2 },				{ 0 },				} );		MultiTermIndexIterator multiTermIndexIterator = (MultiTermIndexIterator)MultiTermIndexIterator.getInstance( i0, i1, i2 );		assertEquals( 3, multiTermIndexIterator.frequency() );				assertTrue( multiTermIndexIterator.hasNext() );		assertTrue( multiTermIndexIterator.hasNext() ); // To increase coverage				assertEquals( 0, multiTermIndexIterator.nextDocument() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );		assertEquals( 4, multiTermIndexIterator.count() );		int[] position = multiTermIndexIterator.positionArray();		assertEquals( 0, position[ 0 ] );		assertEquals( 1, position[ 1 ] );		assertEquals( 2, position[ 2 ] );		assertEquals( 3, position[ 3 ] );		assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );				position = new int[ 4 ];		multiTermIndexIterator.positions( position );		assertEquals( 0, position[ 0 ] );		assertEquals( 1, position[ 1 ] );		assertEquals( 2, position[ 2 ] );		assertEquals( 3, position[ 3 ] );				assertEquals( Interval.valueOf( 3 ), multiTermIndexIterator.intervalIterator().nextInterval() );		IntIterator positions = multiTermIndexIterator.positions();		assertEquals( 0, positions.nextInt() );		assertEquals( 1, positions.nextInt() );		assertEquals( 2, positions.nextInt() );		assertEquals( 3, positions.nextInt() );		assertFalse( positions.hasNext() );				assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );		assertFalse( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage		assertEquals( 1, multiTermIndexIterator.nextDocument() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertEquals( 2, multiTermIndexIterator.count() );		position = multiTermIndexIterator.positionArray();		assertEquals( 0, position[ 0 ] );		assertEquals( 2, position[ 1 ] );		positions = multiTermIndexIterator.positions();		assertEquals( 0, positions.nextInt() );		assertEquals( 2, positions.nextInt() );		assertFalse( positions.hasNext() );				assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );		assertEquals( 2, multiTermIndexIterator.nextDocument() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertEquals( 2, multiTermIndexIterator.count() );		position = multiTermIndexIterator.positionArray();		assertEquals( 0, position[ 0 ] );		assertEquals( 1, position[ 1 ] );		positions = multiTermIndexIterator.positions();		assertEquals( 0, positions.nextInt() );		assertEquals( 1, positions.nextInt() );		assertFalse( positions.hasNext() );				assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );				// Here we get the iterator of the underlying IndexIterator		assertEquals( 3, multiTermIndexIterator.nextDocument() );		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );		assertEquals( 1, multiTermIndexIterator.count() );		position = multiTermIndexIterator.positionArray();		assertEquals( 0, position[ 0 ] );		positions = multiTermIndexIterator.positions();		assertEquals( 0, positions.nextInt() );		assertFalse( positions.hasNext() );				assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );				// The end		assertFalse( multiTermIndexIterator.hasNext() );		assertFalse( multiTermIndexIterator.hasNext() ); // To increase coverage	}		// Contributed by Fabien Campagne	public void testMG4JMultiTermPositionIssue() throws IllegalAccessException, NoSuchMethodException, ConfigurationException, IOException, InvocationTargetException, InstantiationException, ClassNotFoundException, URISyntaxException {		String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();		new IndexBuilder( basename, new StringArrayDocumentCollection(                "A B C D E F F G G",                "G A T H S K L J W L",                "E S K D L J F K L S J D L S J D",                "E B"		) ).run();		Index index = DiskBasedIndex.getInstance( basename + "-text", true, true );        /// String query = "A| B+C+G|W|S+J";        DocumentIterator iterator = OrDocumentIterator.getInstance(                index.documents("A"),                MultiTermIndexIterator.getInstance(                        index.documents("B"),                        index.documents("C"),                        index.documents("G")                ),                index.documents("W"),                MultiTermIndexIterator.getInstance(                        index.documents("S"),                        index.documents("J")                ));        final int[] currDoc = new int[ 1 ];        // A visitor invoking positionArray() on IndexIterators positioned on the current document.        DocumentIteratorVisitor visitor = new AbstractDocumentIteratorVisitor() {        	public boolean visit(IndexIterator indexIterator) throws IOException {        		if (indexIterator.count() > 0 && indexIterator.document() == currDoc[ 0 ] ) indexIterator.positionArray();          		return true;        	}        };        for (int document = 0; document < index.numberOfDocuments; document++) {            currDoc[ 0 ] = iterator.skipTo(document);            if (document == currDoc[ 0 ]) {               iterator.accept(visitor); // see method visit below.            }        }                while( iterator.hasNext() ) {        	currDoc[ 0 ] = iterator.nextDocument();        	iterator.accept(  visitor );        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -