⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indextest.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
	/**	 * Checks that the fields indexed by the given indices have been indexed correctly by performing	 * a mock index construction over the given sequence.	 * 	 * @param sequence a document sequence.	 * @param resolver the virtual document resolver used to index the collection (we assume the	 * same for all virtual fields), or <code>null</code>.	 * @param gap the virtual document gap (we assume the same for all virtual fields; it is	 * immaterial if no field is virtual).	 * @param index a list of indices that have indexed one or more fields of <code>sequence</code>.	 */	@SuppressWarnings("unchecked")	public void checkAgainstContent( DocumentSequence sequence, int[] map, VirtualDocumentResolver resolver, int gap, Index... index ) throws IOException {		DocumentIterator iterator = sequence.iterator();		DocumentFactory factory = sequence.factory();		Document document;		final int n = index.length;		final int[] field = new int[ n ];		final int[][] currMaxPos = new int[ n ][];		final int[] maxDoc = new int[ n ];		IntArrays.fill( maxDoc, -1 );		final Object2ObjectOpenHashMap<MutableString, ObjectArrayList<int[]>>[] termMap = new Object2ObjectOpenHashMap[ n ];		final IntArrayList[] payloadPointers = new IntArrayList[ n ];		final ObjectArrayList<Object>[] payloadContent = new ObjectArrayList[ n ];		for ( int i = 0; i < n; i++ ) {			field[ i ] = factory.fieldIndex( index[ i ].field );			switch ( factory.fieldType( field[ i ] ) ) {			case VIRTUAL:				currMaxPos[ i ] = new int[ resolver.numberOfDocuments() ];			case TEXT:				termMap[ i ] = new Object2ObjectOpenHashMap<MutableString, ObjectArrayList<int[]>>();				break;			case DATE:			case INT:				payloadPointers[ i ] = new IntArrayList();				payloadContent[ i ] = new ObjectArrayList<Object>();			}		}		int documentIndex = 0;		while ( ( document = iterator.nextDocument() ) != null ) {			for ( int i = 0; i < field.length; i++ ) {				switch ( factory.fieldType( field[ i ] ) ) {				case TEXT:					processDocument( document.wordReader( field[ i ] ).setReader( (Reader)document.content( field[ i ] ) ), map == null ? documentIndex : map[ documentIndex ], 0, termMap[ i ],							index[ i ].termProcessor );					break;				case VIRTUAL:					ObjectArrayList<VirtualDocumentFragment> fragments = (ObjectArrayList<VirtualDocumentFragment>)document.content( field[ i ] );					resolver.context( document );					for ( VirtualDocumentFragment fragment : fragments ) {						int d = resolver.resolve( fragment.documentSpecifier() );						if ( d != -1 ) {							if ( map != null ) d = map[ d ];							if ( maxDoc[ i ] < d ) maxDoc[ i ] = d;							currMaxPos[ i ][ d ] = processDocument( document.wordReader( field[ i ] ).setReader( new FastBufferedReader( fragment.text() ) ), d, currMaxPos[ i ][ d ], termMap[ i ],									index[ i ].termProcessor )									+ gap;						}					}					break;				case INT:				case DATE:					Object x = document.content( field[ i ] );					if ( x != null ) {						payloadPointers[ i ].add( map == null ? documentIndex : map[ documentIndex ] );						payloadContent[ i ].add( x );					}				default:				}			}			document.close();			documentIndex++;		}		iterator.close();		for ( int i = 0; i < n; i++ ) {			if ( termMap[ i ] != null ) for ( ObjectArrayList<int[]> list : termMap[ i ].values() ) {				// We sort in all cases, just to reduce the possible execution paths				Collections.sort( list, new Comparator<int[]>() {					public int compare( int[] p0, int[] p1 ) {						return p0[ 0 ] - p1[ 0 ];					}				} );				switch ( factory.fieldType( field[ i ] ) ) {				case VIRTUAL:					// We coalesce the list					ObjectArrayList<int[]> newList = new ObjectArrayList<int[]>();					for ( int k = 0; k < list.size(); ) {						int s;						for ( s = k + 1; s < list.size(); s++ )							if ( list.get( k )[ 0 ] != list.get( s )[ 0 ] ) break;						int count = 0;						for ( int t = k; t < s; t++ )							count += list.get( t ).length - 1;						int[] posting = new int[ count + 1 ];						posting[ 0 ] = list.get( k )[ 0 ];						count = 1;						for ( int t = k; t < s; t++ ) {							System.arraycopy( list.get( t ), 1, posting, count, list.get( t ).length - 1 );							count += list.get( t ).length - 1;						}						k = s;						newList.add( posting );					}					list.clear();					list.addAll( newList );					break;				default:				}			}			if ( payloadPointers[ i ] != null ) {				final int p[] = payloadPointers[ i ].elements();				final Object[] b = payloadContent[ i ].elements();				GenericSorting.quickSort( 0, payloadPointers[ i ].size(), new IntComparator() {					public int compare( int i0, int i1 ) {						return p[ i0 ] - p[ i1 ];					}				}, new Swapper() {					public void swap( int i0, int i1 ) {						final int t = p[ i0 ];						p[ i0 ] = p[ i1 ];						p[ i1 ] = t;						final Object o = b[ i0 ];						b[ i0 ] = b[ i1 ];						b[ i1 ] = o;					}				} );			}		}		for ( int i = 0; i < n; i++ ) {			assertEquals( index[ i ].toString(), factory.fieldType( field[ i ] ) == FieldType.VIRTUAL ? maxDoc[ i ] + 1 : documentIndex, index[ i ].numberOfDocuments );			switch ( factory.fieldType( field[ i ] ) ) {			case TEXT:			case VIRTUAL:				assertEquals( termMap[ i ].size(), index[ i ].numberOfTerms );				int postings = 0,				occurrences = 0;				for ( ObjectArrayList<int[]> l : termMap[ i ].values() ) {					postings += l.size();					for ( int[] p : l )						occurrences += p.length - 1;				}				assertEquals( index[ i ].toString(), postings, index[ i ].numberOfPostings );				assertEquals( occurrences, index[ i ].numberOfOccurrences );				IndexReader indexReader = index[ i ].getReader();				for ( MutableString term : new ObjectRBTreeSet<MutableString>( termMap[ i ].keySet() ).toArray( new MutableString[ termMap[ i ].size() ] ) ) {					String msg = index[ i ] + ":" + term;					IndexIterator indexIterator = indexReader.documents( term );					ObjectArrayList<int[]> list = termMap[ i ].get( term );					int k = 0;					while ( indexIterator.hasNext() ) {						assertEquals( msg, list.get( k )[ 0 ], indexIterator.nextDocument() ); // Document																								// pointer						assertEquals( msg, list.get( k ).length - 1, indexIterator.count() ); // Count						final int[] position = indexIterator.positionArray();						for ( int p = 0; p < indexIterator.count(); p++ )							assertEquals( msg, list.get( k )[ p + 1 ], position[ p ] ); // Positions						k++;					}					assertEquals( k, list.size() ); // This implicitly checks the frequency				}				indexReader.close();				break;			case INT:			case DATE:				assertEquals( index[ i ].toString(), payloadPointers[ i ].size(), index[ i ].numberOfPostings );				assertEquals( index[ i ].toString(), documentIndex != 0 ? 1 : 0, index[ i ].numberOfTerms );				assertEquals( index[ i ].toString(), -1, index[ i ].numberOfOccurrences );				if ( documentIndex != 0 ) {					IndexIterator indexIterator = index[ i ].documents( 0 );					int k = 0;					while ( indexIterator.hasNext() ) {						assertEquals( payloadPointers[ i ].getInt( k ), indexIterator.nextDocument() );						if ( factory.fieldType( field[ i ] ) == FieldType.INT ) assertEquals( ( (Number)payloadContent[ i ].get( k ) ).longValue(), ( (Number)indexIterator.payload().get() )								.longValue() );						else assertEquals( payloadContent[ i ].get( k ), indexIterator.payload().get() );						k++;					}					indexIterator.dispose();					assertEquals( k, payloadContent[ i ].size() );				}			}		}	}	public void setUp() throws IOException {		basename = File.createTempFile( this.getClass().getSimpleName(), "test" ).getCanonicalPath();	}	public void tearDown() throws IOException {		for ( Object f : FileUtils.listFiles( new File( basename ).getParentFile(), FileFilterUtils.prefixFileFilter( this.getClass().getSimpleName() ), null ) )			( (File)f ).delete();		if ( lastSequence != null ) lastSequence.close();	}	// We keep track of the last returned sequence to close it without cluttering the test code	private DocumentSequence lastSequence;	public DocumentSequence getSequence() throws ConfigurationException, IOException {		if ( lastSequence != null ) lastSequence.close();		return lastSequence = new CompositeDocumentSequence( new InputStreamDocumentSequence( this.getClass().getResourceAsStream( "documents.data" ), 10, new IdentityDocumentFactory(				new String[] { "encoding=UTF-8" } ), NUMBER_OF_DOCUMENTS ), new IntArrayDocumentCollection( INTEGER_DOCUMENT ), new DateArrayDocumentCollection( DATE_DOCUMENT ),				new MapVirtualDocumentCollection( VIRTUAL_DOCUMENT ) );	}	@SuppressWarnings("unchecked")	public DocumentSequence getEmptySequence() throws ConfigurationException, IOException {		if ( lastSequence != null ) lastSequence.close();		return lastSequence = new CompositeDocumentSequence( new StringArrayDocumentCollection(), new IntArrayDocumentCollection(), new DateArrayDocumentCollection(),				new MapVirtualDocumentCollection() );	}	public void testIndex( int quantum, int height, TermProcessor termProcessor ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException,			InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		final boolean interleaved = quantum >= 0;		if ( !interleaved ) quantum = -quantum;		// Vanilla indexing		new IndexBuilder( basename, getSequence() ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum )				.height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();		checkAgainstContent( getSequence(), null, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basename + "-text" ), Index.getInstance( basename + "-int" ), Index				.getInstance( basename + "-date" ), Index.getInstance( basename + "-virtual" ) );		final String basenameZipped = basename + "-zipped";		// Vanilla indexing with zipped collection		new IndexBuilder( basename, getSequence() ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum )				.height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).zipCollectionBasename( basenameZipped ).run();		// Vanilla indexing with zipped collection		new IndexBuilder( basenameZipped, (DocumentSequence)BinIO.loadObject( basenameZipped + ".collection" ) ).termProcessor( termProcessor ).indexedFields( 0, 1, 2, 3 ).skipBufferSize( 1024 )				.pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();		// The two indices must be byte-by-byte identical (and we keep the zipped index for future		// reference)		sameIndex( basename + "-text", basenameZipped + "-text" );		sameIndex( basename + "-int", basenameZipped + "-int", "batches" );		sameIndex( basename + "-date", basenameZipped + "-date", "batches" );		sameIndex( basename + "-virtual", basenameZipped + "-virtual", "batches" );		// Indexing with just one batch		new IndexBuilder( basename + "-onebatch", getSequence() ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 )				.quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( NUMBER_OF_DOCUMENTS ).run();		// The two indices must be byte-by-byte identical		sameIndex( basename + "-text", basename + "-onebatch-text", "batches" );		sameIndex( basename + "-int", basename + "-onebatch-int", "batches" );		sameIndex( basename + "-date", basename + "-onebatch-date", "batches" );		sameIndex( basename + "-virtual", basename + "-onebatch-virtual", "batches" );	}	public void testIndex( int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException,			IllegalAccessException, InvocationTargetException, NoSuchMethodException {		testIndex( quantum, height, DowncaseTermProcessor.getInstance() );	}	public void testIndex() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException,			InvocationTargetException, NoSuchMethodException {		testIndex( 0, 0 );		testIndex( 0, 0, KILL_A_PROCESSOR );		testIndex( 1, 1 );		testIndex( 1, 2 );		testIndex( 4, 1 );		testIndex( 4, 4 );

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -