📄 verifier.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
															}														IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );														result = indexIterator.skipTo( l.getInt( start ) );							assert indexIterator.document() == l.getInt( start ) && result == l.getInt( start ): "Trying to skip to document " + l.getInt( start ) + " (term " + t + ") moved to " + indexIterator.document() + "(skipTo() returned " + result + ")";							result = indexIterator.skipTo( Integer.MAX_VALUE );							assert ! indexIterator.hasNext() && result == Integer.MAX_VALUE: "Trying to skip beyond end of list (term " + t + ") after a skip to " + start + " returned " + result + " (hasNext()=" + indexIterator.hasNext() + ")";																				}						pl.update();					}					pl.done();				}			}			catch( Throwable e  ) {				System.err.println( "Exception during all-skip test (index=" + index[ i ] + ", term=" + t + ", start=" + start + ", end=" + end + ")" );				throw e;			} 		}				if ( ! jsapResult.getBoolean( "noComp" ) ) {			IndexReader additionalReader;			IntLinkedOpenHashSet s0 = new IntLinkedOpenHashSet();			IntOpenHashSet s1 = new IntOpenHashSet();			IntAVLTreeSet s2 = new IntAVLTreeSet();			IntIterator it;			IndexIterator indexIterator, additionalIterator;			it.unimi.dsi.mg4j.search.DocumentIterator documentIterator;			int u = 0;						try {				for (i = 0; i < index.length; i++) {					pl.expectedUpdates = numberOfTerms[ i ];					pl.start("Verifying composite iterators in " + index[i] + "...");					additionalReader = index[ i ].getReader();										for (t = 0; t < numberOfTerms[ i ]; t++) {						for (u = 0; u < numberOfTerms[ i ]; u++) {							s0.clear();							s1.clear();							// TODO: in case we have positions, we should check them, too							IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s0 );							IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s1 );							s0.retainAll( s1 );							indexIterator =  termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );							additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u );							it = s0.iterator();							documentIterator = AndDocumentIterator.getInstance( indexIterator, additionalIterator );							for( int j = s0.size(); j-- != 0; ) assert it.nextInt() == documentIterator.nextDocument();							assert ! documentIterator.hasNext();							s2.clear();							IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s2 );							IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s2 );							indexIterator =  termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );							additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u );							it = s2.iterator();							documentIterator = OrDocumentIterator.getInstance( indexIterator, additionalIterator ); 							for( int j = s2.size(); j-- != 0; ) assert it.nextInt() == documentIterator.nextDocument();							assert ! documentIterator.hasNext();						}							pl.update();					}					pl.done();					additionalReader.close();				}			}			catch( Throwable e  ) {				System.err.println( "Exception during composite iterator test (index=" + index[ i ] + ", first term=" + t + ", second term =" + u + ")" );				throw e;			}			}				if ( ! jsapResult.getBoolean( "virtual" ) && jsapResult.getBoolean( "random" ) ) {						// Random access scan			pl.expectedUpdates = index[ 0 ].numberOfDocuments;			pl.itemsName = "documents";			pl.start( "Verifying random access..." );			if ( allBitStreamIndices ) {				it.unimi.dsi.mg4j.document.DocumentIterator documentIterator = documentSequence.iterator();				Document document;				Reader reader;				WordReader wordReader;								final MutableString word = new MutableString(), nonWord = new MutableString();								int docCounter = 0;								while( ( document = documentIterator.nextDocument() ) != null ) {					currDoc = permutation != null ? permutation[ docCounter ] : docCounter;					for( i = 0; i < index.length; i++ ) {						Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );						if ( index[ i ].hasPayloads ) {							// TODO: write tests for the other case							if ( allBitStreamIndices ) {								IndexIterator indexIterator = indexReader[ i ].documents( 0 );								int pointer = indexIterator.skipTo( currDoc );								if ( pointer == currDoc ) {									Payload payload = indexIterator.payload();									if ( ! payload.get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + payload );  								}								else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );							}							else {								IndexIterator indexIterator = indexReader[ i ].documents(  0  );								if ( indexIterator.skipTo( currDoc ) == currDoc ) {									if ( ! indexIterator.payload().get().equals( content ) )										LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + indexIterator.payload().get() );								} 								else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );							}						}						else {							// text index							pos = 0;							termsInDoc[ i ].clear();							reader = (Reader)content;							wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );							wordReader.setReader( reader );							while( wordReader.next( word, nonWord ) ) {								if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue;								if ( ( t = (int)( (BitStreamIndex)index[ i ] ).termMap.getLong( word ) ) == -1 ) LOGGER.error( index[ i ] + ": Could not find term " + word + " in term index" );								else {									if ( index[ i ].hasCounts ) termsInDoc[ i ].put( t, termsInDoc[ i ].get( t ) + 1 );									if ( index[ i ].hasPositions ) wordInPos[ i ][ pos++ ] = t;								}							}							if ( allBitStreamIndices ) {								for( IntIterator x = termsInDoc[ i ].keySet().iterator(); x.hasNext(); ) {									t = x.nextInt();									IndexIterator indexIterator = indexReader[ i ].documents( t );									int pointer = indexIterator.skipTo( currDoc );									if ( pointer == currDoc ) {										if ( index[ i ].hasCounts ) {											int c = indexIterator.count();											if ( termsInDoc[ i ].get( t ) !=  c ) 												LOGGER.error( index[ i ] + ": The count for term " + t + " in document " + currDoc + " is " + c + " instead of " + termsInDoc[ i ].get( t ) );											else {												if ( index[ i ].hasPositions ) {													indexIterator.positions( occ[ i ] );													for( int j = 0; j < c; j++ ) 														if ( wordInPos[ i ][ occ[ i ][ j ] ] != t )  															LOGGER.error( index[ i ] + ": The occurrence of index " + i + " of term " + t + " (position " + occ[ i ] +") in document " + currDoc + " is occupied instead by term " + wordInPos[ i ][ occ[ i ][ j ] ] );												}											}										} 									}									else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t + "(skipTo returned " + pointer + ")" );								}							}							else {								for( IntIterator x = termsInDoc[ i ].keySet().iterator(); x.hasNext(); ) {									t = x.nextInt();									IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );									if ( indexIterator.skipTo( currDoc ) == currDoc ) {										if ( index[ i ].hasCounts ) {											int c = indexIterator.count();											if ( termsInDoc[ i ].get( t ) !=  c ) 												LOGGER.error( index[ i ] + ": The count for term " + t + " in document " + currDoc + " is " + c + " instead of " + termsInDoc[ i ].get( t ) );											else {												if ( index[ i ].hasPositions ) {													indexIterator.positions( occ[ i ] );													for( int j = 0; j < c; j++ ) 														if ( wordInPos[ i ][ occ[ i ][ j ] ] != t )  															LOGGER.error( index[ i ] + ": The occurrence of index " + i + " of term " + t + " (position " + occ[ i ] +") in document " + currDoc + " is occupied instead by term " + wordInPos[ i ][ occ[ i ][ j ] ] );												}											}										}									} 									else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );								}							}						}					}					docCounter++;					document.close();					pl.update();				}			}			else {				LOGGER.warn( "Random access tests require very slow single-term scanning as not all indices are disk based" );				it.unimi.dsi.mg4j.document.DocumentIterator documentIterator = documentSequence.iterator();				Document document;				Reader reader;				WordReader wordReader;								final MutableString word = new MutableString(), nonWord = new MutableString();								int docCounter = 0;								while( ( document = documentIterator.nextDocument() ) != null ) {					currDoc = permutation != null ? permutation[ docCounter ] : docCounter;					for( i = 0; i < index.length; i++ ) {						Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );						if ( index[ i ].hasPayloads ) {							if ( allBitStreamIndices ) {								IndexIterator indexIterator = indexReader[ i ].documents( 0 );								int pointer = indexIterator.skipTo( currDoc );								if ( pointer == currDoc ) {									Payload payload = indexIterator.payload();									if ( ! payload.get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + payload );  								}								else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );							}							else {								IndexIterator indexIterator = indexReader[ i ].documents( "#" );								if ( indexIterator.skipTo( currDoc ) == currDoc ) {									if ( ! indexIterator.payload().get().equals( content ) )										LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + indexIterator.payload().get() );								} 								else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );							}						}						else {							pos = 0;							reader = (Reader)content;							wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );							wordReader.setReader( reader );							while( wordReader.next( word, nonWord ) ) {								if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue;								IndexIterator indexIterator = indexReader[ i ].documents( word );								if ( currDoc != indexIterator.skipTo( currDoc ) )									LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + word );								else if ( index[ i ].hasPositions ) {									indexIterator.positions( occ[ i ] );									if ( IntArrayList.wrap( occ[ i ], indexIterator.count() ).indexOf( pos ) == -1 )										LOGGER.error( index[ i ] + ": Position " + pos + " does not appear in the position list of term " + word + " in document " + currDoc );								}								pos++;							}						}					}					document.close();					pl.update();					docCounter++;				}			}			pl.done();		}				for( IndexReader ir : indexReader ) ir.close();	}}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -