📄 verifier.java
字号:
} IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ); result = indexIterator.skipTo( l.getInt( start ) ); assert indexIterator.document() == l.getInt( start ) && result == l.getInt( start ): "Trying to skip to document " + l.getInt( start ) + " (term " + t + ") moved to " + indexIterator.document() + "(skipTo() returned " + result + ")"; result = indexIterator.skipTo( Integer.MAX_VALUE ); assert ! indexIterator.hasNext() && result == Integer.MAX_VALUE: "Trying to skip beyond end of list (term " + t + ") after a skip to " + start + " returned " + result + " (hasNext()=" + indexIterator.hasNext() + ")"; } pl.update(); } pl.done(); } } catch( Throwable e ) { System.err.println( "Exception during all-skip test (index=" + index[ i ] + ", term=" + t + ", start=" + start + ", end=" + end + ")" ); throw e; } } if ( ! jsapResult.getBoolean( "noComp" ) ) { IndexReader additionalReader; IntLinkedOpenHashSet s0 = new IntLinkedOpenHashSet(); IntOpenHashSet s1 = new IntOpenHashSet(); IntAVLTreeSet s2 = new IntAVLTreeSet(); IntIterator it; IndexIterator indexIterator, additionalIterator; it.unimi.dsi.mg4j.search.DocumentIterator documentIterator; int u = 0; try { for (i = 0; i < index.length; i++) { pl.expectedUpdates = numberOfTerms[ i ]; pl.start("Verifying composite iterators in " + index[i] + "..."); additionalReader = index[ i ].getReader(); for (t = 0; t < numberOfTerms[ i ]; t++) { for (u = 0; u < numberOfTerms[ i ]; u++) { s0.clear(); s1.clear(); // TODO: in case we have positions, we should check them, too IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s0 ); IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s1 ); s0.retainAll( s1 ); indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ); additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u ); it = s0.iterator(); documentIterator = AndDocumentIterator.getInstance( indexIterator, additionalIterator ); for( int j = s0.size(); j-- != 0; ) assert it.nextInt() == documentIterator.nextDocument(); assert ! documentIterator.hasNext(); s2.clear(); IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s2 ); IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s2 ); indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ); additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u ); it = s2.iterator(); documentIterator = OrDocumentIterator.getInstance( indexIterator, additionalIterator ); for( int j = s2.size(); j-- != 0; ) assert it.nextInt() == documentIterator.nextDocument(); assert ! documentIterator.hasNext(); } pl.update(); } pl.done(); additionalReader.close(); } } catch( Throwable e ) { System.err.println( "Exception during composite iterator test (index=" + index[ i ] + ", first term=" + t + ", second term =" + u + ")" ); throw e; } } if ( ! jsapResult.getBoolean( "virtual" ) && jsapResult.getBoolean( "random" ) ) { // Random access scan pl.expectedUpdates = index[ 0 ].numberOfDocuments; pl.itemsName = "documents"; pl.start( "Verifying random access..." ); if ( allBitStreamIndices ) { it.unimi.dsi.mg4j.document.DocumentIterator documentIterator = documentSequence.iterator(); Document document; Reader reader; WordReader wordReader; final MutableString word = new MutableString(), nonWord = new MutableString(); int docCounter = 0; while( ( document = documentIterator.nextDocument() ) != null ) { currDoc = permutation != null ? permutation[ docCounter ] : docCounter; for( i = 0; i < index.length; i++ ) { Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) ); if ( index[ i ].hasPayloads ) { // TODO: write tests for the other case if ( allBitStreamIndices ) { IndexIterator indexIterator = indexReader[ i ].documents( 0 ); int pointer = indexIterator.skipTo( currDoc ); if ( pointer == currDoc ) { Payload payload = indexIterator.payload(); if ( ! payload.get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + payload ); } else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t ); } else { IndexIterator indexIterator = indexReader[ i ].documents( 0 ); if ( indexIterator.skipTo( currDoc ) == currDoc ) { if ( ! indexIterator.payload().get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + indexIterator.payload().get() ); } else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t ); } } else { // text index pos = 0; termsInDoc[ i ].clear(); reader = (Reader)content; wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) ); wordReader.setReader( reader ); while( wordReader.next( word, nonWord ) ) { if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue; if ( ( t = (int)( (BitStreamIndex)index[ i ] ).termMap.getLong( word ) ) == -1 ) LOGGER.error( index[ i ] + ": Could not find term " + word + " in term index" ); else { if ( index[ i ].hasCounts ) termsInDoc[ i ].put( t, termsInDoc[ i ].get( t ) + 1 ); if ( index[ i ].hasPositions ) wordInPos[ i ][ pos++ ] = t; } } if ( allBitStreamIndices ) { for( IntIterator x = termsInDoc[ i ].keySet().iterator(); x.hasNext(); ) { t = x.nextInt(); IndexIterator indexIterator = indexReader[ i ].documents( t ); int pointer = indexIterator.skipTo( currDoc ); if ( pointer == currDoc ) { if ( index[ i ].hasCounts ) { int c = indexIterator.count(); if ( termsInDoc[ i ].get( t ) != c ) LOGGER.error( index[ i ] + ": The count for term " + t + " in document " + currDoc + " is " + c + " instead of " + termsInDoc[ i ].get( t ) ); else { if ( index[ i ].hasPositions ) { indexIterator.positions( occ[ i ] ); for( int j = 0; j < c; j++ ) if ( wordInPos[ i ][ occ[ i ][ j ] ] != t ) LOGGER.error( index[ i ] + ": The occurrence of index " + i + " of term " + t + " (position " + occ[ i ] +") in document " + currDoc + " is occupied instead by term " + wordInPos[ i ][ occ[ i ][ j ] ] ); } } } } else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t + "(skipTo returned " + pointer + ")" ); } } else { for( IntIterator x = termsInDoc[ i ].keySet().iterator(); x.hasNext(); ) { t = x.nextInt(); IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ); if ( indexIterator.skipTo( currDoc ) == currDoc ) { if ( index[ i ].hasCounts ) { int c = indexIterator.count(); if ( termsInDoc[ i ].get( t ) != c ) LOGGER.error( index[ i ] + ": The count for term " + t + " in document " + currDoc + " is " + c + " instead of " + termsInDoc[ i ].get( t ) ); else { if ( index[ i ].hasPositions ) { indexIterator.positions( occ[ i ] ); for( int j = 0; j < c; j++ ) if ( wordInPos[ i ][ occ[ i ][ j ] ] != t ) LOGGER.error( index[ i ] + ": The occurrence of index " + i + " of term " + t + " (position " + occ[ i ] +") in document " + currDoc + " is occupied instead by term " + wordInPos[ i ][ occ[ i ][ j ] ] ); } } } } else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t ); } } } } docCounter++; document.close(); pl.update(); } } else { LOGGER.warn( "Random access tests require very slow single-term scanning as not all indices are disk based" ); it.unimi.dsi.mg4j.document.DocumentIterator documentIterator = documentSequence.iterator(); Document document; Reader reader; WordReader wordReader; final MutableString word = new MutableString(), nonWord = new MutableString(); int docCounter = 0; while( ( document = documentIterator.nextDocument() ) != null ) { currDoc = permutation != null ? permutation[ docCounter ] : docCounter; for( i = 0; i < index.length; i++ ) { Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) ); if ( index[ i ].hasPayloads ) { if ( allBitStreamIndices ) { IndexIterator indexIterator = indexReader[ i ].documents( 0 ); int pointer = indexIterator.skipTo( currDoc ); if ( pointer == currDoc ) { Payload payload = indexIterator.payload(); if ( ! payload.get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + payload ); } else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t ); } else { IndexIterator indexIterator = indexReader[ i ].documents( "#" ); if ( indexIterator.skipTo( currDoc ) == currDoc ) { if ( ! indexIterator.payload().get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + indexIterator.payload().get() ); } else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t ); } } else { pos = 0; reader = (Reader)content; wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) ); wordReader.setReader( reader ); while( wordReader.next( word, nonWord ) ) { if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue; IndexIterator indexIterator = indexReader[ i ].documents( word ); if ( currDoc != indexIterator.skipTo( currDoc ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + word ); else if ( index[ i ].hasPositions ) { indexIterator.positions( occ[ i ] ); if ( IntArrayList.wrap( occ[ i ], indexIterator.count() ).indexOf( pos ) == -1 ) LOGGER.error( index[ i ] + ": Position " + pos + " does not appear in the position list of term " + word + " in document " + currDoc ); } pos++; } } } document.close(); pl.update(); docCounter++; } } pl.done(); } for( IndexReader ir : indexReader ) ir.close(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -