📄 indextest.java
字号:
testIndex( 8, 1 ); testIndex( 8, 4 ); testIndex( -1, 1 ); testIndex( -1, 2 ); testIndex( -4, 1 ); testIndex( -4, 4 ); testIndex( -8, 1 ); testIndex( -8, 4 ); } public void testRemappedIndex( int quantum, int height, TermProcessor termProcessor ) throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { final boolean interleaved = quantum >= 0; if ( !interleaved ) quantum = -quantum; final String basenameMapped = basename + "-map"; int[] map = IntIterators.unwrap( BinIO.asIntIterator( new DataInputStream( this.getClass().getResourceAsStream( "documents.permutation.data" ) ) ) ); String mapFile = File.createTempFile( this.getClass().getSimpleName(), "map" ).toString(); BinIO.storeInts( map, mapFile ); // Remapped index new IndexBuilder( basenameMapped, getSequence() ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).mapFile( mapFile ).run(); checkAgainstContent( getSequence(), map, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basenameMapped + "-text" ), Index.getInstance( basenameMapped + "-int" ), Index .getInstance( basenameMapped + "-date" ), Index.getInstance( basenameMapped + "-virtual" ) ); // Remapped index, one batch new IndexBuilder( basenameMapped + "-onebatch", getSequence() ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ) .quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( NUMBER_OF_DOCUMENTS ).mapFile( mapFile ).run(); sameIndex( basenameMapped + "-text", basenameMapped + "-onebatch-text", "batches" ); sameIndex( basenameMapped + "-int", basenameMapped + "-onebatch-int", "batches" ); sameIndex( basenameMapped + "-date", basenameMapped + "-onebatch-date", "batches" ); sameIndex( basenameMapped + "-virtual", basenameMapped + "-onebatch-virtual", "batches" ); } public void testRemappedIndex( int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { testRemappedIndex( quantum, height, DowncaseTermProcessor.getInstance() ); } public void testRemappedIndex() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { testRemappedIndex( 0, 0 ); testRemappedIndex( 0, 0, KILL_A_PROCESSOR ); testRemappedIndex( 1, 1 ); testRemappedIndex( 1, 2 ); testRemappedIndex( 4, 1 ); testRemappedIndex( 4, 4 ); testRemappedIndex( 8, 1 ); testRemappedIndex( 8, 4 ); } public void testPartitionConcatenate( int quantum, int height ) throws Exception { final boolean interleaved = quantum >= 0; if ( !interleaved ) quantum = -quantum; // Vanilla indexing new IndexBuilder( basename, getSequence() ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ) .virtualDocumentResolver( 3, RESOLVER ).run(); // We partition Reference2ReferenceArrayMap<Component, Coding> flags = new Reference2ReferenceArrayMap<Component, Coding>(); flags.putAll( DEFAULT_STANDARD_INDEX ); if ( interleaved ) flags.put( Component.POSITIONS, Coding.GOLOMB ); BinIO.storeObject( DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy" ); new PartitionDocumentally( basename + "-text", basename + "-text-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionDocumentally( basename + "-int", basename + "-int-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionDocumentally( basename + "-date", basename + "-date-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionDocumentally( basename + "-virtual", basename + "-virtual-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); // For the text part, we need term maps to call sameIndex() String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); for ( String index : localIndex ) BinIO.storeObject( createMap(index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION ); sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() ); sameContent( basename + "-int", basename + "-int-part" ); sameContent( basename + "-date", basename + "-date-part" ); localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION ); sameContent( basename + "-virtual", basename + "-virtual-part", new FileLinesCollection( basename + "-virtual" + TERMS_EXTENSION, "UTF-8" ).iterator() ); localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Concatenate( basename + "-text-merged", localIndex, false, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-text", basename + "-text-merged", "batches" ); localIndex = new Properties( basename + "-int-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Concatenate( basename + "-int-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-int", basename + "-int-merged", "batches" ); localIndex = new Properties( basename + "-date-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Concatenate( basename + "-date-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-date", basename + "-date-merged", "batches" ); localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Concatenate( basename + "-virtual-merged", localIndex, false, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-virtual", basename + "-virtual-merged", "batches" ); } public void testPartitionConcatenate() throws Exception { testPartitionConcatenate( 0, 0 ); testPartitionConcatenate( 1, 1 ); testPartitionConcatenate( 1, 2 ); testPartitionConcatenate( 4, 1 ); testPartitionConcatenate( 4, 4 ); testPartitionConcatenate( 8, 1 ); testPartitionConcatenate( 8, 4 ); testPartitionConcatenate( -1, 1 ); testPartitionConcatenate( -1, 2 ); testPartitionConcatenate( -4, 1 ); testPartitionConcatenate( -4, 4 ); testPartitionConcatenate( -8, 1 ); testPartitionConcatenate( -8, 4 ); } public void testPartitionMerge( int quantum, int height ) throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, Exception { final boolean interleaved = quantum >= 0; if ( !interleaved ) quantum = -quantum; // Vanilla indexing new IndexBuilder( basename, getSequence() ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ) .virtualDocumentResolver( 3, RESOLVER ).run(); // Now we use a crazy strategy moving around documents using modular arithmetic final DocumentalPartitioningStrategy modulo3 = new Modulo3DocumentalClusteringStrategy( NUMBER_OF_DOCUMENTS ); BinIO.storeObject( modulo3, basename + "-strategy" ); new PartitionDocumentally( basename + "-text", basename + "-text-part", modulo3, basename + "-strategy", 0, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionDocumentally( basename + "-int", basename + "-int-part", modulo3, basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionDocumentally( basename + "-date", basename + "-date-part", modulo3, basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionDocumentally( basename + "-virtual", basename + "-virtual-part", modulo3, basename + "-strategy", 0, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION ); sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() ); sameContent( basename + "-int", basename + "-int-part" ); sameContent( basename + "-date", basename + "-date-part" ); localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION ); sameContent( basename + "-virtual", basename + "-virtual-part", new FileLinesCollection( basename + "-virtual" + TERMS_EXTENSION, "UTF-8" ).iterator() ); localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Merge( basename + "-text-merged", localIndex, false, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-text", basename + "-text-merged", "batches" ); localIndex = new Properties( basename + "-int-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Merge( basename + "-int-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-int", basename + "-int-merged", "batches" ); localIndex = new Properties( basename + "-date-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Merge( basename + "-date-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-date", basename + "-date-merged", "batches" ); localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); new Merge( basename + "-virtual-merged", localIndex, false, 1024, DEFAULT_STANDARD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run(); sameIndex( basename + "-virtual", basename + "-virtual-merged", "batches" ); } public void testPartitionMerge() throws Exception { testPartitionMerge( 0, 0 ); testPartitionMerge( 1, 1 ); testPartitionMerge( 1, 2 ); testPartitionMerge( 4, 1 ); testPartitionMerge( 4, 4 ); testPartitionMerge( 8, 1 ); testPartitionMerge( 8, 4 ); testPartitionMerge( -1, 1 ); testPartitionMerge( -1, 2 ); testPartitionMerge( -4, 1 ); testPartitionMerge( -4, 4 ); testPartitionMerge( -8, 1 ); testPartitionMerge( -8, 4 ); } public void testLexicalPartitioning( boolean interleaved ) throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, Exception { // Vanilla indexing new IndexBuilder( basename, getSequence() ).interleaved( interleaved ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).virtualDocumentResolver( 3, RESOLVER ).run(); // Now we use a crazy strategy moving around documents using modular arithmetic final LexicalPartitioningStrategy uniform = LexicalStrategies.uniform( 3, DiskBasedIndex.getInstance( basename + "-text" ) ); BinIO.storeObject( uniform, basename + "-strategy" ); new PartitionLexically( basename + "-text", basename + "-text-part", uniform, basename + "-strategy", 1024, DEFAULT_LOG_INTERVAL ).run(); new PartitionLexically( basename + "-virtual", basename + "-virtual-part", uniform, basename + "-strategy", 1024, DEFAULT_LOG_INTERVAL ).run(); String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION ); sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() ); sameContent( basename + "-virtual", basename + "-virtual-part" ); } public void testLexicalPartitioning() throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, Exception { testLexicalPartitioning( true ); testLexicalPartitioning( false ); } public void testEmpty( int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { final boolean interleaved = quantum >= 0; if ( !interleaved ) quantum = -quantum; // Vanilla indexing new IndexBuilder( basename, getEmptySequence() ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ) .virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run(); checkAgainstContent( getEmptySequence(), null, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basename + "-text" ), Index.getInstance( basename + "-int" ), Index .getInstance( basename + "-date" ), Index.getInstance( basename + "-virtual" ) ); // Permuted indexing String mapFile = File.createTempFile( this.getClass().getSimpleName(), "permutation" ).toString(); new IndexBuilder( basename + "-mapped", getEmptySequence() ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ) .virtualDocumentResolver( 3, RESOLVER ).mapFile( mapFile ).documentsPerBatch( 20 ).run(); sameIndex( basename + "-text", basename + "-mapped-text" ); sameIndex( basename + "-int", basename + "-mapped-int" ); sameIndex( basename + "-date", basename + "-mapped-date" ); sameIndex( basename + "-virtual", basename + "-mapped-virtual" ); } public void testEmpty() throws Exception { testEmpty( 0, 0 ); testEmpty( 1, 1 ); testEmpty( 1, 2 ); testEmpty( 4, 1 ); testEmpty( 4, 4 ); testEmpty( 8, 1 ); testEmpty( 8, 4 ); testEmpty( -1, 1 ); testEmpty( -1, 2 ); testEmpty( -8, 1 ); testEmpty( -8, 4 ); testEmpty( -8, 1 ); testEmpty( -8, 4 ); } public void testLoadOptions( int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { final boolean interleaved = quantum >= 0; if ( !interleaved ) quantum = -quantum; // Vanilla indexing new IndexBuilder( basename, getSequence() ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ) .virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run(); for ( String options : new String[] { "inmemory=1", "mapped=1", "offsetstep=0", "offsetstep=-2" } ) checkAgainstContent( getSequence(), null, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basename + "-text?" + options ), Index.getInstance( basename + "-int?" + options ), Index.getInstance( basename + "-date?" + options ), Index.getInstance( basename + "-virtual?" + options ) ); } public void testLoadOptions() throws Exception { testLoadOptions( 0, 0 ); testLoadOptions( 1, 1 ); testLoadOptions( -1, 1 ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -