📄 diskbasedindex.java
字号:
catch( Exception ignore ) {} File indexFile = new File( basename + INDEX_EXTENSION ); if ( ! indexFile.exists() ) throw new FileNotFoundException( "Cannot find index file " + indexFile.getName() ); final Map<Component,Coding> flags = CompressionFlags.valueOf( properties.getStringArray( Index.PropertyKeys.CODING ), null ); final int numberOfDocuments = properties.getInt( Index.PropertyKeys.DOCUMENTS ); final int numberOfTerms = properties.getInt( Index.PropertyKeys.TERMS ); final long numberOfPostings= properties.getLong( Index.PropertyKeys.POSTINGS ); final long numberOfOccurrences = properties.getLong( Index.PropertyKeys.OCCURRENCES ); final int maxCount = properties.getInt( Index.PropertyKeys.MAXCOUNT, -1 ); final String field = properties.getString( Index.PropertyKeys.FIELD ); if ( termMap != null && termMap.size() != numberOfTerms ) throw new IllegalArgumentException( "The size of the term map (" + termMap.size() + ") is not equal to the number of terms (" + numberOfTerms + ")" ); if ( prefixMap != null && prefixMap.size() != numberOfTerms ) throw new IllegalArgumentException( "The size of the prefix map (" + prefixMap.size() + ") is not equal to the number of terms (" + numberOfTerms + ")" ); final Payload payload = (Payload)( properties.containsKey( Index.PropertyKeys.PAYLOADCLASS ) ? Class.forName( properties.getString( Index.PropertyKeys.PAYLOADCLASS ) ).newInstance() : null ); final Coding frequencyCoding = flags.get( Component.FREQUENCIES ); final Coding pointerCoding = flags.get( Component.POINTERS ); final Coding countCoding = flags.get( Component.COUNTS ); final Coding positionCoding = flags.get( Component.POSITIONS ); if ( countCoding == null && positionCoding != null ) throw new IllegalArgumentException( "Index " + basename + " has positions but no counts (this can't happen)" ); // Load document sizes if forced to do so, or if the pointer/position compression methods make it necessary. IntList sizes = null; // TODO: quick patch to avoid loading sizes in case of payloads. if ( payload == null && ( documentSizes || positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE ) ) { sizes = DiskBasedIndex.readSizes( new InputBitStream( basename + DiskBasedIndex.SIZES_EXTENSION ), numberOfDocuments ); if ( sizes.size() != numberOfDocuments ) throw new IllegalStateException( "The length of the size list (" + sizes.size() + ") is not equal to the number of documents (" + numberOfDocuments + ")" ); } // Load offsets if forced to do so. Depending on a property, we use the core-memory or the semi-external version. final LongList offsets; // TODO: quick patch to avoid loading sizes in case of payloads. if ( payload == null && randomAccess ) { int offsetStep = queryProperties != null && queryProperties.get( UriKeys.OFFSETSTEP ) != null ? Integer.parseInt( queryProperties.get( UriKeys.OFFSETSTEP ) ) : DEFAULT_OFFSET_STEP; if ( offsetStep < 0 ) { // Memory-mapped offsetStep = -offsetStep; final long length = new File( basename + DiskBasedIndex.OFFSETS_EXTENSION ).length(); offsets = LongLists.synchronize( new SemiExternalOffsetList( new InputBitStream( new ByteBufferInputStream( new FileInputStream( basename + DiskBasedIndex.OFFSETS_EXTENSION ).getChannel().map( MapMode.READ_ONLY, 0, length ) ) ), offsetStep, numberOfTerms + 1 ) ); } else { offsets = offsetStep == 0? DiskBasedIndex.readOffsets( new InputBitStream( basename + DiskBasedIndex.OFFSETS_EXTENSION ), numberOfTerms ) : LongLists.synchronize( new SemiExternalOffsetList( new InputBitStream( basename + DiskBasedIndex.OFFSETS_EXTENSION, 1024 ), offsetStep, numberOfTerms + 1 ) ); } if ( offsets.size() != numberOfTerms + 1 ) throw new IllegalStateException( "The length of the offset list (" + offsets.size() + ") is not equal to the number of terms plus one (" + numberOfTerms + " + 1)" ); } else offsets = null; final int quantum = properties.getInt( BitStreamIndex.PropertyKeys.SKIPQUANTUM, -1 ); final int height = properties.getInt( BitStreamIndex.PropertyKeys.SKIPHEIGHT, -1 ); final int bufferSize = properties.getInt( BitStreamIndex.PropertyKeys.BUFFERSIZE, BitStreamIndex.DEFAULT_BUFFER_SIZE ); final TermProcessor termProcessor = Index.getTermProcessor( properties ); final boolean highPerformance = indexClass != null && FileHPIndex.class.isAssignableFrom( indexClass ); if ( queryProperties != null && queryProperties.containsKey( UriKeys.INMEMORY ) ) { /*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) ) return new SqrtSkipInMemoryIndex( BinIO.loadBytes( indexFile.toString() ), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, frequencyCoding, pointerCoding, countCoding, positionCoding, termProcessor, field, properties, termMap, prefixMap, sizes, offsets );*/ return highPerformance ? new InMemoryHPIndex( BinIO.loadBytes( indexFile.toString() ), BinIO.loadBytes( basename + POSITIONS_EXTENSION ), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, termProcessor, field, properties, termMap, prefixMap, sizes, offsets ) : new InMemoryIndex( BinIO.loadBytes( indexFile.toString() ), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, termProcessor, field, properties, termMap, prefixMap, sizes, offsets ); } else if ( queryProperties != null && queryProperties.containsKey( UriKeys.MAPPED ) ) { final File positionsFile = new File( basename + POSITIONS_EXTENSION ); final ByteBuffer index = new FileInputStream( indexFile ).getChannel().map( MapMode.READ_ONLY, 0, indexFile.length() ); return highPerformance ? new MemoryMappedHPIndex( index, new FileInputStream( positionsFile ).getChannel().map( MapMode.READ_ONLY, 0, positionsFile.length() ), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, termProcessor, field, properties, termMap, prefixMap, sizes, offsets ) : new MemoryMappedIndex( index, numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, termProcessor, field, properties, termMap, prefixMap, sizes, offsets ); } /*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) ) return new SqrtSkipFileIndex( basename.toString(), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, frequencyCoding, pointerCoding, countCoding, positionCoding, termProcessor, field, properties, termMap, prefixMap, sizes, offsets, indexFile );*/ return highPerformance ? new FileHPIndex( basename.toString(), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize, termProcessor, field, properties, termMap, prefixMap, sizes, offsets ) : new FileIndex( basename.toString(), numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize, termProcessor, field, properties, termMap, prefixMap, sizes, offsets ); } /** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename. * * @param basename the basename of the index. * @param properties the properties obtained by stemming <code>basename</code>. * @param randomAccess whether the index should be accessible randomly. * @param documentSizes if true, document sizes will be loaded. * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded. * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>. * @throws IllegalAccessException * @throws InstantiationException * * @see #getInstance(CharSequence, Properties, StringMap, PrefixMap, boolean, boolean, EnumMap) */ public static BitStreamIndex getInstance( final CharSequence basename, final Properties properties, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException { StringMap<? extends CharSequence> termMap = null; PrefixMap<? extends CharSequence> prefixMap = null; if ( maps ) { // TODO: check this logic termMap = DiskBasedIndex.loadStringMap( basename + DiskBasedIndex.TERMMAP_EXTENSION ); if ( termMap != null && termMap instanceof PrefixMap ) return getInstance( basename, properties, termMap, (PrefixMap<?>)termMap, randomAccess, documentSizes, queryProperties ); prefixMap = DiskBasedIndex.loadPrefixMap( basename + DiskBasedIndex.PREFIXMAP_EXTENSION ); if ( termMap != null ) return getInstance( basename, properties, termMap, prefixMap, randomAccess, documentSizes, queryProperties ); if ( prefixMap != null ) return getInstance( basename, properties, prefixMap, prefixMap, randomAccess, documentSizes, queryProperties ); } return getInstance( basename, properties, null, prefixMap, randomAccess, documentSizes, queryProperties ); } /** Returns a new disk-based index, possibly guessing reasonable term and prefix maps from the basename. * * <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and, * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>) * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map. * * @param basename the basename of the index. * @param randomAccess whether the index should be accessible randomly (e.g., if it will * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index). * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes * might be loaded anyway because the compression method for positions requires it). * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this * feature might not be available with some kind of index). * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>. */ public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException { return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, queryProperties ); } /** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename. * * <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and, * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>) * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map. * * @param basename the basename of the index. * @param randomAccess whether the index should be accessible randomly (e.g., if it will * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index). * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes * might be loaded anyway because the compression method for positions requires it). * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this * feature might not be available with some kind of index). * @see #getInstance(CharSequence, boolean, boolean, boolean, EnumMap) */ public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException { return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, null ); } /** Returns a new disk-based index, guessing reasonable term and prefix maps from the basename. * * @param basename the basename of the index. * @param randomAccess whether the index should be accessible randomly (e.g., if it will * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index). * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes * might be loaded anyway because the compression method for positions requires it). */ public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException { return getInstance( basename, randomAccess, documentSizes, true ); } /** Returns a new local index, trying to guess reasonable term and prefix maps from the basename, * and loading document sizes only if it is necessary. * * @param basename the basename of the index. * @param randomAccess whether the index should be accessible randomly (e.g., if it will * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index). */ public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException { return getInstance( basename, randomAccess, false ); } /** Returns a new local index, trying to guess reasonable term and prefix maps from the basename, * loading offsets but loading document sizes only if it is necessary. * * @param basename the basename of the index. */ public static BitStreamIndex getInstance( final CharSequence basename ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException { return getInstance( basename, true ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -