📄 index.java
字号:
public static Index getInstance( final CharSequence uri, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { /* If the scheme is mg4j, then we are creating a remote * index. If it is null, we assume it is a property file and load it. Otherwise, we * assume it is a valid property file URI and try to download it. */ final String uriString = uri.toString(); if ( uriString.startsWith( "mg4j:" ) ) { final URI u = new URI( uriString ); return IndexServer.getIndex( u.getHost(), u.getPort(), randomAccess, documentSizes ); } final String basename, query; if ( uriString.startsWith( "file:" ) ) { final URI u = new URI( uriString ); basename = u.getPath(); query = u.getQuery(); } else { final int questionMarkPos = uriString.indexOf( '?' ); basename = questionMarkPos == -1 ? uriString : uriString.substring( 0, questionMarkPos ); query = questionMarkPos == -1 ? null : uriString.substring( questionMarkPos + 1 ); } LOGGER.debug( "Searching for an index with basename " + basename + "..." ); Properties properties = new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ); LOGGER.debug( "Properties: " + properties ); // We parse the key/value pairs appearing in the query part. final EnumMap<UriKeys,String> queryProperties = new EnumMap<UriKeys,String>( UriKeys.class ); if ( query != null ) { String[] keyValue = query.split( ";" ); for( int i = 0; i < keyValue.length; i++ ) { String[] piece = keyValue[ i ].split( "=" ); if ( piece.length != 2 ) throw new IllegalArgumentException( "Malformed key/value pair: " + keyValue[ i ] ); // Convert to standard keys boolean found = false; for( UriKeys key: UriKeys.values() ) if ( found = PropertyBasedDocumentFactory.sameKey( key, piece[ 0 ] ) ) { queryProperties.put( key, piece[ 1 ] ); break; } if ( ! found ) throw new IllegalArgumentException( "Unknown key: " + piece[ 0 ] ); } } String className = properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" ); // Temporary patch if ( "it.unimi.dsi.mg4j.index.SkipFileIndex".equals( className ) ) className = FileIndex.class.getName(); Class<?> indexClass = Class.forName( className ); // It is a cluster if ( IndexCluster.class.isAssignableFrom( indexClass ) ) return IndexCluster.getInstance( basename, randomAccess, documentSizes, queryProperties ); // Now we dispatch to DiskBasedIndex.getInstance(). return DiskBasedIndex.getInstance( basename, properties, randomAccess, documentSizes, maps, queryProperties ); } /** Returns a new index using the given URI, searching dynamically for term and prefix maps. * * @param uri the URI defining the index. * @param randomAccess whether the index should be accessible randomly. * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes * might be loaded anyway because the compression method for positions requires it). * @see #getInstance(CharSequence, boolean, boolean, boolean) */ public static Index getInstance( final CharSequence uri, final boolean randomAccess, final boolean documentSizes ) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return getInstance( uri, randomAccess, documentSizes, true ); } /** Returns a new index using the given URI, searching dynamically for term and prefix maps and loading * document sizes only if it is necessary. * * @param uri the URI defining the index. * @param randomAccess whether the index should be accessible randomly. * @see #getInstance(CharSequence, boolean, boolean) */ public static Index getInstance( final CharSequence uri, final boolean randomAccess ) throws ConfigurationException, IOException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return getInstance( uri, randomAccess, false ); } /** Returns a new index using the given URI, searching dynamically for term and prefix maps, loading offsets but loading * document sizes only if it is necessary. * * @param uri the URI defining the index. * @see #getInstance(CharSequence, boolean) */ public static Index getInstance( final CharSequence uri ) throws ConfigurationException, IOException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return getInstance( uri, true ); } /** An iterator returning no documents based on this index. * * <P>Note that {@link #accept(DocumentIteratorVisitor)} does nothing * and returns true, whereas {@link #acceptOnTruePaths(DocumentIteratorVisitor)} * throws an {@link IllegalStateException}. */ protected class EmptyIndexIterator extends IntIterators.EmptyIterator implements IndexIterator, Serializable { private static final long serialVersionUID = 0; public int document() { throw new IllegalStateException(); } public ReferenceSet<Index> indices() { return Index.this.singletonSet; } public IntervalIterator intervalIterator() { throw new IllegalStateException(); } public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() { throw new IllegalStateException(); } public IntervalIterator intervalIterator( final Index index ) { throw new IllegalStateException(); } public int nextDocument() { return -1; } public int skipTo( final int n ) { return Integer.MAX_VALUE; } public int frequency() { return 0; } public Payload payload() { throw new IllegalStateException(); } public int count() { throw new IllegalStateException(); } public IntIterator positions() { throw new IllegalStateException(); } public int positions( final int[] positions ) { throw new IllegalStateException(); } public int[] positionArray() { throw new IllegalStateException(); } public void dispose() {} public Index index() { return Index.this; }; public boolean accept( DocumentIteratorVisitor visitor ) { return true; } public boolean acceptOnTruePaths( DocumentIteratorVisitor visitor ) { throw new IllegalStateException(); } public String term() { return null; } public void term( final CharSequence term ) { /* No-op allowed by contract. */ } public int id() { return -1; } public void id( final int id ) { /* No-op allowed by contract. */ } public IntervalIterator iterator() { return intervalIterator(); } public int termNumber() { return -1; } } /** A singleton for an iterator returning no documents based on this index. */ public final EmptyIndexIterator emptyIndexIterator = new EmptyIndexIterator(); /** Creates and returns a new {@link IndexReader} based on this index, using * the default buffer size. After that, you can use the reader to read this index. * * @return a new {@link IndexReader} to read this index. */ public IndexReader getReader() throws IOException { return getReader( -1 ); } /** Creates and returns a new {@link IndexReader} based on this index. After that, you * can use the reader to read this index. * * @param bufferSize the size of the buffer to be used accessing the reader, or -1 * for a default buffer size. * @return a new {@link IndexReader} to read this index. */ public abstract IndexReader getReader( final int bufferSize ) throws IOException; /** Creates a new {@link IndexReader} for this index and uses it to return * an index iterator over the documents containing a term. * * <p>Since the reader is created from scratch, it is essential * to {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() dispose} the * returned iterator after usage. See {@link IndexReader#documents(int)} * for a method with the same semantics, but making reader reuse possible. * * @param term a term. * @throws IOException if an exception occurred while accessing the index. * @throws UnsupportedOperationException if this index is not accessible by term * number. * @see IndexReader#documents(int) */ public IndexIterator documents( final int term ) throws IOException { final IndexReader indexReader = getReader(); final IndexIterator indexIterator = indexReader.documents( term ); if ( indexIterator == emptyIndexIterator ) indexReader.close(); return indexIterator; } /** Creates a new {@link IndexReader} for this index and uses it to return * an index iterator over the documents containing a term; the term is * given explicitly, and the index {@linkplain StringMap term map} is used, if present. * * <p>Since the reader is created from scratch, it is essential * to {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() dispose} the * returned iterator after usage. See {@link IndexReader#documents(int)} * for a method with the same semantics, but making reader reuse possible. * * <p>Unless the {@linkplain Index#termProcessor term processor} of * this index is <code>null</code>, words coming from a query will * have to be processed before being used with this method. * * @param term a term. * @throws IOException if an exception occurred while accessing the index. * @throws UnsupportedOperationException if the {@linkplain StringMap term map} is not * available for this index. * @see IndexReader#documents(CharSequence) */ public IndexIterator documents( final CharSequence term ) throws IOException { final IndexReader indexReader = getReader(); final IndexIterator indexIterator = indexReader.documents( term ); if ( indexIterator == emptyIndexIterator ) indexReader.close(); return indexIterator; } /** Creates a number of instances of {@link IndexReader} for this index and uses them to return * a document iterator over the documents containing a set of terms defined * by a prefix; the prefix is given explicitly, and unless the index has a * {@linkplain PrefixMap prefix map}, an {@link UnsupportedOperationException} * will be thrown. * * <p>This method is not provided by {@link IndexReader} because it requires the * creation of several index readers at the same time. These readers must be * {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() disposed} afterwards. * * @param prefix a prefix. * @param limit a limit on the number of terms that will be used to resolve * the prefix query; if the terms starting with <code>prefix</code> are more than * <code>limit</code>, a {@link TooManyTermsException} will be thrown. * @throws IOException if an exception occurred while accessing the index. * @throws UnsupportedOperationException if this index cannot resolve prefixes. * @throws TooManyTermsException if there are more than <code>limit</code> terms starting with <code>prefix</code>. */ public abstract IndexIterator documents( CharSequence prefix, int limit ) throws IOException, TooManyTermsException; /** Set the index used as a key to retrieve intervals from iterators generated from this index. * * <P>This setter is a compromise between clarity of design and efficiency. * Each index iterator is based on an index, and when that index is passed * to {@link DocumentIterator#intervalIterator(Index)}, intervals corresponding * to the positions of the term in the current document are returned. Analogously, * {@link it.unimi.dsi.mg4j.search.DocumentIterator#indices()} returns a singleton * set containing the index. However, when composing indices into clusters, * often iterators generated by a local index must act as if they really belong * to the global index. This method allows to set the index that is used as * a key to return intervals, and that is contained in {@link #singletonSet}. * * <P>Note that setting this value will only influence {@linkplain IndexReader index readers} * created afterwards. * * @param newKeyIndex the new index to be used as a key for interval retrieval. */ public void keyIndex( Index newKeyIndex ) { keyIndex = newKeyIndex; singletonSet = ReferenceSets.singleton( keyIndex ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -