📄 index.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
	public static Index getInstance( final CharSequence uri, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		/* If the scheme is mg4j, then we are creating a remote		 * index. If it is null, we assume it is a property file and load it. Otherwise, we		 * assume it is a valid property file URI and try to download it. */				final String uriString = uri.toString();		if ( uriString.startsWith( "mg4j:" ) ) {			final URI u = new URI( uriString );			return IndexServer.getIndex( u.getHost(), u.getPort(), randomAccess, documentSizes );		}		final String basename, query;				if ( uriString.startsWith( "file:" ) ) {			final URI u = new URI( uriString );			basename = u.getPath();			query = u.getQuery();		}		else {			final int questionMarkPos = uriString.indexOf( '?' );			basename = questionMarkPos == -1 ? uriString : uriString.substring( 0, questionMarkPos );			query = questionMarkPos == -1 ? null : uriString.substring( questionMarkPos + 1 );		}				LOGGER.debug( "Searching for an index with basename " + basename + "..." );		Properties properties = new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION );		LOGGER.debug( "Properties: " + properties );							// We parse the key/value pairs appearing in the query part.		final EnumMap<UriKeys,String> queryProperties = new EnumMap<UriKeys,String>( UriKeys.class );		if ( query != null ) {			String[] keyValue = query.split( ";" );			for( int i = 0; i < keyValue.length; i++ ) {				String[] piece = keyValue[ i ].split( "=" );				if ( piece.length != 2 ) throw new IllegalArgumentException( "Malformed key/value pair: "  + keyValue[ i ] );				// Convert to standard keys				boolean found = false;				for( UriKeys key: UriKeys.values() )  					if ( found = PropertyBasedDocumentFactory.sameKey( key, piece[ 0 ] ) ) {						queryProperties.put( key, piece[ 1 ] );						break;					}				if ( ! found ) throw new IllegalArgumentException( "Unknown key: " + piece[ 0 ] );			}		}		String className = properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" );		// Temporary patch		if ( "it.unimi.dsi.mg4j.index.SkipFileIndex".equals( className ) ) className = FileIndex.class.getName();		Class<?> indexClass = Class.forName( className );		// It is a cluster		if ( IndexCluster.class.isAssignableFrom( indexClass ) )			return IndexCluster.getInstance( basename, randomAccess, documentSizes, queryProperties );							// Now we dispatch to DiskBasedIndex.getInstance().		return DiskBasedIndex.getInstance( basename, properties, randomAccess, documentSizes, maps, queryProperties );	}	/** Returns a new index using the given URI, searching dynamically for term and prefix maps.	 * 	 * @param uri the URI defining the index.	 * @param randomAccess whether the index should be accessible randomly.	 * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes	 * might be loaded anyway because the compression method for positions requires it).	 * @see #getInstance(CharSequence, boolean, boolean, boolean)	 */	public static Index getInstance( final CharSequence uri, final boolean randomAccess, final boolean documentSizes ) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		return getInstance( uri, randomAccess, documentSizes, true );	}	/** Returns a new index using the given URI, searching dynamically for term and prefix maps and loading	 * document sizes only if it is necessary.   	 * 	 * @param uri the URI defining the index.	 * @param randomAccess whether the index should be accessible randomly.	 * @see #getInstance(CharSequence, boolean, boolean)	 */	public static Index getInstance( final CharSequence uri, final boolean randomAccess ) throws ConfigurationException, IOException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		return getInstance( uri, randomAccess, false );	}	/** Returns a new index using the given URI, searching dynamically for term and prefix maps, loading offsets but loading	 * document sizes only if it is necessary.   	 * 	 * @param uri the URI defining the index.	 * @see #getInstance(CharSequence, boolean)	 */	public static Index getInstance( final CharSequence uri ) throws ConfigurationException, IOException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		return getInstance( uri, true );	}	/** An iterator returning no documents based on this index. 	 * 	 * <P>Note that {@link #accept(DocumentIteratorVisitor)} does nothing	 * and returns true, whereas {@link #acceptOnTruePaths(DocumentIteratorVisitor)}	 * throws an {@link IllegalStateException}.	 */	protected class EmptyIndexIterator extends IntIterators.EmptyIterator implements IndexIterator, Serializable {		private static final long serialVersionUID = 0;		public int document() { throw new IllegalStateException(); }		public ReferenceSet<Index> indices() { return Index.this.singletonSet; }		public IntervalIterator intervalIterator() { throw new IllegalStateException(); }		public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() { throw new IllegalStateException(); }		public IntervalIterator intervalIterator( final Index index ) { throw new IllegalStateException(); }		public int nextDocument() { return -1; }		public int skipTo( final int n ) { return Integer.MAX_VALUE; }		public int frequency() { return 0; }		public Payload payload() { throw new IllegalStateException(); }		public int count() { throw new IllegalStateException(); }		public IntIterator positions() { throw new IllegalStateException(); }		public int positions( final int[] positions ) { throw new IllegalStateException(); }		public int[] positionArray() { throw new IllegalStateException(); }		public void dispose() {}		public Index index() { return Index.this; };		public boolean accept( DocumentIteratorVisitor visitor ) { return true; }		public boolean acceptOnTruePaths( DocumentIteratorVisitor visitor ) { throw new IllegalStateException(); }		public String term() { return null; }		public void term( final CharSequence term ) { /* No-op allowed by contract. */ }		public int id() { return -1; }		public void id( final int id ) { /* No-op allowed by contract. */ }		public IntervalIterator iterator() { return intervalIterator(); }		public int termNumber() { return -1; }	}	/** A singleton for an iterator returning no documents based on this index. */	public final EmptyIndexIterator emptyIndexIterator = new EmptyIndexIterator();		/** Creates and returns a new {@link IndexReader} based on this index, using	 * the default buffer size. After that, you can use the reader to read this index.	 * 	 * @return a new {@link IndexReader} to read this index.	 */	public IndexReader getReader() throws IOException {		return getReader( -1 );	}		/** Creates and returns a new {@link IndexReader} based on this index. After that, you	 *  can use the reader to read this index.	 * 	 * @param bufferSize the size of the buffer to be used accessing the reader, or -1	 * for a default buffer size.	 * @return a new {@link IndexReader} to read this index.	 */	public abstract IndexReader getReader( final int bufferSize ) throws IOException;		/** Creates a new {@link IndexReader} for this index and uses it to return 	 * an index iterator over the documents containing a term.	 *	 * <p>Since the reader is created from scratch, it is essential	 * to {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() dispose} the	 * returned iterator after usage. See {@link IndexReader#documents(int)}	 * for a method with the same semantics, but making reader reuse possible.	 * 	 * @param term a term.	 * @throws IOException if an exception occurred while accessing the index.	 * @throws UnsupportedOperationException if this index is not accessible by term	 * number.	 * @see IndexReader#documents(int)	 */	public IndexIterator documents( final int term ) throws IOException {		final IndexReader indexReader = getReader();		final IndexIterator indexIterator = indexReader.documents( term );		if ( indexIterator == emptyIndexIterator ) indexReader.close();		return indexIterator;	}	/** Creates a new {@link IndexReader} for this index and uses it to return 	 * an index iterator over the documents containing a term; the term is	 *  given explicitly, and the index {@linkplain StringMap term map} is used, if present.	 *	 * <p>Since the reader is created from scratch, it is essential	 * to {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() dispose} the	 * returned iterator after usage. See {@link IndexReader#documents(int)}	 * for a method with the same semantics, but making reader reuse possible.	 * 	 * <p>Unless the {@linkplain Index#termProcessor term processor} of	 * this index is <code>null</code>, words coming from a query will	 * have to be processed before being used with this method.	 * 	 * @param term a term.	 * @throws IOException if an exception occurred while accessing the index.	 * @throws UnsupportedOperationException if the {@linkplain StringMap term map} is not 	 * available for this index.	 * @see IndexReader#documents(CharSequence)	 */	public IndexIterator documents( final CharSequence term ) throws IOException {		final IndexReader indexReader = getReader();		final IndexIterator indexIterator = indexReader.documents( term );		if ( indexIterator == emptyIndexIterator ) indexReader.close();		return indexIterator;	}	/** Creates a number of instances of {@link IndexReader} for this index and uses them to return 	 * a document iterator over the documents containing a set of terms defined	 *  by a prefix; the prefix is given explicitly, and unless the index has a 	 *  {@linkplain PrefixMap prefix map}, an {@link UnsupportedOperationException}	 *  will be thrown. 	 *	 * <p>This method is not provided by {@link IndexReader} because it requires the	 * creation of several index readers at the same time. These readers must be	 * {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() disposed} afterwards.	 * 	 * @param prefix a prefix.	 * @param limit a limit on the number of terms that will be used to resolve	 * the prefix query; if the terms starting with <code>prefix</code> are more than	 * <code>limit</code>, a {@link TooManyTermsException} will be thrown. 	 * @throws IOException if an exception occurred while accessing the index.	 * @throws UnsupportedOperationException if this index cannot resolve prefixes.	 * @throws TooManyTermsException if there are more than <code>limit</code> terms starting with <code>prefix</code>.	 */	public abstract IndexIterator documents( CharSequence prefix, int limit ) throws IOException, TooManyTermsException;		/** Set the index used as a key to retrieve intervals from iterators generated from this index.	 * 	 * <P>This setter is a compromise between clarity of design and efficiency.	 * Each index iterator is based on an index, and when that index is passed	 * to {@link DocumentIterator#intervalIterator(Index)}, intervals corresponding	 * to the positions of the term in the current document are returned. Analogously,	 * {@link it.unimi.dsi.mg4j.search.DocumentIterator#indices()} returns a singleton	 * set containing the index. However, when composing indices into clusters, 	 * often iterators generated by a local index must act as if they really belong	 * to the global index. This method allows to set the index that is used as	 * a key to return intervals, and that is contained in {@link #singletonSet}.   	 *	 * <P>Note that setting this value will only influence {@linkplain IndexReader index readers}	 * created afterwards.	 * 	 * @param newKeyIndex the new index to be used as a key for interval retrieval.	 */		public void keyIndex( Index newKeyIndex ) {		keyIndex = newKeyIndex;		singletonSet = ReferenceSets.singleton( keyIndex );	}}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -