⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dispatchingdocumentfactory.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package it.unimi.dsi.mg4j.document;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Paolo Boldi  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.io.BinIO;import it.unimi.dsi.fastutil.objects.Object2IntMap;import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;import it.unimi.dsi.fastutil.objects.Object2ObjectLinkedOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.io.FastBufferedReader;import it.unimi.dsi.io.NullReader;import it.unimi.dsi.io.WordReader;import it.unimi.dsi.util.Properties;import java.io.IOException;import java.io.InputStream;import java.io.Serializable;import java.util.Iterator;import java.util.Map;import org.apache.commons.configuration.ConfigurationException;/** A document factory that actually dispatches the task of building documents to various factories *  according to some strategy. *  * <p>The strategy is specified as (an object embedding) a method that determines which factory * should be used on the basis of the metadata that are provided to the {@link #getDocument(InputStream, Reference2ObjectMap)} * method. Since usually the strategy will have to resolve the name of metadata, it is also passed * this factory, so that the correct  * {@link it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory#resolve(Enum,Reference2ObjectMap)} method can be invoked.  *  * <p>Moreover, at construction one must specify, for each subfactory and for each field of this * factory, which field of the subfactory should be used. Note that to guarantee sequential access, * fields specified for each subfactory should appear in increasing order. */public class DispatchingDocumentFactory extends PropertyBasedDocumentFactory {	private static final long serialVersionUID = 1L;	private static final boolean DEBUG = false;	/** Case-insensitive keys for metadata. 	 * 	 *  @see PropertyBasedDocumentFactory.MetadataKeys	 */ 	public static enum MetadataKeys {	/** The property containing the (comma-separated) sequence of field names. */		FIELDNAME, 	/** The property containing the key that should be checked (e.g., mimetype). */		KEY, 	/** The property containing comma-separated sequence of colon-separated pairs value/document factory names. */		RULE,	/** The property containing a comma-separated list with as many items as there are factories; each item will be	 *  a colon-separated list of as many integers as there are fields. The <var>k</var>-th integer in the <var>f</var>-th	 *  list is the number of the field of the <var>f</var>-th factory that should be used to extract field number <var>k</var>,	 *  or -1 if the field should be empty. */		MAP	}	/** The value to be used in <code>RULE</code> to introduce the default factory. Otherwise, no default factory is	 *  provided for documents that do not match. */	public final static String OTHERWISE_IN_RULE = "?";			/** A strategy that decides which factory is appropriate using the document metadata. */		public static interface DispatchingStrategy extends Serializable {		/** Decides the index of the factory to be used for the given metadata, possibly using		 *  a factory to resolve property names.		 * 		 * @param metadata the metadata of the document to be produced.		 * @param factory the factory used to resolve metadata names.		 * @return the factory index.		 */		public int factoryNumber( Reference2ObjectMap<Enum<?>,Object> metadata, PropertyBasedDocumentFactory factory );	};		/** A strategy that is based on trying to match the value of the metadata with a given key with respect to a	 *  certain set of values.	 */	public static class StringBasedDispatchingStrategy implements DispatchingStrategy {		private static final long serialVersionUID = 1L;		/** The key to be resolved. */		private final Enum<?> key;		/** The values that should be used for comparisons. */		private final Object2IntMap<String> value;		/** The strategy works as follows: the property named <code>key</code> is resolved; if this property		 *  is not set, the default return value of <var>value</var> is returned. 		 *  Otherwise, its value is compared, using the <code>equals</code>,		 *  method with the elements of the <code>value</code> set, and the corresponding integer is returned.		 * 		 * @param key the key to be resolved.		 * @param value the map of values.		 */		public StringBasedDispatchingStrategy( final Enum<?> key, final Object2IntMap<String> value ) {			this.key = key;			this.value = value;		}				public int factoryNumber( final Reference2ObjectMap<Enum<?>,Object> metadata, final PropertyBasedDocumentFactory factory ) {			final Object val = factory.resolve( key, metadata );			if ( DEBUG ) System.out.println( "key " + key + " resolved using " + metadata + " into " + val );			return value.getInt( val );		}			};		/** The number of subfactories used. */	private int n;	/** The subfactories used. */	private DocumentFactory[] documentFactory;	/** The number of fields of this factory. */	private int numberOfFields;	/** The names of the fields. */	private String[] fieldName;	/** The types of the fields. */	private FieldType[] fieldType;	/** The array specifying how subfactory fields should be mapped into fields of this factory. More precisely,	 *  <code>rename[f][k]</code> specifies which field of factory <code>documentFactory[f]</code> should be used	 *  to return the field named <code>fieldName[k]</code>: it is assumed that the type of the field in the subfactory	 *  is correct (i.e., that <code>documentFactory[f].fieldType(k)==fieldType[k]</code>). The value -1 is used to	 *  return an empty textual field (i.e., a word reader on an empty string).	 */	private int[][] rename;	/** The strategy to be used. */	private DispatchingStrategy strategy;	/** If a {@link StringBasedDispatchingStrategy} should be used, this field represents the property key to be checked. 	 *  Otherwise, this is <code>null</code>. */	private Enum<?> dispatchingKey;	/** If a {@link StringBasedDispatchingStrategy} should be used, this field represents the map from values to factories. */	private Object2ObjectLinkedOpenHashMap<String,Class<? extends DocumentFactory>> value2factoryClass;				private void init( final DocumentFactory[] documentFactory, final String[] fieldName, 			final FieldType[] fieldType, final int[][] rename, final DispatchingStrategy strategy ) {			n = documentFactory.length;			this.documentFactory = documentFactory;			numberOfFields = fieldName.length;			this.fieldName = fieldName;			this.fieldType = fieldType;			this.rename = rename;			this.strategy = strategy;	}		// TODO: All IllegalArgumentException where ConfigurationException; check that now it's OK	private void checkAttributes() {		if ( fieldName.length != fieldType.length || rename.length != documentFactory.length || documentFactory.length != n || fieldName.length != numberOfFields ) throw new IllegalArgumentException( "Length mismatch in defining the dispatching factory");		for ( int f = 0; f < n; f++ ) {			if ( rename[ f ].length != numberOfFields ) throw new IllegalArgumentException( "The number of fields (" + numberOfFields + ") does not match the mapping rule for factory " + documentFactory[ f ].getClass().getName() );			for ( int k = 0; k < numberOfFields; k++ ) {				if ( rename[ f ][ k ] < -1 || rename[ f ][ k ] >= documentFactory[ f ].numberOfFields() )					throw new IllegalArgumentException( rename[ f ][ k ] + " is not a field of factory " + documentFactory[ f ] );				if ( rename[ f ][ k ] >= 0 && fieldType[ k ] != documentFactory[ f ].fieldType( rename[ f ][ k ] ) )					throw new IllegalArgumentException( "Field " + rename[ f ][ k ] + " of factory " + documentFactory[ f ] + " has a type different from the type of the field it is mapped to" );			}					}		if ( n == 0 || numberOfFields == 0 ) throw new IllegalArgumentException( "Zero factories or fields specified" );		if ( strategy == null ) throw new IllegalArgumentException( "No strategy was specified" );	}	private void setExtraArguments( final Object xtraPars ) throws IllegalArgumentException {		if ( value2factoryClass == null ) throw new IllegalArgumentException( "No " + MetadataKeys.RULE + " property was specified for the dispatching factory" );		n = value2factoryClass.values().size();		documentFactory = new DocumentFactory[ n ];		Iterator<Class<? extends DocumentFactory>> it = value2factoryClass.values().iterator();		for ( int f = 0; f < n; f++ ) {			Class<? extends DocumentFactory> documentFactoryClass = it.next();			try {				if ( xtraPars == null )					documentFactory[ f ] = documentFactoryClass.newInstance();				else					documentFactory[ f ] = documentFactoryClass.getConstructor( xtraPars.getClass() ).newInstance( xtraPars );			} catch ( Exception e ) {				throw new IllegalArgumentException( e );			}		}		fieldType = new FieldType[ numberOfFields ];		if ( rename == null ) throw new IllegalArgumentException( "No " + MetadataKeys.MAP + " property was specified for the dispatching factory" );		for ( int f = 0; f < n; f++ ) {			for ( int k = 0; k < numberOfFields; k++ ) {				int kk = rename[ f ][ k ];				if ( kk >= 0 && fieldType[ k ] != null && fieldType[ k ] != documentFactory[ f ].fieldType( kk ) ) 					throw new IllegalArgumentException( "Mismatch between field types for field " + f + ", relative to the remapping of factory " + documentFactory[ f ].getClass().getName() + " (the type used to be " + fieldType[ k ] + ", but now we want it to be " + documentFactory[ f ].fieldType( kk ) + ")" );				if ( kk >= 0 ) fieldType[ k ] = documentFactory[ f ].fieldType( kk );			} 		}		for ( int f = 0; f < numberOfFields; f++ ) 			if ( fieldType[ f ] == null ) throw new IllegalArgumentException( "The type of field " + fieldName[ f ] + " could not be deduced, because it is never mapped to" );		if ( dispatchingKey == null ) throw new IllegalArgumentException( "No " + MetadataKeys.KEY + " property was specified for the dispatching factory" );		Object2IntMap<String> value2int = new Object2IntOpenHashMap<String>();		value2int.defaultReturnValue( -1 );		for( Map.Entry<String,Class<? extends DocumentFactory>> e : value2factoryClass.entrySet() ) {			int k;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -