📄 dispatchingdocumentfactory.java
字号:
package it.unimi.dsi.mg4j.document;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Paolo Boldi * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.io.BinIO;import it.unimi.dsi.fastutil.objects.Object2IntMap;import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;import it.unimi.dsi.fastutil.objects.Object2ObjectLinkedOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.io.FastBufferedReader;import it.unimi.dsi.io.NullReader;import it.unimi.dsi.io.WordReader;import it.unimi.dsi.util.Properties;import java.io.IOException;import java.io.InputStream;import java.io.Serializable;import java.util.Iterator;import java.util.Map;import org.apache.commons.configuration.ConfigurationException;/** A document factory that actually dispatches the task of building documents to various factories * according to some strategy. * * <p>The strategy is specified as (an object embedding) a method that determines which factory * should be used on the basis of the metadata that are provided to the {@link #getDocument(InputStream, Reference2ObjectMap)} * method. Since usually the strategy will have to resolve the name of metadata, it is also passed * this factory, so that the correct * {@link it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory#resolve(Enum,Reference2ObjectMap)} method can be invoked. * * <p>Moreover, at construction one must specify, for each subfactory and for each field of this * factory, which field of the subfactory should be used. Note that to guarantee sequential access, * fields specified for each subfactory should appear in increasing order. */public class DispatchingDocumentFactory extends PropertyBasedDocumentFactory { private static final long serialVersionUID = 1L; private static final boolean DEBUG = false; /** Case-insensitive keys for metadata. * * @see PropertyBasedDocumentFactory.MetadataKeys */ public static enum MetadataKeys { /** The property containing the (comma-separated) sequence of field names. */ FIELDNAME, /** The property containing the key that should be checked (e.g., mimetype). */ KEY, /** The property containing comma-separated sequence of colon-separated pairs value/document factory names. */ RULE, /** The property containing a comma-separated list with as many items as there are factories; each item will be * a colon-separated list of as many integers as there are fields. The <var>k</var>-th integer in the <var>f</var>-th * list is the number of the field of the <var>f</var>-th factory that should be used to extract field number <var>k</var>, * or -1 if the field should be empty. */ MAP } /** The value to be used in <code>RULE</code> to introduce the default factory. Otherwise, no default factory is * provided for documents that do not match. */ public final static String OTHERWISE_IN_RULE = "?"; /** A strategy that decides which factory is appropriate using the document metadata. */ public static interface DispatchingStrategy extends Serializable { /** Decides the index of the factory to be used for the given metadata, possibly using * a factory to resolve property names. * * @param metadata the metadata of the document to be produced. * @param factory the factory used to resolve metadata names. * @return the factory index. */ public int factoryNumber( Reference2ObjectMap<Enum<?>,Object> metadata, PropertyBasedDocumentFactory factory ); }; /** A strategy that is based on trying to match the value of the metadata with a given key with respect to a * certain set of values. */ public static class StringBasedDispatchingStrategy implements DispatchingStrategy { private static final long serialVersionUID = 1L; /** The key to be resolved. */ private final Enum<?> key; /** The values that should be used for comparisons. */ private final Object2IntMap<String> value; /** The strategy works as follows: the property named <code>key</code> is resolved; if this property * is not set, the default return value of <var>value</var> is returned. * Otherwise, its value is compared, using the <code>equals</code>, * method with the elements of the <code>value</code> set, and the corresponding integer is returned. * * @param key the key to be resolved. * @param value the map of values. */ public StringBasedDispatchingStrategy( final Enum<?> key, final Object2IntMap<String> value ) { this.key = key; this.value = value; } public int factoryNumber( final Reference2ObjectMap<Enum<?>,Object> metadata, final PropertyBasedDocumentFactory factory ) { final Object val = factory.resolve( key, metadata ); if ( DEBUG ) System.out.println( "key " + key + " resolved using " + metadata + " into " + val ); return value.getInt( val ); } }; /** The number of subfactories used. */ private int n; /** The subfactories used. */ private DocumentFactory[] documentFactory; /** The number of fields of this factory. */ private int numberOfFields; /** The names of the fields. */ private String[] fieldName; /** The types of the fields. */ private FieldType[] fieldType; /** The array specifying how subfactory fields should be mapped into fields of this factory. More precisely, * <code>rename[f][k]</code> specifies which field of factory <code>documentFactory[f]</code> should be used * to return the field named <code>fieldName[k]</code>: it is assumed that the type of the field in the subfactory * is correct (i.e., that <code>documentFactory[f].fieldType(k)==fieldType[k]</code>). The value -1 is used to * return an empty textual field (i.e., a word reader on an empty string). */ private int[][] rename; /** The strategy to be used. */ private DispatchingStrategy strategy; /** If a {@link StringBasedDispatchingStrategy} should be used, this field represents the property key to be checked. * Otherwise, this is <code>null</code>. */ private Enum<?> dispatchingKey; /** If a {@link StringBasedDispatchingStrategy} should be used, this field represents the map from values to factories. */ private Object2ObjectLinkedOpenHashMap<String,Class<? extends DocumentFactory>> value2factoryClass; private void init( final DocumentFactory[] documentFactory, final String[] fieldName, final FieldType[] fieldType, final int[][] rename, final DispatchingStrategy strategy ) { n = documentFactory.length; this.documentFactory = documentFactory; numberOfFields = fieldName.length; this.fieldName = fieldName; this.fieldType = fieldType; this.rename = rename; this.strategy = strategy; } // TODO: All IllegalArgumentException where ConfigurationException; check that now it's OK private void checkAttributes() { if ( fieldName.length != fieldType.length || rename.length != documentFactory.length || documentFactory.length != n || fieldName.length != numberOfFields ) throw new IllegalArgumentException( "Length mismatch in defining the dispatching factory"); for ( int f = 0; f < n; f++ ) { if ( rename[ f ].length != numberOfFields ) throw new IllegalArgumentException( "The number of fields (" + numberOfFields + ") does not match the mapping rule for factory " + documentFactory[ f ].getClass().getName() ); for ( int k = 0; k < numberOfFields; k++ ) { if ( rename[ f ][ k ] < -1 || rename[ f ][ k ] >= documentFactory[ f ].numberOfFields() ) throw new IllegalArgumentException( rename[ f ][ k ] + " is not a field of factory " + documentFactory[ f ] ); if ( rename[ f ][ k ] >= 0 && fieldType[ k ] != documentFactory[ f ].fieldType( rename[ f ][ k ] ) ) throw new IllegalArgumentException( "Field " + rename[ f ][ k ] + " of factory " + documentFactory[ f ] + " has a type different from the type of the field it is mapped to" ); } } if ( n == 0 || numberOfFields == 0 ) throw new IllegalArgumentException( "Zero factories or fields specified" ); if ( strategy == null ) throw new IllegalArgumentException( "No strategy was specified" ); } private void setExtraArguments( final Object xtraPars ) throws IllegalArgumentException { if ( value2factoryClass == null ) throw new IllegalArgumentException( "No " + MetadataKeys.RULE + " property was specified for the dispatching factory" ); n = value2factoryClass.values().size(); documentFactory = new DocumentFactory[ n ]; Iterator<Class<? extends DocumentFactory>> it = value2factoryClass.values().iterator(); for ( int f = 0; f < n; f++ ) { Class<? extends DocumentFactory> documentFactoryClass = it.next(); try { if ( xtraPars == null ) documentFactory[ f ] = documentFactoryClass.newInstance(); else documentFactory[ f ] = documentFactoryClass.getConstructor( xtraPars.getClass() ).newInstance( xtraPars ); } catch ( Exception e ) { throw new IllegalArgumentException( e ); } } fieldType = new FieldType[ numberOfFields ]; if ( rename == null ) throw new IllegalArgumentException( "No " + MetadataKeys.MAP + " property was specified for the dispatching factory" ); for ( int f = 0; f < n; f++ ) { for ( int k = 0; k < numberOfFields; k++ ) { int kk = rename[ f ][ k ]; if ( kk >= 0 && fieldType[ k ] != null && fieldType[ k ] != documentFactory[ f ].fieldType( kk ) ) throw new IllegalArgumentException( "Mismatch between field types for field " + f + ", relative to the remapping of factory " + documentFactory[ f ].getClass().getName() + " (the type used to be " + fieldType[ k ] + ", but now we want it to be " + documentFactory[ f ].fieldType( kk ) + ")" ); if ( kk >= 0 ) fieldType[ k ] = documentFactory[ f ].fieldType( kk ); } } for ( int f = 0; f < numberOfFields; f++ ) if ( fieldType[ f ] == null ) throw new IllegalArgumentException( "The type of field " + fieldName[ f ] + " could not be deduced, because it is never mapped to" ); if ( dispatchingKey == null ) throw new IllegalArgumentException( "No " + MetadataKeys.KEY + " property was specified for the dispatching factory" ); Object2IntMap<String> value2int = new Object2IntOpenHashMap<String>(); value2int.defaultReturnValue( -1 ); for( Map.Entry<String,Class<? extends DocumentFactory>> e : value2factoryClass.entrySet() ) { int k;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -