⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dispatchingdocumentfactory.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
			for ( k = 0; k < n; k++ ) 				if ( e.getValue() == documentFactory[ k ].getClass() ) {					if ( e.getKey().equals( OTHERWISE_IN_RULE ) ) value2int.defaultReturnValue( k );					else value2int.put( e.getKey(), k );					break;				}			if ( k == n ) throw new IllegalArgumentException( "Mismatch in the rule mapping " + e.getKey() + " to " + e.getValue() );		}		System.out.println( "Building a strategy mapping " + dispatchingKey + " to " + value2int );		strategy = new StringBasedDispatchingStrategy( dispatchingKey, value2int );	}	/** Creates a new dispatching factory. 	 * 	 * @param documentFactory the array of subfactories.	 * @param fieldName the names of this factory's fields.	 * @param fieldType the types of this factory's fields. 	 * @param rename the way fields of this class are mapped to fields of the subfactories.	 * @param strategy the strategy to decide which factory should be used.	 */	public DispatchingDocumentFactory( final DocumentFactory[] documentFactory, final String[] fieldName, 			final FieldType[] fieldType, final int[][] rename, final DispatchingStrategy strategy ){		init( documentFactory, fieldName, fieldType, rename, strategy );		checkAttributes();	} 	public DispatchingDocumentFactory copy() {		final DocumentFactory[] documentFactory = new DocumentFactory[ this.documentFactory.length ];		for( int i = documentFactory.length; i-- != 0; ) documentFactory[ i ] = this.documentFactory[ i ].copy();		return new DispatchingDocumentFactory( documentFactory, fieldName, fieldType, rename, strategy );	}		public DispatchingDocumentFactory( final Properties properties ) throws ConfigurationException {		super( properties );		setExtraArguments( properties );		checkAttributes();	}	public DispatchingDocumentFactory( final String[] property ) throws ConfigurationException {		super( property );		setExtraArguments( property );		checkAttributes();	}	public DispatchingDocumentFactory( final Reference2ObjectMap<Enum<?>,Object> defaultMetadata ) {		super( defaultMetadata );		checkAttributes(); // Will certainly fail because the configuration is actually missing	}	public DispatchingDocumentFactory() {		super();		checkAttributes(); // Will certainly fail because the configuration is actually missing	}		@SuppressWarnings("unchecked")	@Override	protected boolean parseProperty( final String key, final String[] values, final Reference2ObjectMap<Enum<?>,Object> metadata ) throws ConfigurationException {		if ( sameKey( MetadataKeys.FIELDNAME, key ) ) {			fieldName = values;			numberOfFields = fieldName.length;			return true;		} 		else if ( sameKey( MetadataKeys.KEY, key ) ) {			final String dispatchingKeyName = ensureJustOne( key, values );			final int lastDot = dispatchingKeyName.lastIndexOf( '.' );			try {				dispatchingKey = Enum.valueOf( (Class<Enum>)Class.forName( dispatchingKeyName.substring( 0, lastDot ) ),						dispatchingKeyName.substring( lastDot + 1) );			}			catch ( ClassNotFoundException e ) {				throw new IllegalArgumentException( "The class specified in the key " + dispatchingKeyName + " cannot be found" );			} 			return true;		}		else if ( sameKey( MetadataKeys.RULE, key ) ) {			String[] rules = values;			value2factoryClass = new Object2ObjectLinkedOpenHashMap<String,Class<? extends DocumentFactory>>();			int i, m = rules.length;			for ( i = 0; i < m; i++ ) {				int pos = rules[ i ].indexOf( ':' );				if ( pos <= 0 || pos == rules[ i ].length() - 1 ) throw new ConfigurationException( "Rule " + rules[ i ] + " does not contain a colon or it is malformed" );				if ( rules[ i ].indexOf( ':', pos + 1 ) >= 0 ) throw new ConfigurationException( "Rule " + rules[ i ] + " contains too many colons" );				String factoryName = rules[ i ].substring( pos + 1 );				Class<? extends DocumentFactory> factoryClass = null;				try {					factoryClass = (Class<? extends DocumentFactory>)Class.forName( factoryName );					if ( ! ( DocumentFactory.class.isAssignableFrom( factoryClass ) ) ) throw new ClassNotFoundException();				} catch ( ClassNotFoundException e ) {					throw new ConfigurationException( "ParsingFactory " + factoryName + " is invalid; maybe the package name is missing" );				}				value2factoryClass.put( rules[ i ].substring( 0, pos ), factoryClass );			}			m = value2factoryClass.values().size();			return true;					}		else if ( sameKey( MetadataKeys.MAP, key ) ) {			String[] pieces = values;			int i, m = pieces.length;			rename = new int[ m ][];			for ( i = 0; i < m; i++ ) {				String[] subpieces = pieces[ i ].split( ":" );				if ( i > 0 && subpieces.length != rename[ 0 ].length ) throw new ConfigurationException( "Length mismatch in the map " + values );				rename[ i ] = new int[ subpieces.length ];				for ( int k = 0; k < subpieces.length; k++ ) {					try {						rename[ i ][ k ] = Integer.parseInt( subpieces[ k ] );					} catch ( NumberFormatException e ) {						throw new ConfigurationException( "Number format exception in the map " + values );					}				}			}		}		return super.parseProperty( key, values, metadata );	}			public int numberOfFields() {		return numberOfFields;	}	public String fieldName( final int field ) {		ensureFieldIndex( field );		return fieldName[ field ];	}	public int fieldIndex( final String fieldName ) {		for ( int k = 0; k < numberOfFields; k++ ) 			if ( this.fieldName[ k ].equals( fieldName ) ) return k;		return -1;	}	public FieldType fieldType( final int field ) {		ensureFieldIndex( field );		return fieldType[ field ];	}	/** A word reader that is returned when a null field should be returned. */	final private WordReader nullReader = new FastBufferedReader();	public Document getDocument( final InputStream rawContent, final Reference2ObjectMap<Enum<?>,Object> metadata ) throws IOException {				final int factoryIndex = strategy.factoryNumber( metadata, this );		System.out.println( "The strategy returned " + factoryIndex );		if ( factoryIndex < 0 || factoryIndex >= n ) throw new IllegalArgumentException();				System.out.println( "Going to parse a document with " + metadata + ", using " + documentFactory[ factoryIndex ].getClass().getName() );				final DocumentFactory factory = documentFactory[ factoryIndex ];		final Document document = factory.getDocument( rawContent, metadata );				return new AbstractDocument() {			public CharSequence title() {				return document.title();			}						public String toString() {				return document.toString();			}			public CharSequence uri() {				return document.uri();			}			public Object content( final int field ) throws IOException {				ensureFieldIndex( field );				if ( rename[ factoryIndex ][ field ] < 0 ) return NullReader.getInstance();				return document.content( rename[ factoryIndex ][ field ] );			}						public WordReader wordReader( final int field ) {				ensureFieldIndex( field );				if ( rename[ factoryIndex ][ field ] < 0 ) return nullReader;				return document.wordReader( rename[ factoryIndex ][ field ] ); 			}			public void close() throws IOException {				super.close();				document.close();			}		};	}			public static void main( final String[] arg ) throws IOException, ConfigurationException {		//PdfDocumentFactory pdfFactory = new PdfDocumentFactory();		//HtmlDocumentFactory htmlFactory = new HtmlDocumentFactory();		//IdentityDocumentFactory idFactory = new IdentityDocumentFactory();		//Object2IntMap map = new Object2IntOpenHashMap(		//		new String[] { "application/pdf", "text/html" },		//		new int[] { 0, 1 }		//	);		//map.defaultReturnValue( 2 );		//DispatchingStrategy strategy = new StringBasedDispatchingStrategy( MetadataKeys.MIMETYPE, map	);				Properties p = new Properties();		p.addProperty( MetadataKeys.FIELDNAME.name().toLowerCase(), "text,title" );		p.addProperty( MetadataKeys.KEY.name().toLowerCase(), PropertyBasedDocumentFactory.MetadataKeys.MIMETYPE.name() );		p.addProperty( MetadataKeys.RULE.name().toLowerCase(), "application/pdf:it.unimi.dsi.mg4j.document.PdfDocumentFactory,text/html:it.unimi.dsi.mg4j.document.HtmlDocumentFactory,?:it.unimi.dsi.mg4j.document.IdentityDocumentFactory" );		p.addProperty( MetadataKeys.MAP.name().toLowerCase(), "0:-1,0:1,0:-1" );		p.addProperty( MetadataKeys.MAP.name().toLowerCase(), "0:-1,0:1,0:-1" );		p.addProperty( MetadataKeys.MAP.name().toLowerCase(), "0:-1,0:1,0:-1" );		p.addProperty( PropertyBasedDocumentFactory.MetadataKeys.ENCODING.name().toLowerCase(), "iso-8859-1" );				DispatchingDocumentFactory factory = new DispatchingDocumentFactory( p ); 		DocumentCollection dc = new FileSetDocumentCollection( arg, factory );		BinIO.storeObject( dc, "test.collection" );	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -