⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 javamaildocumentcollection.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.document;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Paolo Boldi and Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.Util;import it.unimi.dsi.fastutil.io.BinIO;import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.io.FastBufferedReader;import it.unimi.dsi.io.NullReader;import it.unimi.dsi.io.WordReader;import it.unimi.dsi.lang.MutableString;import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory.MetadataKeys;import it.unimi.dsi.util.Properties;import java.io.IOException;import java.io.InputStream;import java.io.Reader;import java.io.Serializable;import java.io.StringReader;import java.lang.reflect.InvocationTargetException;import java.nio.charset.Charset;import java.text.ParseException;import java.util.Date;import javax.mail.Address;import javax.mail.Folder;import javax.mail.Message;import javax.mail.MessagingException;import javax.mail.Session;import javax.mail.Store;import javax.mail.URLName;import javax.mail.internet.AddressException;import javax.mail.internet.MailDateFormat;import org.apache.commons.configuration.ConfigurationException;import org.apache.log4j.Logger;import com.martiansoftware.jsap.FlaggedOption;import com.martiansoftware.jsap.JSAP;import com.martiansoftware.jsap.JSAPException;import com.martiansoftware.jsap.JSAPResult;import com.martiansoftware.jsap.Parameter;import com.martiansoftware.jsap.SimpleJSAP;import com.martiansoftware.jsap.UnflaggedOption;/** A {@link it.unimi.dsi.mg4j.document.DocumentCollection} corresponding to *  a Javamail {@link javax.mail.Store}. *   *  <p>This class is very simple: for instance, it will not understand correctly *  multipart MIME messages, which will seen as without content. You are invited *  to extend it. *   *  <p>This implementation is an example of a document collection that does not use a *  factory: more precisely, there is an internal class that act as a wired factory. This *  structure is made necessary by the fact that Javamail provide no means to parse messages *  starting from an {@link java.io.InputStream}, which makes a separate implementation *  of {@link it.unimi.dsi.mg4j.document.DocumentFactory#getDocument(InputStream,Reference2ObjectMap)} *  impossible. *   *  <p>Note that to be able to use this class you must configure properly Javamail: *  this involves setting up a <samp>javamail.properties</samp> file describing the *  providers you want to use for the various access schemes. GNU Javamail, for instance, contains *  providers for files, IMAP, POP, etc.  */public class JavamailDocumentCollection extends AbstractDocumentCollection implements Serializable {	private final static Logger LOGGER = Util.getLogger( JavamailDocumentCollection.class );	/** A special date (actually, 1 January 1970) representing no date. */	public final static Date NO_DATE = new Date( 0 );		private static final long serialVersionUID = 2L;	/** Our only session . */	private final static Session SESSION = Session.getDefaultInstance( new java.util.Properties() );	/** The number of messages. */	private final int numberOfMessages;	/** The factory to be used by this collection. */	private final JavamailDocumentFactory factory;	/** The URL for the store. */	private final String storeUrl;	/** The folder name. */	private final String folderName;	/** The javamail store we are reading. */	private final transient Store store;	/** The javamail folder we are reading. */	private final transient Folder folder;		/** Builds a document collection corresponding to a given store URL and folder name.	 * 	 *  <p><strong>Beware.</strong> This class is not suited for large mbox files!	 * 	 * @param storeUrl the javamail URL of the store.	 * @param folderName the folder name.	 * @param factory the factory that will be used to create documents.	 * @throws MessagingException 	 */	protected JavamailDocumentCollection( final String storeUrl, final String folderName, final JavamailDocumentFactory factory ) throws MessagingException {		this.storeUrl = storeUrl;		this.folderName = folderName;		this.factory = factory;		this.store = SESSION.getStore( new URLName( storeUrl ) );		store.connect();				this.folder = store.getDefaultFolder().getFolder( folderName );		folder.open( Folder.READ_ONLY );				this.numberOfMessages = folder.getMessageCount();	}	public JavamailDocumentCollection( final String storeUrl, final String folderName ) throws MessagingException {		this( storeUrl, folderName, new JavamailDocumentFactory() );	}	public JavamailDocumentCollection( final String storeUrl, final String folderName, final Properties properties ) throws MessagingException, ConfigurationException {		this( storeUrl, folderName, new JavamailDocumentFactory( properties ) );	}	public JavamailDocumentCollection( final String storeUrl, final String folderName, final String[] property ) throws MessagingException, ConfigurationException {		this( storeUrl, folderName, new JavamailDocumentFactory( property ) );	}	public JavamailDocumentCollection( final String storeUrl, final String folderName, final Reference2ObjectMap<Enum<?>,Object> defaultMetadata ) throws MessagingException {		this( storeUrl, folderName, new JavamailDocumentFactory( defaultMetadata ) );	}	public JavamailDocumentCollection copy() {		try {			return new JavamailDocumentCollection( storeUrl, folderName, factory.copy() );		}		catch ( MessagingException e ) {			throw new RuntimeException( e );		}	}			private final static class JavamailDocumentFactory extends PropertyBasedDocumentFactory {		private static final long serialVersionUID = 1L;		/** The field names (each also corresponds to a header, except for the 0-th). */		private static final String[] FIELD_NAME = { "body", "subject", "from", "to", "date", "cc", "bcc", "content-type" };		/** The field types. */		private static final FieldType[] FIELD_TYPE = { FieldType.TEXT, FieldType.TEXT, FieldType.TEXT, FieldType.TEXT, FieldType.DATE, FieldType.TEXT, FieldType.TEXT, FieldType.TEXT };		/** The map from field names to field indices. */		private static final Object2IntOpenHashMap<String> FIELD2INDEX;		static {			FIELD2INDEX = new Object2IntOpenHashMap<String>( FIELD_NAME.length, .5f );			FIELD2INDEX.defaultReturnValue( -1 );			for( int i = 0; i < FIELD_NAME.length; i++ ) FIELD2INDEX.put( FIELD_NAME[ i ], i );		}		/** The word reader used for all documents. */		private WordReader wordReader = new FastBufferedReader();		protected boolean parseProperty( final String key, final String[] values, final Reference2ObjectMap<Enum<?>,Object> metadata ) throws ConfigurationException {			if ( sameKey( MetadataKeys.ENCODING, key) ) {				metadata.put( MetadataKeys.ENCODING, Charset.forName( ensureJustOne( key, values ) ).toString() );				return true;			}						return super.parseProperty( key, values, metadata );		}				public JavamailDocumentFactory() {			init();		}				public JavamailDocumentFactory( final Properties properties ) throws ConfigurationException {			super( properties );			init();		}		public JavamailDocumentFactory( final Reference2ObjectMap<Enum<?>,Object> defaultMetadata ) {			super( defaultMetadata );			init();		}		public JavamailDocumentFactory( final String[] property ) throws ConfigurationException {			super( property );			init();		}				private void init() {			wordReader = new FastBufferedReader();		}		public JavamailDocumentFactory copy() {			return new JavamailDocumentFactory( defaultMetadata );		}				public int numberOfFields() {			return FIELD_NAME.length;		}				public String fieldName( final int field ) {			ensureFieldIndex( field );			return FIELD_NAME[ field ];		}				public FieldType fieldType( final int field ) {			ensureFieldIndex( field );			return FIELD_TYPE[ field ];		}				public int fieldIndex( final String fieldName ) {			return FIELD2INDEX.getInt( fieldName );		}				public Document getDocument( final InputStream rawContent, final Reference2ObjectMap<Enum<?>,Object> metadata )  {			throw new UnsupportedOperationException();			}	}			public DocumentFactory factory() {		return factory;	}		public int size() {		return numberOfMessages;	}	public void close() throws IOException {		super.close();		try {			folder.close( false );			store.close();		}		catch( MessagingException e ) {			throw new IOException( e.toString() );		}	}		private Object readResolve() throws MessagingException, IOException {		super.close(); // To avoid spurious warnings about unclosed collected objects.		return new JavamailDocumentCollection( storeUrl, folderName, factory );	}		public Document document( final int index ) throws IOException {			try {				return new AbstractDocument() {					// Can you believe that? Javamail numbers messages from 1...					final Message message = folder.getMessage( index + 1 );					public CharSequence title() {						final String subject; 						try {							subject = message.getSubject();						}						catch ( MessagingException e ) {							throw new RuntimeException( e.toString() );						}						if ( subject == null ) return (CharSequence)factory.resolve( MetadataKeys.TITLE, factory.defaultMetadata );						else return subject; 					}										public CharSequence uri() {						try {							return folder.getURLName() + "#" + message.getMessageNumber();						}						catch ( MessagingException e ) {							throw new RuntimeException( e );						} 					}					private Reader joinAddresses( final Address address[] ) {						if ( address == null ) return NullReader.getInstance();						final MutableString s = new MutableString();						if ( address != null ) {							for( int i = 0; i < address.length; i++ ) {								if ( i > 0 ) s.append( ", " );								s.append( address[ i ] );							}						}						return new FastBufferedReader( s );					}										public Object content( final int field ) throws IOException {						factory.ensureFieldIndex( field );						try {							switch ( field ) {							case 0: // body								// TODO: analyze multipart messages								Object content = null;								try {									content = message.getContent();								}								catch( Exception e ) {									LOGGER.warn( "Message " + message.getMessageNumber() + " cannot be decoded; content will be empty", e );								}																if ( content != null && content instanceof String ) return new StringReader( (String)content );																return NullReader.getInstance();							case 1: // subject								return message.getSubject() == null ? NullReader.getInstance() : new StringReader( message.getSubject() );							case 2: // from								return joinAddresses( message.getFrom() );							case 3: // to 								return joinAddresses( message.getRecipients( Message.RecipientType.TO ) );							case 4: // date								final String[] date = message.getHeader( "date" );								if ( date == null || date.length == 0 ) return NO_DATE;								final MailDateFormat mailDateFormat = new MailDateFormat();								try {									return mailDateFormat.parse( date[ 0 ] );								}								catch ( ParseException e ) {									LOGGER.warn( "Error parsing date " + date[ 0 ] );									return NO_DATE;								}							case 5: // cc								return joinAddresses( message.getRecipients( Message.RecipientType.CC ) );							case 6: // bcc								return joinAddresses( message.getRecipients( Message.RecipientType.BCC ) );							case 7: // content-type								return new StringReader( message.getContentType() );							}						}						catch ( MessagingException e ) {							// A simple error							if ( e instanceof AddressException ) {								LOGGER.warn( "Error while parsing address", e );								return NullReader.getInstance();							}							throw new IOException( e.toString() );						}						throw new IllegalStateException();					}					public WordReader wordReader( final int field ) {						factory.ensureFieldIndex( field );						return factory.wordReader; 					}				};			}			catch ( MessagingException e ) {				throw new IOException( e.toString() );			}		}		public Reference2ObjectMap<Enum<?>,Object> metadata( final int index ) {		ensureDocumentIndex( index );		final Reference2ObjectArrayMap<Enum<?>,Object> metadata = new Reference2ObjectArrayMap<Enum<?>,Object>( 2 );		metadata.put( MetadataKeys.TITLE, "Message #" + index );		metadata.put( MetadataKeys.URI, storeUrl + folder + "#" + index );		return metadata;	}		public InputStream stream( final int index ) throws IOException {		ensureDocumentIndex( index );		try {			// Can you believe that? Javamail numbers messages from 1...			return folder.getMessage( index + 1 ).getInputStream();		}		catch ( MessagingException e ) {			throw new IOException( e.toString() );		}	}		public static void main( final String[] arg ) throws IOException, JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, InstantiationException, MessagingException, ConfigurationException {		SimpleJSAP jsap = new SimpleJSAP( JavamailDocumentCollection.class.getName(), "Saves a serialised mbox collection based on a given mbox file.",				new Parameter[] {					new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ),					new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ),					new UnflaggedOption( "storeUrl", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The javamail store." ),					new UnflaggedOption( "folder", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The folder to be read." )				}		);				JSAPResult jsapResult = jsap.parse( arg );		if ( jsap.messagePrinted() ) return;				BinIO.storeObject( new JavamailDocumentCollection( jsapResult.getString( "storeUrl" ), jsapResult.getString( "folder" ), jsapResult.getStringArray( "property" ) ), jsapResult.getString( "collection" ) );	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -