📄 javamaildocumentcollection.java
字号:
package it.unimi.dsi.mg4j.document;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Paolo Boldi and Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.Util;import it.unimi.dsi.fastutil.io.BinIO;import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.io.FastBufferedReader;import it.unimi.dsi.io.NullReader;import it.unimi.dsi.io.WordReader;import it.unimi.dsi.lang.MutableString;import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory.MetadataKeys;import it.unimi.dsi.util.Properties;import java.io.IOException;import java.io.InputStream;import java.io.Reader;import java.io.Serializable;import java.io.StringReader;import java.lang.reflect.InvocationTargetException;import java.nio.charset.Charset;import java.text.ParseException;import java.util.Date;import javax.mail.Address;import javax.mail.Folder;import javax.mail.Message;import javax.mail.MessagingException;import javax.mail.Session;import javax.mail.Store;import javax.mail.URLName;import javax.mail.internet.AddressException;import javax.mail.internet.MailDateFormat;import org.apache.commons.configuration.ConfigurationException;import org.apache.log4j.Logger;import com.martiansoftware.jsap.FlaggedOption;import com.martiansoftware.jsap.JSAP;import com.martiansoftware.jsap.JSAPException;import com.martiansoftware.jsap.JSAPResult;import com.martiansoftware.jsap.Parameter;import com.martiansoftware.jsap.SimpleJSAP;import com.martiansoftware.jsap.UnflaggedOption;/** A {@link it.unimi.dsi.mg4j.document.DocumentCollection} corresponding to * a Javamail {@link javax.mail.Store}. * * <p>This class is very simple: for instance, it will not understand correctly * multipart MIME messages, which will seen as without content. You are invited * to extend it. * * <p>This implementation is an example of a document collection that does not use a * factory: more precisely, there is an internal class that act as a wired factory. This * structure is made necessary by the fact that Javamail provide no means to parse messages * starting from an {@link java.io.InputStream}, which makes a separate implementation * of {@link it.unimi.dsi.mg4j.document.DocumentFactory#getDocument(InputStream,Reference2ObjectMap)} * impossible. * * <p>Note that to be able to use this class you must configure properly Javamail: * this involves setting up a <samp>javamail.properties</samp> file describing the * providers you want to use for the various access schemes. GNU Javamail, for instance, contains * providers for files, IMAP, POP, etc. */public class JavamailDocumentCollection extends AbstractDocumentCollection implements Serializable { private final static Logger LOGGER = Util.getLogger( JavamailDocumentCollection.class ); /** A special date (actually, 1 January 1970) representing no date. */ public final static Date NO_DATE = new Date( 0 ); private static final long serialVersionUID = 2L; /** Our only session . */ private final static Session SESSION = Session.getDefaultInstance( new java.util.Properties() ); /** The number of messages. */ private final int numberOfMessages; /** The factory to be used by this collection. */ private final JavamailDocumentFactory factory; /** The URL for the store. */ private final String storeUrl; /** The folder name. */ private final String folderName; /** The javamail store we are reading. */ private final transient Store store; /** The javamail folder we are reading. */ private final transient Folder folder; /** Builds a document collection corresponding to a given store URL and folder name. * * <p><strong>Beware.</strong> This class is not suited for large mbox files! * * @param storeUrl the javamail URL of the store. * @param folderName the folder name. * @param factory the factory that will be used to create documents. * @throws MessagingException */ protected JavamailDocumentCollection( final String storeUrl, final String folderName, final JavamailDocumentFactory factory ) throws MessagingException { this.storeUrl = storeUrl; this.folderName = folderName; this.factory = factory; this.store = SESSION.getStore( new URLName( storeUrl ) ); store.connect(); this.folder = store.getDefaultFolder().getFolder( folderName ); folder.open( Folder.READ_ONLY ); this.numberOfMessages = folder.getMessageCount(); } public JavamailDocumentCollection( final String storeUrl, final String folderName ) throws MessagingException { this( storeUrl, folderName, new JavamailDocumentFactory() ); } public JavamailDocumentCollection( final String storeUrl, final String folderName, final Properties properties ) throws MessagingException, ConfigurationException { this( storeUrl, folderName, new JavamailDocumentFactory( properties ) ); } public JavamailDocumentCollection( final String storeUrl, final String folderName, final String[] property ) throws MessagingException, ConfigurationException { this( storeUrl, folderName, new JavamailDocumentFactory( property ) ); } public JavamailDocumentCollection( final String storeUrl, final String folderName, final Reference2ObjectMap<Enum<?>,Object> defaultMetadata ) throws MessagingException { this( storeUrl, folderName, new JavamailDocumentFactory( defaultMetadata ) ); } public JavamailDocumentCollection copy() { try { return new JavamailDocumentCollection( storeUrl, folderName, factory.copy() ); } catch ( MessagingException e ) { throw new RuntimeException( e ); } } private final static class JavamailDocumentFactory extends PropertyBasedDocumentFactory { private static final long serialVersionUID = 1L; /** The field names (each also corresponds to a header, except for the 0-th). */ private static final String[] FIELD_NAME = { "body", "subject", "from", "to", "date", "cc", "bcc", "content-type" }; /** The field types. */ private static final FieldType[] FIELD_TYPE = { FieldType.TEXT, FieldType.TEXT, FieldType.TEXT, FieldType.TEXT, FieldType.DATE, FieldType.TEXT, FieldType.TEXT, FieldType.TEXT }; /** The map from field names to field indices. */ private static final Object2IntOpenHashMap<String> FIELD2INDEX; static { FIELD2INDEX = new Object2IntOpenHashMap<String>( FIELD_NAME.length, .5f ); FIELD2INDEX.defaultReturnValue( -1 ); for( int i = 0; i < FIELD_NAME.length; i++ ) FIELD2INDEX.put( FIELD_NAME[ i ], i ); } /** The word reader used for all documents. */ private WordReader wordReader = new FastBufferedReader(); protected boolean parseProperty( final String key, final String[] values, final Reference2ObjectMap<Enum<?>,Object> metadata ) throws ConfigurationException { if ( sameKey( MetadataKeys.ENCODING, key) ) { metadata.put( MetadataKeys.ENCODING, Charset.forName( ensureJustOne( key, values ) ).toString() ); return true; } return super.parseProperty( key, values, metadata ); } public JavamailDocumentFactory() { init(); } public JavamailDocumentFactory( final Properties properties ) throws ConfigurationException { super( properties ); init(); } public JavamailDocumentFactory( final Reference2ObjectMap<Enum<?>,Object> defaultMetadata ) { super( defaultMetadata ); init(); } public JavamailDocumentFactory( final String[] property ) throws ConfigurationException { super( property ); init(); } private void init() { wordReader = new FastBufferedReader(); } public JavamailDocumentFactory copy() { return new JavamailDocumentFactory( defaultMetadata ); } public int numberOfFields() { return FIELD_NAME.length; } public String fieldName( final int field ) { ensureFieldIndex( field ); return FIELD_NAME[ field ]; } public FieldType fieldType( final int field ) { ensureFieldIndex( field ); return FIELD_TYPE[ field ]; } public int fieldIndex( final String fieldName ) { return FIELD2INDEX.getInt( fieldName ); } public Document getDocument( final InputStream rawContent, final Reference2ObjectMap<Enum<?>,Object> metadata ) { throw new UnsupportedOperationException(); } } public DocumentFactory factory() { return factory; } public int size() { return numberOfMessages; } public void close() throws IOException { super.close(); try { folder.close( false ); store.close(); } catch( MessagingException e ) { throw new IOException( e.toString() ); } } private Object readResolve() throws MessagingException, IOException { super.close(); // To avoid spurious warnings about unclosed collected objects. return new JavamailDocumentCollection( storeUrl, folderName, factory ); } public Document document( final int index ) throws IOException { try { return new AbstractDocument() { // Can you believe that? Javamail numbers messages from 1... final Message message = folder.getMessage( index + 1 ); public CharSequence title() { final String subject; try { subject = message.getSubject(); } catch ( MessagingException e ) { throw new RuntimeException( e.toString() ); } if ( subject == null ) return (CharSequence)factory.resolve( MetadataKeys.TITLE, factory.defaultMetadata ); else return subject; } public CharSequence uri() { try { return folder.getURLName() + "#" + message.getMessageNumber(); } catch ( MessagingException e ) { throw new RuntimeException( e ); } } private Reader joinAddresses( final Address address[] ) { if ( address == null ) return NullReader.getInstance(); final MutableString s = new MutableString(); if ( address != null ) { for( int i = 0; i < address.length; i++ ) { if ( i > 0 ) s.append( ", " ); s.append( address[ i ] ); } } return new FastBufferedReader( s ); } public Object content( final int field ) throws IOException { factory.ensureFieldIndex( field ); try { switch ( field ) { case 0: // body // TODO: analyze multipart messages Object content = null; try { content = message.getContent(); } catch( Exception e ) { LOGGER.warn( "Message " + message.getMessageNumber() + " cannot be decoded; content will be empty", e ); } if ( content != null && content instanceof String ) return new StringReader( (String)content ); return NullReader.getInstance(); case 1: // subject return message.getSubject() == null ? NullReader.getInstance() : new StringReader( message.getSubject() ); case 2: // from return joinAddresses( message.getFrom() ); case 3: // to return joinAddresses( message.getRecipients( Message.RecipientType.TO ) ); case 4: // date final String[] date = message.getHeader( "date" ); if ( date == null || date.length == 0 ) return NO_DATE; final MailDateFormat mailDateFormat = new MailDateFormat(); try { return mailDateFormat.parse( date[ 0 ] ); } catch ( ParseException e ) { LOGGER.warn( "Error parsing date " + date[ 0 ] ); return NO_DATE; } case 5: // cc return joinAddresses( message.getRecipients( Message.RecipientType.CC ) ); case 6: // bcc return joinAddresses( message.getRecipients( Message.RecipientType.BCC ) ); case 7: // content-type return new StringReader( message.getContentType() ); } } catch ( MessagingException e ) { // A simple error if ( e instanceof AddressException ) { LOGGER.warn( "Error while parsing address", e ); return NullReader.getInstance(); } throw new IOException( e.toString() ); } throw new IllegalStateException(); } public WordReader wordReader( final int field ) { factory.ensureFieldIndex( field ); return factory.wordReader; } }; } catch ( MessagingException e ) { throw new IOException( e.toString() ); } } public Reference2ObjectMap<Enum<?>,Object> metadata( final int index ) { ensureDocumentIndex( index ); final Reference2ObjectArrayMap<Enum<?>,Object> metadata = new Reference2ObjectArrayMap<Enum<?>,Object>( 2 ); metadata.put( MetadataKeys.TITLE, "Message #" + index ); metadata.put( MetadataKeys.URI, storeUrl + folder + "#" + index ); return metadata; } public InputStream stream( final int index ) throws IOException { ensureDocumentIndex( index ); try { // Can you believe that? Javamail numbers messages from 1... return folder.getMessage( index + 1 ).getInputStream(); } catch ( MessagingException e ) { throw new IOException( e.toString() ); } } public static void main( final String[] arg ) throws IOException, JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, InstantiationException, MessagingException, ConfigurationException { SimpleJSAP jsap = new SimpleJSAP( JavamailDocumentCollection.class.getName(), "Saves a serialised mbox collection based on a given mbox file.", new Parameter[] { new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ), new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ), new UnflaggedOption( "storeUrl", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The javamail store." ), new UnflaggedOption( "folder", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The folder to be read." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; BinIO.storeObject( new JavamailDocumentCollection( jsapResult.getString( "storeUrl" ), jsapResult.getString( "folder" ), jsapResult.getStringArray( "property" ) ), jsapResult.getString( "collection" ) ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -