📄 dbximport.java
字号:
/** * <p>Title: StandBayeMail </p> * <p>Description: A bayesian spam filter</p> * <p>Copyright: Copyright (c) 2004 by Luca M. Viola</p> * <p>Company: 3AM.it</p> * @author Luca M. Viola <luca@3am.it> * @version 1.0 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */package oeimport;import java.io.*;import java.util.Date;import java.util.StringTokenizer;import sun.misc.BASE64Decoder;public class dbxImport{ private byte[] buffer; private int[] indinf_MsgPointers; private int treeEntries; private final int MaxIndex = 0x20; private final int mesginf_MAddress = 0x04; private final int treeNodeSize = 0x27c; // In dbx files we have 32-bit integers used as pointers // inside the file. In order to cope with them we need to // convert them from the intel low-endian byte order to // java's int high-endian or "network" order. This // routine will take a position in the byte buffer, and // retrieve the low-endian int starting from there, // giving back the correct Java int representazion private int low2hiEndianInt(byte [] buffer,int start ) { int ret=0; int s0=0,s1=0,s2=0,s3=0; // Java does not understand the concept // of "unsigned" byte, so we map bytes // to integers to perform unsigned // arithmetics on them s3=buffer[start+3] & 0xff; s2=buffer[start+2] & 0xff; s1=buffer[start+1] & 0xff; s0=buffer[start ] & 0xff; ret += (s3 << 24); ret += (s2 << 16); ret += (s1 << 8); ret += s0; return ret; } // the same as before but works on 16-bit integers (or short int) private int low2hiEndianShort(byte [] buffer,int start ) { int ret=0; int s0=0,s1=0; s1=buffer[start+1] & 0xff; s0=buffer[start ] & 0xff; ret += (s1 << 8); ret += s0; return ret; } // This method recursively scans the message tree and retrieves all the // messages pointers in there, saving them in a global array. private void readTree( int ins,int parent,int address,int position,int values) { //Allocates enough space to host the tree-node data int[] tbuff = new int[treeNodeSize >> 2]; int entryChksm = 0; //If we try to scan more entries than the maximum in the tree //there is a mistake if (position + values > treeEntries) { System.err.println("Too many tree entries to read !"); System.exit(0); } //Goes to the root address of this node ins = address; //reads all the data in the tree mode for (int k = 0; k < tbuff.length; k++) tbuff[k] = low2hiEndianInt(buffer, ins + (k * 4)); ins += (tbuff.length) * 4; //If the pointer to the parent that we passed doesn't equal the one //stored in the tree node we are kind of screwed :-) if (tbuff[3] != parent) { System.err.println("Wrong parent !"); System.exit(0); } //Gets the entries of this subtree int entries = ( (tbuff[4] >> 8) & 0xff); if (entries > 0x33) { System.err.println("Wrong value for entries !"); System.exit(0); } //If this node had a child... if (tbuff[2] != 0) { //...we continue the scanning of this branch of the tree readTree(ins, address, tbuff[2], position, tbuff[5]); //in N we add the number f entried computer for this subtree //we'll use this value as a checksum later entryChksm += tbuff[5]; } //For each entry in the substree... for (int i = 0; i < entries; ++i) { //...Goes to the "message info " pointer int pos = 6 + i * 3; //If there is a messageinfo if (tbuff[pos] != 0) { //calculates the position of this message in the //entries array int value = position + (++entryChksm); //if the position id beyond the maximum number of //entries we've got something to think about if (value > treeEntries) { System.err.println("To many values !"); System.exit(0); } //Stores the "message info" pointer in the array indinf_MsgPointers[value - 1] = tbuff[pos]; } //If this node has a child we scan the subtree if (tbuff[pos + 1] != 0) { readTree(ins, address, tbuff[pos + 1], position + entryChksm, tbuff[pos + 2]); //and update N entryChksm += tbuff[pos + 2]; } } //if the total number of N doesn't equal the number of entries //that we passed recursively something else went wrong. if (entryChksm != values) { System.err.println("Wrong number of values found!"); System.exit(0); } } public dbxImport( String filename,String filedest ) { try { FileOutputStream fos=null; PrintStream pw; //We decide wether the output is a file or the stdout if( !filedest.equals("") ) { fos=new FileOutputStream(filedest); pw=new PrintStream(fos); } else pw=System.out; RandomAccessFile raf=new RandomAccessFile(filename,"r"); long len=raf.length(); buffer=new byte[(int)len]; // Reads the all file in a byte array int result=raf.read(buffer); // Retrieves the entries in the dbx tree int entries=low2hiEndianInt(buffer,0x0c4); //System.err.println("Number of entries: "+Integer.toHexString(entries)); // Finds out where in the file the tree starts int treepos=low2hiEndianInt(buffer,0x0e4);// System.out.println("Tree starts @pos: "+Integer.toHexString(treepos)); // If there is a tree and it has some entries we start if(treepos>0 && entries>0) { // We reserve space for the entries (pointers at the message info structure) indinf_MsgPointers=new int[entries]; treeEntries=entries; // We start scanning the tree from the root readTree(treepos,0,treepos,0,entries); int [] minfpointer=indinf_MsgPointers; // Skip 8 ints to go to the beginning of the message infos treepos+=24; //We format a unix mbox-compatible date header Date now=new Date(); String fromdate=now.toString(); int pos=fromdate.indexOf("GMT"); int pos2=0; if( pos!=-1 ) if( pos!=-1 ) { pos2=fromdate.indexOf(" ",pos+4); String from1=fromdate.substring(0,pos); String from2=fromdate.substring(pos2+1,fromdate.length()); fromdate=from1+from2; } // We get all the information stored in the tree entries for( int i=0; i<entries; i++ ) { //Get the pointer to the Indexed Info structure stored in every //Message info //minfpointer[i] = low2hiEndianInt(buffer, treepos + (i * 12));// System.out.println("Indexed info #"+i+" @pos: "+Integer.toHexString(minfpointer[i])); int indinf_p = minfpointer[i]; // Indexed info object marker (equals to its position in the dbx file) int indinf_mrk = low2hiEndianInt(buffer, indinf_p); // Size of the Indexed info structure int indinf_len = low2hiEndianInt(buffer, indinf_p + 4); int indinf_olen = low2hiEndianShort(buffer, indinf_p + 8); // Number of the entries (pieces of mail message) in the Indexed info structure int indinf_ent = (buffer[indinf_p + 10] & 0xff); int indinf_cnt = (buffer[indinf_p + 11] & 0xff);// System.out.println("\tIndexed info #"+i+" size: "+Integer.toHexString(indinf_len));// System.out.println("\tIndexed info #"+i+" ent : "+Integer.toHexString(indinf_ent)); // Points to the header of this index info int indinf_buff = indinf_p + 12; // Points to the data (compacted indexes) of this index info int indinf_data = (indinf_buff) + (indinf_ent << 2); boolean isIndirect = false; int lastIndirect = 0; int indinf_Indexes = 0; int[] indinf_Begin = new int[MaxIndex]; int[] indinf_Length = new int[MaxIndex]; int pointer = indinf_buff; int textchunk_p = 0; // The following cycle reconstructs the indexes to all the pieces // of an email message stored in this indexed info file for (int j = 0; j < indinf_ent; ++j) { int value = low2hiEndianInt(buffer, pointer); boolean isDirect = (value & 0x80) > 0; int index = (value & 0x7f); value >>= 8; if (index >= MaxIndex) { System.err.println("Index too big"); System.exit(0); } if (isDirect) { indinf_Begin[index] = (indinf_buff + (j << 2) + 1); indinf_Length[index] = 3; } else { indinf_Begin[index] = (indinf_data + value); indinf_Length[index] = 0; if (isIndirect) indinf_Length[index] = (indinf_data + value) - indinf_Begin[index]; isIndirect = true; lastIndirect = index; } indinf_Indexes |= 1 << index; pointer += 4; } if (isIndirect) indinf_Length[lastIndirect] = (indinf_data + indinf_len) - indinf_Begin[lastIndirect]; // This recreates, for the messages that are marked as "stored" , // the pointer to the first pieces of the mail message // linked chain if ( (indinf_Indexes & (1 << mesginf_MAddress)) > 0) { int length = indinf_Length[mesginf_MAddress], value = 0; int data = indinf_Begin[mesginf_MAddress]; if (data > 0) { value = low2hiEndianInt(buffer, data); if (length < 4) value &= (1 << (length << 3)) - 1; } textchunk_p = value; } String msgbody=""; if (textchunk_p > 0) { if (i > 0) pw.println("\r\n"); //Outputs the message header in mbox format pw.println("From ???@??? " + fromdate); //Starting pointer to the mail message int next = textchunk_p; do { // Follows the linked chain to every piece of this // message, puts all the pieces togheter and outputs // the text int txtl = low2hiEndianInt(buffer, next + 8); byte[] txtbuf = new byte[txtl]; System.arraycopy(buffer, next + 16, txtbuf, 0, txtl); String s = new String(txtbuf); msgbody+=s; next = low2hiEndianInt(buffer, next + 12); } // Until there are no more pieces to this message while (next != 0); } //Finds and extracts the Subject String subject=""; int spos=msgbody.indexOf("Subject:"); int spos2=0; if( spos!=-1 ) { spos2 = msgbody.indexOf("\n", spos); subject = msgbody.substring(spos + 8, spos2); } String ret=""; boolean found=false; //If Outlook Express encoded the message... if( subject.trim().startsWith("=?") ) { StringTokenizer st=new StringTokenizer(subject,"?"); String s1="",s2="",s3="",s4=""; if( st.hasMoreTokens() ) s1=st.nextToken(); if( st.hasMoreTokens() ) s2=st.nextToken(); if( st.hasMoreTokens() ) s3=st.nextToken(); if( st.hasMoreTokens() ) s4=st.nextToken(); //...Decodes it with Base64.... BASE64Decoder nd = new BASE64Decoder(); ret=new String(nd.decodeBuffer(s4+"=")); found=true; } //...and replaces it in the message body when necessary if( found ) { StringBuffer _msgbody=new StringBuffer(msgbody); _msgbody.replace(spos, spos2 + 1, "Subject: " + ret.trim() + "\r\n"); pw.print(_msgbody.toString()); } else pw.print(msgbody); } } raf.close(); if( !filedest.equals("") ) fos.close(); } catch( IOException ie ) { ie.printStackTrace(); } } public static void main( String [] args ) { String filename=""; String filedest=""; if( args==null || args.length>2 || args.length<1 ) { System.err.println("Usage: dbxImport \"filename.mbx\" [[\"filedest\"] | [> filedest]]"); System.exit(0); } if( args.length<=2 ) filename=args[0]; if( args.length==2 ) filedest=args[1]; new dbxImport(filename,filedest); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -