⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mainfetch.java

📁 一个java实现的有界面的email发送程序。可以从网络上抓取email。也可以从文件中读取email
💻 JAVA
字号:
package com.code10.fetch;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import com.code10.access.DbAccess;
import com.code10.basecomponent.Entry;
import com.code10.basecomponent.UrlEntry;

import com.code10.basecomponent.strDeal;

public class MainFetch {
	//private String strStartURL;
	private static Logger logger = Logger.getLogger(MainFetch.class.getName());
	
	private int intLayer;
	private Queue<UrlEntry> queueStrTmp1;
	private Queue<UrlEntry> queueStrTmp2;
	private ArrayList<Entry> aryRs;
	private static HttpClient client = new HttpClient(); 
	private int intStatus = 0;
	private  ThreadFetchURL threadFetchURL;
	private  ThreadParse threadParse;
	
	public MainFetch(String strStartURL, int intLayer){
		client.getParams().setContentCharset("GB2312"); 
		logger.setLevel(Level.ALL);
		UrlEntry.setStrHost(strStartURL);
		UrlEntry temp = new UrlEntry();
		this.intLayer = intLayer;
		temp.setUrl(strStartURL);
		temp.setILayer(0);
		queueStrTmp1 = new LinkedList<UrlEntry>();
		queueStrTmp1.add(temp);
		queueStrTmp2 = new LinkedList<UrlEntry>();
		queueStrTmp2.add(temp);
		aryRs = new ArrayList<Entry>();
	}
	
	public void start(){
		logger.debug("开始抓取 .....");
		threadFetchURL = new ThreadFetchURL();
		threadFetchURL.start();		
		threadParse = new ThreadParse();
		threadParse.start();
	}
	public ArrayList<Entry> getEntry(){
		while(intStatus != 1){
			try {
				Thread.sleep(1000);
			} catch (InterruptedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return this.aryRs;
	}
	
	private class ThreadFetchURL extends Thread{

		@Override
		public void run() {
			// TODO Auto-generated method stub
			logger.debug("ThreadFetchURL is running!");
			UrlEntry UrlTemp = null;
		    while(true){
		    	logger.debug("ThreadFetchURL : in while");
		    	UrlTemp = getURL1();
		    	String url = UrlTemp.getUrl();
		    	if(url !=  null ){
		    		if(UrlTemp.getILayer() != intLayer){
		    			String strContent = getContent(url);
		    			if(strContent == null) continue;
		    		    parseAndaddURL(strContent, UrlTemp);
		    		}
		    		else{
		    			break;
		    		}
		    	}		    	   			    	
		    	//logger.debug("out while!");
		    }	    
		}
	}
	
	private class ThreadParse extends Thread{

		@Override
		public void run() {
			// TODO Auto-generated method stub
			logger.debug("ThreadParse : emailparse is starting...");
			UrlEntry UrlTemp = null;
			String url = null;
			  while(true){
				  logger.debug("ThreadParse : emailparse while is running...");
		    	UrlTemp = getURL2();
		    	url = UrlTemp.getUrl();
		    	if(url ==  null ){
		    		 if(threadFetchURL.isAlive()){
		    			 try {
							sleep(100);
						} catch (InterruptedException e) {
							// TODO Auto-generated catch block
							e.printStackTrace();
						}
		    		 }else{
		    			 logger.debug("*******************************************\n" +
		    			 		"抓取结束,开始导入数据库......\n");
		    			 
		    			 DbAccess dba = new DbAccess();		    			 
		    			 dba.insertInto(aryRs);
		    			 logger.debug("成功导入数据库 \n" +
		    			 		"*******************************************\n");
		    			 break;
		    		 }
		    	}	
		    	else{
		    		String strTemp = getContent(url);
		    		if(strTemp == null) continue;
	    			 parseAndAddEmail(strTemp , UrlTemp);
	    		}
		    	
		    }	    
			
		}
	}
	
	/**
	 * 根据URL得到网页内容
	 * @param strURL
	 * @return
	 */
	private synchronized  String getContent(String strURL){
		try {
			logger.debug("ThreadFetchURL : start get the URL : " + strURL);
			GetMethod getMethod = new GetMethod(strURL);
			client.executeMethod(getMethod);
			if(getMethod.getStatusCode () == HttpStatus.SC_OK){
				return getMethod.getResponseBodyAsString();
			}
		} catch (HttpException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			logger.debug("ThreadFetchURL : get the URL : " + strURL + "failed! (HttpException)");
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			logger.debug("ThreadFetchURL : get the URL : " + strURL + "failed! (IOException)");
		} catch (Exception e) {
			e.printStackTrace();
			logger.debug("ThreadFetchURL : get the URL : " + strURL + "failed! (Exception)");
		}
		return null;
	}
	/**
	 * 根据网页内容解析出URL,并且放到列表中
	 * @param strContent
	 * @return
	 * @throws IOException 
	 */
	private synchronized  void parseAndaddURL(String strContent , UrlEntry urlParent) {
		
		ArrayList<String> ary = strDeal.parseURL(strContent);
		logger.debug("ThreadFetchURL : in parseAndaddURL");
		for(int i = 0; i < ary.size(); i++){
			if(!UrlEntry.isInWebSite(ary.get(i)) || !UrlEntry.isLegURL(ary.get(i))) continue;		//如果是站外的话,或非法URL 就丢弃
			UrlEntry UrlTemp = new UrlEntry();
			UrlTemp.setStrParentUrl(urlParent.getUrl());
			UrlTemp.setIntParentNum(urlParent.getIntSelfNum());
			UrlTemp.setLayer(urlParent.getILayer() + 1);
			UrlTemp.creatCompeleteURL(ary.get(i));
			if(!isExist(UrlTemp.getUrl())){
				logger.debug("==========================================================\n");
				UrlTemp.setIntSelfNum(urlParent.getIntSonNum() + 1);
				logger.debug("ThreadFetchURL : 层数 :" + UrlTemp.getILayer() + "  父URL编号 :" +
						UrlTemp.getIntParentNum() + "  编号 :" + UrlTemp.getIntSelfNum() + "  内容 :" + UrlTemp.getUrl());
				queueStrTmp1.add(UrlTemp);
				
				queueStrTmp2.add(UrlTemp);	
			}
		}
		return;
	}
	
	/**
	 * 根据网页内容解析出EMail和用户名,并加入到返回列表中
	 * @param strContent
	 * @return
	 */
	private void parseAndAddEmail(String strContent ,UrlEntry urlEntry){
			
		    ArrayList<String> ary = strDeal.parseEmail(strContent);
		    logger.debug("ThreadParse : in parseAndAddEmail");
		    int temp = ary.size();
		    Entry entry = null;
			for(int i = 0 ; i < temp ; i++){	
				entry = new Entry();
				entry.setEmail(ary.get(i));
				entry.setUsername("");
				if(!isExistToo(entry.getEmail()))
				{
					logger.debug("ThreadParse : Parsing the URL : " + urlEntry.getIntSelfNum() + "  " + urlEntry.getUrl());
					logger.debug("ThreadParse : the geted email is : " + ary.get(i));
					aryRs.add(entry);
					logger.debug("ThreadParse : the size of aryRs is " + aryRs.size());
				}
		    }
			logger.debug("ThreadParse : out parseAndAddEmail");
		    return ;
	}
	/**
	 * 在列表中取URL
	 * @return
	 */
	private UrlEntry getURL1(){	
		if(queueStrTmp1.isEmpty())
			return new UrlEntry();
		else
			return queueStrTmp1.poll();
	}
	
	private UrlEntry getURL2(){	
		if(queueStrTmp2.isEmpty())
			return new UrlEntry();
		else
			return queueStrTmp2.poll();
	}
	
	public boolean isExist(String strUrl){
		Iterator<UrlEntry> IteratorTmp = queueStrTmp1.iterator();
		while(IteratorTmp.hasNext()){
			if(IteratorTmp.next().getUrl().compareToIgnoreCase(strUrl) == 0){
				return true;
			}
		}
		return false;
	}
	public boolean isExistToo(String strEmail){
		Iterator<Entry> IteratorTmp = aryRs.iterator();
		while(IteratorTmp.hasNext()){
			if(IteratorTmp.next().getEmail().compareToIgnoreCase(strEmail) == 0){
				return true;
			}
		}
		return false;
	}
	
	public  synchronized void appendMsg(String str){
//		MainFrame.getInstance().appendMsg(str);
	}
	
	public void stopFetch(){
//		logger.debug("停止抓取 .....");
//		if(threadFetchURL.isAlive()){
//			threadFetchURL.stop();
//		}
//		if(threadParse.isAlive()){
//			threadParse.stop();
//		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -