⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 abouturl.java

📁 这个是我们做土地局的一个项目。感觉不错。欢迎下载
💻 JAVA
字号:
package huayi.gg.lyg;
import java.net.*;
import java.io.*;
import java.util.*;
import java.util.regex.*;  
import huayi.gg.dbConn.dbConn;
public class AboutURL
{
	private  String str;
  /* private  Vector totalemail=new Vector();
    
    public void remove()
    {
    	totalemail.removeAllElements(); 
    }*/
	
	public void setStr(String string)
	{
		this.str=string;
	}
	
	public void GetAllEmail()
	{
		String first=getDocumentAt(str);
	
	    lookemail(first);
	     Vector tempurl=new Vector();
	   Vector tempurl1=new Vector();
	    Vector tempurl2=new Vector();
	    Vector tempurl3=new Vector();
	    
	    tempurl=lookurl(first);
	   //  System.out.println(tempurl);
	   
	    for(int i=0;i<tempurl.size();i++)
	    {
	    	//String url0=tempurl.get(i).toString();
	    	String Second =getDocumentAt(tempurl.get(i).toString());
	    	lookemail(Second);
	    //	System.out.println(totalemail);
	       tempurl1=lookurl(Second);//注意,如果判断本主机上,此时返回的tempurl1可能为空
	        // System.out.println(tempurl1);
	        	for(int j=0;j<tempurl1.size();j++)
	    	{
	    		//String url1=tempurl1.get(j).toString();
	    	 	String Third =getDocumentAt(tempurl1.get(j).toString());
	    		lookemail(Third); 
	    		tempurl2=lookurl(Third);
	    		for(int k=0;k<tempurl2.size();k++)
	    		{
	    			//String url2=tempurl2.get(k).toString();
	    			String four=getDocumentAt(tempurl2.get(k).toString());
	    			lookemail(four);
	    			tempurl3=lookurl(four);
	    			for(int m=0;m<tempurl3.size();m++)
	    		    {
	    		    	String five=getDocumentAt(tempurl3.get(m).toString());
	    		    	lookemail(five);
	    		    }
	    		}
	    		tempurl2.removeAllElements();
	    			
	    	} 
	    	tempurl1.removeAllElements(); 
	    }
	     tempurl.removeAllElements(); 
	    
	   //  System.out.println(totalemail.size());
	  //  return totalemail;
	    	    
	 }
   	 public String GetHostName(String hostname)//判断是否某站点内部页面
    {
    	URL aurl;
    	String ss=" ";
    	try{
    		aurl=new URL(hostname);
    	    ss=aurl.getHost();
    	 }
    	 catch(MalformedURLException e)
    	 {
    	 	e.printStackTrace();
    	 }
    	 return ss;
    }
	public String getDocumentAt(String urlString)//提取整个网页的txt文档
    {
	   StringBuffer document = new StringBuffer();
	    try {
	       URL url = new URL(urlString);
	       URLConnection conn = url.openConnection();
	       BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
	       String line = null;
	       while ((line = reader.readLine()) != null)
	       document.append(line + "\n");
	       reader.close();
	        } 
	     catch (MalformedURLException e)
	       {
	       	 System.out.println("Unable to connect to URL: " + urlString);
	       } catch (IOException e)
	       {
	       	  System.out.println("IOException when connecting to URL: " + urlString);
	       }  
	       return document.toString();
	       
	  }
      public void lookemail(String document)//找出一个页面上的email
	    {
	 	  //String document=getDocumentAt(url);
	 	  dbConn db=new dbConn();
	 	  String strSql=new  String();
	 	       
         
          
	 	   Pattern pattern = Pattern.compile("mailto:[\"]?([^>^<^%]*[^/^:].(?:com|cn|org|net|il))(?:\\\"|\\'|\\s)", Pattern.CASE_INSENSITIVE);  
	           Matcher matcher = pattern.matcher(document);        
	           while (matcher.find()) 
	           {         
	                String match = matcher.group(1);  
	                int i=match.indexOf(";");
	                if(i>0)
	                {
	                	strSql="insert into T_MAIL_MAIL(email) values('"+match.substring(0,i)+"')";
	                	db.executeUpdate(strSql);
	                	strSql="insert into T_MAIL_MAIL(email)values('"+match.substring(i+1,match.length())+"')";
	                	}
	                	else      strSql="insert into T_MAIL_MAIL(email) values('"+match+"')";  
	               // totalemail.addElement(match);         
	                db.executeUpdate(strSql);  //替换为写入数据库 存储时判断数据库中是否存在    
	           }    
	           db.close();
         }
       public Vector lookurl(String document)//找出一个页面中所有的URL;
         {
            Vector url=new Vector();
         	
         	Pattern pattern = Pattern.compile("href=[\"]?(http://[^>^<]*[^\"].(?:com|cn|org|net|html|shtml|htm|jsp|php|asp))(?:\\\"|\\'|\\s)", Pattern.CASE_INSENSITIVE);  
	      
	        Matcher matcher = pattern.matcher(document); 
	        url.removeAllElements();        
	        while (matcher.find()) 
	        {         
	          
	          String match = matcher.group(1);            
	             
	        // if(GetHostName(match).equals(hostName))          
	             url.addElement(match);
	        }          
	            
	          return url;  
         }
         public void updateDatabase()
         {
            dbConn db=new dbConn();
	 	    String strSql=new  String();
	 	    strSql="delete from T_MAIL_MAILTEMP";
	 	    
	       	db.executeUpdate(strSql);
	       	strSql="insert into T_MAIL_MAILTEMP select distinct email from T_MAIL_MAIL";
	       	db.executeUpdate(strSql);
	       	strSql="delete from T_MAIL_MAIL";
	       	db.executeUpdate(strSql);
	       	strSql= "insert into T_MAIL_MAIL select email from T_MAIL_MAILTEMP";
	       	db.executeUpdate(strSql);
	       	//db.close();
	       }
         	
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -