📄 abouturl.java
字号:
package huayi.gg.lyg;
import java.net.*;
import java.io.*;
import java.util.*;
import java.util.regex.*;
import huayi.gg.dbConn.dbConn;
public class AboutURL
{
private String str;
/* private Vector totalemail=new Vector();
public void remove()
{
totalemail.removeAllElements();
}*/
public void setStr(String string)
{
this.str=string;
}
public void GetAllEmail()
{
String first=getDocumentAt(str);
lookemail(first);
Vector tempurl=new Vector();
Vector tempurl1=new Vector();
Vector tempurl2=new Vector();
Vector tempurl3=new Vector();
tempurl=lookurl(first);
// System.out.println(tempurl);
for(int i=0;i<tempurl.size();i++)
{
//String url0=tempurl.get(i).toString();
String Second =getDocumentAt(tempurl.get(i).toString());
lookemail(Second);
// System.out.println(totalemail);
tempurl1=lookurl(Second);//注意,如果判断本主机上,此时返回的tempurl1可能为空
// System.out.println(tempurl1);
for(int j=0;j<tempurl1.size();j++)
{
//String url1=tempurl1.get(j).toString();
String Third =getDocumentAt(tempurl1.get(j).toString());
lookemail(Third);
tempurl2=lookurl(Third);
for(int k=0;k<tempurl2.size();k++)
{
//String url2=tempurl2.get(k).toString();
String four=getDocumentAt(tempurl2.get(k).toString());
lookemail(four);
tempurl3=lookurl(four);
for(int m=0;m<tempurl3.size();m++)
{
String five=getDocumentAt(tempurl3.get(m).toString());
lookemail(five);
}
}
tempurl2.removeAllElements();
}
tempurl1.removeAllElements();
}
tempurl.removeAllElements();
// System.out.println(totalemail.size());
// return totalemail;
}
public String GetHostName(String hostname)//判断是否某站点内部页面
{
URL aurl;
String ss=" ";
try{
aurl=new URL(hostname);
ss=aurl.getHost();
}
catch(MalformedURLException e)
{
e.printStackTrace();
}
return ss;
}
public String getDocumentAt(String urlString)//提取整个网页的txt文档
{
StringBuffer document = new StringBuffer();
try {
URL url = new URL(urlString);
URLConnection conn = url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line = null;
while ((line = reader.readLine()) != null)
document.append(line + "\n");
reader.close();
}
catch (MalformedURLException e)
{
System.out.println("Unable to connect to URL: " + urlString);
} catch (IOException e)
{
System.out.println("IOException when connecting to URL: " + urlString);
}
return document.toString();
}
public void lookemail(String document)//找出一个页面上的email
{
//String document=getDocumentAt(url);
dbConn db=new dbConn();
String strSql=new String();
Pattern pattern = Pattern.compile("mailto:[\"]?([^>^<^%]*[^/^:].(?:com|cn|org|net|il))(?:\\\"|\\'|\\s)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(document);
while (matcher.find())
{
String match = matcher.group(1);
int i=match.indexOf(";");
if(i>0)
{
strSql="insert into T_MAIL_MAIL(email) values('"+match.substring(0,i)+"')";
db.executeUpdate(strSql);
strSql="insert into T_MAIL_MAIL(email)values('"+match.substring(i+1,match.length())+"')";
}
else strSql="insert into T_MAIL_MAIL(email) values('"+match+"')";
// totalemail.addElement(match);
db.executeUpdate(strSql); //替换为写入数据库 存储时判断数据库中是否存在
}
db.close();
}
public Vector lookurl(String document)//找出一个页面中所有的URL;
{
Vector url=new Vector();
Pattern pattern = Pattern.compile("href=[\"]?(http://[^>^<]*[^\"].(?:com|cn|org|net|html|shtml|htm|jsp|php|asp))(?:\\\"|\\'|\\s)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(document);
url.removeAllElements();
while (matcher.find())
{
String match = matcher.group(1);
// if(GetHostName(match).equals(hostName))
url.addElement(match);
}
return url;
}
public void updateDatabase()
{
dbConn db=new dbConn();
String strSql=new String();
strSql="delete from T_MAIL_MAILTEMP";
db.executeUpdate(strSql);
strSql="insert into T_MAIL_MAILTEMP select distinct email from T_MAIL_MAIL";
db.executeUpdate(strSql);
strSql="delete from T_MAIL_MAIL";
db.executeUpdate(strSql);
strSql= "insert into T_MAIL_MAIL select email from T_MAIL_MAILTEMP";
db.executeUpdate(strSql);
//db.close();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -