⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 linkiter.java

📁 分析www.sina.com 和www.qq.com 新闻
💻 JAVA
字号:
import java.awt.event.*;
import java.net.*;
import java.io.*;
import java.util.regex.*; 
public class LinkIter {

String sCurrentLine="";  

String sTotalString=""; 
String sTotalString_qq=""; 
InputStream l_urlStream;  
java.io.BufferedReader l_reader=null;
HttpURLConnection l_connection=null;

public void println() 
{//取163网页
try{
URL l_url = null;
l_url=new java.net.URL("http://www.163.com/");  
l_connection = (java.net.HttpURLConnection) l_url.openConnection();  
l_connection.connect();  

l_urlStream = l_connection.getInputStream();  

 l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));  
 	while((sCurrentLine=l_reader.readLine())!=null)
	{sTotalString+=sCurrentLine;  
	}
File file=new File("D:/test1.html"); 
if(file.exists())
file.delete();
if(!file.exists()) 
file.createNewFile(); 
FileOutputStream out1=new FileOutputStream(file,true); 
out1.write(sTotalString.getBytes());
     System.out.println("game over!");

//	System.out.println(sTotalString);
}
catch (Exception e)
{System.out.println("Sorry!");
}


try{//去qq.com
URL l_url= null;
l_url=new java.net.URL("http://www.qq.com/");  
l_connection = (java.net.HttpURLConnection) l_url.openConnection();  
l_connection.connect();  

l_urlStream = l_connection.getInputStream();  

 l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));  
 	while((sCurrentLine=l_reader.readLine())!=null)
	{sTotalString_qq+=sCurrentLine;  
	}
File file=new File("D:/test3.html"); 
if(file.exists())
file.delete();
if(!file.exists()) 
file.createNewFile(); 
FileOutputStream out1=new FileOutputStream(file,true); 
out1.write(sTotalString_qq.getBytes());
     System.out.println("qq game over!");

//	System.out.println(sTotalString);
}
catch (Exception e)
{System.out.println("qq Sorry!");
}

}
public void Analyse1()
{
String line="";
int i=0;//163的计数器
	String html1="<HTML><HEAD><TITLE>毕业设计</TITLE><BODY leftMargin=50 topMargin=0 marginheight=0 marginwidth=0>";
	String html2="<table align=center><tr align=center><td align=center>";
	String nail0="<TABLE class=grey3d3 cellSpacing=0 cellPadding=0 width=780 align=center ";
	String nail1="bgColor=#ffffff border=0><TBODY><TR><TD align=middle><HR width=100% color=#aeaec1 SIZE=0>";
    String nail2="</TD></TR><TR><TD align=center><FONT color=#003b97><BR>";
    String nail3="版权所有2008-2008 @ 黄龙键 </FONT><BR><FONT color=#ffffff size=3>78864.58|65831 </FONT>";
    String nail4="</TD></TR></TBODY></TABLE></td></tr></table></BODY></HTML>";
    String html="";
	
	String nail="";
			String check1="<!DOCTYPE (.*?)<body>";
	//	String check_html="<html (.*?)/>";
	//	String check_title="<title> (.*?)>";
	//	String check_head="<head> (.*?)>";
	//	String check_meta="<meta (.*?)/>";
	//	String check_body="</head><body>";
	String check_link="<a.*href=\".*\">(.+?)</a>";
	String check_table="<table>(.*?)</table>";
	//String check_w2="<div class=\"col1\">(.*?)<div.*class=\"col2\">";//第一个div
	       
		String check_w2="<div class=\"col w2\"><h4>(.*?)</a></span></div></div>";
		//qq.com新闻中心板块
		String check_qq="<div id=\"NewsInfo\">(.*?)<div id=\"map\" class=\"undis\"></div>";
		//	qq.com娱乐新闻板块
		String check_caijing="<br></div></div>	<div class=\"imgRArea\">(.*?)<div class=\"imgArea\">	<div class=\"imgLAreapic\">";
		//	qq.com科技板块
		String check_keji="<h4 class=\"s\" name=\"d\"><a href=\"http://tech.qq.com/\">科技</a></h4>(.*?)<div class=\"imgArea\">  <div class=\"imgLAreapic\">";
		String check2="";	
		String check_tiyu="<div id=\"SportsInfo\">(.*?)<div class=\"res_b\"></div>			</div>			<div class=\"proArea\">";
		String check_caijing1="<div id=\"stocksuggest\" style=\"height:auto;z-Index:10000;background:#fff;padding:0 ;border:1px solid #6993C6; min-height:100px;width:220px;position:absolute;display:none;\">(.*?)<a href=\"http://book.qq.com\">";
		 	//第一次匹配<!DOCTYPE
		Pattern regex=Pattern.compile(check_w2);
		Pattern	regex2=Pattern.compile(check_qq);
		Pattern	regex3=Pattern.compile(check_caijing);
		Pattern	regex4=Pattern.compile(check_keji);
	    Pattern	regex5=Pattern.compile(check_caijing1);
		Pattern	regex6=Pattern.compile(check_tiyu);
			
			
		
		
		
  	    
		
		//tcher matcher = regex.matcher(line); 
       Matcher matcher=regex.matcher(sTotalString);
       
       Matcher matcher_qq=regex2.matcher(sTotalString_qq);
       Matcher matcher_caijing=regex3.matcher(sTotalString_qq);
       Matcher matcher_keji=regex4.matcher(sTotalString_qq);
       Matcher matcher_caijing1=regex5.matcher(sTotalString_qq);
       Matcher matcher_tiyu=regex6.matcher(sTotalString_qq);
      // Matcher matcher_qq=regex.matcher(sTotalString_qq);
       //对qq网站进行匹配
       
       
   	try{   // int i=regex.matcher(sTotalString).end();
   	//把body之前的东西删去
      line="";
      	while(matcher.find())
      	{i++;
      	
      	if(i==1)
      	{
      	//搜索qq.com
          //System.out.println(line);
      		String xinwen="";
      	    xinwen="<TABLE class=grey3d1 cellSpacing=0 cellPadding=3 width=780 align=center bgColor=#ffffff>";
      	    xinwen+=" <TBODY><TR><TD noWrap align=center bgcolor=#0033FF>新闻中心</TD></TR></TBODY></TABLE>";
      	    line+=xinwen;
      		line+=matcher.group()+ "\n";
      		//搜索qq.com
      			while(matcher_qq.find())
          {//	line="\n我是qq";
            line+=matcher_qq.group()+"\n";            
           }
      	}
      	
      	
      	if(i==2)
      	{	String xinwen="";
      	    xinwen="<TABLE class=grey3d1 cellSpacing=0 cellPadding=3 width=780 align=center bgColor=#ffffff>";
      	    xinwen+=" <TBODY><TR><TD noWrap align=center bgcolor=#0033FF>娱乐频道</TD></TR></TBODY></TABLE>";
      	    line+=xinwen;
      		line+=matcher.group()+ "\n";
      			//搜索qq.com
      			while(matcher_caijing.find())
          {//	line="\n我是qq";
            line+=matcher_caijing.group()+"\n";            
           }
      	}
      	
      	if(i==3)
      	{	String xinwen="";
      	    xinwen="<TABLE class=grey3d1 cellSpacing=0 cellPadding=3 width=780 align=center bgColor=#ffffff>";
      	    xinwen+=" <TBODY><TR><TD noWrap align=center bgcolor=#0033FF>体育新闻</TD></TR></TBODY></TABLE>";
      	    line+=xinwen;
      		line+=matcher.group()+ "\n";
      		
      				//搜索qq.com
      			while(matcher_tiyu.find())
          {//	line="\n我是qq";
            line+=matcher_tiyu.group()+"\n";            
           }
      	}
      	
      	if(i==4)
      	{	String xinwen="";
      	    xinwen="<TABLE class=grey3d1 cellSpacing=0 cellPadding=3 width=780 align=center bgColor=#ffffff>";
      	    xinwen+=" <TBODY><TR><TD noWrap align=center bgcolor=#0033FF>股市新闻</TD></TR></TBODY></TABLE>";
      	    line+=xinwen;
      		line+=matcher.group()+ "\n";
      		while(matcher_caijing1.find())
          {//	line="\n我是qq";
            line+=matcher_caijing1.group()+"\n";            
           }
      	}
      	
      	if(i==5)
      	{	String xinwen="";
      	    xinwen="<TABLE class=grey3d1 cellSpacing=0 cellPadding=3 width=780 align=center bgColor=#ffffff>";
      	    xinwen+=" <TBODY><TR><TD noWrap align=center bgcolor=#0033FF>科技新闻频道</TD></TR></TBODY></TABLE>";
      	    line+=xinwen;
      		line+=matcher.group()+ "\n";
      	
      	while(matcher_keji.find())
          {//	line="\n我是qq";
            line+=matcher_keji.group()+"\n";            
           }
      	}
      	
      	 
      	}
      	
      	
      	sTotalString=line;
      	html=html1+html2;
      	nail=nail0+nail1+nail2+nail3+nail4;
       sCurrentLine=html+line+nail;
        sTotalString=sCurrentLine;
         File file=new File("D:/test2.html"); 
         	if(file.exists())
            file.delete();    
      	
if(!file.exists()) 
file.createNewFile(); 
FileOutputStream out1=new FileOutputStream(file,true); 
out1.write( sTotalString.getBytes());

System.out.print("文件结束");

}
catch(Exception e)
{System.out.println("分析方法");
}
            
}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -