⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 spider.java

📁 录取网页信息,写入Mysql数据库
💻 JAVA
字号:

import java.net.*;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.*;
import java.io.IOException;
import java.sql.*;

public class Spider
{
    String sourceURL;//需要采集的网页网址
    String sourceContent;//网页页面内容
    static String company;//公司名
    ArrayList<String> matchContent = new ArrayList<String>();//网页内容匹配区域
    
    public static void main(String[] args)
    {
    	try {
    		System.out.print("Please give the company:");      
    		BufferedReader stdin = 
    			new BufferedReader(new InputStreamReader(System.in));   
				company=stdin.readLine();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} ;
        Spider urls21=new Spider("http://finance.yahoo.com/q/hp?s=");
        urls21.alllink();
        urls21.insertdb();
        urls21.inserintodb();     
    }
    
    public Spider()
    {
    }
    public Spider(String sourceURL1)
    {
         sourceURL=sourceURL1+company;
    }
     
      
    //获取网页页面内容
    public void getSourceContent(String  URLStr)
    {
        StringBuffer sb=new StringBuffer();
        try
        {
            URL newURL=new URL(URLStr);
            BufferedReader br=new BufferedReader(
            		new InputStreamReader(newURL.openStream()));
                                                                                  
            String temp;
            while(( temp=br.readLine())!=null)
            {
                sb.append(temp);
            }
            sourceContent=sb.toString();
            
        }
        catch(MalformedURLException e)
        {
            e.printStackTrace();
        }
        catch(IOException e)
        {
            e.printStackTrace();
        }
    }
    
    //获取匹配区域数据
    public void getMatchContent()
    { 

    	Pattern p=Pattern.compile("align=\"right\">(.*?)</td>");
    	Matcher match=p.matcher(sourceContent);
    	if(match.find())
    	{
    		while (match.find())
    		{
    			System.out.println(match.group(1));  
    			matchContent.add(match.group(1));//获取被匹配的部分   
    		}
    	}     
    }     
    public void alllink( )
    {
    	String link = null;
    	String linkURL;
    	int flag = 0;
    	int last = 0;
    	String matchtemp;
    	getSourceContent(sourceURL);
    	getMatchContent();
    	Pattern p_next=Pattern.compile("<a href=\"/q/hp\\?s="+company+"(.*?)\">Next</a>");
    	Matcher match_next=p_next.matcher(sourceContent);
    	while (match_next.find())
        {
    		flag++;
        	matchtemp=match_next.group(1); 
        	if (flag ==1 )
        	{       		
        		matchtemp = matchtemp.replaceAll("amp;", "");
        		int index = matchtemp.indexOf("y=");
        		link = matchtemp.substring(0,index+2);
        	}
        	else if (flag ==2)
        	{
        		p_next=Pattern.compile("y=(.*?)\">");
        		match_next=p_next.matcher(matchtemp);
        		while (match_next.find())
        		{       		
        			last = Integer.parseInt(match_next.group(1));
        		}       		
        	}        
        } 
    	for (int i=1;i <= last/66;i++)
    	{
    		linkURL = sourceURL + link +  Integer.toString(i*66); 
    		getSourceContent(linkURL);
    		getMatchContent();
    	}
    }
    public void insertdb()
    {
    	String context = null;
    	int counter = 0;
        for (Iterator iterator = matchContent.iterator(); iterator.hasNext();) {                      
            if ((counter%7 != 0) &&( counter != 0)&&(counter%7 != 6) &&counter != 0)
            {
            	counter++;
            	context = context + (String) iterator.next()+"\t";
            } 
            else if(counter%7 == 0&&( counter != 0))
            {
            	context =context + "\r\n";
            	context = context + company;
            	context = context +"\t";
            	String s=(String) iterator.next();
            	context = context + todate(s)+"\t";
            	counter++;
            }
            else if(counter%7 == 6)
            {
            	context=context+(String) iterator.next();
            	counter++;
            }            
            else 
            {
            	context = company;
            	context = context +"\t";
            	String s=(String) iterator.next();
            	context = context + todate(s)+"\t";  
            	counter++;
            }

        } 
        BufferedWriter output = null;
		try {
			output = new BufferedWriter(new FileWriter("E:\\stockdate.txt"));
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} //写入流 
        try {
			output.write(context);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} //s1为写入的字符串 
        try {
			output.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}   
    }

	public void inserintodb()
    {
        try {
            Class.forName("com.mysql.jdbc.Driver").newInstance();
        } catch (InstantiationException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (IllegalAccessException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (ClassNotFoundException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
        Connection connection = null;
        ResultSet re = null;
        Statement stmt = null;
        try {
             connection = DriverManager.getConnection("jdbc:mysql://localhost/stock","root","123456");
             	stmt = connection.createStatement();
             	String sql="LOAD DATA LOCAL INFILE 'E://stockdate.txt' INTO TABLE yahoostock";
             	re = stmt.executeQuery(sql);
        } catch (SQLException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Template
        }

        try {
            re.close();
        } catch (SQLException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }

        try {
            stmt.close();
        } catch (SQLException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }

        try {
            connection.close();
        } catch (SQLException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
    public String todate(String date)
    {
    	String[] temparr;
    	temparr = date.split("-");
    	if(temparr[1].equalsIgnoreCase("Jan"))
    	{
    		temparr[1]= "01";
    	}
    	else if(temparr[1].equalsIgnoreCase("Feb"))
    	{
    		temparr[1]= "02";
    	}
    	else if(temparr[1].equalsIgnoreCase("Mar"))
    	{
    		temparr[1] = "03";
    	}
    	else if(temparr[1].equalsIgnoreCase("Apr"))
    	{
    		temparr[1]="04";
    	}
    	else if(temparr[1].equalsIgnoreCase("May"))
    	{
    		temparr[1]= "05";
    	}
    	else if(temparr[1].equalsIgnoreCase("Jun"))
    	{
    		temparr[1]="06";
    	}
    	else if(temparr[1].equalsIgnoreCase("Jul"))
    	{
    		temparr[1]="07";
    	}
    	else if(temparr[1].equalsIgnoreCase("Aug"))
    	{
    		temparr[1]= "08";
    	}
    	else if(temparr[1].equalsIgnoreCase("Sep"))
    	{
    		temparr[1]= "09";
    	}
    	else if(temparr[1].equalsIgnoreCase("Oct"))
    	{
    		temparr[1]= "10";
    	}
    	else if(temparr[1].equalsIgnoreCase("Nov"))
    	{
    		temparr[1]= "11";
    	}
    	else if(temparr[1].equalsIgnoreCase("Dec"))
    	{
    		temparr[1]= "12";
    	}
    	date = temparr[2]+temparr[1]+temparr[0];
		return date;
    	
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -