⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 spide.java

📁 中文自动分类。使用spider抓取网络信息
💻 JAVA
字号:
package text_category;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Map;

import com.opensymphony.xwork2.ActionContext;
import com.opensymphony.xwork2.ActionSupport;
import com.opensymphony.xwork2.ModelDriven;

public class Spide extends ActionSupport implements ModelDriven{

	public String execute() throws Exception
	{
		String document = loadDocument(url);
		if (document == null || document.length() < 0)
		{
			return INPUT;
		}
		setContent(document);
		setHintString(getText("Spider.success"));	
		
		Map attributes = ActionContext.getContext().getSession();
		attributes.put("url", url);
		attributes.put("content", content);
		
		return SUCCESS;
	}
	
	public String loadDocument(String url)
	{
		StringBuffer textBuf = new StringBuffer();
		try {

            // Determine whether URL or local path

            // Try to parse the document source as URL

			Reader reader = null;
            URL sourceUrl = null;
            boolean validUrlFormat = true;

            try {
                sourceUrl = new URL(url);
            } catch (MalformedURLException e) {
                validUrlFormat = false;
            }

            // If in URL format, open a connection
            if (validUrlFormat) {

                URLConnection connection = sourceUrl.openConnection();
                reader = new InputStreamReader(connection.getInputStream());

            }

            // If not, try to open as a local file
            else {

                InputStream is = new FileInputStream(url);
                reader = new InputStreamReader(is);                
            }
                        
            try {
                BufferedReader in = new BufferedReader(reader);
                String buf = null;

                while ((buf = in.readLine()) != null) {
                    textBuf.append(buf);
                    textBuf.append("\n");
                }

                in.close();
            } catch (IOException e) {
                return null;
            }
        } catch (Exception e) {
            return null;
        }
        
        return textBuf.toString();
	}

	private String content;
	
	private String url;

	public String getUrl() {
		return url;
	}

	public void setUrl(String url) {
		this.url = url;
	}
	
	private String HintString;

	public String getHintString() {
		return HintString;
	}

	public void setHintString(String hintString) {
		HintString = hintString;
	}

	public String getContent() {
		return content;
	}

	public void setContent(String content) {
		this.content = content;
	}
	
	public Object getModel()
	{
		return url;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -