⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 downloadthread.java

📁 基于JAVA的网络蜘蛛系统,使用JAVA实现抓取网络资源的网络蜘蛛。通过一个入口网址来扫描整个互联网的网址
💻 JAVA
字号:
package issa.webspider;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.List;

public class DownloadThread implements Runnable {

	private boolean isRunning = true;

	@Override
	public void run() {
		while (isRunning) {
			try {
				URL url = InternalQueues.getFromWaitingList();
				Reader reader = download(url);
				if (reader != null) {
					HtmlParser parser = new HtmlParser(url);
					List<URL> links = parser.getLinks();
					addLinks(links);
					// displayLinks(links);
					FileWriter fileWriter = new FileWriter(url);
					fileWriter.writeToFile(reader);
				}
			} catch (InterruptedException e) {

				e.printStackTrace();
			} catch (IOException e) {
				System.err.println(e);
			}
		}

	}

	private void addLinks(List links) {
		for (Iterator i = links.iterator(); i.hasNext();) {
			URL url = (URL) i.next();
			InternalQueues.addToWaitingList(url);
		}
	}

	private void displayLinks(List links) {
		for (Iterator i = links.iterator(); i.hasNext();) {
			URL url = (URL) i.next();
			System.out
					.println("-------------------------------------------------------------------------------");
			System.out.println(url);
		}
		System.out
				.println("\n*******************************************************************************\n");
	}

	private Reader download(URL url) throws IOException {
		URLConnection uc = url.openConnection();
		System.out.println("URL: " + url + " Content Type: "
				+ uc.getContentType());
		if (!matchContentTypes(uc)) {
			return null;
		}
		Class[] types = { String.class, Reader.class, InputStream.class };
		Object o = uc.getContent(types);

		if (o instanceof String) {
			// System.out.println("String");
			System.out.println(o);
			return null;
		} else if (o instanceof Reader) {
			// System.out.println("Reader");
			int c;
			Reader r = (Reader) o;
			return r;
		} else if (o instanceof InputStream) {
			// System.out.println("InputStream");
			int c;
			InputStream in = (InputStream) o;
			in = new BufferedInputStream(in);
			Reader r = new InputStreamReader(in);
			return r;
		} else if (o == null) {
			System.out.println("None of the requested types were available.");
			return null;
		} else {
			System.out.println("Error: unexpected type " + o.getClass());
			return null;
		}
	}

	private boolean matchContentTypes(URLConnection connection) {
		String type = connection.getContentType();
		if (type.startsWith("text/html") || type.startsWith("text/plain"))
			return true;
		else
			return false;
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -