📄 downloadthread.java
字号:
package issa.webspider;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.List;
public class DownloadThread implements Runnable {
private boolean isRunning = true;
@Override
public void run() {
while (isRunning) {
try {
URL url = InternalQueues.getFromWaitingList();
Reader reader = download(url);
if (reader != null) {
HtmlParser parser = new HtmlParser(url);
List<URL> links = parser.getLinks();
addLinks(links);
// displayLinks(links);
FileWriter fileWriter = new FileWriter(url);
fileWriter.writeToFile(reader);
}
} catch (InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
System.err.println(e);
}
}
}
private void addLinks(List links) {
for (Iterator i = links.iterator(); i.hasNext();) {
URL url = (URL) i.next();
InternalQueues.addToWaitingList(url);
}
}
private void displayLinks(List links) {
for (Iterator i = links.iterator(); i.hasNext();) {
URL url = (URL) i.next();
System.out
.println("-------------------------------------------------------------------------------");
System.out.println(url);
}
System.out
.println("\n*******************************************************************************\n");
}
private Reader download(URL url) throws IOException {
URLConnection uc = url.openConnection();
System.out.println("URL: " + url + " Content Type: "
+ uc.getContentType());
if (!matchContentTypes(uc)) {
return null;
}
Class[] types = { String.class, Reader.class, InputStream.class };
Object o = uc.getContent(types);
if (o instanceof String) {
// System.out.println("String");
System.out.println(o);
return null;
} else if (o instanceof Reader) {
// System.out.println("Reader");
int c;
Reader r = (Reader) o;
return r;
} else if (o instanceof InputStream) {
// System.out.println("InputStream");
int c;
InputStream in = (InputStream) o;
in = new BufferedInputStream(in);
Reader r = new InputStreamReader(in);
return r;
} else if (o == null) {
System.out.println("None of the requested types were available.");
return null;
} else {
System.out.println("Error: unexpected type " + o.getClass());
return null;
}
}
private boolean matchContentTypes(URLConnection connection) {
String type = connection.getContentType();
if (type.startsWith("text/html") || type.startsWith("text/plain"))
return true;
else
return false;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -