📄 downloadpage.java
字号:
package com.blogool.crawl;
import java.io.*;
import java.util.*;
import org.flytinge.HttpListGet;
import org.flytinge.HttpListGetItem;
import org.flytinge.SuperContentHandle;
import com.blogool.crawl.lib.*;
public class DownloadPage {
public static void main(String[] args) {
Cat root = Util.loadCat(new File("d:/libox1/cats2.xml"));
List<HttpListGetItem> hlgItems = new ArrayList<HttpListGetItem>();
DownloadProductContentHandle handle = new DownloadProductContentHandle();
//init output files
Set<String> sets = new HashSet<String>();
File[] files = DownloadProductContentHandle.OUTPUT_PATH.listFiles();
for (int i = 0; i < files.length; i ++) {
if (files[i].length() < 4096) {
files[i].delete();
} else {
sets.add(files[i].getName());
}
}
for (int i = 0; i < root.getCats().size(); i ++) {
Cat c = root.getCats().get(i);
for (int j = 0; j < c.getCats().size(); j ++) {
Cat cat = c.getCats().get(j);
List<Item> list = cat.getItems();
if (list != null) {
for (int k = 0; k < list.size(); k ++) {
Item item = list.get(k);
String url = item.getUrl();
String fn = Util.getItemFileName(url);
if (sets.contains(fn)) continue;
HttpListGetItem hlgi = new HttpListGetItem();
hlgi.setEnds("</html>");
hlgi.setHandle(handle);
if (item.getUrl() == null) continue;
hlgi.setUrl(item.getUrl());
hlgItems.add(hlgi);
}
}
}
}
String[] proxys = {
//"70.187.193.125:8080",
"128.208.4.199:3124",
"132.239.17.226:3124",
"35.9.27.27:3124",
"128.2.223.65:3128",
"128.8.126.111:3127",
"128.8.126.112:3124",
"128.10.19.52:3124",
//"130.37.198.244:3124",
"141.213.4.201:3124",
"141.213.4.202:3124",
"155.225.2.72:3128",
"128.193.33.8:3127",
"198.82.160.220:3124",
"155.225.2.72:3128",
};
List<String> proxies = new ArrayList<String>();
List<Integer> ports = new ArrayList<Integer>();
for (int i = 0; i < proxys.length; i ++) {
String[] ps = proxys[i].split("\\:");
proxies.add(ps[0]);
ports.add(Integer.parseInt(ps[1]));
}
System.out.println("All Item count:" + hlgItems.size());
HttpListGet hlg = new HttpListGet(hlgItems);
//hlg.setProxyList(proxies, ports);
hlg.start();
}
public static class DownloadProductContentHandle implements SuperContentHandle {
private static File OUTPUT_PATH = new File("d:/libox1/items");
static {
try {
if (!OUTPUT_PATH.exists()) OUTPUT_PATH.mkdirs();
} catch (Exception e) {
e.printStackTrace();
}
}
public void handle(HttpListGetItem item, String content) {
try {
String name = Util.getItemFileName(item.getUrl());
File f = new File(OUTPUT_PATH, name);
Util.saveContent(content, f);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -