⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 downloadimage.java

📁 Light in the box 抓取程序。 使用HttpClient
💻 JAVA
字号:
package com.blogool.crawl;

import java.io.*;
import java.util.*;

import org.flytinge.HttpDataGetItem;
import org.flytinge.HttpDataListGet;
import org.flytinge.HttpListGet;
import org.flytinge.HttpListGetItem;
import org.flytinge.SuperContentHandle;
import org.flytinge.SuperDataHandle;

import com.blogool.crawl.lib.*;

public class DownloadImage {
	
	public static void main(String[] args) {

		Cat root = Util.loadCat(new File("d:/libox1/cats4.xml"));
		List<HttpDataGetItem> hlgItems = new ArrayList<HttpDataGetItem>();
		DownloadImageHandle handle = new DownloadImageHandle();
		
		//init output files
		Set<String> sets = new HashSet<String>();
		File[] files = DownloadImageHandle.OUTPUT_PATH.listFiles();
		for (int i = 0; i < files.length; i ++) {
			/*
			if (files[i].length() < 4096) {
				files[i].delete();
			} else {
				sets.add(files[i].getName());
			}*/
			sets.add(files[i].getName());
		}
		
		for (int i = 0; i < root.getCats().size(); i ++) {
			Cat c = root.getCats().get(i);
			for (int j = 0; j < c.getCats().size(); j ++) {
				Cat cat = c.getCats().get(j);
				List<Item> list = cat.getItems();
				if (list != null) {
					for (int k = 0; k < list.size(); k ++) {
						/*
						Item item = list.get(k);
						if (item.getImageUrls() == null || item.getImageUrls().length == 1) continue;
						
						String url = item.getImageUrls()[1];
						String fn = Util.getImageFileName(url);
						if (sets.contains(fn)) continue;
						
						HttpDataGetItem hlgi = new HttpDataGetItem();
						hlgi.setEnds(null);
						hlgi.setHandle(handle);
						if (item.getUrl() == null) continue;
						
						hlgi.setUrl(changeUrl(url));
						hlgItems.add(hlgi);
						
						if (item.getImageUrls().length == 2) continue;
						url = item.getImageUrls()[2];
						fn = Util.getImageFileName(url);
						if (sets.contains(fn)) continue;
						hlgi = new HttpDataGetItem();
						hlgi.setEnds(null);
						hlgi.setHandle(handle);
						if (item.getUrl() == null) continue;
						
						hlgi.setUrl(changeUrl(url));
						hlgItems.add(hlgi);
						*/
						Item item = list.get(k);
						if (item.getImageUrls() == null) continue;
						for (int l = 0; l < item.getImageUrls().length; l ++) {
							String url = item.getImageUrls()[l];
							String fn = Util.getImageFileName(url);
							if (sets.contains(fn)) continue;
							HttpDataGetItem hlgi = new HttpDataGetItem();
							hlgi.setEnds(null);
							hlgi.setHandle(handle);
							hlgi.setUrl(changeUrl(url));
							hlgItems.add(hlgi);
						}
					}
				}
			}
		}
		
		System.out.println(hlgItems.size());
		
		String[] proxys = {
			//"70.187.193.125:8080",
			"128.208.4.199:3124",
			"132.239.17.226:3124",
			"35.9.27.27:3124",
			"128.2.223.65:3128",
			"128.8.126.111:3127",
			"128.8.126.112:3124",
			"128.10.19.52:3124",
			//"130.37.198.244:3124",
			"141.213.4.201:3124",
			"141.213.4.202:3124",
			"155.225.2.72:3128",
			"128.193.33.8:3127",
			"198.82.160.220:3124",
			"155.225.2.72:3128",
		};
		
		List<String> proxies = new ArrayList<String>();
		List<Integer> ports = new ArrayList<Integer>();
		
		for (int i = 0; i < proxys.length; i ++) {
			String[] ps = proxys[i].split("\\:");
			proxies.add(ps[0]);
			ports.add(Integer.parseInt(ps[1]));
		}
		
		System.out.println("All Item count:" + hlgItems.size());
		HttpDataListGet hlg = new HttpDataListGet(hlgItems);
		//hlg.setProxyList(proxies, ports);
		hlg.start();
	
	}

	public static void main1(String[] args) {
		Cat root = Util.loadCat(new File("d:/libox1/cats2.xml"));
		List<HttpDataGetItem> hlgItems = new ArrayList<HttpDataGetItem>();
		DownloadImageHandle handle = new DownloadImageHandle();
		
		//init output files
		Set<String> sets = new HashSet<String>();
		File[] files = DownloadImageHandle.OUTPUT_PATH.listFiles();
		for (int i = 0; i < files.length; i ++) {
			/*
			if (files[i].length() < 4096) {
				files[i].delete();
			} else {
				sets.add(files[i].getName());
			}*/
			sets.add(files[i].getName());
		}
		
		for (int i = 0; i < root.getCats().size(); i ++) {
			Cat c = root.getCats().get(i);
			for (int j = 0; j < c.getCats().size(); j ++) {
				Cat cat = c.getCats().get(j);
				List<Item> list = cat.getItems();
				if (list != null) {
					for (int k = 0; k < list.size(); k ++) {
						Item item = list.get(k);
						if (item.getImageUrls() == null || item.getImageUrls().length == 0) continue;
						
						String url = item.getImageUrls()[0];

						String fn = Util.getImageFileName(url);
						if (sets.contains(fn)) continue;
						
						HttpDataGetItem hlgi = new HttpDataGetItem();
						hlgi.setEnds(null);
						hlgi.setHandle(handle);
						if (item.getUrl() == null) continue;
						
						hlgi.setUrl(changeUrl(url));
						hlgItems.add(hlgi);
					}
				}
			}
		}
		String[] proxys = {
			//"70.187.193.125:8080",
			"128.208.4.199:3124",
			"132.239.17.226:3124",
			"35.9.27.27:3124",
			"128.2.223.65:3128",
			"128.8.126.111:3127",
			"128.8.126.112:3124",
			"128.10.19.52:3124",
			//"130.37.198.244:3124",
			"141.213.4.201:3124",
			"141.213.4.202:3124",
			"155.225.2.72:3128",
			"128.193.33.8:3127",
			"198.82.160.220:3124",
			"155.225.2.72:3128",
		};
		
		List<String> proxies = new ArrayList<String>();
		List<Integer> ports = new ArrayList<Integer>();
		
		for (int i = 0; i < proxys.length; i ++) {
			String[] ps = proxys[i].split("\\:");
			proxies.add(ps[0]);
			ports.add(Integer.parseInt(ps[1]));
		}
		
		System.out.println("All Item count:" + hlgItems.size());
		HttpDataListGet hlg = new HttpDataListGet(hlgItems);
		//hlg.setProxyList(proxies, ports);
		hlg.start();
	}
	
	public static String changeUrl(String url) {
		return url.replaceAll("/images/s/", "/images/l/");
		//return url;
	}
	
	public static class  DownloadImageHandle implements SuperDataHandle {
		private static File OUTPUT_PATH = new File("d:/libox1/imagesl_bak");
		
		static {
			try {
				if (!OUTPUT_PATH.exists()) OUTPUT_PATH.mkdirs();
				

			} catch (Exception e) {
				e.printStackTrace();
			}
		}

		public void handle(HttpDataGetItem item, byte[] bytes) {
			try {
				String name = Util.getImageFileName(item.getUrl());
				File f = new File(OUTPUT_PATH, name);
				Util.saveData(bytes, f);
			} catch (Exception e) {
				e.printStackTrace();
			}
			
		}

	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -