⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 getrealcount.java

📁 Light in the box 抓取程序。 使用HttpClient
💻 JAVA
字号:
package com.blogool.crawl;

import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.flytinge.ContentHandle;
import org.flytinge.HttpGet;

import com.blogool.crawl.lib.Cat;

public class GetRealCount {
	public static void main(String[] args) {
		Cat root = Util.loadCat(new File("d:/libox1/cats.xml"));
		ExitThread et = new ExitThread(root, new File("d:/libox1/cats1.xml"));
		//注册退出事件
		Runtime.getRuntime().addShutdownHook(et);
		
		for (int i = 0; i < root.getCats().size(); i ++) {
			Cat cat = root.getCats().get(i);
			List<Cat> list = cat.getCats();
			for (int j = 0; j < list.size(); j ++) {
				Cat c = list.get(j);
				System.out.println("handle cat:" + c.getCatName());
				String url = c.getUrl();
				HandleGetRealCount hgrc = new HandleGetRealCount(c);
				HttpGet hg = new HttpGet(url, "</html>", hgrc);
				hg.start();
			}
		}
	}
	
	//获取真实商品个数和记录流行商品
	static class HandleGetRealCount implements ContentHandle {
		private Cat cat;
		private String content;
		
		private static Pattern pSize = Pattern.compile("<div class=\"sea_r_part3_left text_bold\">\\d+ \\- \\d+ of (\\d+) products</div>");
		private static Pattern pPopular = Pattern.compile("<div id=\"product_list_single\".+?>\\s*<a href=\"(.+?)\".+?>");
		public HandleGetRealCount(Cat c) {
			this.cat = c;
		}
		
		public void handle(String content) {
			this.content = content;
			
			//修正数量
			Matcher m = pSize.matcher(content);
			if (m.find()) {
				int max = Integer.parseInt(m.group(1));
				cat.setSize(max);
				cat.setUrl(cat.getUrl());
			} else {
				System.out.println("Cat" + cat.getCatName() + "\t\t\t:::error::: use default: 1");
				cat.setSize(1);
			}
			//流行商品解析
			StringBuilder sb = new StringBuilder();
			m = pPopular.matcher(content);
			while (m.find()) {
				sb.append(m.group(1));
				sb.append(",");
			}
			
			String ids = sb.toString();
			if (ids.length() > 0)
				cat.setPopularIds(ids);
		}
		
	}
	
	//退出之前保存
	public static class ExitThread extends Thread {
		private File file;
		private Cat root;
		
		public ExitThread(Cat cat, File f) {
			this.file = f;
			this.root = cat;
		}
		
		public void run() {
			Util.saveCat(root, file);
		}
	}
	
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -