⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 exporttohtml.java

📁 Light in the box 抓取程序。 使用HttpClient
💻 JAVA
字号:
package com.blogool.export;

import java.io.*;
import java.sql.*;
import java.text.NumberFormat;
import java.util.*;

import com.blogool.crawl.lib.*;
import com.blogool.crawl.*;

public class ExportToHtml {
	public static void main(String[] arg) throws Exception {
		Cat root = com.blogool.crawl.Util.loadCat(new File(
				"d:/libox1/cats4.xml"));

		root.setId(33);

		// Connection c1 = DBUtil.getConnection(), c2 = DBUtil.getConnection(),
		// c3 = DBUtil.getConnection(), c4 = DBUtil.getConnection();

		StringBuilder sbIndex = new StringBuilder();
		for (int i = 0; i < root.getCats().size(); i++) {
			Cat c = root.getCats().get(i);
			sbIndex.append(c.getCatName()).append("<br>");

			// insertCat(c1, c);
			for (int j = 0; j < c.getCats().size(); j++) {
				Cat cat = c.getCats().get(j);
				sbIndex.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;").append(
						"<a href='").append(cat.getCatName()).append(".html'>")
						.append(cat.getCatName()).append("</a><br>");
				// insertCat(c2, cat);
				StringBuilder sb = new StringBuilder();

				sb.append("<a href=index.html>Home</a><br>");
				sb.append("Category:").append(cat.getCatName());
				sb
						.append("<table width='95%' border='0' cellpadding='0' cellspacing='1' bgcolor='#FFB91F'><tr bgcolor='#FFFFFF'><td width='40%'>Product Name</td><td width='20%'>Image</td><td width='10%'>List Price($)</td><td width='10%'>Price($)</td><td width='20%'>Description</td><td width='10%'>Limit Number</td></tr>");

				List<Item> items = cat.getItems();

				if (items != null) {
					for (int k = 0; k < items.size(); k++) {
						Item item = items.get(k);
						item.setParent(cat);
						insertProduct(sb, item);
					}
				}
				sb.append("</table>");
				FileWriter fw = new FileWriter("d:/libox1/" + cat.getCatName()
						+ ".html");
				fw.write(sb.toString());
				fw.close();
			}
		}

		FileWriter fw = new FileWriter("d:/libox1/index.html");
		fw.write(sbIndex.toString());
		fw.close();
		fw = new FileWriter("d:/libox1/descs.html");
		fw.write(SB_DESC.toString());
		fw.close();
	}

	/**
	 * 获取cat下面的产品数
	 * 
	 * @param cat
	 * @return
	 */
	public static int getItemCount(Cat cat) {
		if (cat == null)
			return 0;
		List<Cat> list = cat.getCats();
		int result = 0;
		if (list != null) {
			for (int i = 0; i < list.size(); i++) {
				result += getItemCount(list.get(i));
			}
		}
		List<Item> items = cat.getItems();
		if (items != null) {
			result += items.size();
		}
		return result;
	}

	public static Integer getId(Connection conn, String sql, String key) {
		PreparedStatement pstmt = null;
		try {
			pstmt = conn.prepareStatement(sql);
			pstmt.setObject(1, key);
			java.sql.ResultSet result = pstmt.executeQuery();
			if (result.next()) {
				return result.getInt(1);
			}

		} catch (Exception ex) {
			ex.printStackTrace();
		} finally {
			try {
				if (pstmt != null) {
					pstmt.close();

				}

			} catch (SQLException ex1) {

			}
		}
		return null;
	}

	public static float getPrice(String price) {
		try {
			if (price == null)
				return -1;
			price = price.trim().toLowerCase();
			if (price.startsWith("us$"))
				price = price.substring("us$".length());
			price = price.trim();
			return Float.parseFloat(price);
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
		}
	}

	public static String dropHtmlFlag(String content) {
		if (content == null)
			return null;
		return content.replaceAll("</?.+?>", "").replaceAll("&nbsp;", " ");
	}

	public static String getImageUrl(String imageUrl) {
		if (imageUrl == null)
			return null;

		String name = Util.getImageFileName(imageUrl);
		String md5 = Util.md5Encoding(name);
		String folder = md5.substring(0, 2);
		String result = folder + "/" + name;
		return result;
	}

	public static String changePrize(String prize) {
		double p = getPrice(prize);
		// p = p * (0.75 + Math.random() * 0.05);
		p *= 100;
		p = (int) p;
		return "US$" + (p / 100);
	}

	private static Random r = new Random();
	
	private static StringBuilder SB_DESC = new StringBuilder();

	public static void insertProduct(StringBuilder sb, Item item) {
		// 删除1/5产品
		// if (Math.abs(r.nextInt()) % 5 == 0) return;
	
		if (item.getProductName() == null)
			item.setProductName("");
		if (item.getListPrice() == null)
			item.setListPrice("");
		if (item.getDescription() == null)
			item.setDescription("");
		if (item.getLimitNumber() == null)
			item.setLimitNumber("");
		if (item.getUnitPrice() == null)
			item.setUnitPrice("0");

		String desc = dropHtmlFlag(item.getDescription());
		
		sb.append("<tr>").append("<td>").append(item.getProductName()).append(
				"</td><td>").append("<a target=_blank href='imagesl/").append(
				getImageUrl(item.getImageUrls()[0])).append(
				"'><img border=0 src='images/").append(
				getImageUrl(item.getImageUrls()[0])).append("'></a>").append(
				"</td><td>")

		.append(item.getListPrice()).append("</td><td>").append(
				changePrize(item.getUnitPrice()))

		.append("</td><td>").append(dropHtmlText(desc, item))
				.append("</td><td>").append(item.getLimitNumber()).append(
						"</td><td>").append("</tr>").append("\r\n");
		
		SB_DESC.append("<br><br><br><div>");
		SB_DESC.append("<a name='").append(item.getId()).append("'>Item #").append(item.getId()).append("</a><br><br><br>");
		SB_DESC.append(desc);
		SB_DESC.append("</div>");
		
	}
	
	public static String dropHtmlText(String desc, Item item) {
		if (desc.length() > 200) {
			desc = desc.substring(0, 200);
			desc += "...<a href='descs.html#" + item.getId() +"'>MORE</a>"; 
		}
		return desc;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -