⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 util.java

📁 Light in the box 抓取程序。 使用HttpClient
💻 JAVA
字号:
package com.blogool.crawl;

import java.io.*;

import com.blogool.crawl.lib.Cat;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.DomDriver;

public class Util {
	public static int PAGE_COUNT = 20;

	public static String md5Encoding(String source) {
		StringBuffer reStr = null;
		try {
			java.security.MessageDigest alga = java.security.MessageDigest
					.getInstance("MD5");
			byte[] bs = alga.digest(source.getBytes());
			reStr = new StringBuffer();
			for (int i = 0; i < bs.length; i++) {
				reStr.append(byteHEX(bs[i]));
			}
		} catch (Exception ex) {
		}
		return reStr == null ? null : reStr.toString().toLowerCase();
	}

	/**
	 * byteHEX(),byte类型的数转换成十六进制的ASCII,
	 */
	public static String byteHEX(byte ib) {
		char[] Digit = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
				'B', 'C', 'D', 'E', 'F' };
		char[] ob = new char[2];
		ob[0] = Digit[(ib >>> 4) & 0X0F];
		ob[1] = Digit[ib & 0X0F];
		String s = new String(ob);
		return s;
	}

	public static String getBASE64(String s) {
		if (s == null)
			return null;
		return (new sun.misc.BASE64Encoder()).encode(s.getBytes());

	}

	public static String getUnBASE64(String s) {
		if (s == null)
			return null;
		try {
			return new String((new sun.misc.BASE64Decoder()).decodeBuffer(s));
		} catch (IOException e) {
			// TODO 自动生成 catch 块
			e.printStackTrace();
			return null;
		}
	}

	public static void saveCat(Cat root, File f) {
		// 启用xstream+xpp方式保存数据
		XStream sm = new XStream();
		FileOutputStream ops = null;
		try {
			ops = new FileOutputStream(f);
			sm.toXML(root, ops);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (ops != null) {
				try {
					ops.close();
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
	}

	public static Cat loadCat(File f) {
		XStream sm = new XStream(new DomDriver());

		FileInputStream fis = null;
		try {
			fis = new FileInputStream(f);
			Cat root = (Cat) sm.fromXML(fis);
			return root;
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (fis != null) {
				try {
					fis.close();
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
		return null;
	}

	public static void main(String[] args) {
		/*
		 * Cat root = loadCat(new File("d:/libox1/cats1.xml")); int c = 0; for
		 * (int i = 0; i < root.getCats().size(); i ++) { Cat cat =
		 * root.getCats().get(i); for (int j = 0; j < cat.getCats().size(); j
		 * ++) { Cat cc = cat.getCats().get(j); File f = new
		 * File("d:/libox1/pages", getBASE64(cc.getCatName())); if (!f.exists()) {
		 * System.out.println(cc.getCatName());
		 * System.out.println(getBASE64(cc.getCatName()));
		 * System.out.println(cc.getUrl()); } } }
		 */
		File f = new File("d:/libox1/test");
		File[] files = f.listFiles();
		for (int i = 0; i < files.length; i++) {
			String fn = files[i].getName();
			fn = Util.getUnBASE64(fn);
			files[i].renameTo(new File(f, fn));
		}

	}

	public static void saveContent(String content, File f) {
		try {
			// String name = Util.getItemFileName(url);
			// File f = new File(OUTPUT_PATH, name);
			BufferedWriter bw = null;
			try {
				bw = new BufferedWriter(new FileWriter(f));
				bw.write(content);
			} catch (Exception e) {
				e.printStackTrace();
			} finally {
				try {
					if (bw != null)
						bw.close();
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public static void saveData(byte[] bytes, File f) {
		try {
			OutputStream os = null;
			
			try {
				os = new BufferedOutputStream(new FileOutputStream(f));
				os.write(bytes);
			} catch (Exception e) {
				e.printStackTrace();
			} finally {
				if (os != null) {
					try {
						os.close();
					} catch (Exception e) {
						e.printStackTrace();
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static String getContentOrignal(File f) {
		
		BufferedReader br = null;
		StringBuilder sb = new StringBuilder();

		try {
			br = new BufferedReader(new FileReader(f));
			String line = null;
			while ((line = br.readLine()) != null) {
				sb.append(line);
				sb.append("\n");
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (br != null) {
				try {
					br.close();
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
		return sb.toString();
	}
	
	public static String getContent(File f) {
		System.out.println("获取文件" + f.getPath() + "内容.");
		BufferedReader br = null;
		StringBuilder sb = new StringBuilder();

		try {
			br = new BufferedReader(new FileReader(f));
			String line = null;
			while ((line = br.readLine()) != null) {
				sb.append(line);
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (br != null) {
				try {
					br.close();
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
		return sb.toString();
	}

	public static String modifyName(String name) {
		/*
		 * name.replaceAll("wholesale", ""); if (name == null) return ""; int
		 * pos = name.indexOf("\" title=\""); if (pos >= 0) name =
		 * name.substring(0, pos); pos = name.indexOf("\" ");
		 * 
		 * if (pos >= 0) return name.substring(0, pos); else return name;
		 */
		if (name == null || name.trim().equals("")) {
			System.out.println("Cat name is null");
			return null;
		} else {
			return name.replace("[?()/\\*<>|\"\']", "-");
		}
	}

	public static Cat getCatByFileName(String fileName, Cat root) {
		if (root == null || root.getCats() == null)
			return null;
		fileName = fileName.toLowerCase();
		if (fileName.equals("Apple iPhone".toLowerCase())) {
			int i = 0;
			i++;
		}

		for (int i = 0; i < root.getCats().size(); i++) {
			Cat c = root.getCats().get(i);
			if (c.getCats() == null)
				continue;
			for (int j = 0; j < c.getCats().size(); j++) {
				Cat ch = c.getCats().get(j);
				if (ch.getUrl().toLowerCase().indexOf(fileName) >= 0
						|| ch.getCatName().toLowerCase().indexOf(fileName) >= 0)
					return ch;
			}
		}
		return null;
	}

	public static String getItemFileName(String url) {
		if (url == null || (url = url.trim()).equals(""))
			return null;
		else {
			int pos = url.lastIndexOf("_");
			if (pos >= 0) {
				return url.substring(pos + 1, url.length());
			} else
				return null;
		}
	}
	
	public static String getImageFileName(String url) {
		if (url == null || (url = url.trim()).equals(""))
			return null;
		else {
			int pos = url.lastIndexOf("/");
			if (pos >= 0) {
				return url.substring(pos + 1, url.length());
			} else
				return null;
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -