⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 searchresultdata.java

📁 利用多线程从搜索引擎下载网页并提取数据到数据库。
💻 JAVA
字号:
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

/**
 * 
 * 
 * @author James Don
 * @date 2007-11-9
 */

public class SearchResultData {

	private Connection cnn;

	public SearchResultData(Connection cnn) {
		this.cnn = cnn;
	}

	public synchronized void insert(String profile_url) {

		int search_engine_id = 2; // 2 for google
		String profile_cached_url = "";

		PreparedStatement ps;
		try {
			ps = cnn
					.prepareStatement("insert into search_results(search_engine_id,profile_url,profile_cached_url) select ?,?,?");
			ps.setInt(1, search_engine_id);
			ps.setString(2, profile_url);
			ps.setString(3, profile_cached_url);
			ps.execute();
		} catch (SQLException e) {
			System.out.println(e.toString());
		}

	}

	public synchronized void updateCacheUrl(String profile_cached_url,
			int searchEngineID, int rowID) {
		PreparedStatement ps;
		try {
			ps = cnn
					.prepareStatement("update search_results set search_engine_id=?, profile_cached_url=? where id=?");
			ps.setInt(1, searchEngineID);
			ps.setString(2, profile_cached_url);
			ps.setInt(3, rowID);
			ps.execute();
		} catch (SQLException e) {
			System.out.println(e.toString());
		}
	}

	public synchronized void updateCacheUrlDownloadTagOK(int tag, int rowID) {
		PreparedStatement ps;
		try {
			ps = cnn
					.prepareStatement("update search_results set download_tag=? where id=?");
			ps.setInt(1, tag);
			ps.setInt(2, rowID);
			ps.execute();
		} catch (SQLException e) {
			System.out.println(e.toString());
		}
	}

	public synchronized void updateCacheUrlDownloadTagFailed(int tag, int rowID) {
		PreparedStatement ps;
		try {
			ps = cnn
					.prepareStatement("update search_results set download_tag=download_tag+? where id=?");
			ps.setInt(1, tag);
			ps.setInt(2, rowID);
			ps.execute();
		} catch (SQLException e) {
			System.out.println(e.toString());
		}
	}

	public synchronized int insert(Map<String, String> links)
			throws SQLException {
		int search_engine_id = 2; // 2 for google
		PreparedStatement ps;
		int insertCount = 0;
		String error = "";
		for (Iterator<Entry<String, String>> it = (Iterator<Entry<String, String>>) links
				.entrySet().iterator(); it.hasNext();) {
			Entry<String, String> entry = (Entry<String, String>) it.next();
			ps = cnn
					.prepareStatement("insert into search_results(search_engine_id,profile_url,profile_cached_url) select ?,?,?");
			ps.setInt(1, search_engine_id);
			ps.setString(2, entry.getKey());
			ps.setString(3, entry.getValue());
			ps.execute();
			insertCount += 1;

		}
		System.out.println(error);
		return insertCount;
	}

	public synchronized int insert(Set<String> links,int search_engine_id) throws SQLException {
		PreparedStatement ps;
		int insertCount = 0;
		String error = "";
		ps = cnn
				.prepareStatement("insert into search_results(profile_url,profile_cached_url,search_engine_id) select ?,?,?");
		for (String s : links) {
			// System.out.println(s);
			int start, end;
			start = s.indexOf("www");
			//end = s.indexOf("+http");
			end = s.indexOf("&d=");
		
			if (start > 0 && end > start) {
				try {

					ps
							.setString(1, "http://"
									+ s.substring(start, end));
					ps.setString(2, s);
					ps.setInt(3, search_engine_id);
					ps.execute();
					insertCount += 1;
				} catch (com.mysql.jdbc.exceptions.MySQLIntegrityConstraintViolationException e) {
					// System.out.println("Error Code: " + e.getErrorCode());
					//e.printStackTrace();
					if (e.getErrorCode() != 1062)
						throw e;
				}
			}
			// com.mysql.jdbc.exceptions.MySQLIntegrityConstraintViolationException:
			// Duplicate
			// entry 'www.linkedin.com/pub/0/3/b' for key 2Establish a database
			// connection spe
		}

		return insertCount;
	}

	public synchronized void updateCacheUrlFail(int tag, int rowID) {
		PreparedStatement ps;
		try {
			ps = cnn
					.prepareStatement("update search_results set search_engine_id=search_engine_id+? where search_engine_id<0 and id=?");
			ps.setInt(1, tag);
			ps.setInt(2, rowID);
			ps.execute();
		} catch (SQLException e) {
			e.printStackTrace();
		}

	}

	public static ResultSet getUrls(int maxCount) throws SQLException {
		CallableStatement ps;
		// ResultSet rs;
		ps = DataAccess.getConnection().prepareCall("{ call getUrls(?) }");
		ps.setInt(1, maxCount);
		return ps.executeQuery();
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -