⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 datatransfer.java

📁 利用多线程从搜索引擎下载网页并提取数据到数据库。
💻 JAVA
字号:
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class DataTransfer implements Serializable {
	
	Map<String, Integer> tables;
	int batchNo;
	int max_person_id;
	static final int rowsPerBatch = 400000;
	static final String sFile = "f:\\DataTransfer.dat";
	static final String dumpFile1 = "R:\\dump_people";
	static final String dumpFile2 = "R:\\dump_others";
	static Connection cnnRemote;
	static Connection cnnLocal;
	private static final long serialVersionUID = 1234567890;
	/**
	 * @param args
	 */
	public static void main(String[] args) {

		try {
			DataTransfer dt = new DataTransfer();
			initConnection();
			dt.initVars();
			dt.getMaxIDs(cnnRemote);
			dt.insertRows(cnnLocal);
			dt.max_person_id=dt.tables.get("people").intValue();
			DataTransfer.writeObject(dt);
		} catch (SQLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}

	}


	public static void initConnection() throws SQLException {
		// deserialize();

		//cnnRemote = DataAccess.getConnection();
	//	cnnLocal = DataAccessLocal.getConnection();
	}

	public void dumpAll() throws SQLException, IOException {
		while (hasMore())
			dump();
	}

	private boolean hasMore() throws SQLException {
		Statement stmt = cnnLocal.createStatement();
		ResultSet rs = stmt
				.executeQuery("select count(*) from people where id>"
						+ max_person_id);
		rs.next();
		return rs.getInt(1) > 0;
	}

	public void dump() throws IOException {
		String personIDLow = String.valueOf(max_person_id);
		String personIDHigh = String.valueOf(max_person_id + rowsPerBatch);

		String file1 = dumpFile1 + String.valueOf(batchNo);
		String cmd1 = "mysqldump -ucpaths -pbuckin8t0r -t -K cpaths_profiles people  \"-wid>"
				+ personIDLow + " and id<=" + personIDHigh + "\" > " + file1;
		String cmd11 = "wiz " + file1 + " " + file1 + ".zip";

		String file2 = dumpFile1 + String.valueOf(batchNo);
		String cmd2 = "mysqldump -ucpaths -pbuckin8t0r -t -K cpaths_profiles activity_groups  educational_destinations honors work_destinations \"-wperson_id>"
				+ personIDLow
				+ " and person_id<="
				+ personIDHigh
				+ "\" > "
				+ file2;
		String cmd22 = "wiz " + file2 + " " + file2 + ".zip";

		Runtime rt = Runtime.getRuntime();
		rt.exec(cmd1);
		rt.exec(cmd11);
		rt.exec(cmd2);
		rt.exec(cmd22);

		max_person_id += rowsPerBatch;
		batchNo += 1;

	}

	public static DataTransfer readObject() throws ClassNotFoundException, IOException {
		DataTransfer dt = null;
		FileInputStream fis = new FileInputStream(sFile);
		ObjectInputStream ois = new ObjectInputStream(fis);
		dt = (DataTransfer) ois.readObject();
		fis.close();
		return dt;
	}

	public static void writeObject(DataTransfer dt) throws IOException {
		FileOutputStream fos = new FileOutputStream(sFile);
		ObjectOutputStream oos = new ObjectOutputStream(fos);
		 oos.writeObject(dt);
		 oos.flush();
		 fos.close();
	}	
	
	private void initVars() {
		batchNo = 100;
		tables=new HashMap<String,Integer>();
		tables.put("activity_groups", -1);
		tables.put("educational_destinations", -1);
		tables.put("honors", -1);
		tables.put("people", -1);
		tables.put("work_destinations", -1);
	}

	public void insertRows(Connection cnn) throws SQLException {
		String table;
		Iterator<String> it = tables.keySet().iterator();
		while (it.hasNext()) {
			table = it.next();
			insertARow(cnn, table, tables.get(table));
		}
	}

	public void getMaxIDs(Connection cnn) throws SQLException {
		int maxID;
		String table;
		Iterator<String> it = tables.keySet().iterator();
		while (it.hasNext()) {
			table = it.next();
			maxID = getAMaxID(cnn, table);
			tables.put(table, maxID);
		}
	}

	private void insertARow(Connection cnnLocal2, String table, int id)
			throws SQLException {
		Statement stmt = cnnLocal2.createStatement();
		stmt.execute("alter table " + table + " disable keys");
		stmt.execute("insert into " + table + "(id) select " + id);
		stmt.execute("alter table " + table + " enable keys");
	}

	private int getAMaxID(Connection cnnRemote2, String table)
			throws SQLException {

		Statement stmt = cnnRemote2.createStatement();
		ResultSet rs = stmt.executeQuery("select max(id) from " + table);
		rs.next();
		return rs.getInt(1);
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -