⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sqlworkloadmanager.java

📁 VHDL制作的ann的code
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * Encog Neural Network and Bot Library for Java v1.x * http://www.heatonresearch.com/encog/ * http://code.google.com/p/encog-java/ *  * Copyright 2008, Heaton Research Inc., and individual contributors. * See the copyright.txt in the distribution for a full listing of  * individual contributors. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */package org.encog.bot.spider.workload.sql;import java.net.MalformedURLException;import java.net.URL;import java.sql.ResultSet;import java.sql.SQLException;import java.util.concurrent.CountDownLatch;import java.util.concurrent.Semaphore;import java.util.concurrent.TimeUnit;import java.util.logging.Level;import java.util.logging.Logger;import org.encog.bot.spider.Spider;import org.encog.bot.spider.workload.WorkloadError;import org.encog.bot.spider.workload.WorkloadManager;import org.encog.util.db.DBError;import org.encog.util.db.RepeatableStatement;import org.encog.util.db.RepeatableConnection;/** * SQLWorkloadManager: This workload manager stores the URL lists in an SQL * database. This workload manager uses two tables, which can be created as * follows: *  * CREATE TABLE 'spider_host' ( 'host_id' int(10) unsigned NOT NULL * auto_increment, 'host' varchar(255) NOT NULL default '', 'status' varchar(1) * NOT NULL default '', 'urls_done' int(11) NOT NULL, 'urls_error' int(11) NOT * NULL, PRIMARY KEY ('host_id') ) *  * CREATE TABLE 'spider_workload' ( 'workload_id' int(10) unsigned NOT NULL * auto_increment, 'host' int(10) unsigned NOT NULL, 'url' varchar(2083) NOT * NULL default '', 'status' varchar(1) NOT NULL default '', 'depth' int(10) * unsigned NOT NULL, 'url_hash' int(11) NOT NULL, 'source_id' int(11) NOT NULL, * PRIMARY KEY ('workload_id'), KEY 'status' ('status'), KEY 'url_hash' * ('url_hash'), KEY 'host' ('host') ) */public class SQLWorkloadManager implements WorkloadManager {	/**	 * The logger.	 */	private static Logger logger = Logger.getLogger(	"com.heatonresearch.httprecipes.spider.workload.sql.SQLWorkloadManager");		/**	 * The mask used to generate URL hash's.	 */	public static final int HASH_MASK = 0xffff;		/**	 * The SQL holder to use.	 */	private final SQLHolder holder = new SQLHolder();	/**	 * Prepared statement to clear the workload. 	 */	private RepeatableStatement stmtClear;	/**	 * Prepared statement to clear the hosts.	 */	private RepeatableStatement stmtClear2;	/**	 * Prepared statement to add work. 	 */	private RepeatableStatement stmtAdd;	/**	 * Prepared statement to add work.	 */	private RepeatableStatement stmtAdd2;	/**	 * Prepared statement to get work. 	 */	private RepeatableStatement stmtGetWork;	/**	 * Prepared statement to get work. 	 */	private RepeatableStatement stmtGetWork2;	/**	 * Prepared statement to empty the workload. 	 */	private RepeatableStatement stmtWorkloadEmpty;	/**	 * Prepared statement to get the status of a URL. 	 */	private RepeatableStatement stmtSetWorkloadStatus;	/**	 * Prepared statement to set the status of a URL. 	 */		private RepeatableStatement stmtSetWorkloadStatus2;	/**	 * Prepared statement to get the depth of a URL. 	 */	private RepeatableStatement stmtGetDepth;	/**	 * Prepared statement to get the source of a URL. 	 */	private RepeatableStatement stmtGetSource;	/**	 * Prepared statement to resume. 	 */	private RepeatableStatement stmtResume;	/**	 * Prepared statement to resume. 	 */	private RepeatableStatement stmtResume2;	/**	 * Prepared statement to get a URL's id.	 */	private RepeatableStatement stmtGetWorkloadID;	/**	 * Prepared statement to get a host id.	 */	private RepeatableStatement stmtGetHostID;	/**	 * Prepared statement to get the next host.	 */	private RepeatableStatement stmtGetNextHost;	/**	 * Prepared statement to set a host's status.	 */	private RepeatableStatement stmtSetHostStatus;	/**	 * Prepared statement to get a host.	 */	private RepeatableStatement stmtGetHost;	/**	 * Only one thread at a time is allowed to add to the workload.	 */	private Semaphore addLock;	/**	 * Is there any work?	 */	private CountDownLatch workLatch;	/**	 * The maximum size a URL can be.	 */	private int maxURLSize;	/**	 * The maximum size that a host can be.	 */	private int maxHostSize;	/**	 * Used to obtain the next URL.	 */	private RepeatableStatement.Results workResultSet = null;	/**	 * Used to obtain the next host.	 */	private RepeatableStatement.Results hostResultSet = null;	/**	 * A connection to a JDBC database.	 */	private RepeatableConnection connection;	/**	 * The current host.	 */	private String currentHost;	/**	 * The ID of the current host.	 */	private int currentHostID = -1;	/**	 * Add the specified URL to the workload.	 * 	 * @param url	 *            The URL to be added.	 * @param source	 *            The page that contains this URL.	 * @param depth	 *            The depth of this URL.	 * @return True if the URL was added, false otherwise.	 * @throws WorkloadException	 */	public boolean add(final URL url, final URL source, final int depth) {		boolean result = false;		try {			this.addLock.acquire();			if (!contains(url)) {				final String strURL = truncate(url.toString(), this.maxURLSize);				final String strHost = truncate(url.getHost(), this.maxHostSize)						.toLowerCase();				result = true;				// get the host				int hostID = getHostID(url, false);				if (hostID == -1) {					this.stmtAdd2.execute(strHost, Status.STATUS_WAITING, 0, 0);					hostID = getHostID(url, true);				}				// need to set the current host for the first time?				if (this.currentHostID == -1) {					this.currentHostID = hostID;					this.currentHost = strHost;					this.stmtSetHostStatus.execute(Status.STATUS_PROCESSING,							this.currentHostID);				}				// now add workload element				if (source != null) {					final int sourceID = getWorkloadID(source, true);					this.stmtAdd.execute(hostID, strURL, Status.STATUS_WAITING,							depth, computeHash(url), sourceID);				} else {					this.stmtAdd.execute(hostID, strURL, Status.STATUS_WAITING,							depth, computeHash(url), 0);				}				this.workLatch.countDown();			}		} catch (final InterruptedException e) {			throw new WorkloadError(e);					} catch (final SQLException e) {			throw new WorkloadError(e);		} finally {			this.addLock.release();		}		return result;	}	/**	 * Clear the workload.	 */	public void clear() {		this.stmtClear.execute();		this.stmtClear2.execute();	}	/**	 * Close the workload manager.	 */	public void close() {		if (this.workResultSet != null) {			try {				this.workResultSet.close();			} catch (final Exception e) {				logger						.log(Level.SEVERE, "Error trying to close workload result set, ignoring...");			}			this.workResultSet = null;		}		if (this.connection != null) {			this.connection.close();		}	}	/**	 * Compute a hash for a URL.	 * 	 * @param url	 *            The URL to compute the hash for.	 * @return The hash code.	 */	private int computeHash(final URL url) {		final String str = url.toString().trim();		int result = str.hashCode();		result = result % SQLWorkloadManager.HASH_MASK;		return result;	}	/**	 * Determine if the workload contains the specified URL.	 * 	 * @param url	 *            The URL to search the workload for.	 * @return True of the workload contains the specified URL. @	 */	public boolean contains(final URL url) {		try {			return getWorkloadID(url, false) != -1;		} catch (final SQLException e) {			throw new WorkloadError(e);		}	}	/**	 * Convert the specified String to a URL. If the string is too long or has	 * other issues, throw a WorkloadException.	 * 	 * @param aurl	 *            A String to convert into a URL.	 * @return The URL. @ Thrown if, The String could not be converted.	 */	public URL convertURL(final String aurl) {		URL result = null;		final String url = aurl.trim();		if (this.maxURLSize != -1 && url.length() > this.maxURLSize) {			throw new WorkloadError("URL size is too big, must be under "					+ this.maxURLSize + " bytes.");		}		try {			result = new URL(url);		} catch (final MalformedURLException e) {			throw new WorkloadError(e);		}		return result;	}	/**	 * Create the correct type of SQL holder for this workload managers.	 * This will likely be overridden by subclasses.	 * @return A SQL holder.	 */	public SQLHolder createSQLHolder() {		return new SQLHolder();	}	/**	 * Return the size of the specified column.	 * 	 * @param table	 *            The table that contains the column.	 * @param column	 *            The column to get the size for.	 * @return The size of the column.	 */	public int getColumnSize(final String table, final String column) {		try {			final ResultSet rs = this.connection.getConnection().getMetaData()					.getColumns(null, null, table, null);			while (rs.next()) {				final String c = rs.getString("COLUMN_NAME");				final int size = rs.getInt("COLUMN_SIZE");				if (c.equalsIgnoreCase(column)) {					return size;				}			}			return -1;		} catch (final SQLException e) {			throw new DBError(e);		}	}	/**	 * @return the connection	 */	public RepeatableConnection getConnection() {		return this.connection;	}	/**	 * Get the current host.	 * 	 * @return The current host.	 */	public String getCurrentHost() {		return this.currentHost;	}	/**	 * Get the depth of the specified URL.	 * 	 * @param url	 *            The URL to get the depth of.	 * @return The depth of the specified URL. @ Thrown if the depth could not	 *         be found.	 */	public int getDepth(final URL url) {		RepeatableStatement.Results rs = null;		try {			rs = this.stmtGetDepth.executeQuery(computeHash(url));			while (rs.getResultSet().next()) {				final String u = rs.getResultSet().getString(1);				if (u.equals(url.toString())) {					return rs.getResultSet().getInt(2);				}			}			return 1;		} catch (final SQLException e) {			throw new WorkloadError(e);		} finally {			if (rs != null) {				rs.close();			}		}	}	/**	 * Get the host name associated with the specified host id.	 * 	 * @param hostID	 *            The host id to look up.	 * @return The name of the host. @ Thrown if unable to obtain the host name.	 */	private String getHost(final int hostID) {		RepeatableStatement.Results rs = null;		try {			rs = this.stmtGetHost.executeQuery(hostID);			if (!rs.getResultSet().next()) {				throw new WorkloadError("Can't find previously created host.");			}			return rs.getResultSet().getString(1);		} catch (final SQLException e) {			throw new WorkloadError(e);		} finally {			if (rs != null) {				rs.close();			}		}	}	/**	 * Get the id for the specified host name.	 * 	 * @param host	 *            The host to lookup.	 * @param require	 *            Should an exception be thrown if the host is not located.	 * @return The id of the specified host name. @ Thrown if the host id is not	 *         found, and is required.	 * @throws SQLException	 *             Thrown if a SQL error occurs.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -