📄 jdbcwordsdatasource.java
字号:
/*
* ====================================================================
*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 Nick Lothian. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* developers of Classifier4J (http://classifier4j.sf.net/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The name "Classifier4J" must not be used to endorse or promote
* products derived from this software without prior written
* permission. For written permission, please contact
* http://sourceforge.net/users/nicklothian/.
*
* 5. Products derived from this software may not be called
* "Classifier4J", nor may "Classifier4J" appear in their names
* without prior written permission. For written permission, please
* contact http://sourceforge.net/users/nicklothian/.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*/
package net.sf.classifier4J.bayesian;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import net.sf.classifier4J.ICategorisedClassifier;
/**
*
* <p>A JDBC based datasource. It requires a table of the following structure (tested in MySQL 4):
*
* <pre>
* CREATE TABLE word_probability (
* word VARCHAR(255) NOT NULL,
* category VARCHAR(20) NOT NULL,
* match_count INT DEFAULT 0 NOT NULL,
* nonmatch_count INT DEFAULT 0 NOT NULL,
* PRIMARY KEY(word, category)
* )
* </pre>
*
*</p>
*<p>It will truncate any word longer than 255 characters to 255 characters</p>
*
* @author Nick Lothian
* @author Peter Leschev
*
*/
public class JDBCWordsDataSource implements ICategorisedWordsDataSource {
IJDBCConnectionManager connectionManager;
private Log log = LogFactory.getLog(this.getClass());
/**
* Create a JDBCWordsDataSource using the DEFAULT_CATEGORY ("DEFAULT")
*
* @param cm The connection manager to use
*/
public JDBCWordsDataSource(IJDBCConnectionManager cm) throws WordsDataSourceException {
this.connectionManager = cm;
createTable();
}
public WordProbability getWordProbability(String category, String word) throws WordsDataSourceException {
WordProbability wp = null;
String method = "getWordProbability()";
int matchingCount = 0;
int nonMatchingCount = 0;
Connection conn = null;
try {
conn = connectionManager.getConnection();
PreparedStatement ps = conn.prepareStatement("SELECT match_count, nonmatch_count FROM word_probability WHERE word = ? AND category = ?");
ps.setString(1, word);
ps.setString(2, category);
ResultSet rs = ps.executeQuery();
if (rs.next()) {
matchingCount = rs.getInt("match_count");
nonMatchingCount = rs.getInt("nonmatch_count");
}
wp = new WordProbability(word, matchingCount, nonMatchingCount);
} catch (SQLException e) {
throw new WordsDataSourceException("Problem obtaining WordProbability from database", e);
} finally {
if (conn != null) {
try {
connectionManager.returnConnection(conn);
} catch (SQLException e1) {
// ignore
}
}
}
if (log.isDebugEnabled()) {
log.debug(method + " WordProbability loaded [" + wp + "]");
}
return wp;
}
public WordProbability getWordProbability(String word) throws WordsDataSourceException {
return getWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word);
}
private void updateWordProbability(String category, String word, boolean isMatch) throws WordsDataSourceException {
String fieldname = "nonmatch_count";
if (isMatch) {
fieldname = "match_count";
}
// truncate word at 255 characters
if (word.length() > 255) {
word = word.substring(0, 254);
}
Connection conn = null;
try {
conn = connectionManager.getConnection();
PreparedStatement insertStatement = conn.prepareStatement("INSERT INTO word_probability (word, category) VALUES (?, ?)");
PreparedStatement selectStatement = conn.prepareStatement("SELECT 1 FROM word_probability WHERE word = ? AND category = ?");
PreparedStatement updateStatement = conn.prepareStatement("UPDATE word_probability SET " + fieldname + " = " + fieldname + " + 1 WHERE word = ? AND category = ?");
selectStatement.setString(1, word);
selectStatement.setString(2, category);
ResultSet rs = selectStatement.executeQuery();
if (!rs.next()) {
// word is not in table
// insert the word
insertStatement.setString(1, word);
insertStatement.setString(2, category);
insertStatement.execute();
}
// update the word count
updateStatement.setString(1, word);
updateStatement.setString(2, category);
updateStatement.execute();
} catch (SQLException e) {
throw new WordsDataSourceException("Problem updating WordProbability", e);
} finally {
if (conn != null) {
try {
connectionManager.returnConnection(conn);
} catch (SQLException e1) {
// ignore
}
}
}
}
public void addMatch(String category, String word) throws WordsDataSourceException {
if (category == null) {
throw new IllegalArgumentException("category cannot be null");
}
updateWordProbability(category, word, true);
}
public void addMatch(String word) throws WordsDataSourceException {
updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, true);
}
public void addNonMatch(String category, String word) throws WordsDataSourceException {
if (category == null) {
throw new IllegalArgumentException("category cannot be null");
}
updateWordProbability(category, word, false);
}
public void addNonMatch(String word) throws WordsDataSourceException {
updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, false);
}
/**
* Create the word_probability table if it does not already
* exist. Tested successfully with MySQL 4 & HSQLDB. See
* comments in code for Axion 1.0M1 issues.
*
*
* @throws WordsDataSourceException
*/
private void createTable() throws WordsDataSourceException {
Connection con = null;
try {
con = connectionManager.getConnection();
// check if the word_probability table exists
DatabaseMetaData dbm = con.getMetaData();
ResultSet rs = dbm.getTables(null, null, "word_probability", null);
if (!rs.next()) {
// the table does not exist
Statement stmt = con.createStatement();
// Under Axion 1.0M1, use
// stmt.executeUpdate( "CREATE TABLE word_probability ( "
// + " word VARCHAR(255) NOT NULL,"
// + " category VARCHAR(20) NOT NULL,"
// + " match_count INTEGER NOT NULL,"
// + " nonmatch_count INTEGER NOT NULL, "
// + " PRIMARY KEY(word, category) ) ");
stmt.executeUpdate("CREATE TABLE word_probability ( "
+ " word VARCHAR(255) NOT NULL,"
+ " category VARCHAR(20) NOT NULL,"
+ " match_count INT DEFAULT 0 NOT NULL,"
+ " nonmatch_count INT DEFAULT 0 NOT NULL, "
+ " PRIMARY KEY(word, category) ) ");
}
} catch (SQLException e) {
throw new WordsDataSourceException("Problem creating table", e); // we can't recover from this
} finally {
if (con != null) {
try {
connectionManager.returnConnection(con);
} catch (SQLException e1) {
// ignore
}
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -