📄 databaseloader.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* DatabaseLoader.java
* Copyright (C) 2004 Stefan Mutter
*
*/
package weka.core.converters;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.FastVector;
import weka.core.Attribute;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.Option;
import java.io.IOException;
import java.sql.*;
import java.util.Hashtable;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.Enumeration;
import java.util.Vector;
/**
* Reads from a database.
* Can read a database in batch or incremental mode.
* In inremental mode MySQL and HSQLDB are supported.
* For all other DBMS set a pseudoincremental mode is used:
* In pseudo incremental mode the instances are read into main memory all at once and then incrementally provided to the user.
* For incremental loading the rows in the database table have to be ordered uniquely.
* The reason for this is that every time only a single row is fetched by extending the user" query by a LIMIT clause.
* If this extension is impossible instances will be loaded pseudoincrementally. To ensure that every row is fetched exaclty once, they have to ordered.
* Therefore a (primary) key is necessary.This approach is chosen, instead of using JDBC driver facilities, because the latter one differ betweeen different drivers.
* If you use the DatabaseSaver and save instances by generating automatically a primary key (its name is defined in DtabaseUtils), this primary key will
* be used for ordering but will not be part of the output. The user defined SQL query to extract the instances should not contain LIMIT and ORDER BY clauses (see -Q option).
* In addition, for incremental loading, you can define in the DatabaseUtils file how many distinct values a nominal attribute is allowed to have. If this number is exceeded, the column will become a string attribute.
* In batch mode no string attributes will be created.
*
* Available options are:
* -Q the query to specify which tuples to load<br>
* The query must have the form:
* SELECT *|<column-list> FROM <table> [WHERE}
* (default: SELECT * FROM Results0).<p>
*
* -P comma separted list of columns that are a unqiue key <br>
* Only needed for incremental loading, if it cannot be detected automatically<p>
*
* -I <br>
* Sets incremental loading
*
* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
* @version $Revision: 1.1 $
* @see Loader
*/
public class DatabaseLoader extends AbstractLoader implements BatchConverter, IncrementalConverter, DatabaseConverter, OptionHandler {
/** The header information that is retrieved in the beginning of incremental loading */
protected Instances m_structure;
/** Used in pseudoincremental mode. The whole dataset from which instances will be read incrementally.*/
private Instances m_datasetPseudoInc;
/** Set of instances that equals m_structure except that the auto_generated_id column is not included as an attribute*/
private Instances m_oldStructure;
/** The database connection */
private DatabaseConnection m_DataBaseConnection;
/** The user defined query to load instances. (form: SELECT *|<column-list> FROM <table> [WHERE <condition>]) */
private String m_query = "Select * from Results0";;
/** Flag indicating that pseudo incremental mode is used (all instances load at once into main memeory and then incrementally from main memory instead of the database) */
private boolean m_pseudoIncremental;
/** Limit when an attribute is treated as string attribute and not as a nominal one because it has to many values. */
private int m_nominalToStringLimit;
/** The number of rows obtained by m_query, eg the size of the ResultSet to load*/
private int m_rowCount;
/** Indicates how many rows has already been loaded incrementally */
private int m_counter;
/** Decides which SQL statement to limit the number of rows should be used. DBMS dependent. Algorithm just tries several possibilities. */
private int m_choice;
/** Flag indicating that incremental process wants to read first instance*/
private boolean m_firstTime;
/** Flag indicating that incremental mode is chosen (for command line use only)*/
private boolean m_inc;
/** Contains the name of the columns that uniquely define a row in the ResultSet. Ensures a unique ordering of instances for indremental loading.*/
private FastVector m_orderBy;
/** Stores the index of a nominal value */
private Hashtable [] m_nominalIndexes;
/** Stores the nominal value*/
private FastVector [] m_nominalStrings;
/** Name of the primary key column that will allow unique ordering necessary for incremental loading. The name is specified in the DatabaseUtils file.*/
private String m_idColumn;
/* Type mapping used for reading*/
public static final int STRING = 0;
public static final int BOOL = 1;
public static final int DOUBLE = 2;
public static final int BYTE = 3;
public static final int SHORT = 4;
public static final int INTEGER = 5;
public static final int LONG = 6;
public static final int FLOAT = 7;
public static final int DATE = 8;
/** The property file for the database connection */
protected static String PROPERTY_FILE
= "weka/experiment/DatabaseUtils.props";
/** Properties associated with the database connection */
protected static Properties PROPERTIES;
/** the JDBC URL to use */
protected String m_URL = null;
/** the database user to use */
protected String m_User = null;
/** the database password to use */
protected String m_Password = null;
/** the keys for unique ordering */
protected String m_Keys = null;
/** reads the property file */
static {
try {
PROPERTIES = Utils.readProperties(PROPERTY_FILE);
} catch (Exception ex) {
System.err.println("Problem reading properties. Fix before continuing.");
System.err.println(ex);
}
}
/**
* Constructor
*/
public DatabaseLoader() throws Exception{
reset();
m_pseudoIncremental=false;
String props=PROPERTIES.getProperty("nominalToStringLimit");
m_nominalToStringLimit = Integer.parseInt(props);
m_idColumn=PROPERTIES.getProperty("idColumn");
}
/**
* Returns a string describing this Loader
* @return a description of the Loader suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Reads Instances from a Database";
}
/** Resets the Loader ready to read a new data set
* @throws Exception if an error occurs while disconnecting from the database
*/
public void reset() throws Exception{
resetStructure();
if(m_DataBaseConnection != null && m_DataBaseConnection.isConnected())
m_DataBaseConnection.disconnectFromDatabase();
m_DataBaseConnection = new DatabaseConnection();
// don't lose previously set connection data!
if (m_URL != null)
m_DataBaseConnection.setDatabaseURL(m_URL);
if (m_User != null)
m_DataBaseConnection.setUsername(m_User);
if (m_Password != null)
m_DataBaseConnection.setPassword(m_Password);
m_orderBy = new FastVector();
// don't lose previously set key columns!
if (m_Keys != null)
setKeys(m_Keys);
m_inc = false;
}
/** Resets the structure of instances*/
public void resetStructure(){
m_structure = null;
m_datasetPseudoInc = null;
m_oldStructure = null;
m_rowCount = 0;
m_counter = 0;
m_choice = 0;
m_firstTime = true;
setRetrieval(NONE);
}
/**
* Sets the query to execute against the database
* @param q the query to execute
*/
public void setQuery(String q) {
q = q.replaceAll("[fF][rR][oO][mM]","FROM");
q = q.replaceFirst("[sS][eE][lL][eE][cC][tT]","SELECT");
m_query = q;
}
/**
* Gets the query to execute against the database
* @return the query
*/
public String getQuery() {
return m_query;
}
/**
* the tip text for this property
* @return the tip text
*/
public String queryTipText(){
return "The query that should load the instances."
+"\n The query has to be of the form SELECT <column-list>|* FROM <table> [WHERE <conditions>]";
}
/**
* Sets the key columns of a database table
* @param keys a String containing the key columns in a comma separated list.
*/
public void setKeys(String keys){
m_Keys = keys;
m_orderBy.removeAllElements();
StringTokenizer st = new StringTokenizer(keys, ",");
while (st.hasMoreTokens()) {
String column = st.nextToken();
column = column.replaceAll(" ","");
m_orderBy.addElement(column);
}
}
/**
* Gets the key columns' name
* @return name of the key columns'
*/
public String getKeys(){
StringBuffer key = new StringBuffer();
for(int i = 0;i < m_orderBy.size(); i++){
key.append((String)m_orderBy.elementAt(i));
if(i != m_orderBy.size()-1)
key.append(", ");
}
return key.toString();
}
/**
* the tip text for this property
* @return the tip text
*/
public String keysTipText(){
return "For incremental loading a unique identiefer has to be specified."
+"\nIf the query includes all columns of a table (SELECT *...) a primary key"
+"\ncan be detected automatically depending on the JDBC driver. If that is not possible"
+"\nspecify the key columns here in a comma separated list.";
}
/**
* Sets the database URL
* @param string with the database URL
*/
public void setUrl(String url){
m_URL = url;
m_DataBaseConnection.setDatabaseURL(url);
}
/**
* Gets the URL
* @return the URL
*/
public String getUrl(){
return m_DataBaseConnection.getDatabaseURL();
}
/**
* the tip text for this property
* @return the tip text
*/
public String urlTipText(){
return "The URL of the database";
}
/**
* Sets the database user
* @param the database user name
*/
public void setUser(String user){
m_User = user;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -