📄 databaseloader.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * DatabaseLoader.java * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand * */package weka.core.converters;import weka.core.Instance;import weka.core.Instances;import weka.core.FastVector;import weka.core.Attribute;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.Option;import java.io.IOException;import java.sql.*;import java.util.Hashtable;import java.util.Properties;import java.util.StringTokenizer;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Reads Instances from a Database. Can read a database in batch or incremental mode.<br/> * In inremental mode MySQL and HSQLDB are supported.<br/> * For all other DBMS set a pseudoincremental mode is used:<br/> * In pseudo incremental mode the instances are read into main memory all at once and then incrementally provided to the user.<br/> * For incremental loading the rows in the database table have to be ordered uniquely.<br/> * The reason for this is that every time only a single row is fetched by extending the user query by a LIMIT clause.<br/> * If this extension is impossible instances will be loaded pseudoincrementally. To ensure that every row is fetched exaclty once, they have to ordered.<br/> * Therefore a (primary) key is necessary.This approach is chosen, instead of using JDBC driver facilities, because the latter one differ betweeen different drivers.<br/> * If you use the DatabaseSaver and save instances by generating automatically a primary key (its name is defined in DtabaseUtils), this primary key will be used for ordering but will not be part of the output. The user defined SQL query to extract the instances should not contain LIMIT and ORDER BY clauses (see -Q option).<br/> * In addition, for incremental loading, you can define in the DatabaseUtils file how many distinct values a nominal attribute is allowed to have. If this number is exceeded, the column will become a string attribute.<br/> * In batch mode no string attributes will be created. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -url <JDBC URL> * The JDBC URL to connect to. * (default: from DatabaseUtils.props file)</pre> * * <pre> -user <name> * The user to connect with to the database. * (default: none)</pre> * * <pre> -password <password> * The password to connect with to the database. * (default: none)</pre> * * <pre> -Q <query> * SQL query of the form * SELECT <list of columns>|* FROM <table> [WHERE] * to execute. * (default: Select * From Results0)</pre> * * <pre> -P <list of column names> * List of column names uniquely defining a DB row * (separated by ', '). * Used for incremental loading. * If not specified, the key will be determined automatically, * if possible with the used JDBC driver. * The auto ID column created by the DatabaseSaver won't be loaded.</pre> * * <pre> -I * Sets incremental loading</pre> * <!-- options-end --> * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision: 1.12 $ * @see Loader */public class DatabaseLoader extends AbstractLoader implements BatchConverter, IncrementalConverter, DatabaseConverter, OptionHandler { /** for serialization */ static final long serialVersionUID = -7936159015338318659L; /** The header information that is retrieved in the beginning of incremental loading */ protected Instances m_structure; /** Used in pseudoincremental mode. The whole dataset from which instances will be read incrementally.*/ private Instances m_datasetPseudoInc; /** Set of instances that equals m_structure except that the auto_generated_id column is not included as an attribute*/ private Instances m_oldStructure; /** The database connection */ private DatabaseConnection m_DataBaseConnection; /** The user defined query to load instances. (form: SELECT *|<column-list> FROM <table> [WHERE <condition>]) */ private String m_query = "Select * from Results0"; /** Flag indicating that pseudo incremental mode is used (all instances load at once into main memeory and then incrementally from main memory instead of the database) */ private boolean m_pseudoIncremental; /** If true it checks whether or not the table exists in the database before loading depending on jdbc metadata information. * Set flag to false if no check is required or if jdbc metadata is not complete. */ private boolean m_checkForTable; /** Limit when an attribute is treated as string attribute and not as a nominal one because it has to many values. */ private int m_nominalToStringLimit; /** The number of rows obtained by m_query, eg the size of the ResultSet to load*/ private int m_rowCount; /** Indicates how many rows has already been loaded incrementally */ private int m_counter; /** Decides which SQL statement to limit the number of rows should be used. DBMS dependent. Algorithm just tries several possibilities. */ private int m_choice; /** Flag indicating that incremental process wants to read first instance*/ private boolean m_firstTime; /** Flag indicating that incremental mode is chosen (for command line use only)*/ private boolean m_inc; /** Contains the name of the columns that uniquely define a row in the ResultSet. Ensures a unique ordering of instances for indremental loading.*/ private FastVector m_orderBy; /** Stores the index of a nominal value */ private Hashtable [] m_nominalIndexes; /** Stores the nominal value*/ private FastVector [] m_nominalStrings; /** Name of the primary key column that will allow unique ordering necessary for incremental loading. The name is specified in the DatabaseUtils file.*/ private String m_idColumn; /** The property file for the database connection */ protected static String PROPERTY_FILE = DatabaseConnection.PROPERTY_FILE; /** Properties associated with the database connection */ protected static Properties PROPERTIES; /** the JDBC URL to use */ protected String m_URL = null; /** the database user to use */ protected String m_User = null; /** the database password to use */ protected String m_Password = null; /** the keys for unique ordering */ protected String m_Keys = null; /** reads the property file */ static { try { PROPERTIES = Utils.readProperties(PROPERTY_FILE); } catch (Exception ex) { System.err.println("Problem reading properties. Fix before continuing."); System.err.println(ex); } } /** * Constructor * * @throws Exception if initialization fails */ public DatabaseLoader() throws Exception{ reset(); m_pseudoIncremental=false; m_checkForTable=true; String props=PROPERTIES.getProperty("nominalToStringLimit"); m_nominalToStringLimit = Integer.parseInt(props); m_idColumn=PROPERTIES.getProperty("idColumn"); if (PROPERTIES.getProperty("checkForTable", "").equalsIgnoreCase("FALSE")) m_checkForTable=false; } /** * Returns a string describing this Loader * * @return a description of the Loader suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Reads Instances from a Database. " + "Can read a database in batch or incremental mode.\n" + "In inremental mode MySQL and HSQLDB are supported.\n" + "For all other DBMS set a pseudoincremental mode is used:\n" + "In pseudo incremental mode the instances are read into main memory all at once and then incrementally provided to the user.\n" + "For incremental loading the rows in the database table have to be ordered uniquely.\n" + "The reason for this is that every time only a single row is fetched by extending the user query by a LIMIT clause.\n" + "If this extension is impossible instances will be loaded pseudoincrementally. To ensure that every row is fetched exaclty once, they have to ordered.\n" + "Therefore a (primary) key is necessary.This approach is chosen, instead of using JDBC driver facilities, because the latter one differ betweeen different drivers.\n" + "If you use the DatabaseSaver and save instances by generating automatically a primary key (its name is defined in DtabaseUtils), this primary key will " + "be used for ordering but will not be part of the output. The user defined SQL query to extract the instances should not contain LIMIT and ORDER BY clauses (see -Q option).\n" + "In addition, for incremental loading, you can define in the DatabaseUtils file how many distinct values a nominal attribute is allowed to have. If this number is exceeded, the column will become a string attribute.\n" + "In batch mode no string attributes will be created."; } /** Resets the Loader ready to read a new data set * @throws Exception if an error occurs while disconnecting from the database */ public void reset() throws Exception{ resetStructure(); if(m_DataBaseConnection != null && m_DataBaseConnection.isConnected()) m_DataBaseConnection.disconnectFromDatabase(); m_DataBaseConnection = new DatabaseConnection(); // don't lose previously set connection data! if (m_URL != null) m_DataBaseConnection.setDatabaseURL(m_URL); if (m_User != null) m_DataBaseConnection.setUsername(m_User); if (m_Password != null) m_DataBaseConnection.setPassword(m_Password); m_orderBy = new FastVector(); // don't lose previously set key columns! if (m_Keys != null) setKeys(m_Keys); m_inc = false; } /** * Resets the structure of instances */ public void resetStructure(){ m_structure = null; m_datasetPseudoInc = null; m_oldStructure = null; m_rowCount = 0; m_counter = 0; m_choice = 0; m_firstTime = true; setRetrieval(NONE); } /** * Sets the query to execute against the database * * @param q the query to execute */ public void setQuery(String q) { q = q.replaceAll("[fF][rR][oO][mM]","FROM"); q = q.replaceFirst("[sS][eE][lL][eE][cC][tT]","SELECT"); m_query = q; } /** * Gets the query to execute against the database * * @return the query */ public String getQuery() { return m_query; } /** * the tip text for this property * * @return the tip text */ public String queryTipText(){ return "The query that should load the instances." +"\n The query has to be of the form SELECT <column-list>|* FROM <table> [WHERE <conditions>]"; } /** * Sets the key columns of a database table * * @param keys a String containing the key columns in a comma separated list. */ public void setKeys(String keys){ m_Keys = keys; m_orderBy.removeAllElements(); StringTokenizer st = new StringTokenizer(keys, ","); while (st.hasMoreTokens()) { String column = st.nextToken(); column = column.replaceAll(" ",""); m_orderBy.addElement(column); } } /** * Gets the key columns' name * * @return name of the key columns' */ public String getKeys(){ StringBuffer key = new StringBuffer(); for(int i = 0;i < m_orderBy.size(); i++){ key.append((String)m_orderBy.elementAt(i)); if(i != m_orderBy.size()-1) key.append(", "); } return key.toString(); } /** * the tip text for this property * * @return the tip text */ public String keysTipText(){ return "For incremental loading a unique identiefer has to be specified." +"\nIf the query includes all columns of a table (SELECT *...) a primary key" +"\ncan be detected automatically depending on the JDBC driver. If that is not possible" +"\nspecify the key columns here in a comma separated list."; } /** * Sets the database URL * * @param url string with the database URL */ public void setUrl(String url){ m_URL = url; m_DataBaseConnection.setDatabaseURL(url); } /** * Gets the URL * * @return the URL */ public String getUrl(){ return m_DataBaseConnection.getDatabaseURL(); } /** * the tip text for this property * * @return the tip text */ public String urlTipText(){ return "The URL of the database"; } /** * Sets the database user * * @param user the database user name */ public void setUser(String user){ m_User = user; m_DataBaseConnection.setUsername(user); } /** * Gets the user name * * @return name of database user */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -