⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 databaseloader.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    DatabaseLoader.java
 *    Copyright (C) 2004 Stefan Mutter
 *
 */

package weka.core.converters;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.FastVector;
import weka.core.Attribute;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.Option;
import java.io.IOException;
import java.sql.*;
import java.util.Hashtable;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.Enumeration;
import java.util.Vector;


/**
 * Reads from a database.
 * Can read a database in batch or incremental mode.
 * In inremental mode MySQL and HSQLDB are supported.
 * For all other DBMS set a pseudoincremental mode is used:
 * In pseudo incremental mode the instances are read into main memory all at once and then incrementally provided to the user.
 * For incremental loading the rows in the database table have to be ordered uniquely.
 * The reason for this is that every time only a single row is fetched by extending the user" query by a LIMIT clause.
 * If this extension is impossible instances will be loaded pseudoincrementally. To ensure that every row is fetched exaclty once, they have to ordered.
 * Therefore a (primary) key is necessary.This approach is chosen, instead of using JDBC driver facilities, because the latter one differ betweeen different drivers.
 * If you use the DatabaseSaver and save instances by generating automatically a primary key (its name is defined in DtabaseUtils), this primary key will
 * be used for ordering but will not be part of the output. The user defined SQL query to extract the instances should not contain LIMIT and ORDER BY clauses (see -Q option). 
 * In addition, for incremental loading,  you can define in the DatabaseUtils file how many distinct values a nominal attribute is allowed to have. If this number is exceeded, the column will become a string attribute.  
 * In batch mode no string attributes will be created.
 *
 * Available options are: 
 * -Q the query to specify which tuples to load<br>
 * The query must have the form:
 * SELECT *|<column-list> FROM <table> [WHERE}
 * (default: SELECT * FROM Results0).<p>
 *
 * -P comma separted list of columns that are a unqiue key <br>
 * Only needed for incremental loading, if it cannot be detected automatically<p>
 *
 * -I <br>
 * Sets incremental loading
 *
 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
 * @version $Revision: 1.1 $
 * @see Loader
 */
public class DatabaseLoader extends AbstractLoader implements BatchConverter, IncrementalConverter, DatabaseConverter, OptionHandler {

  /** The header information that is retrieved in the beginning of incremental loading */
  protected Instances m_structure;
  
  /** Used in pseudoincremental mode. The whole dataset from which instances will be read incrementally.*/
  private Instances m_datasetPseudoInc;
  
  /** Set of instances that equals m_structure except that the auto_generated_id column is not included as an attribute*/
  private Instances m_oldStructure;
  
  /** The database connection */
  private DatabaseConnection m_DataBaseConnection;
  
  /** The user defined query to load instances. (form: SELECT *|<column-list> FROM <table> [WHERE <condition>]) */
  private String m_query = "Select * from Results0";;
  
  /** Flag indicating that pseudo incremental mode is used (all instances load at once into main memeory and then incrementally from main memory instead of the database) */
  private boolean m_pseudoIncremental;
  
  /** Limit when an attribute is treated as string attribute and not as a nominal one because it has to many values. */
  private int m_nominalToStringLimit;
  
  /** The number of rows obtained by m_query, eg the size of the ResultSet to load*/
  private int m_rowCount;
  
  /** Indicates how many rows has already been loaded incrementally */
  private int m_counter;
  
  /** Decides which SQL statement to limit the number of rows should be used. DBMS dependent. Algorithm just tries several possibilities. */
  private int m_choice;
  
  /** Flag indicating that incremental process wants to read first instance*/
  private boolean m_firstTime;
  
  /** Flag indicating that incremental mode is chosen (for command line use only)*/
  private boolean m_inc;
  
  /** Contains the name of the columns that uniquely define a row in the ResultSet. Ensures a unique ordering of instances for indremental loading.*/
  private FastVector m_orderBy;
  
  /** Stores the index of a nominal value */
  private Hashtable [] m_nominalIndexes;
  
  /**  Stores the nominal value*/
  private FastVector [] m_nominalStrings;
  
  /** Name of the primary key column that will allow unique ordering necessary for incremental loading. The name is specified in the DatabaseUtils file.*/
  private String m_idColumn;
  
  /* Type mapping used for reading*/
  public static final int STRING = 0;
  public static final int BOOL = 1;
  public static final int DOUBLE = 2;
  public static final int BYTE = 3;
  public static final int SHORT = 4;
  public static final int INTEGER = 5;
  public static final int LONG = 6;
  public static final int FLOAT = 7;
  public static final int DATE = 8; 
  
  /** The property file for the database connection */
  protected static String PROPERTY_FILE
    = "weka/experiment/DatabaseUtils.props";
  
  /** Properties associated with the database connection */
  protected static Properties PROPERTIES;

  /** the JDBC URL to use */
  protected String m_URL = null;

  /** the database user to use */
  protected String m_User = null;

  /** the database password to use */
  protected String m_Password = null;
  
  /** the keys for unique ordering */
  protected String m_Keys = null;

  /** reads the property file */
  static {

    try {
      PROPERTIES = Utils.readProperties(PROPERTY_FILE);
   
    } catch (Exception ex) {
      System.err.println("Problem reading properties. Fix before continuing.");
      System.err.println(ex);
    }
  }
  
  /**
   * Constructor
   */
  public DatabaseLoader() throws Exception{
  
      reset();
      m_pseudoIncremental=false;
      String props=PROPERTIES.getProperty("nominalToStringLimit");
      m_nominalToStringLimit = Integer.parseInt(props);
      m_idColumn=PROPERTIES.getProperty("idColumn");
  }

  /**
   * Returns a string describing this Loader
   * @return a description of the Loader suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Reads Instances from a Database";
  }

  

  /** Resets the Loader ready to read a new data set
   * @throws Exception if an error occurs while disconnecting from the database
   */
  public void reset() throws Exception{

    resetStructure();
    if(m_DataBaseConnection != null && m_DataBaseConnection.isConnected())
        m_DataBaseConnection.disconnectFromDatabase();
    m_DataBaseConnection = new DatabaseConnection();

    // don't lose previously set connection data!
    if (m_URL != null)
      m_DataBaseConnection.setDatabaseURL(m_URL);
    if (m_User != null)
      m_DataBaseConnection.setUsername(m_User);
    if (m_Password != null)
      m_DataBaseConnection.setPassword(m_Password);

    m_orderBy = new FastVector();
    // don't lose previously set key columns!
    if (m_Keys != null)
      setKeys(m_Keys);
      
    m_inc = false;
    
  }
  
  
  /** Resets the structure of instances*/
  public void resetStructure(){
  
      m_structure = null;
      m_datasetPseudoInc = null;
      m_oldStructure = null;
      m_rowCount = 0;
      m_counter = 0;
      m_choice = 0;
      m_firstTime = true;
      setRetrieval(NONE);
  }
  
  
  /**
   * Sets the query to execute against the database
   * @param q the query to execute
   */
  public void setQuery(String q) {
    q = q.replaceAll("[fF][rR][oO][mM]","FROM");
    q = q.replaceFirst("[sS][eE][lL][eE][cC][tT]","SELECT");  
    m_query = q;
  }

  /**
   * Gets the query to execute against the database
   * @return the query
   */
  public String getQuery() {
    return m_query;
  }
  
  /**
   * the tip text for this property
   * @return the tip text
   */
  public String queryTipText(){
  
      return "The query that should load the instances."
        +"\n The query has to be of the form SELECT <column-list>|* FROM <table> [WHERE <conditions>]";
  }
  
  /**
   * Sets the key columns of a database table
   * @param keys a String containing the key columns in a comma separated list.
   */
  public void setKeys(String keys){
  
    m_Keys = keys;
    m_orderBy.removeAllElements();
    StringTokenizer st = new StringTokenizer(keys, ",");
    while (st.hasMoreTokens()) {
        String column = st.nextToken();
        column = column.replaceAll(" ","");
        m_orderBy.addElement(column);
    }
  }
  
   /**
   * Gets the key columns' name
   * @return name of the key columns'
   */
  public String getKeys(){
  
      StringBuffer key = new StringBuffer();
      for(int i = 0;i < m_orderBy.size(); i++){
        key.append((String)m_orderBy.elementAt(i));
        if(i != m_orderBy.size()-1)
          key.append(", ");
      }
      return key.toString();
  }
  
  /**
   * the tip text for this property
   * @return the tip text
   */
  public String keysTipText(){
  
      return "For incremental loading a unique identiefer has to be specified."
        +"\nIf the query includes all columns of a table (SELECT *...) a primary key"
        +"\ncan be detected automatically depending on the JDBC driver. If that is not possible"
        +"\nspecify the key columns here in a comma separated list.";
  }
  
  /**
   * Sets the database URL
   * @param string with the database URL
   */
  public void setUrl(String url){
      
      m_URL = url;
      m_DataBaseConnection.setDatabaseURL(url);
    
  }
  
  /**
   * Gets the URL
   * @return the URL
   */
  public String getUrl(){
  
      return m_DataBaseConnection.getDatabaseURL();
  }
  
  /**
   * the tip text for this property
   * @return the tip text
   */
  public String urlTipText(){
  
      return "The URL of the database";
  }
  
  /**
   * Sets the database user
   * @param the database user name
   */
  public void setUser(String user){
   
      m_User = user;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -