⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancequery.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    InstanceQuery.java *    Copyright (C) 1999 Len Trigg * */package weka.experiment;import java.io.*;import java.math.*;import java.net.InetAddress;import java.sql.*;import java.util.*;import java.util.Date;import weka.core.*;/** * Convert the results of a database query into instances. The jdbc * driver and database to be used default to "jdbc.idbDriver" and * "jdbc:idb=experiments.prp". These may be changed by creating * a java properties file called DatabaseUtils.props in user.home or * the current directory. eg:<p> * * <code><pre> * jdbcDriver=jdbc.idbDriver * jdbcURL=jdbc:idb=experiments.prp * </pre></code><p> * * Command line use just outputs the instances to System.out. * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class InstanceQuery extends DatabaseUtils implements OptionHandler {  /** Determines whether sparse data is created */  boolean m_CreateSparseData = false;    /** Query to execute */  String m_Query = "SELECT * from ?";  /**   * Sets up the database drivers   *   * @exception Exception if an error occurs   */  public InstanceQuery() throws Exception {    super();  }  /**   * Returns an enumeration describing the available options <p>   *   */   public Enumeration listOptions () {     Vector newVector = new Vector(2);     newVector.addElement(new Option("\tSQL query to execute.",				     "Q",1,"-Q <query>"));     newVector.addElement(new Option("\tReturn sparse rather than normal "				    +"instances." 				    , "S", 0, "-S"));     return  newVector.elements();   }  /**   * Parses a given list of options.   *   * Valid options are:<p>   *    * -S <br>   * Return a set of sparse instances rather than normal instances.<p>   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    setSparseData(Utils.getFlag('S',options));    String optionString;        optionString = Utils.getOption('Q',options);    if (optionString.length() != 0) {      setQuery(optionString);    }   }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String queryTipText() {    return "The SQL query to execute against the database.";  }    /**   * Set the query to execute against the database   * @param q the query to execute   */  public void setQuery(String q) {    m_Query = q;  }  /**   * Get the query to execute against the database   * @return the query   */  public String getQuery() {    return m_Query;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String sparseDataTipText() {    return "Encode data as sparse instances.";  }  /**   * Sets whether data should be encoded as sparse instances   * @param s true if data should be encoded as a set of sparse instances   */  public void setSparseData(boolean s) {    m_CreateSparseData = s;  }  /**   * Gets whether data is to be returned as a set of sparse instances   * @return true if data is to be encoded as sparse instances   */  public boolean getSparseData() {    return m_CreateSparseData;  }  /**   * Gets the current settings of InstanceQuery   *   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    String[] options = new String[3];    int current = 0;    options[current] = "-Q"; options[current++] = getQuery();     if (getSparseData()) {      options[current++] = "-S";    }    while (current < options.length) {      options[current++] = "";    }        return  options;  }  /**   * Makes a database query using the query set through the -Q option    * to convert a table into a set of instances   *   * @return the instances contained in the result of the query   * @exception Exception if an error occurs   */  public Instances retrieveInstances() throws Exception {    return retrieveInstances(m_Query);  }  /**   * Makes a database query to convert a table into a set of instances   *   * @param query the query to convert to instances   * @return the instances contained in the result of the query   * @exception Exception if an error occurs   */  public Instances retrieveInstances(String query) throws Exception {    System.err.println("Executing query: " + query);    connectToDatabase();    if (execute(query) == false) {      throw new Exception("Query didn't produce results");    }    ResultSet rs = getResultSet();    System.err.println("Getting metadata...");    ResultSetMetaData md = rs.getMetaData();    // Determine structure of the instances    int numAttributes = md.getColumnCount();    int [] attributeTypes = new int [numAttributes];    Hashtable [] nominalIndexes = new Hashtable [numAttributes];    FastVector [] nominalStrings = new FastVector [numAttributes];    for (int i = 1; i <= numAttributes; i++) {      switch (md.getColumnType(i)) {      case Types.CHAR:      case Types.VARCHAR:      case Types.LONGVARCHAR:      case Types.BINARY:      case Types.VARBINARY:      case Types.LONGVARBINARY:	//System.err.println("String --> nominal");	attributeTypes[i - 1] = Attribute.NOMINAL;	nominalIndexes[i - 1] = new Hashtable();	nominalStrings[i - 1] = new FastVector();	break;      case Types.BIT:	////System.err.println("boolean --> nominal");	attributeTypes[i - 1] = Attribute.NOMINAL;	nominalIndexes[i - 1] = new Hashtable();	nominalIndexes[i - 1].put("false", new Double(0));	nominalIndexes[i - 1].put("true", new Double(1));	nominalStrings[i - 1] = new FastVector();	nominalStrings[i - 1].addElement("false");	nominalStrings[i - 1].addElement("true");	break;      case Types.NUMERIC:      case Types.DECIMAL:	//System.err.println("BigDecimal --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;      case Types.TINYINT:	//System.err.println("byte --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;      case Types.SMALLINT:	//System.err.println("short --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;      case Types.INTEGER:	//System.err.println("int --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;      case Types.BIGINT:	//System.err.println("long --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;      case Types.REAL:	//System.err.println("float --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;      case Types.FLOAT:      case Types.DOUBLE:	//System.err.println("double --> numeric");	attributeTypes[i - 1] = Attribute.NUMERIC;	break;	/*case Types.BINARY:      case Types.VARBINARY:      case Types.LONGVARBINARY:	//System.err.println("byte[] --> unsupported");	attributeTypes[i - 1] = Attribute.STRING;	break; */      case Types.DATE:      case Types.TIME:      case Types.TIMESTAMP:	attributeTypes[i - 1] = Attribute.DATE;	break;      default:	//System.err.println("Unknown column type");	attributeTypes[i - 1] = Attribute.STRING;      }    }    // Step through the tuples    System.err.println("Creating instances...");    FastVector instances = new FastVector();    int rowCount = 0;    while(rs.next()) {      if (rowCount % 100 == 0) {	System.err.print("read " + rowCount + " instances \r");	System.err.flush();      }      double[] vals = new double[numAttributes];      for(int i = 1; i <= numAttributes; i++) {	switch (md.getColumnType(i)) {	case Types.CHAR:	case Types.VARCHAR:	case Types.LONGVARCHAR:	case Types.BINARY:	case Types.VARBINARY:	case Types.LONGVARBINARY:	  String str = rs.getString(i);	  	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    Double index = (Double)nominalIndexes[i - 1].get(str);	    if (index == null) {	      index = new Double(nominalStrings[i - 1].size());	      nominalIndexes[i - 1].put(str, index);	      nominalStrings[i - 1].addElement(str);	    }	    vals[i - 1] = index.doubleValue();	  }	  break;	case Types.BIT:	  boolean boo = rs.getBoolean(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (boo ? 1.0 : 0.0);	  }	  break;	case Types.NUMERIC:	case Types.DECIMAL:	  //	  BigDecimal bd = rs.getBigDecimal(i, 4); 	  double dd = rs.getDouble(i);	  // Use the column precision instead of 4?	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    //	    newInst.setValue(i - 1, bd.doubleValue());	    vals[i - 1] =  dd;	  }	  break;	case Types.TINYINT:	  byte by = rs.getByte(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (double)by;	  }	  break;	case Types.SMALLINT:	  short sh = rs.getByte(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (double)sh;	  }	  break;	case Types.INTEGER:	  int in = rs.getInt(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (double)in;	  }	  break;	case Types.BIGINT:	  long lo = rs.getLong(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (double)lo;	  }	  break;	case Types.REAL:	  float fl = rs.getFloat(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (double)fl;	  }	  break;	case Types.FLOAT:	case Types.DOUBLE:	  double dou = rs.getDouble(i);	  if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {	    vals[i - 1] = (double)dou;	  }	  break;	  /*case Types.BINARY:	case Types.VARBINARY:	case Types.LONGVARBINARY: */	case Types.DATE:	case Types.TIME:	case Types.TIMESTAMP:          Date date = rs.getDate(i);          if (rs.wasNull()) {	    vals[i - 1] = Instance.missingValue();	  } else {            // TODO: Do a value check here.            vals[i - 1] = (double)date.getTime();          }          break;	default:	  vals[i - 1] = Instance.missingValue();	}      }      Instance newInst;      if (m_CreateSparseData) {	newInst = new SparseInstance(1.0, vals);      } else {	newInst = new Instance(1.0, vals);      }      instances.addElement(newInst);      rowCount++;    }    //disconnectFromDatabase();  (perhaps other queries might be made)        // Create the header and add the instances to the dataset    System.err.println("Creating header...");    FastVector attribInfo = new FastVector();    for (int i = 0; i < numAttributes; i++) {      String attribName = md.getColumnName(i + 1);      switch (attributeTypes[i]) {      case Attribute.NOMINAL:	attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));	break;      case Attribute.NUMERIC:	attribInfo.addElement(new Attribute(attribName));	break;      case Attribute.STRING:	attribInfo.addElement(new Attribute(attribName, (FastVector)null));	break;      case Attribute.DATE:	attribInfo.addElement(new Attribute(attribName, (String)null));	break;      default:	throw new Exception("Unknown attribute type");      }    }    Instances result = new Instances("QueryResult", attribInfo, 				     instances.size());    for (int i = 0; i < instances.size(); i++) {      result.add((Instance)instances.elementAt(i));    }    rs.close();    return result;  }  /**   * Test the class from the command line. The instance   * query should be specified with -Q sql_query   *   * @param args contains options for the instance query   */  public static void main(String args[]) {    try {      InstanceQuery iq = new InstanceQuery();      String query = Utils.getOption('Q', args);      if (query.length() == 0) {	iq.setQuery("select * from Experiment_index");      } else {	iq.setQuery(query);      }      iq.setOptions(args);      try {	Utils.checkForRemainingOptions(args);      } catch (Exception e) {	System.err.println("Options for weka.experiment.InstanceQuery:\n");	Enumeration en = iq.listOptions();	while (en.hasMoreElements()) {	  Option o = (Option)en.nextElement();	  System.err.println(o.synopsis()+"\n"+o.description());	}	System.exit(1);      }           Instances aha = iq.retrieveInstances();      iq.disconnectFromDatabase();      // The dataset may be large, so to make things easier we'll      // output an instance at a time (rather than having to convert      // the entire dataset to one large string)      System.out.println(new Instances(aha, 0));      for (int i = 0; i < aha.numInstances(); i++) {	System.out.println(aha.instance(i));      }    } catch(Exception e) {      e.printStackTrace();      System.err.println(e.getMessage());    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -