📄 instancequery.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.misc;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Types;
import java.util.Date;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import org.agentacademy.modules.dataminer.core.Attribute;
import org.agentacademy.modules.dataminer.core.FastVector;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.SparseInstance;
import org.agentacademy.modules.dataminer.core.Utils;
public class InstanceQuery extends DatabaseUtilities implements OptionHandler {
/** Determines whether sparse data is created */
boolean m_CreateSparseData = false;
/** Query to execute */
String m_Query = "SELECT * from attributes";
/**
* Sets up the database drivers
*
* @exception Exception if an error occurs
*/
public InstanceQuery() throws Exception {
super();
}
/**
* Returns an enumeration describing the available options <p>
*
*/
public Enumeration listOptions () {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tSQL query to execute.",
"Q",1,"-Q <query>"));
newVector.addElement(new Option("\tReturn sparse rather than normal "
+"instances."
, "S", 0, "-S"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:<p>
*
* -S <br>
* Return a set of sparse instances rather than normal instances.<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions (String[] options)
throws Exception
{
setSparseData(Utils.getFlag('S',options));
String optionString;
optionString = Utils.getOption('Q',options);
if (optionString.length() != 0) {
setQuery(optionString);
}
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String queryTipText() {
return "The SQL query to execute against the database.";
}
/**
* Set the query to execute against the database
* @param q the query to execute
*/
public void setQuery(String q) {
m_Query = q;
}
/**
* Get the query to execute against the database
* @return the query
*/
public String getQuery() {
return m_Query;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String sparseDataTipText() {
return "Encode data as sparse instances.";
}
/**
* Sets whether data should be encoded as sparse instances
* @param s true if data should be encoded as a set of sparse instances
*/
public void setSparseData(boolean s) {
m_CreateSparseData = s;
}
/**
* Gets whether data is to be returned as a set of sparse instances
* @return true if data is to be encoded as sparse instances
*/
public boolean getSparseData() {
return m_CreateSparseData;
}
/**
* Gets the current settings of InstanceQuery
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[3];
int current = 0;
options[current] = "-Q"; options[current++] = getQuery();
if (getSparseData()) {
options[current++] = "-S";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Makes a database query using the query set through the -Q option
* to convert a table into a set of instances
*
* @return the instances contained in the result of the query
* @exception Exception if an error occurs
*/
public Instances retrieveInstances() throws Exception {
return retrieveInstances(m_Query);
}
/**
* Makes a database query to convert a table into a set of instances
*
* @param query the query to convert to instances
* @return the instances contained in the result of the query
* @exception Exception if an error occurs
*/
public Instances retrieveInstances(String query) throws Exception {
System.err.println("Executing query: " + query);
connectToDatabase();
if (execute(query) == false) {
throw new Exception("Query didn't produce results");
}
ResultSet rs = getResultSet();
// System.out.println(rs.getWarnings());
System.err.println("Getting metadata...");
ResultSetMetaData md = rs.getMetaData();
// System.out.print("Did get metadata");
// Determine structure of the instances
int numAttributes = md.getColumnCount();
// System.out.println("number of columns" + numAttributes);
int [] attributeTypes = new int [numAttributes];
Hashtable [] nominalIndexes = new Hashtable [numAttributes];
FastVector [] nominalStrings = new FastVector [numAttributes];
for (int i = 1; i <= numAttributes; i++) {
switch (md.getColumnType(i)) {
case Types.CHAR:
case Types.VARCHAR:
case Types.LONGVARCHAR:
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
//System.err.println("String --> nominal");
attributeTypes[i - 1] = Attribute.NOMINAL;
nominalIndexes[i - 1] = new Hashtable();
nominalStrings[i - 1] = new FastVector();
break;
case Types.BIT:
////System.err.println("boolean --> nominal");
attributeTypes[i - 1] = Attribute.NOMINAL;
nominalIndexes[i - 1] = new Hashtable();
nominalIndexes[i - 1].put("false", new Double(0));
nominalIndexes[i - 1].put("true", new Double(1));
nominalStrings[i - 1] = new FastVector();
nominalStrings[i - 1].addElement("false");
nominalStrings[i - 1].addElement("true");
break;
case Types.NUMERIC:
case Types.DECIMAL:
//System.err.println("BigDecimal --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case Types.TINYINT:
//System.err.println("byte --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case Types.SMALLINT:
//System.err.println("short --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case Types.INTEGER:
//System.err.println("int --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case Types.BIGINT:
//System.err.println("long --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case Types.REAL:
//System.err.println("float --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case Types.FLOAT:
case Types.DOUBLE:
//System.err.println("double --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
/*case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
//System.err.println("byte[] --> unsupported");
attributeTypes[i - 1] = Attribute.STRING;
break; */
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
attributeTypes[i - 1] = Attribute.DATE;
break;
default:
//System.err.println("Unknown column type");
attributeTypes[i - 1] = Attribute.STRING;
}
}
// Step through the tuples
System.err.println("Creating instances...");
FastVector instances = new FastVector();
int rowCount = 0;
while(rs.next()) {
if (rowCount % 100 == 0) {
System.err.print("read " + rowCount + " instances \r");
System.err.flush();
}
double[] vals = new double[numAttributes];
for(int i = 1; i <= numAttributes; i++) {
switch (md.getColumnType(i)) {
case Types.CHAR:
case Types.VARCHAR:
case Types.LONGVARCHAR:
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
String str = rs.getString(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
Double index = (Double)nominalIndexes[i - 1].get(str);
if (index == null) {
index = new Double(nominalStrings[i - 1].size());
nominalIndexes[i - 1].put(str, index);
nominalStrings[i - 1].addElement(str);
}
vals[i - 1] = index.doubleValue();
}
break;
case Types.BIT:
boolean boo = rs.getBoolean(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (boo ? 1.0 : 0.0);
}
break;
case Types.NUMERIC:
case Types.DECIMAL:
// BigDecimal bd = rs.getBigDecimal(i, 4);
double dd = rs.getDouble(i);
// Use the column precision instead of 4?
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
// newInst.setValue(i - 1, bd.doubleValue());
vals[i - 1] = dd;
}
break;
case Types.TINYINT:
byte by = rs.getByte(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (double)by;
}
break;
case Types.SMALLINT:
short sh = rs.getByte(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (double)sh;
}
break;
case Types.INTEGER:
int in = rs.getInt(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (double)in;
}
break;
case Types.BIGINT:
long lo = rs.getLong(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (double)lo;
}
break;
case Types.REAL:
float fl = rs.getFloat(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (double)fl;
}
break;
case Types.FLOAT:
case Types.DOUBLE:
double dou = rs.getDouble(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
vals[i - 1] = (double)dou;
}
break;
/*case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY: */
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
Date date = rs.getDate(i);
if (rs.wasNull()) {
vals[i - 1] = Instance.missingValue();
} else {
// TODO: Do a value check here.
vals[i - 1] = (double)date.getTime();
}
break;
default:
vals[i - 1] = Instance.missingValue();
}
}
Instance newInst;
if (m_CreateSparseData) {
newInst = new SparseInstance(1.0, vals);
} else {
newInst = new Instance(1.0, vals);
}
instances.addElement(newInst);
rowCount++;
}
//disconnectFromDatabase(); (perhaps other queries might be made)
// Create the header and add the instances to the dataset
System.err.println("Creating header...");
FastVector attribInfo = new FastVector();
for (int i = 0; i < numAttributes; i++) {
String attribName = md.getColumnName(i + 1);
switch (attributeTypes[i]) {
case Attribute.NOMINAL:
attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
break;
case Attribute.NUMERIC:
attribInfo.addElement(new Attribute(attribName));
break;
case Attribute.STRING:
attribInfo.addElement(new Attribute(attribName, (FastVector)null));
break;
case Attribute.DATE:
attribInfo.addElement(new Attribute(attribName, (String)null));
break;
default:
throw new Exception("Unknown attribute type");
}
}
Instances result = new Instances("QueryResult", attribInfo,
instances.size());
for (int i = 0; i < instances.size(); i++) {
result.add((Instance)instances.elementAt(i));
}
rs.close();
return result;
}
/**
* Test the class from the command line. The instance
* query should be specified with -Q sql_query
*
* @param args contains options for the instance query
*/
/* public static void main(String args[]) {
try {
InstanceQuery iq = new InstanceQuery();
String query = Utils.getOption('Q', args);
System.out.println(args);
if (query.length() == 0) {
System.out.println("Edw paw kateu8eian");
iq.setQuery("select * from fieldname");
} else {
iq.setQuery(query);
}
iq.setOptions(args);
try {
Utils.checkForRemainingOptions(args);
} catch (Exception e) {
System.err.println("Options for wekajade.InstanceQuery:\n");
Enumeration en = iq.listOptions();
while (en.hasMoreElements()) {
Option o = (Option)en.nextElement();
System.err.println(o.synopsis()+"\n"+o.description());
}
System.exit(1);
}
Instances aha = iq.retrieveInstances();
iq.disconnectFromDatabase();
// The dataset may be large, so to make things easier we'll
// output an instance at a time (rather than having to convert
// the entire dataset to one large string)
System.out.println(new Instances(aha, 0));
for (int i = 0; i < aha.numInstances(); i++) {
System.out.println(aha.instance(i));
}
} catch(Exception e) {
e.printStackTrace();
System.err.println(e.getMessage());
}
}
*/
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -