📄 pairedttester.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* PairedTTester.java
* Copyright (C) 1999 Len Trigg
*
*/
package weka.experiment;
import java.io.BufferedReader;
import java.io.FileReader;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.Utils;
/**
* Calculates T-Test statistics on data stored in a set of instances.<p>
*
* Valid options from the command-line are:<p>
*
* -D num,num2... <br>
* The column numbers that uniquely specify a dataset.
* (default last) <p>
*
* -R num <br>
* The column number containing the run number.
* (default last) <p>
*
* -F num <br>
* The column number containing the fold number.
* (default none) <p>
*
* -S num <br>
* The significance level for T-Tests.
* (default 0.05) <p>
*
* -G num,num2... <br>
* The column numbers that uniquely specify one result generator (eg:
* scheme name plus options).
* (default last) <p>
*
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision$
*/
public class PairedTTester implements OptionHandler {
/** The set of instances we will analyse */
protected Instances m_Instances;
/** The index of the column containing the run number */
protected int m_RunColumn = 0;
/** The option setting for the run number column (-1 means last) */
protected int m_RunColumnSet = -1;
/** The option setting for the fold number column (-1 means none) */
protected int m_FoldColumn = -1;
/** The significance level for comparisons */
protected double m_SignificanceLevel = 0.05;
/**
* The range of columns that specify a unique "dataset"
* (eg: scheme plus configuration)
*/
protected Range m_DatasetKeyColumnsRange = new Range();
/** An array containing the indexes of just the selected columns */
protected int [] m_DatasetKeyColumns;
/** The list of dataset specifiers */
protected DatasetSpecifiers m_DatasetSpecifiers =
new DatasetSpecifiers();
/**
* The range of columns that specify a unique result set
* (eg: scheme plus configuration)
*/
protected Range m_ResultsetKeyColumnsRange = new Range();
/** An array containing the indexes of just the selected columns */
protected int [] m_ResultsetKeyColumns;
/** Stores a vector for each resultset holding all instances in each set */
protected FastVector m_Resultsets = new FastVector();
/** Indicates whether the instances have been partitioned */
protected boolean m_ResultsetsValid;
/** Indicates whether standard deviations should be displayed */
protected boolean m_ShowStdDevs = false;
/** Produce tables in latex format */
protected boolean m_latexOutput = false;
/* A list of unique "dataset" specifiers that have been observed */
protected class DatasetSpecifiers {
FastVector m_Specifiers = new FastVector();
/**
* Removes all specifiers.
*/
protected void removeAllSpecifiers() {
m_Specifiers.removeAllElements();
}
/**
* Add an instance to the list of specifiers (if necessary)
*/
protected void add(Instance inst) {
for (int i = 0; i < m_Specifiers.size(); i++) {
Instance specifier = (Instance)m_Specifiers.elementAt(i);
boolean found = true;
for (int j = 0; j < m_DatasetKeyColumns.length; j++) {
if (inst.value(m_DatasetKeyColumns[j]) !=
specifier.value(m_DatasetKeyColumns[j])) {
found = false;
}
}
if (found) {
return;
}
}
m_Specifiers.addElement(inst);
}
/**
* Get the template at the given position.
*/
protected Instance specifier(int i) {
return (Instance)m_Specifiers.elementAt(i);
}
/**
* Gets the number of specifiers.
*/
protected int numSpecifiers() {
return m_Specifiers.size();
}
}
/* Utility class to store the instances pertaining to a dataset */
protected class Dataset {
Instance m_Template;
FastVector m_Dataset;
public Dataset(Instance template) {
m_Template = template;
m_Dataset = new FastVector();
add(template);
}
/**
* Returns true if the two instances match on those attributes that have
* been designated key columns (eg: scheme name and scheme options)
*
* @param first the first instance
* @param second the second instance
* @return true if first and second match on the currently set key columns
*/
protected boolean matchesTemplate(Instance first) {
for (int i = 0; i < m_DatasetKeyColumns.length; i++) {
if (first.value(m_DatasetKeyColumns[i]) !=
m_Template.value(m_DatasetKeyColumns[i])) {
return false;
}
}
return true;
}
/**
* Adds the given instance to the dataset
*/
protected void add(Instance inst) {
m_Dataset.addElement(inst);
}
/**
* Returns a vector containing the instances in the dataset
*/
protected FastVector contents() {
return m_Dataset;
}
/**
* Sorts the instances in the dataset by the run number.
*
* @param runColumn a value of type 'int'
*/
public void sort(int runColumn) {
double [] runNums = new double [m_Dataset.size()];
for (int j = 0; j < runNums.length; j++) {
runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn);
}
int [] index = Utils.stableSort(runNums);
FastVector newDataset = new FastVector(runNums.length);
for (int j = 0; j < index.length; j++) {
newDataset.addElement(m_Dataset.elementAt(index[j]));
}
m_Dataset = newDataset;
}
}
/* Utility class to store the instances in a resultset */
protected class Resultset {
Instance m_Template;
FastVector m_Datasets;
public Resultset(Instance template) {
m_Template = template;
m_Datasets = new FastVector();
add(template);
}
/**
* Returns true if the two instances match on those attributes that have
* been designated key columns (eg: scheme name and scheme options)
*
* @param first the first instance
* @param second the second instance
* @return true if first and second match on the currently set key columns
*/
protected boolean matchesTemplate(Instance first) {
for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {
if (first.value(m_ResultsetKeyColumns[i]) !=
m_Template.value(m_ResultsetKeyColumns[i])) {
return false;
}
}
return true;
}
/**
* Returns a string descriptive of the resultset key column values
* for this resultset
*
* @return a value of type 'String'
*/
protected String templateString() {
String result = "";
String tempResult = "";
for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {
tempResult = m_Template.toString(m_ResultsetKeyColumns[i]) + ' ';
// compact the string
tempResult = Utils.removeSubstring(tempResult, "weka.classifiers.");
tempResult = Utils.removeSubstring(tempResult, "weka.filters.");
tempResult = Utils.removeSubstring(tempResult, "weka.attributeSelection.");
result += tempResult;
}
return result.trim();
}
/**
* Returns a vector containing all instances belonging to one dataset.
*
* @param index a template instance
* @return a value of type 'FastVector'
*/
public FastVector dataset(Instance inst) {
for (int i = 0; i < m_Datasets.size(); i++) {
if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(inst)) {
return ((Dataset)m_Datasets.elementAt(i)).contents();
}
}
return null;
}
/**
* Adds an instance to this resultset
*
* @param newInst a value of type 'Instance'
*/
public void add(Instance newInst) {
for (int i = 0; i < m_Datasets.size(); i++) {
if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(newInst)) {
((Dataset)m_Datasets.elementAt(i)).add(newInst);
return;
}
}
Dataset newDataset = new Dataset(newInst);
m_Datasets.addElement(newDataset);
}
/**
* Sorts the instances in each dataset by the run number.
*
* @param runColumn a value of type 'int'
*/
public void sort(int runColumn) {
for (int i = 0; i < m_Datasets.size(); i++) {
((Dataset)m_Datasets.elementAt(i)).sort(runColumn);
}
}
} // Resultset
/**
* Returns a string descriptive of the key column values for
* the "datasets
*
* @param template the template
* @return a value of type 'String'
*/
protected String templateString(Instance template) {
String result = "";
for (int i = 0; i < m_DatasetKeyColumns.length; i++) {
result += template.toString(m_DatasetKeyColumns[i]) + ' ';
}
if (result.startsWith("weka.classifiers.")) {
result = result.substring("weka.classifiers.".length());
}
return result.trim();
}
/**
* Set whether latex is output
* @param l true if tables are to be produced in Latex format
*/
public void setProduceLatex(boolean l) {
m_latexOutput = l;
}
/**
* Get whether latex is output
* @return true if Latex is to be output
*/
public boolean getProduceLatex() {
return m_latexOutput;
}
/**
* Set whether standard deviations are displayed or not.
* @param s true if standard deviations are to be displayed
*/
public void setShowStdDevs(boolean s) {
m_ShowStdDevs = s;
}
/**
* Returns true if standard deviations have been requested.
* @return true if standard deviations are to be displayed.
*/
public boolean getShowStdDevs() {
return m_ShowStdDevs;
}
/**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -