📄 pairedttester.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PairedTTester.java * Copyright (C) 1999 Len Trigg * */package weka.experiment;import weka.core.Instances;import weka.core.Instance;import weka.core.Range;import weka.core.Attribute;import weka.core.Utils;import weka.core.FastVector;import weka.core.Statistics;import weka.core.OptionHandler;import java.io.BufferedReader;import java.io.FileReader;import java.util.Date;import java.text.SimpleDateFormat;import java.util.Enumeration;import java.util.Vector;import weka.core.Option;/** * Calculates T-Test statistics on data stored in a set of instances.<p> * * Valid options from the command-line are:<p> * * -D num,num2... <br> * The column numbers that uniquely specify a dataset. * (default last) <p> * * -R num <br> * The column number containing the run number. * (default last) <p> * * -S num <br> * The significance level for T-Tests. * (default 0.05) <p> * * -R num,num2... <br> * The column numbers that uniquely specify one result generator (eg: * scheme name plus options). * (default last) <p> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.14 $ */public class PairedTTester implements OptionHandler { /** The set of instances we will analyse */ protected Instances m_Instances; /** The index of the column containing the run number */ protected int m_RunColumn = 0; /** The option setting for the run number column (-1 means last) */ protected int m_RunColumnSet = -1; /** The significance level for comparisons */ protected double m_SignificanceLevel = 0.05; /** * The range of columns that specify a unique "dataset" * (eg: scheme plus configuration) */ protected Range m_DatasetKeyColumnsRange = new Range(); /** An array containing the indexes of just the selected columns */ protected int [] m_DatasetKeyColumns; /** The list of dataset specifiers */ protected DatasetSpecifiers m_DatasetSpecifiers = new DatasetSpecifiers(); /** * The range of columns that specify a unique result set * (eg: scheme plus configuration) */ protected Range m_ResultsetKeyColumnsRange = new Range(); /** An array containing the indexes of just the selected columns */ protected int [] m_ResultsetKeyColumns; /** Stores a vector for each resultset holding all instances in each set */ protected FastVector m_Resultsets = new FastVector(); /** Indicates whether the instances have been partitioned */ protected boolean m_ResultsetsValid; /** Indicates whether standard deviations should be displayed */ protected boolean m_ShowStdDevs = false; /** Produce tables in latex format */ protected boolean m_latexOutput = false; /* A list of unique "dataset" specifiers that have been observed */ private class DatasetSpecifiers { FastVector m_Specifiers = new FastVector(); /** * Removes all specifiers. */ protected void removeAllSpecifiers() { m_Specifiers.removeAllElements(); } /** * Add an instance to the list of specifiers (if necessary) */ protected void add(Instance inst) { for (int i = 0; i < m_Specifiers.size(); i++) { Instance specifier = (Instance)m_Specifiers.elementAt(i); boolean found = true; for (int j = 0; j < m_DatasetKeyColumns.length; j++) { if (inst.value(m_DatasetKeyColumns[j]) != specifier.value(m_DatasetKeyColumns[j])) { found = false; } } if (found) { return; } } m_Specifiers.addElement(inst); } /** * Get the template at the given position. */ protected Instance specifier(int i) { return (Instance)m_Specifiers.elementAt(i); } /** * Gets the number of specifiers. */ protected int numSpecifiers() { return m_Specifiers.size(); } } /* Utility class to store the instances pertaining to a dataset */ private class Dataset { Instance m_Template; FastVector m_Dataset; public Dataset(Instance template) { m_Template = template; m_Dataset = new FastVector(); add(template); } /** * Returns true if the two instances match on those attributes that have * been designated key columns (eg: scheme name and scheme options) * * @param first the first instance * @param second the second instance * @return true if first and second match on the currently set key columns */ protected boolean matchesTemplate(Instance first) { for (int i = 0; i < m_DatasetKeyColumns.length; i++) { if (first.value(m_DatasetKeyColumns[i]) != m_Template.value(m_DatasetKeyColumns[i])) { return false; } } return true; } /** * Adds the given instance to the dataset */ protected void add(Instance inst) { m_Dataset.addElement(inst); } /** * Returns a vector containing the instances in the dataset */ protected FastVector contents() { return m_Dataset; } /** * Sorts the instances in the dataset by the run number. * * @param runColumn a value of type 'int' */ public void sort(int runColumn) { double [] runNums = new double [m_Dataset.size()]; for (int j = 0; j < runNums.length; j++) { runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn); } int [] index = Utils.sort(runNums); FastVector newDataset = new FastVector(runNums.length); for (int j = 0; j < index.length; j++) { newDataset.addElement(m_Dataset.elementAt(index[j])); } m_Dataset = newDataset; } } /* Utility class to store the instances in a resultset */ private class Resultset { Instance m_Template; FastVector m_Datasets; public Resultset(Instance template) { m_Template = template; m_Datasets = new FastVector(); add(template); } /** * Returns true if the two instances match on those attributes that have * been designated key columns (eg: scheme name and scheme options) * * @param first the first instance * @param second the second instance * @return true if first and second match on the currently set key columns */ protected boolean matchesTemplate(Instance first) { for (int i = 0; i < m_ResultsetKeyColumns.length; i++) { if (first.value(m_ResultsetKeyColumns[i]) != m_Template.value(m_ResultsetKeyColumns[i])) { return false; } } return true; } /** * Returns a string descriptive of the resultset key column values * for this resultset * * @return a value of type 'String' */ protected String templateString() { String result = ""; String tempResult = ""; for (int i = 0; i < m_ResultsetKeyColumns.length; i++) { tempResult = m_Template.toString(m_ResultsetKeyColumns[i]) + ' '; // compact the string tempResult = Utils.removeSubstring(tempResult, "weka.classifiers."); tempResult = Utils.removeSubstring(tempResult, "weka.filters."); tempResult = Utils.removeSubstring(tempResult, "weka.attributeSelection."); result += tempResult; } return result.trim(); } /** * Returns a vector containing all instances belonging to one dataset. * * @param index a template instance * @return a value of type 'FastVector' */ public FastVector dataset(Instance inst) { for (int i = 0; i < m_Datasets.size(); i++) { if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(inst)) { return ((Dataset)m_Datasets.elementAt(i)).contents(); } } return null; } /** * Adds an instance to this resultset * * @param newInst a value of type 'Instance' */ public void add(Instance newInst) { for (int i = 0; i < m_Datasets.size(); i++) { if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(newInst)) { ((Dataset)m_Datasets.elementAt(i)).add(newInst); return; } } Dataset newDataset = new Dataset(newInst); m_Datasets.addElement(newDataset); } /** * Sorts the instances in each dataset by the run number. * * @param runColumn a value of type 'int' */ public void sort(int runColumn) { for (int i = 0; i < m_Datasets.size(); i++) { ((Dataset)m_Datasets.elementAt(i)).sort(runColumn); } } } // Resultset /** * Returns a string descriptive of the key column values for * the "datasets * * @param template the template * @return a value of type 'String' */ private String templateString(Instance template) { String result = ""; for (int i = 0; i < m_DatasetKeyColumns.length; i++) { result += template.toString(m_DatasetKeyColumns[i]) + ' '; } if (result.startsWith("weka.classifiers.")) { result = result.substring("weka.classifiers.".length()); } return result.trim(); } /** * Set whether latex is output * @param l true if tables are to be produced in Latex format */ public void setProduceLatex(boolean l) { m_latexOutput = l; } /** * Get whether latex is output * @return true if Latex is to be output */ public boolean getProduceLatex() { return m_latexOutput; } /** * Set whether standard deviations are displayed or not. * @param s true if standard deviations are to be displayed */ public void setShowStdDevs(boolean s) { m_ShowStdDevs = s; } /** * Returns true if standard deviations have been requested. * @return true if standard deviations are to be displayed. */ public boolean getShowStdDevs() { return m_ShowStdDevs; } /** * Separates the instances into resultsets and by dataset/run. * * @exception Exception if the TTest parameters have not been set. */ protected void prepareData() throws Exception { if (m_Instances == null) { throw new Exception("No instances have been set"); } if (m_RunColumnSet == -1) { m_RunColumn = m_Instances.numAttributes() - 1; } else { m_RunColumn = m_RunColumnSet; } if (m_ResultsetKeyColumnsRange == null) { throw new Exception("No result specifier columns have been set"); } m_ResultsetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1); m_ResultsetKeyColumns = m_ResultsetKeyColumnsRange.getSelection(); if (m_DatasetKeyColumnsRange == null) { throw new Exception("No dataset specifier columns have been set"); } m_DatasetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1); m_DatasetKeyColumns = m_DatasetKeyColumnsRange.getSelection(); // Split the data up into result sets m_Resultsets.removeAllElements(); m_DatasetSpecifiers.removeAllSpecifiers(); for (int i = 0; i < m_Instances.numInstances(); i++) { Instance current = m_Instances.instance(i); if (current.isMissing(m_RunColumn)) { throw new Exception("Instance has missing value in run " + "column!\n" + current); } for (int j = 0; j < m_ResultsetKeyColumns.length; j++) { if (current.isMissing(m_ResultsetKeyColumns[j])) { throw new Exception("Instance has missing value in resultset key " + "column " + (m_ResultsetKeyColumns[j] + 1) + "!\n" + current); } } for (int j = 0; j < m_DatasetKeyColumns.length; j++) { if (current.isMissing(m_DatasetKeyColumns[j])) { throw new Exception("Instance has missing value in dataset key " + "column " + (m_DatasetKeyColumns[j] + 1) + "!\n" + current); } } boolean found = false; for (int j = 0; j < m_Resultsets.size(); j++) { Resultset resultset = (Resultset) m_Resultsets.elementAt(j); if (resultset.matchesTemplate(current)) { resultset.add(current); found = true; break; } } if (!found) { Resultset resultset = new Resultset(current); m_Resultsets.addElement(resultset); } m_DatasetSpecifiers.add(current); } // Tell each resultset to sort on the run column for (int j = 0; j < m_Resultsets.size(); j++) { Resultset resultset = (Resultset) m_Resultsets.elementAt(j); resultset.sort(m_RunColumn); } m_ResultsetsValid = true; } /** * Gets the number of datasets in the resultsets * * @return the number of datasets in the resultsets */ public int getNumDatasets() { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return 0; } } return m_DatasetSpecifiers.numSpecifiers(); } /** * Gets the number of resultsets in the data. * * @return the number of resultsets in the data */ public int getNumResultsets() { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return 0; } } return m_Resultsets.size(); } /** * Gets a string descriptive of the specified resultset. * * @param index the index of the resultset
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -