pairedttester.java

来自「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」· Java 代码 · 共 1,482 行 · 第 1/3 页
JAVA
1,482 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    PairedTTester.java *    Copyright (C) 1999 Len Trigg * */package weka.experiment;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Range;import weka.core.Utils;import java.io.BufferedReader;import java.io.FileReader;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Enumeration;import java.util.Vector;/** * Calculates T-Test statistics on data stored in a set of instances. <p/> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D &lt;index,index2-index4,...&gt; *  Specify list of columns that specify a unique *  dataset. *  First and last are valid indexes. (default none)</pre> *  * <pre> -R &lt;index&gt; *  Set the index of the column containing the run number</pre> *  * <pre> -F &lt;index&gt; *  Set the index of the column containing the fold number</pre> *  * <pre> -G &lt;index1,index2-index4,...&gt; *  Specify list of columns that specify a unique *  'result generator' (eg: classifier name and options). *  First and last are valid indexes. (default none)</pre> *  * <pre> -S &lt;significance level&gt; *  Set the significance level for comparisons (default 0.05)</pre> *  * <pre> -V *  Show standard deviations</pre> *  * <pre> -L *  Produce table comparisons in Latex table format</pre> *  * <pre> -csv *  Produce table comparisons in CSV table format</pre> *  * <pre> -html *  Produce table comparisons in HTML table format</pre> *  * <pre> -significance *  Produce table comparisons with only the significance values</pre> *  * <pre> -gnuplot *  Produce table comparisons output suitable for GNUPlot</pre> *  <!-- options-end --> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.33 $ */public class PairedTTester   implements OptionHandler, Tester {    /** for serialization */  static final long serialVersionUID = 8370014624008728610L;  /** The set of instances we will analyse */  protected Instances m_Instances;  /** The index of the column containing the run number */  protected int m_RunColumn = 0;  /** The option setting for the run number column (-1 means last) */  protected int m_RunColumnSet = -1;  /** The option setting for the fold number column (-1 means none) */  protected int m_FoldColumn = -1;  /** The column to sort on (-1 means default sorting) */  protected int m_SortColumn = -1;  /** The sorting of the datasets (according to the sort column) */  protected int[] m_SortOrder = null;  /** The sorting of the columns (test base is always first) */  protected int[] m_ColOrder = null;  /** The significance level for comparisons */  protected double m_SignificanceLevel = 0.05;  /**   * The range of columns that specify a unique "dataset"   * (eg: scheme plus configuration)   */  protected Range m_DatasetKeyColumnsRange = new Range();  /** An array containing the indexes of just the selected columns */   protected int [] m_DatasetKeyColumns;  /** The list of dataset specifiers */  protected DatasetSpecifiers m_DatasetSpecifiers =     new DatasetSpecifiers();  /**   * The range of columns that specify a unique result set   * (eg: scheme plus configuration)   */  protected Range m_ResultsetKeyColumnsRange = new Range();  /** An array containing the indexes of just the selected columns */   protected int [] m_ResultsetKeyColumns;  /** An array containing the indexes of the datasets to display */  protected int[] m_DisplayedResultsets = null;  /** Stores a vector for each resultset holding all instances in each set */  protected FastVector m_Resultsets = new FastVector();  /** Indicates whether the instances have been partitioned */  protected boolean m_ResultsetsValid;  /** Indicates whether standard deviations should be displayed */  protected boolean m_ShowStdDevs = false;    /** the instance of the class to produce the output. */  protected ResultMatrix m_ResultMatrix = new ResultMatrixPlainText();    /** A list of unique "dataset" specifiers that have been observed */  protected class DatasetSpecifiers {    /** the specifiers that have been observed */    FastVector m_Specifiers = new FastVector();    /**     * Removes all specifiers.     */    protected void removeAllSpecifiers() {      m_Specifiers.removeAllElements();    }    /**      * Add an instance to the list of specifiers (if necessary)     *      * @param inst	the instance to add     */    protected void add(Instance inst) {            for (int i = 0; i < m_Specifiers.size(); i++) {	Instance specifier = (Instance)m_Specifiers.elementAt(i);	boolean found = true;	for (int j = 0; j < m_DatasetKeyColumns.length; j++) {	  if (inst.value(m_DatasetKeyColumns[j]) !=	      specifier.value(m_DatasetKeyColumns[j])) {	    found = false;	  }	}	if (found) {	  return;	}      }      m_Specifiers.addElement(inst);    }    /**     * Get the template at the given position.     *      * @param i		the index     * @return		the template     */    protected Instance specifier(int i) {      return (Instance)m_Specifiers.elementAt(i);    }    /**     * Gets the number of specifiers.     *      * @return		the current number of specifiers     */    protected int numSpecifiers() {      return m_Specifiers.size();    }  }  /** Utility class to store the instances pertaining to a dataset */  protected class Dataset {    /** the template */    Instance m_Template;    /** the dataset */    FastVector m_Dataset;    /**     * Constructor     *      * @param template	the template     */    public Dataset(Instance template) {      m_Template = template;      m_Dataset = new FastVector();      add(template);    }        /**     * Returns true if the two instances match on those attributes that have     * been designated key columns (eg: scheme name and scheme options)     *     * @param first the first instance     * @return true if first and second match on the currently set key columns     */    protected boolean matchesTemplate(Instance first) {            for (int i = 0; i < m_DatasetKeyColumns.length; i++) {	if (first.value(m_DatasetKeyColumns[i]) !=	    m_Template.value(m_DatasetKeyColumns[i])) {	  return false;	}      }      return true;    }    /**     * Adds the given instance to the dataset     *      * @param inst	the instance to add     */    protected void add(Instance inst) {            m_Dataset.addElement(inst);    }    /**     * Returns a vector containing the instances in the dataset     *      * @return 		the current contents     */    protected FastVector contents() {      return m_Dataset;    }    /**     * Sorts the instances in the dataset by the run number.     *     * @param runColumn a value of type 'int'     */    public void sort(int runColumn) {      double [] runNums = new double [m_Dataset.size()];      for (int j = 0; j < runNums.length; j++) {	runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn);      }      int [] index = Utils.stableSort(runNums);      FastVector newDataset = new FastVector(runNums.length);      for (int j = 0; j < index.length; j++) {	newDataset.addElement(m_Dataset.elementAt(index[j]));      }      m_Dataset = newDataset;    }  }   /** Utility class to store the instances in a resultset */  protected class Resultset {    /** the template */    Instance m_Template;        /** the dataset */    FastVector m_Datasets;    /**     * Constructir     *      * @param template		the template     */    public Resultset(Instance template) {      m_Template = template;      m_Datasets = new FastVector();      add(template);    }        /**     * Returns true if the two instances match on those attributes that have     * been designated key columns (eg: scheme name and scheme options)     *     * @param first the first instance     * @return true if first and second match on the currently set key columns     */    protected boolean matchesTemplate(Instance first) {            for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {	if (first.value(m_ResultsetKeyColumns[i]) !=	    m_Template.value(m_ResultsetKeyColumns[i])) {	  return false;	}      }      return true;    }    /**     * Returns a string descriptive of the resultset key column values     * for this resultset     *     * @return a value of type 'String'     */    protected String templateString() {      String result = "";      String tempResult = "";      for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {	tempResult = m_Template.toString(m_ResultsetKeyColumns[i]) + ' ';	// compact the string        tempResult = Utils.removeSubstring(tempResult, "weka.classifiers.");        tempResult = Utils.removeSubstring(tempResult, "weka.filters.");        tempResult = Utils.removeSubstring(tempResult, "weka.attributeSelection.");	result += tempResult;      }      return result.trim();    }        /**     * Returns a vector containing all instances belonging to one dataset.     *     * @param inst a template instance     * @return a value of type 'FastVector'     */    public FastVector dataset(Instance inst) {      for (int i = 0; i < m_Datasets.size(); i++) {	if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(inst)) {	  return ((Dataset)m_Datasets.elementAt(i)).contents();	}       }      return null;    }        /**     * Adds an instance to this resultset     *     * @param newInst a value of type 'Instance'     */    public void add(Instance newInst) {            for (int i = 0; i < m_Datasets.size(); i++) {	if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(newInst)) {	  ((Dataset)m_Datasets.elementAt(i)).add(newInst);	  return;	}      }      Dataset newDataset = new Dataset(newInst);      m_Datasets.addElement(newDataset);    }    /**     * Sorts the instances in each dataset by the run number.     *     * @param runColumn a value of type 'int'     */    public void sort(int runColumn) {      for (int i = 0; i < m_Datasets.size(); i++) {	((Dataset)m_Datasets.elementAt(i)).sort(runColumn);      }    }  } // Resultset  /**   * Returns a string descriptive of the key column values for   * the "datasets   *   * @param template the template   * @return a value of type 'String'   */  protected String templateString(Instance template) {        String result = "";    for (int i = 0; i < m_DatasetKeyColumns.length; i++) {      result += template.toString(m_DatasetKeyColumns[i]) + ' ';    }    if (result.startsWith("weka.classifiers.")) {      result = result.substring("weka.classifiers.".length());    }    return result.trim();  }  /**   * Sets the matrix to use to produce the output.   * @param matrix the instance to use to produce the output   * @see ResultMatrix   */  public void setResultMatrix(ResultMatrix matrix) {    m_ResultMatrix = matrix;  }  /**   * Gets the instance that produces the output.   * @return the instance to produce the output   */  public ResultMatrix getResultMatrix() {    return m_ResultMatrix;  }  /**   * Set whether standard deviations are displayed or not.   * @param s true if standard deviations are to be displayed   */  public void setShowStdDevs(boolean s) {    m_ShowStdDevs = s;  }  /**   * Returns true if standard deviations have been requested.   * @return true if standard deviations are to be displayed.   */  public boolean getShowStdDevs() {    return m_ShowStdDevs;  }    /**   * Separates the instances into resultsets and by dataset/run.   *   * @throws Exception if the TTest parameters have not been set.   */  protected void prepareData() throws Exception {    if (m_Instances == null) {      throw new Exception("No instances have been set");    }    if (m_RunColumnSet == -1) {      m_RunColumn = m_Instances.numAttributes() - 1;    } else {      m_RunColumn = m_RunColumnSet;    }    if (m_ResultsetKeyColumnsRange == null) {      throw new Exception("No result specifier columns have been set");    }    m_ResultsetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);    m_ResultsetKeyColumns = m_ResultsetKeyColumnsRange.getSelection();    if (m_DatasetKeyColumnsRange == null) {      throw new Exception("No dataset specifier columns have been set");    }    m_DatasetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);    m_DatasetKeyColumns = m_DatasetKeyColumnsRange.getSelection();        //  Split the data up into result sets    m_Resultsets.removeAllElements();      m_DatasetSpecifiers.removeAllSpecifiers();    for (int i = 0; i < m_Instances.numInstances(); i++) {      Instance current = m_Instances.instance(i);      if (current.isMissing(m_RunColumn)) {	throw new Exception("Instance has missing value in run "			    + "column!\n" + current);      }       for (int j = 0; j < m_ResultsetKeyColumns.length; j++) {	if (current.isMissing(m_ResultsetKeyColumns[j])) {	  throw new Exception("Instance has missing value in resultset key "
pairedttester.java - 源码说明

本页面展示了「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」中的 pairedttester.java 源码文件，采用 Java 编程语言编写，共 1,482 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?