📄 pairedttester.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      if (j != baseResultset) {        label = new StringBuffer(Utils.padLeft("(" + (j + 1) + ") "                                               + getResultsetName(j), resultsetLength));        titles.append(label).append(' ');        for (int i = 0; i < label.length(); i++) {          separator.append('-');        }        separator.append('-');      }    }    result.append(titles).append('\n').append(separator).append('\n');        // Iterate over datasets    int [] win = new int [getNumResultsets()];    int [] loss = new int [getNumResultsets()];    int [] tie = new int [getNumResultsets()];    StringBuffer skipped = new StringBuffer("");    for (int i = 0; i < getNumDatasets(); i++) {      // Print the name of the dataset      String datasetName =         templateString(m_DatasetSpecifiers.specifier(i));      try {        PairedStats pairedStats =           calculateStatistics(m_DatasetSpecifiers.specifier(i),                               baseResultset, baseResultset,                              comparisonColumn);        datasetName = Utils.padRight(datasetName, datasetLength);        result.append(datasetName);        result.append(Utils.padLeft('('                                    + Utils.doubleToString(pairedStats.count,                                                           0)                                    + ')', 5)).append(' ');        if (!m_ShowStdDevs) {          result.append(Utils.doubleToString(pairedStats.xStats.mean,                                             resultsetLength - 2, 2)).            append(" | ");        } else {          result.append(Utils.doubleToString(pairedStats.xStats.mean,                                             (maxWidthMean+5), 2));          if (Double.isInfinite(pairedStats.xStats.stdDev)) {            result.append('(' + Utils.padRight("Inf", maxWidthStdDev + 3)                          +')').append(" | ");          } else {            result.append('('+Utils.doubleToString(pairedStats.xStats.stdDev,                                                   (maxWidthStdDev+3),2)                          +')').append(" | ");          }        }        // Iterate over the resultsets        for (int j = 0; j < getNumResultsets(); j++) {          if (j != baseResultset) {            try {              pairedStats =                 calculateStatistics(m_DatasetSpecifiers.specifier(i),                                     baseResultset, j, comparisonColumn);              char sigChar = ' ';              if (pairedStats.differencesSignificance < 0) {                sigChar = 'v';                win[j]++;              } else if (pairedStats.differencesSignificance > 0) {                sigChar = '*';                loss[j]++;              } else {                tie[j]++;              }              if (!m_ShowStdDevs) {                result.append(Utils.doubleToString(pairedStats.yStats.mean,                                                   resultsetLength - 2,                                                   2)).append(' ')                  .append(sigChar).append(' ');              } else {                result.append(Utils.doubleToString(pairedStats.yStats.mean,                                                   (maxWidthMean+5),                                                   2));                if (Double.isInfinite(pairedStats.yStats.stdDev)) {                  result.append('('                                 + Utils.padRight("Inf", maxWidthStdDev + 3)                                +')');                } else {                  result.append('('+Utils.doubleToString(pairedStats.                                                         yStats.stdDev,                                                          (maxWidthStdDev+3),                                                         2)+')');                }                result.append(' ').append(sigChar).append(' ');              }            } catch (Exception ex) {              ex.printStackTrace();              result.append(Utils.padLeft("", resultsetLength + 1));            }          }        }        result.append('\n');      } catch (Exception ex) {        ex.printStackTrace();        skipped.append(datasetName).append(' ');      }    }    result.append(separator).append('\n');    result.append(Utils.padLeft("(v/ /*)", datasetLength + 4 +                                resultsetLength)).append(" | ");    for (int j = 0; j < getNumResultsets(); j++) {      if (j != baseResultset) {        result.append(Utils.padLeft("(" + win[j] + '/' + tie[j]                                    + '/' + loss[j] + ')',                                    resultsetLength)).append(' ');      }    }    result.append('\n');    if (!skipped.equals("")) {      result.append("Skipped: ").append(skipped).append('\n');    }    return result.toString();  }				      /**   * Creates a comparison table where a base resultset is compared to the   * other resultsets. Results are presented for every dataset.   *   * @param baseResultset the index of the base resultset   * @param comparisonColumn the index of the column to compare over   * @return the comparison table string   * @exception Exception if an error occurs   */  public String multiResultsetFull(int baseResultset,				   int comparisonColumn) throws Exception {    int maxWidthMean = 2;    int maxWidthStdDev = 2;     // determine max field width    for (int i = 0; i < getNumDatasets(); i++) {      for (int j = 0; j < getNumResultsets(); j++) {	try {	  PairedStats pairedStats = 	    calculateStatistics(m_DatasetSpecifiers.specifier(i), 				baseResultset, j, comparisonColumn);          if (!Double.isInfinite(pairedStats.yStats.mean) &&              !Double.isNaN(pairedStats.yStats.mean)) {            double width = ((Math.log(Math.abs(pairedStats.yStats.mean)) /                              Math.log(10))+1);            if (width > maxWidthMean) {              maxWidthMean = (int)width;            }          }	  	  if (m_ShowStdDevs &&              !Double.isInfinite(pairedStats.yStats.stdDev) &&              !Double.isNaN(pairedStats.yStats.stdDev)) {	    double width = ((Math.log(Math.abs(pairedStats.yStats.stdDev)) /                              Math.log(10))+1);	    if (width > maxWidthStdDev) {	      maxWidthStdDev = (int)width;	    }	  }	}  catch (Exception ex) {	  ex.printStackTrace();	}      }    }    StringBuffer result = new StringBuffer(1000);    if (m_latexOutput) {      result = new StringBuffer(multiResultsetFullLatex(baseResultset, 							comparisonColumn, 							maxWidthMean,							maxWidthStdDev));    } else {      result = new StringBuffer(multiResultsetFullPlainText(baseResultset,                                                             comparisonColumn,                                                             maxWidthMean,                                                            maxWidthStdDev));    }    // append a key so that we can tell the difference between long    // scheme+option names    result.append("\nKey:\n\n");    for (int j = 0; j < getNumResultsets(); j++) {      result.append("("+(j+1)+") ");      result.append(getResultsetName(j)+"\n");    }    return result.toString();  }  /**   * Lists options understood by this object.   *   * @return an enumeration of Options.   */  public Enumeration listOptions() {        Vector newVector = new Vector(5);    newVector.addElement(new Option(             "\tSpecify list of columns that specify a unique\n"	      + "\tdataset.\n"	      + "\tFirst and last are valid indexes. (default none)",              "D", 1, "-D <index,index2-index4,...>"));    newVector.addElement(new Option(	      "\tSet the index of the column containing the run number",              "R", 1, "-R <index>"));    newVector.addElement(new Option(              "\tSpecify list of columns that specify a unique\n"	      + "\t'result generator' (eg: classifier name and options).\n"	      + "\tFirst and last are valid indexes. (default none)",              "G", 1, "-G <index1,index2-index4,...>"));    newVector.addElement(new Option(	      "\tSet the significance level for comparisons (default 0.05)",              "S", 1, "-S <significance level>"));    newVector.addElement(new Option(	      "\tShow standard deviations",              "V", 0, "-V"));    newVector.addElement(new Option(	      "\tProduce table comparisons in Latex table format",              "L", 0, "-L"));    return newVector.elements();  }  /**   * Parses a given list of options. Valid options are:<p>   *   * -D num,num2... <br>   * The column numbers that uniquely specify a dataset.   * (default last) <p>   *   * -R num <br>   * The column number containing the run number.   * (default last) <p>   *   * -S num <br>   * The significance level for T-Tests.   * (default 0.05) <p>   *   * -R num,num2... <br>   * The column numbers that uniquely specify one result generator (eg:   * scheme name plus options).   * (default last) <p>   *   * -V <br>   * Show standard deviations <p>   *   * -L <br>   * Produce comparison tables in Latex table format <p>   *   * @param options an array containing options to set.   * @exception Exception if invalid options are given   */  public void setOptions(String[] options) throws Exception {    setShowStdDevs(Utils.getFlag('V', options));    setProduceLatex(Utils.getFlag('L', options));    String datasetList = Utils.getOption('D', options);    Range datasetRange = new Range();    if (datasetList.length() != 0) {      datasetRange.setRanges(datasetList);    }    setDatasetKeyColumns(datasetRange);    String indexStr = Utils.getOption('R', options);    if (indexStr.length() != 0) {      if (indexStr.equals("first")) {	setRunColumn(0);      } else if (indexStr.equals("last")) {	setRunColumn(-1);      } else {	setRunColumn(Integer.parseInt(indexStr) - 1);      }        } else {      setRunColumn(-1);    }    String sigStr = Utils.getOption('S', options);    if (sigStr.length() != 0) {      setSignificanceLevel((new Double(sigStr)).doubleValue());    } else {      setSignificanceLevel(0.05);    }        String resultsetList = Utils.getOption('G', options);    Range generatorRange = new Range();    if (resultsetList.length() != 0) {      generatorRange.setRanges(resultsetList);    }    setResultsetKeyColumns(generatorRange);  }    /**   * Gets current settings of the PairedTTester.   *   * @return an array of strings containing current options.   */  public String[] getOptions() {    String [] options = new String [10];    int current = 0;    if (!getResultsetKeyColumns().getRanges().equals("")) {      options[current++] = "-G";      options[current++] = getResultsetKeyColumns().getRanges();    }    if (!getDatasetKeyColumns().getRanges().equals("")) {      options[current++] = "-D";      options[current++] = getDatasetKeyColumns().getRanges();    }    options[current++] = "-R";    options[current++] = "" + (getRunColumn() + 1);    options[current++] = "-S";    options[current++] = "" + getSignificanceLevel();        if (getShowStdDevs()) {      options[current++] = "-V";    }    if (getProduceLatex()) {      options[current++] = "-L";    }    while (current < options.length) {      options[current++] = "";    }    return options;  }  /**   * Get the value of ResultsetKeyColumns.   *   * @return Value of ResultsetKeyColumns.   */  public Range getResultsetKeyColumns() {        return m_ResultsetKeyColumnsRange;  }    /**   * Set the value of ResultsetKeyColumns.   *   * @param newResultsetKeyColumns Value to assign to ResultsetKeyColumns.   */  public void setResultsetKeyColumns(Range newResultsetKeyColumns) {        m_ResultsetKeyColumnsRange = newResultsetKeyColumns;    m_ResultsetsValid = false;  }    /**   * Get the value of SignificanceLevel.   *   * @return Value of SignificanceLevel.   */  public double getSignificanceLevel() {        return m_SignificanceLevel;  }    /**   * Set the value of SignificanceLevel.   *   * @param newSignificanceLevel Value to assign to SignificanceLevel.   */  public void setSignificanceLevel(double newSignificanceLevel) {        m_SignificanceLevel = newSignificanceLevel;  }  /**   * Get the value of DatasetKeyColumns.   *   * @return Value of DatasetKeyColumns.   */  public Range getDatasetKeyColumns() {        return m_DatasetKeyColumnsRange;  }    /**   * Set the value of DatasetKeyColumns.   *   * @param newDatasetKeyColumns Value to assign to DatasetKeyColumns.   */  public void setDatasetKeyColumns(Range newDatasetKeyColumns) {        m_DatasetKeyColumnsRange = newDatasetKeyColumns;    m_ResultsetsValid = false;  }    /**   * Get the value of RunColumn.   *   * @return Value of RunColumn.   */  public int getRunColumn() {        return m_RunColumnSet;  }    /**   * Set the value of RunColumn.   *   * @param newRunColumn Value to assign to RunColumn.   */  public void setRunColumn(int newRunColumn) {        m_RunColumnSet = newRunColumn;  }    /**   * Get the value of Instances.   *   * @return Value of Instances.   */  public Instances getInstances() {        return m_Instances;  }    /**   * Set the value of Instances.   *   * @param newInstances Value to assign to Instances.   */  public void setInstances(Instances newInstances) {        m_Instances = newInstances;    m_ResultsetsValid = false;  }    /**   * Test the class from the command line.   *   * @param args contains options for the instance ttests   */  public static void main(String args[]) {    try {      PairedTTester tt = new PairedTTester();      String datasetName = Utils.getOption('t', args);      String compareColStr = Utils.getOption('c', args);      String baseColStr = Utils.getOption('b', args);      boolean summaryOnly = Utils.getFlag('s', args);      boolean rankingOnly = Utils.getFlag('r', args);      try {	if ((datasetName.length() == 0)	    || (compareColStr.length() == 0)) {	  throw new Exception("-t and -c options are required");	}	tt.setOptions(args);	Utils.checkForRemainingOptions(args);      } catch (Exception ex) {	String result = "";	Enumeration enum = tt.listOptions();	while (enum.hasMoreElements()) {	  Option option = (Option) enum.nextElement();	  result += option.synopsis() + '\n'	    + option.description() + '\n';	}	throw new Exception(	      "Usage:\n\n"	      + "-t <file>\n"	      + "\tSet the dataset containing data to evaluate\n"	      + "-b <index>\n"	      + "\tSet the resultset to base comparisons against (optional)\n"	      + "-c <index>\n"	      + "\tSet the column to perform a comparison on\n"	      + "-s\n"	      + "\tSummarize wins over all resultset pairs\n\n"	      + "-r\n"	      + "\tGenerate a resultset ranking\n\n"	      + result);      }      Instances data = new Instances(new BufferedReader(				  new FileReader(datasetName)));      tt.setInstances(data);      //      tt.prepareData();      int compareCol = Integer.parseInt(compareColStr) - 1;      System.out.println(tt.header(compareCol));      if (rankingOnly) {	System.out.println(tt.multiResultsetRanking(compareCol));      } else if (summaryOnly) {	System.out.println(tt.multiResultsetSummary(compareCol));      } else {	System.out.println(tt.resultsetKey());	if (baseColStr.length() == 0) {	  for (int i = 0; i < tt.getNumResultsets(); i++) {	    System.out.println(tt.multiResultsetFull(i, compareCol));	  }	} else {	  int baseCol = Integer.parseInt(baseColStr) - 1;	  System.out.println(tt.multiResultsetFull(baseCol, compareCol));	}      }    } catch(Exception e) {      e.printStackTrace();      System.err.println(e.getMessage());    }  }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -