⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pairedttester.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
   * @return a descriptive string for the resultset   */  public String getResultsetName(int index) {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return null;      }    }    return ((Resultset) m_Resultsets.elementAt(index)).templateString();  }    /**   * Computes a paired t-test comparison for a specified dataset between   * two resultsets.   *   * @param datasetSpecifier the dataset specifier   * @param resultset1Index the index of the first resultset   * @param resultset2Index the index of the second resultset   * @param comparisonColumn the column containing values to compare   * @return the results of the paired comparison   * @exception Exception if an error occurs   */  public PairedStats calculateStatistics(Instance datasetSpecifier,				     int resultset1Index,				     int resultset2Index,				     int comparisonColumn) throws Exception {    if (m_Instances.attribute(comparisonColumn).type()	!= Attribute.NUMERIC) {      throw new Exception("Comparison column " + (comparisonColumn + 1)			  + " ("			  + m_Instances.attribute(comparisonColumn).name()			  + ") is not numeric");    }    if (!m_ResultsetsValid) {      prepareData();    }    Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index);    Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index);    FastVector dataset1 = resultset1.dataset(datasetSpecifier);    FastVector dataset2 = resultset2.dataset(datasetSpecifier);    String datasetName = templateString(datasetSpecifier);    if (dataset1 == null) {      throw new Exception("No results for dataset=" + datasetName			 + " for resultset=" + resultset1.templateString());    } else if (dataset2 == null) {      throw new Exception("No results for dataset=" + datasetName			 + " for resultset=" + resultset2.templateString());    } else if (dataset1.size() != dataset2.size()) {      throw new Exception("Results for dataset=" + datasetName			  + " differ in size for resultset="			  + resultset1.templateString()			  + " and resultset="			  + resultset2.templateString()			  );    }        PairedStats pairedStats = new PairedStats(m_SignificanceLevel);    for (int k = 0; k < dataset1.size(); k ++) {      Instance current1 = (Instance) dataset1.elementAt(k);      Instance current2 = (Instance) dataset2.elementAt(k);      if (current1.isMissing(comparisonColumn)) {	throw new Exception("Instance has missing value in comparison "			    + "column!\n" + current1);      }      if (current2.isMissing(comparisonColumn)) {	throw new Exception("Instance has missing value in comparison "			    + "column!\n" + current2);      }      if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) {	System.err.println("Run numbers do not match!\n"			    + current1 + current2);      }      double value1 = current1.value(comparisonColumn);      double value2 = current2.value(comparisonColumn);      pairedStats.add(value1, value2);    }    pairedStats.calculateDerived();    return pairedStats;  }    /**   * Creates a key that maps resultset numbers to their descriptions.   *   * @return a value of type 'String'   */  public String resultsetKey() {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return ex.getMessage();      }    }    String result = "";    for (int j = 0; j < getNumResultsets(); j++) {      result += "(" + (j + 1) + ") " + getResultsetName(j) + '\n';    }    return result + '\n';  }    /**   * Creates a "header" string describing the current resultsets.   *   * @param comparisonColumn a value of type 'int'   * @return a value of type 'String'   */  public String header(int comparisonColumn) {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return ex.getMessage();      }    }    return "Analysing:  "      + m_Instances.attribute(comparisonColumn).name() + '\n'      + "Datasets:   " + getNumDatasets() + '\n'      + "Resultsets: " + getNumResultsets() + '\n'      + "Confidence: " + getSignificanceLevel() + " (two tailed)\n"      + "Date:       " + (new SimpleDateFormat()).format(new Date()) + "\n\n";  }  /**   * Carries out a comparison between all resultsets, counting the number   * of datsets where one resultset outperforms the other.   *   * @param comparisonColumn the index of the comparison column   * @return a 2d array where element [i][j] is the number of times resultset   * j performed significantly better than resultset i.   * @exception Exception if an error occurs   */  public int [][] multiResultsetWins(int comparisonColumn)    throws Exception {    int numResultsets = getNumResultsets();    int [][] win = new int [numResultsets][numResultsets];    for (int i = 0; i < numResultsets; i++) {      for (int j = i + 1; j < numResultsets; j++) {	System.err.print("Comparing (" + (i + 1) + ") with ("			 + (j + 1) + ")\r");	System.err.flush();	for (int k = 0; k < getNumDatasets(); k++) {	  try {	    PairedStats pairedStats = 	      calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j,				  comparisonColumn);	    if (pairedStats.differencesSignificance < 0) {	      win[i][j]++;	    } else if (pairedStats.differencesSignificance > 0) {	      win[j][i]++;	    }	  } catch (Exception ex) {	    ex.printStackTrace();	    System.err.println(ex.getMessage());	  }	}      }    }    return win;  }    /**   * Carries out a comparison between all resultsets, counting the number   * of datsets where one resultset outperforms the other. The results   * are summarized in a table.   *   * @param comparisonColumn the index of the comparison column   * @return the results in a string   * @exception Exception if an error occurs   */  public String multiResultsetSummary(int comparisonColumn)    throws Exception {        int [][] win = multiResultsetWins(comparisonColumn);    int numResultsets = getNumResultsets();    int resultsetLength = 1 + Math.max((int)(Math.log(numResultsets)					     / Math.log(10)),				       (int)(Math.log(getNumDatasets()) / 					     Math.log(10)));    String result = "";    String titles = "";    if (m_latexOutput) {      result += "\\begin{table}[thb]\n\\caption{\\label{labelname}"		  +"Table Caption}\n";      result += "\\footnotesize\n";      result += "{\\centering \\begin{tabular}{l";    }    for (int i = 0; i < numResultsets; i++) {      if (m_latexOutput) {	titles += " &";	result += "c";      }      titles += ' ' + Utils.padLeft("" + (char)((int)'a' + i % 26),				    resultsetLength);    }    if (m_latexOutput) {      result += "}}\\\\\n\\hline\n";      result += titles + " \\\\\n\\hline\n";    } else {      result += titles + "  (No. of datasets where [col] >> [row])\n";    }    for (int i = 0; i < numResultsets; i++) {      for (int j = 0; j < numResultsets; j++) {	if (m_latexOutput && j == 0) {	  result +=  (char)((int)'a' + i % 26);	}	if (j == i) {	  if (m_latexOutput) {	    result += " & - ";	  } else {	    result += ' ' + Utils.padLeft("-", resultsetLength);	  }	} else {	  if (m_latexOutput) {	    result += "& " + win[i][j] + ' ';	  } else {	    result += ' ' + Utils.padLeft("" + win[i][j], resultsetLength);	  }	}      }      if (!m_latexOutput) {	result += " | " + (char)((int)'a' + i % 26)	  + " = " + getResultsetName(i) + '\n';      } else {	result += "\\\\\n";      }    }    if (m_latexOutput) {      result += "\\hline\n\\end{tabular} \\footnotesize \\par}\n\\end{table}";    }    return result;  }  public String multiResultsetRanking(int comparisonColumn)    throws Exception {    int [][] win = multiResultsetWins(comparisonColumn);    int numResultsets = getNumResultsets();    int [] wins = new int [numResultsets];    int [] losses = new int [numResultsets];    int [] diff = new int [numResultsets];    for (int i = 0; i < win.length; i++) {      for (int j = 0; j < win[i].length; j++) {	wins[j] += win[i][j];	diff[j] += win[i][j];	losses[i] += win[i][j];	diff[i] -= win[i][j];      }    }    int biggest = Math.max(wins[Utils.maxIndex(wins)],			   losses[Utils.maxIndex(losses)]);    int width = Math.max(2 + (int)(Math.log(biggest) / Math.log(10)),			 ">-<".length());    String result;    if (m_latexOutput) {      result = "\\begin{table}[thb]\n\\caption{\\label{labelname}Table Caption"	+"}\n\\footnotesize\n{\\centering \\begin{tabular}{rlll}\\\\\n\\hline\n";      result += "Resultset & Wins$-$ & Wins & Losses \\\\\n& Losses & & "	+"\\\\\n\\hline\n";    } else {      result = Utils.padLeft(">-<", width) + ' '	+ Utils.padLeft(">", width) + ' '	+ Utils.padLeft("<", width) + " Resultset\n";    }    int [] ranking = Utils.sort(diff);    for (int i = numResultsets - 1; i >= 0; i--) {      int curr = ranking[i];      if (m_latexOutput) {	result += "(" + (curr+1) + ") & " 	  + Utils.padLeft("" + diff[curr], width) 	  +" & " + Utils.padLeft("" + wins[curr], width)	  +" & " + Utils.padLeft("" + losses[curr], width)	  +"\\\\\n";      } else {	result += Utils.padLeft("" + diff[curr], width) + ' '	  + Utils.padLeft("" + wins[curr], width) + ' '	  + Utils.padLeft("" + losses[curr], width) + ' '	  + getResultsetName(curr) + '\n';      }    }    if (m_latexOutput) {      result += "\\hline\n\\end{tabular} \\footnotesize \\par}\n\\end{table}";    }    return result;  }  /**   * Generates a comparison table in latex table format   *   * @param baseResultset the index of the base resultset   * @param comparisonColumn the index of the column to compare over   * @param maxWidthMean width for the mean   * @param maxWidthStdDev width for the standard deviation   * @return the comparison table string   */  private String multiResultsetFullLatex(int baseResultset,				     int comparisonColumn,				     int maxWidthMean,				     int maxWidthStdDev) {    StringBuffer result = new StringBuffer(1000);    int numcols = getNumResultsets() * 2;    if (m_ShowStdDevs) {      numcols += getNumResultsets();    }    result.append("\\begin{table}[thb]\n\\caption{\\label{labelname}"		  +"Table Caption}\n");    if (!m_ShowStdDevs) {      result.append("\\footnotesize\n");    } else {      result.append("\\scriptsize\n");    }    // output the column alignment characters    // one for the dataset name and one for the comparison column    if (!m_ShowStdDevs) {      result.append("{\\centering \\begin{tabular}{ll");    } else {      // dataset, mean, std dev      result.append("{\\centering \\begin{tabular}{lr@{\\hspace{0cm}}l");    }    for (int j = 0; j < getNumResultsets(); j++) {      if (j != baseResultset) {	if (!m_ShowStdDevs) {	  result.append("l@{\\hspace{0.1cm}}l");	} else {	  result.append("r@{\\hspace{0cm}}l@{\\hspace{0cm}}r");	}      }    }    result.append("}\n\\\\\n\\hline\n");    if (!m_ShowStdDevs) {      result.append("Data Set & ("+(baseResultset+1)+")");    } else {      result.append("Data Set & \\multicolumn{2}{c}{("+(baseResultset+1)+")}");    }    // now do the column names (numbers)    for (int j = 0; j < getNumResultsets(); j++) {      if (j != baseResultset) {	if (!m_ShowStdDevs) {	  result.append("& (" + (j + 1) + ") & ");	} else {	  result.append("& \\multicolumn{3}{c}{(" + (j + 1) + ")} ");	}      }    }    result.append("\\\\\n\\hline\n");        int datasetLength = 25;    int resultsetLength = maxWidthMean + 7;    if (m_ShowStdDevs) {      resultsetLength += (maxWidthStdDev + 5);    }    for (int i = 0; i < getNumDatasets(); i++) {      // Print the name of the dataset      String datasetName = 	templateString(m_DatasetSpecifiers.specifier(i)).replace('_','-');      try {	PairedStats pairedStats = 	  calculateStatistics(m_DatasetSpecifiers.specifier(i), 			      baseResultset, baseResultset,			      comparisonColumn);	datasetName = Utils.padRight(datasetName, datasetLength);	result.append(datasetName);		if (!m_ShowStdDevs) {	  result.append("& "+Utils.doubleToString(pairedStats.xStats.mean,			       resultsetLength - 2, 2));	} else {	  result.append("& "+Utils.doubleToString(pairedStats.xStats.mean,					     (maxWidthMean+5), 2)+"$\\pm$");	  if (Double.isNaN(pairedStats.xStats.stdDev)) {	    result.append("&"+Utils.doubleToString(0.0,						  (maxWidthStdDev+3),2)+" ");	  } else {	    result.append("&"+Utils.doubleToString(pairedStats.xStats.stdDev,						   (maxWidthStdDev+3),2)+" ");	  }	}	// Iterate over the resultsets	for (int j = 0; j < getNumResultsets(); j++) {	  if (j != baseResultset) {	    try {	      pairedStats = 		calculateStatistics(m_DatasetSpecifiers.specifier(i), 				    baseResultset, j, comparisonColumn);	      String sigString = "";	      if (pairedStats.differencesSignificance < 0) {		sigString = "$\\circ$";	      } else if (pairedStats.differencesSignificance > 0) {		sigString = "$\\bullet$";	      } 	      if (!m_ShowStdDevs) {		result.append(" & "+Utils.doubleToString(pairedStats.yStats.mean,						   resultsetLength - 2,						   2)).append(" & "+sigString);	      } else {		result.append(" & "			      +Utils.doubleToString(pairedStats.yStats.mean,						   (maxWidthMean+5),						   2)+"$\\pm$");		if (Double.isNaN(pairedStats.yStats.stdDev)) {		  result.append("&"+Utils.doubleToString(0.0, 				(maxWidthStdDev+3),2)+" ");		} else {		  result.append("&"+Utils.doubleToString(pairedStats.				  yStats.stdDev, (maxWidthStdDev+3),2)+" ");		}		result.append(" & ").append(sigString);	      }	    } catch (Exception ex) {	      ex.printStackTrace();	      result.append(Utils.padLeft("", resultsetLength + 1));	    }	  }	}	result.append("\\\\\n");      } catch (Exception ex) {	ex.printStackTrace();      }    }    result.append("\\hline\n\\multicolumn{"+numcols+"}{c}{$\\circ$, $\\bullet$"		  +" statistically significant improvement or degradation}"		  +"\\\\\n\\end{tabular} ");    if (!m_ShowStdDevs) {      result.append("\\footnotesize ");      } else {	result.append("\\scriptsize ");      }        result.append("\\par}\n\\end{table}"		  +"\n");    System.out.println(result.toString()+"\n\n");    return result.toString();  }  /**   * Generates a comparison table in latex table format   *   * @param baseResultset the index of the base resultset   * @param comparisonColumn the index of the column to compare over   * @param maxWidthMean width for the mean   * @param maxWidthStdDev width for the standard deviation   * @return the comparison table string   */  private String multiResultsetFullPlainText(int baseResultset,                                             int comparisonColumn,                                             int maxWidthMean,                                             int maxWidthStdDev) {    StringBuffer result = new StringBuffer(1000);    int datasetLength = 25;    //    int resultsetLength = 9;    //    int resultsetLength = 16;    int resultsetLength = maxWidthMean + 7;    if (m_ShowStdDevs) {      resultsetLength += (maxWidthStdDev + 5);    }    // Set up the titles    StringBuffer titles = new StringBuffer(Utils.padRight("Dataset",                                                          datasetLength));    titles.append(' ');    StringBuffer label       = new StringBuffer(Utils.padLeft("(" + (baseResultset + 1)                                       + ") "                                       + getResultsetName(baseResultset),                                       resultsetLength + 3));    titles.append(label);    StringBuffer separator = new StringBuffer(Utils.padRight("",                                                             datasetLength));    while (separator.length() < titles.length()) {      separator.append('-');    }    separator.append("---");    titles.append(" | ");    for (int j = 0; j < getNumResultsets(); j++) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -