⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pairedttester.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
			      + "column " + (m_ResultsetKeyColumns[j] + 1)			      + "!\n" + current);	}      }      for (int j = 0; j < m_DatasetKeyColumns.length; j++) {	if (current.isMissing(m_DatasetKeyColumns[j])) {	  throw new Exception("Instance has missing value in dataset key "			      + "column " + (m_DatasetKeyColumns[j] + 1)			      + "!\n" + current);	}      }      boolean found = false;      for (int j = 0; j < m_Resultsets.size(); j++) {	Resultset resultset = (Resultset) m_Resultsets.elementAt(j);	if (resultset.matchesTemplate(current)) {	  resultset.add(current);	  found = true;	  break;	}      }      if (!found) {	Resultset resultset = new Resultset(current);	m_Resultsets.addElement(resultset);      }      m_DatasetSpecifiers.add(current);    }    // Tell each resultset to sort on the run column    for (int j = 0; j < m_Resultsets.size(); j++) {      Resultset resultset = (Resultset) m_Resultsets.elementAt(j);      if (m_FoldColumn >= 0) {        // sort on folds first in case they are out of order        resultset.sort(m_FoldColumn);      }      resultset.sort(m_RunColumn);    }    m_ResultsetsValid = true;  }  /**   * Gets the number of datasets in the resultsets   *   * @return the number of datasets in the resultsets   */  public int getNumDatasets() {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return 0;      }    }    return m_DatasetSpecifiers.numSpecifiers();  }  /**   * Gets the number of resultsets in the data.   *   * @return the number of resultsets in the data   */  public int getNumResultsets() {    if (!m_ResultsetsValid) {      try {  prepareData();      } catch (Exception ex) {  ex.printStackTrace();  return 0;      }    }    return m_Resultsets.size();  }  /**   * Gets a string descriptive of the specified resultset.   *   * @param index the index of the resultset   * @return a descriptive string for the resultset   */  public String getResultsetName(int index) {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return null;      }    }    return ((Resultset) m_Resultsets.elementAt(index)).templateString();  }    /**   * Checks whether the resultset with the given index shall be displayed.   *    * @param index the index of the resultset to check whether it shall be displayed    * @return whether the specified resultset is displayed    */  public boolean displayResultset(int index) {    boolean       result;    int           i;        result = true;    if (m_DisplayedResultsets != null) {      result = false;      for (i = 0; i < m_DisplayedResultsets.length; i++) {        if (m_DisplayedResultsets[i] == index) {          result = true;          break;        }      }    }          return result;  }    /**   * Computes a paired t-test comparison for a specified dataset between   * two resultsets.   *   * @param datasetSpecifier the dataset specifier   * @param resultset1Index the index of the first resultset   * @param resultset2Index the index of the second resultset   * @param comparisonColumn the column containing values to compare   * @return the results of the paired comparison   * @throws Exception if an error occurs   */  public PairedStats calculateStatistics(Instance datasetSpecifier,					 int resultset1Index,					 int resultset2Index,					 int comparisonColumn) throws Exception {    if (m_Instances.attribute(comparisonColumn).type()	!= Attribute.NUMERIC) {      throw new Exception("Comparison column " + (comparisonColumn + 1)			  + " ("			  + m_Instances.attribute(comparisonColumn).name()			  + ") is not numeric");    }    if (!m_ResultsetsValid) {      prepareData();    }    Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index);    Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index);    FastVector dataset1 = resultset1.dataset(datasetSpecifier);    FastVector dataset2 = resultset2.dataset(datasetSpecifier);    String datasetName = templateString(datasetSpecifier);    if (dataset1 == null) {      throw new Exception("No results for dataset=" + datasetName			 + " for resultset=" + resultset1.templateString());    } else if (dataset2 == null) {      throw new Exception("No results for dataset=" + datasetName			 + " for resultset=" + resultset2.templateString());    } else if (dataset1.size() != dataset2.size()) {      throw new Exception("Results for dataset=" + datasetName			  + " differ in size for resultset="			  + resultset1.templateString()			  + " and resultset="			  + resultset2.templateString()			  );    }        PairedStats pairedStats = new PairedStats(m_SignificanceLevel);    for (int k = 0; k < dataset1.size(); k ++) {      Instance current1 = (Instance) dataset1.elementAt(k);      Instance current2 = (Instance) dataset2.elementAt(k);      if (current1.isMissing(comparisonColumn)) {	System.err.println("Instance has missing value in comparison "			   + "column!\n" + current1);	continue;      }      if (current2.isMissing(comparisonColumn)) {	System.err.println("Instance has missing value in comparison "			   + "column!\n" + current2);	continue;      }      if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) {	System.err.println("Run numbers do not match!\n"			    + current1 + current2);      }      if (m_FoldColumn != -1) {	if (current1.value(m_FoldColumn) != current2.value(m_FoldColumn)) {	  System.err.println("Fold numbers do not match!\n"			     + current1 + current2);	}      }      double value1 = current1.value(comparisonColumn);      double value2 = current2.value(comparisonColumn);      pairedStats.add(value1, value2);    }    pairedStats.calculateDerived();    //System.err.println("Differences stats:\n" + pairedStats.differencesStats);    return pairedStats;  }    /**   * Creates a key that maps resultset numbers to their descriptions.   *   * @return a value of type 'String'   */  public String resultsetKey() {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return ex.getMessage();      }    }    String result = "";    for (int j = 0; j < getNumResultsets(); j++) {      result += "(" + (j + 1) + ") " + getResultsetName(j) + '\n';    }    return result + '\n';  }    /**   * Creates a "header" string describing the current resultsets.   *   * @param comparisonColumn a value of type 'int'   * @return a value of type 'String'   */  public String header(int comparisonColumn) {    if (!m_ResultsetsValid) {      try {	prepareData();      } catch (Exception ex) {	ex.printStackTrace();	return ex.getMessage();      }    }        initResultMatrix();    m_ResultMatrix.addHeader("Tester", getClass().getName());    m_ResultMatrix.addHeader("Analysing", m_Instances.attribute(comparisonColumn).name());    m_ResultMatrix.addHeader("Datasets", Integer.toString(getNumDatasets()));    m_ResultMatrix.addHeader("Resultsets", Integer.toString(getNumResultsets()));    m_ResultMatrix.addHeader("Confidence", getSignificanceLevel() + " (two tailed)");    m_ResultMatrix.addHeader("Sorted by", getSortColumnName());    m_ResultMatrix.addHeader("Date", (new SimpleDateFormat()).format(new Date()));    return m_ResultMatrix.toStringHeader() + "\n";  }  /**   * Carries out a comparison between all resultsets, counting the number   * of datsets where one resultset outperforms the other.   *   * @param comparisonColumn the index of the comparison column   * @param nonSigWin for storing the non-significant wins   * @return a 2d array where element [i][j] is the number of times resultset   * j performed significantly better than resultset i.   * @throws Exception if an error occurs   */  public int [][] multiResultsetWins(int comparisonColumn, int [][] nonSigWin)    throws Exception {    int numResultsets = getNumResultsets();    int [][] win = new int [numResultsets][numResultsets];    //    int [][] nonSigWin = new int [numResultsets][numResultsets];    for (int i = 0; i < numResultsets; i++) {      for (int j = i + 1; j < numResultsets; j++) {	System.err.print("Comparing (" + (i + 1) + ") with ("			 + (j + 1) + ")\r");	System.err.flush();	for (int k = 0; k < getNumDatasets(); k++) {	  try {	    PairedStats pairedStats = 	      calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j,				  comparisonColumn);	    if (pairedStats.differencesSignificance < 0) {	      win[i][j]++;	    } else if (pairedStats.differencesSignificance > 0) {	      win[j][i]++;	    }	    if (pairedStats.differencesStats.mean < 0) {	      nonSigWin[i][j]++;	    } else if (pairedStats.differencesStats.mean > 0) {	      nonSigWin[j][i]++;	    }	  } catch (Exception ex) {	    //ex.printStackTrace();	    System.err.println(ex.getMessage());	  }	}      }    }    return win;  }  /**   * clears the content and fills the column and row names according to the   * given sorting   */  protected void initResultMatrix() {    m_ResultMatrix.setSize(getNumResultsets(), getNumDatasets());    m_ResultMatrix.setShowStdDev(m_ShowStdDevs);    for (int i = 0; i < getNumDatasets(); i++)      m_ResultMatrix.setRowName(i,           templateString(m_DatasetSpecifiers.specifier(i)));    for (int j = 0; j < getNumResultsets(); j++) {      m_ResultMatrix.setColName(j, getResultsetName(j));      m_ResultMatrix.setColHidden(j, !displayResultset(j));    }  }    /**   * Carries out a comparison between all resultsets, counting the number   * of datsets where one resultset outperforms the other. The results   * are summarized in a table.   *   * @param comparisonColumn the index of the comparison column   * @return the results in a string   * @throws Exception if an error occurs   */  public String multiResultsetSummary(int comparisonColumn)    throws Exception {        int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()];    int[][] win = multiResultsetWins(comparisonColumn, nonSigWin);        initResultMatrix();        m_ResultMatrix.setSummary(nonSigWin, win);        return m_ResultMatrix.toStringSummary();  }  /**   * returns a ranking of the resultsets   *    * @param comparisonColumn	the column to compare with   * @return			the ranking   * @throws Exception		if something goes wrong   */  public String multiResultsetRanking(int comparisonColumn)    throws Exception {        int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()];    int[][] win       = multiResultsetWins(comparisonColumn, nonSigWin);        initResultMatrix();        m_ResultMatrix.setRanking(win);    return m_ResultMatrix.toStringRanking();  }				      /**   * Creates a comparison table where a base resultset is compared to the   * other resultsets. Results are presented for every dataset.   *   * @param baseResultset the index of the base resultset   * @param comparisonColumn the index of the column to compare over   * @return the comparison table string   * @throws Exception if an error occurs   */  public String multiResultsetFull(int baseResultset,				   int comparisonColumn) throws Exception {    int maxWidthMean = 2;    int maxWidthStdDev = 2;        double[] sortValues = new double[getNumDatasets()];          // determine max field width    for (int i = 0; i < getNumDatasets(); i++) {      sortValues[i] = Double.POSITIVE_INFINITY;  // sorts skipped cols to end            for (int j = 0; j < getNumResultsets(); j++) {        if (!displayResultset(j))          continue;	try {	  PairedStats pairedStats = 	    calculateStatistics(m_DatasetSpecifiers.specifier(i), 				baseResultset, j, comparisonColumn);          if (!Double.isInfinite(pairedStats.yStats.mean) &&              !Double.isNaN(pairedStats.yStats.mean)) {            double width = ((Math.log(Math.abs(pairedStats.yStats.mean)) /                              Math.log(10))+1);            if (width > maxWidthMean) {              maxWidthMean = (int)width;            }          }          if (j == baseResultset) {            if (getSortColumn() != -1)              sortValues[i] = calculateStatistics(                                m_DatasetSpecifiers.specifier(i),                                 baseResultset, j, getSortColumn()).xStats.mean;            else              sortValues[i] = i;          }	  	  if (m_ShowStdDevs &&              !Double.isInfinite(pairedStats.yStats.stdDev) &&              !Double.isNaN(pairedStats.yStats.stdDev)) {	    double width = ((Math.log(Math.abs(pairedStats.yStats.stdDev)) /                              Math.log(10))+1);	    if (width > maxWidthStdDev) {	      maxWidthStdDev = (int)width;	    }	  }	}  catch (Exception ex) {	  //ex.printStackTrace();          System.err.println(ex);	}      }    }    // sort rows according to sort column    m_SortOrder = Utils.sort(sortValues);    // determine column order    m_ColOrder = new int[getNumResultsets()];    m_ColOrder[0] = baseResultset;    int index = 1;    for (int i = 0; i < getNumResultsets(); i++) {      if (i == baseResultset)        continue;      m_ColOrder[index] = i;      index++;    }    // setup matrix    initResultMatrix();        m_ResultMatrix.setRowOrder(m_SortOrder);    m_ResultMatrix.setColOrder(m_ColOrder);    m_ResultMatrix.setMeanWidth(maxWidthMean);    m_ResultMatrix.setStdDevWidth(maxWidthStdDev);    m_ResultMatrix.setSignificanceWidth(1);    // make sure that test base is displayed, even though it might not be    // selected    for (int i = 0; i < m_ResultMatrix.getColCount(); i++) {      if (    (i == baseResultset)           && (m_ResultMatrix.getColHidden(i)) ) {        m_ResultMatrix.setColHidden(i, false);        System.err.println("Note: test base was hidden - set visible!");      }    }        // the data    for (int i = 0; i < getNumDatasets(); i++) {      m_ResultMatrix.setRowName(i,           templateString(m_DatasetSpecifiers.specifier(i)));      for (int j = 0; j < getNumResultsets(); j++) {        try {          // calc stats          PairedStats pairedStats =             calculateStatistics(m_DatasetSpecifiers.specifier(i),                 baseResultset, j, comparisonColumn);          // count          m_ResultMatrix.setCount(i, pairedStats.count);          // mean          m_ResultMatrix.setMean(j, i, pairedStats.yStats.mean);                    // std dev          m_ResultMatrix.setStdDev(j, i, pairedStats.yStats.stdDev);          // significance          if (pairedStats.differencesSignificance < 0)            m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_WIN);          else if (pairedStats.differencesSignificance > 0)            m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_LOSS);          else            m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_TIE);        }        catch (Exception e) {          //e.printStackTrace();          System.err.println(e);        }      }    }    // generate output    StringBuffer result = new StringBuffer(1000);    try {      result.append(m_ResultMatrix.toStringMatrix());    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -