📄 pairedttester.java
字号:
+ "column " + (m_ResultsetKeyColumns[j] + 1) + "!\n" + current); } } for (int j = 0; j < m_DatasetKeyColumns.length; j++) { if (current.isMissing(m_DatasetKeyColumns[j])) { throw new Exception("Instance has missing value in dataset key " + "column " + (m_DatasetKeyColumns[j] + 1) + "!\n" + current); } } boolean found = false; for (int j = 0; j < m_Resultsets.size(); j++) { Resultset resultset = (Resultset) m_Resultsets.elementAt(j); if (resultset.matchesTemplate(current)) { resultset.add(current); found = true; break; } } if (!found) { Resultset resultset = new Resultset(current); m_Resultsets.addElement(resultset); } m_DatasetSpecifiers.add(current); } // Tell each resultset to sort on the run column for (int j = 0; j < m_Resultsets.size(); j++) { Resultset resultset = (Resultset) m_Resultsets.elementAt(j); if (m_FoldColumn >= 0) { // sort on folds first in case they are out of order resultset.sort(m_FoldColumn); } resultset.sort(m_RunColumn); } m_ResultsetsValid = true; } /** * Gets the number of datasets in the resultsets * * @return the number of datasets in the resultsets */ public int getNumDatasets() { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return 0; } } return m_DatasetSpecifiers.numSpecifiers(); } /** * Gets the number of resultsets in the data. * * @return the number of resultsets in the data */ public int getNumResultsets() { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return 0; } } return m_Resultsets.size(); } /** * Gets a string descriptive of the specified resultset. * * @param index the index of the resultset * @return a descriptive string for the resultset */ public String getResultsetName(int index) { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return null; } } return ((Resultset) m_Resultsets.elementAt(index)).templateString(); } /** * Checks whether the resultset with the given index shall be displayed. * * @param index the index of the resultset to check whether it shall be displayed * @return whether the specified resultset is displayed */ public boolean displayResultset(int index) { boolean result; int i; result = true; if (m_DisplayedResultsets != null) { result = false; for (i = 0; i < m_DisplayedResultsets.length; i++) { if (m_DisplayedResultsets[i] == index) { result = true; break; } } } return result; } /** * Computes a paired t-test comparison for a specified dataset between * two resultsets. * * @param datasetSpecifier the dataset specifier * @param resultset1Index the index of the first resultset * @param resultset2Index the index of the second resultset * @param comparisonColumn the column containing values to compare * @return the results of the paired comparison * @throws Exception if an error occurs */ public PairedStats calculateStatistics(Instance datasetSpecifier, int resultset1Index, int resultset2Index, int comparisonColumn) throws Exception { if (m_Instances.attribute(comparisonColumn).type() != Attribute.NUMERIC) { throw new Exception("Comparison column " + (comparisonColumn + 1) + " (" + m_Instances.attribute(comparisonColumn).name() + ") is not numeric"); } if (!m_ResultsetsValid) { prepareData(); } Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index); Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index); FastVector dataset1 = resultset1.dataset(datasetSpecifier); FastVector dataset2 = resultset2.dataset(datasetSpecifier); String datasetName = templateString(datasetSpecifier); if (dataset1 == null) { throw new Exception("No results for dataset=" + datasetName + " for resultset=" + resultset1.templateString()); } else if (dataset2 == null) { throw new Exception("No results for dataset=" + datasetName + " for resultset=" + resultset2.templateString()); } else if (dataset1.size() != dataset2.size()) { throw new Exception("Results for dataset=" + datasetName + " differ in size for resultset=" + resultset1.templateString() + " and resultset=" + resultset2.templateString() ); } PairedStats pairedStats = new PairedStats(m_SignificanceLevel); for (int k = 0; k < dataset1.size(); k ++) { Instance current1 = (Instance) dataset1.elementAt(k); Instance current2 = (Instance) dataset2.elementAt(k); if (current1.isMissing(comparisonColumn)) { System.err.println("Instance has missing value in comparison " + "column!\n" + current1); continue; } if (current2.isMissing(comparisonColumn)) { System.err.println("Instance has missing value in comparison " + "column!\n" + current2); continue; } if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) { System.err.println("Run numbers do not match!\n" + current1 + current2); } if (m_FoldColumn != -1) { if (current1.value(m_FoldColumn) != current2.value(m_FoldColumn)) { System.err.println("Fold numbers do not match!\n" + current1 + current2); } } double value1 = current1.value(comparisonColumn); double value2 = current2.value(comparisonColumn); pairedStats.add(value1, value2); } pairedStats.calculateDerived(); //System.err.println("Differences stats:\n" + pairedStats.differencesStats); return pairedStats; } /** * Creates a key that maps resultset numbers to their descriptions. * * @return a value of type 'String' */ public String resultsetKey() { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return ex.getMessage(); } } String result = ""; for (int j = 0; j < getNumResultsets(); j++) { result += "(" + (j + 1) + ") " + getResultsetName(j) + '\n'; } return result + '\n'; } /** * Creates a "header" string describing the current resultsets. * * @param comparisonColumn a value of type 'int' * @return a value of type 'String' */ public String header(int comparisonColumn) { if (!m_ResultsetsValid) { try { prepareData(); } catch (Exception ex) { ex.printStackTrace(); return ex.getMessage(); } } initResultMatrix(); m_ResultMatrix.addHeader("Tester", getClass().getName()); m_ResultMatrix.addHeader("Analysing", m_Instances.attribute(comparisonColumn).name()); m_ResultMatrix.addHeader("Datasets", Integer.toString(getNumDatasets())); m_ResultMatrix.addHeader("Resultsets", Integer.toString(getNumResultsets())); m_ResultMatrix.addHeader("Confidence", getSignificanceLevel() + " (two tailed)"); m_ResultMatrix.addHeader("Sorted by", getSortColumnName()); m_ResultMatrix.addHeader("Date", (new SimpleDateFormat()).format(new Date())); return m_ResultMatrix.toStringHeader() + "\n"; } /** * Carries out a comparison between all resultsets, counting the number * of datsets where one resultset outperforms the other. * * @param comparisonColumn the index of the comparison column * @param nonSigWin for storing the non-significant wins * @return a 2d array where element [i][j] is the number of times resultset * j performed significantly better than resultset i. * @throws Exception if an error occurs */ public int [][] multiResultsetWins(int comparisonColumn, int [][] nonSigWin) throws Exception { int numResultsets = getNumResultsets(); int [][] win = new int [numResultsets][numResultsets]; // int [][] nonSigWin = new int [numResultsets][numResultsets]; for (int i = 0; i < numResultsets; i++) { for (int j = i + 1; j < numResultsets; j++) { System.err.print("Comparing (" + (i + 1) + ") with (" + (j + 1) + ")\r"); System.err.flush(); for (int k = 0; k < getNumDatasets(); k++) { try { PairedStats pairedStats = calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j, comparisonColumn); if (pairedStats.differencesSignificance < 0) { win[i][j]++; } else if (pairedStats.differencesSignificance > 0) { win[j][i]++; } if (pairedStats.differencesStats.mean < 0) { nonSigWin[i][j]++; } else if (pairedStats.differencesStats.mean > 0) { nonSigWin[j][i]++; } } catch (Exception ex) { //ex.printStackTrace(); System.err.println(ex.getMessage()); } } } } return win; } /** * clears the content and fills the column and row names according to the * given sorting */ protected void initResultMatrix() { m_ResultMatrix.setSize(getNumResultsets(), getNumDatasets()); m_ResultMatrix.setShowStdDev(m_ShowStdDevs); for (int i = 0; i < getNumDatasets(); i++) m_ResultMatrix.setRowName(i, templateString(m_DatasetSpecifiers.specifier(i))); for (int j = 0; j < getNumResultsets(); j++) { m_ResultMatrix.setColName(j, getResultsetName(j)); m_ResultMatrix.setColHidden(j, !displayResultset(j)); } } /** * Carries out a comparison between all resultsets, counting the number * of datsets where one resultset outperforms the other. The results * are summarized in a table. * * @param comparisonColumn the index of the comparison column * @return the results in a string * @throws Exception if an error occurs */ public String multiResultsetSummary(int comparisonColumn) throws Exception { int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()]; int[][] win = multiResultsetWins(comparisonColumn, nonSigWin); initResultMatrix(); m_ResultMatrix.setSummary(nonSigWin, win); return m_ResultMatrix.toStringSummary(); } /** * returns a ranking of the resultsets * * @param comparisonColumn the column to compare with * @return the ranking * @throws Exception if something goes wrong */ public String multiResultsetRanking(int comparisonColumn) throws Exception { int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()]; int[][] win = multiResultsetWins(comparisonColumn, nonSigWin); initResultMatrix(); m_ResultMatrix.setRanking(win); return m_ResultMatrix.toStringRanking(); } /** * Creates a comparison table where a base resultset is compared to the * other resultsets. Results are presented for every dataset. * * @param baseResultset the index of the base resultset * @param comparisonColumn the index of the column to compare over * @return the comparison table string * @throws Exception if an error occurs */ public String multiResultsetFull(int baseResultset, int comparisonColumn) throws Exception { int maxWidthMean = 2; int maxWidthStdDev = 2; double[] sortValues = new double[getNumDatasets()]; // determine max field width for (int i = 0; i < getNumDatasets(); i++) { sortValues[i] = Double.POSITIVE_INFINITY; // sorts skipped cols to end for (int j = 0; j < getNumResultsets(); j++) { if (!displayResultset(j)) continue; try { PairedStats pairedStats = calculateStatistics(m_DatasetSpecifiers.specifier(i), baseResultset, j, comparisonColumn); if (!Double.isInfinite(pairedStats.yStats.mean) && !Double.isNaN(pairedStats.yStats.mean)) { double width = ((Math.log(Math.abs(pairedStats.yStats.mean)) / Math.log(10))+1); if (width > maxWidthMean) { maxWidthMean = (int)width; } } if (j == baseResultset) { if (getSortColumn() != -1) sortValues[i] = calculateStatistics( m_DatasetSpecifiers.specifier(i), baseResultset, j, getSortColumn()).xStats.mean; else sortValues[i] = i; } if (m_ShowStdDevs && !Double.isInfinite(pairedStats.yStats.stdDev) && !Double.isNaN(pairedStats.yStats.stdDev)) { double width = ((Math.log(Math.abs(pairedStats.yStats.stdDev)) / Math.log(10))+1); if (width > maxWidthStdDev) { maxWidthStdDev = (int)width; } } } catch (Exception ex) { //ex.printStackTrace(); System.err.println(ex); } } } // sort rows according to sort column m_SortOrder = Utils.sort(sortValues); // determine column order m_ColOrder = new int[getNumResultsets()]; m_ColOrder[0] = baseResultset; int index = 1; for (int i = 0; i < getNumResultsets(); i++) { if (i == baseResultset) continue; m_ColOrder[index] = i; index++; } // setup matrix initResultMatrix(); m_ResultMatrix.setRowOrder(m_SortOrder); m_ResultMatrix.setColOrder(m_ColOrder); m_ResultMatrix.setMeanWidth(maxWidthMean); m_ResultMatrix.setStdDevWidth(maxWidthStdDev); m_ResultMatrix.setSignificanceWidth(1); // make sure that test base is displayed, even though it might not be // selected for (int i = 0; i < m_ResultMatrix.getColCount(); i++) { if ( (i == baseResultset) && (m_ResultMatrix.getColHidden(i)) ) { m_ResultMatrix.setColHidden(i, false); System.err.println("Note: test base was hidden - set visible!"); } } // the data for (int i = 0; i < getNumDatasets(); i++) { m_ResultMatrix.setRowName(i, templateString(m_DatasetSpecifiers.specifier(i))); for (int j = 0; j < getNumResultsets(); j++) { try { // calc stats PairedStats pairedStats = calculateStatistics(m_DatasetSpecifiers.specifier(i), baseResultset, j, comparisonColumn); // count m_ResultMatrix.setCount(i, pairedStats.count); // mean m_ResultMatrix.setMean(j, i, pairedStats.yStats.mean); // std dev m_ResultMatrix.setStdDev(j, i, pairedStats.yStats.stdDev); // significance if (pairedStats.differencesSignificance < 0) m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_WIN); else if (pairedStats.differencesSignificance > 0) m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_LOSS); else m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_TIE); } catch (Exception e) { //e.printStackTrace(); System.err.println(e); } } } // generate output StringBuffer result = new StringBuffer(1000); try { result.append(m_ResultMatrix.toStringMatrix()); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -