📄 pairedttester.java
字号:
* Separates the instances into resultsets and by dataset/run.
*
* @exception Exception if the TTest parameters have not been set.
*/
protected void prepareData() throws Exception {
if (m_Instances == null) {
throw new Exception("No instances have been set");
}
if (m_RunColumnSet == -1) {
m_RunColumn = m_Instances.numAttributes() - 1;
} else {
m_RunColumn = m_RunColumnSet;
}
if (m_ResultsetKeyColumnsRange == null) {
throw new Exception("No result specifier columns have been set");
}
m_ResultsetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);
m_ResultsetKeyColumns = m_ResultsetKeyColumnsRange.getSelection();
if (m_DatasetKeyColumnsRange == null) {
throw new Exception("No dataset specifier columns have been set");
}
m_DatasetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);
m_DatasetKeyColumns = m_DatasetKeyColumnsRange.getSelection();
// Split the data up into result sets
m_Resultsets.removeAllElements();
m_DatasetSpecifiers.removeAllSpecifiers();
for (int i = 0; i < m_Instances.numInstances(); i++) {
Instance current = m_Instances.instance(i);
if (current.isMissing(m_RunColumn)) {
throw new Exception("Instance has missing value in run "
+ "column!\n" + current);
}
for (int j = 0; j < m_ResultsetKeyColumns.length; j++) {
if (current.isMissing(m_ResultsetKeyColumns[j])) {
throw new Exception("Instance has missing value in resultset key "
+ "column " + (m_ResultsetKeyColumns[j] + 1)
+ "!\n" + current);
}
}
for (int j = 0; j < m_DatasetKeyColumns.length; j++) {
if (current.isMissing(m_DatasetKeyColumns[j])) {
throw new Exception("Instance has missing value in dataset key "
+ "column " + (m_DatasetKeyColumns[j] + 1)
+ "!\n" + current);
}
}
boolean found = false;
for (int j = 0; j < m_Resultsets.size(); j++) {
Resultset resultset = (Resultset) m_Resultsets.elementAt(j);
if (resultset.matchesTemplate(current)) {
resultset.add(current);
found = true;
break;
}
}
if (!found) {
Resultset resultset = new Resultset(current);
m_Resultsets.addElement(resultset);
}
m_DatasetSpecifiers.add(current);
}
// Tell each resultset to sort on the run column
for (int j = 0; j < m_Resultsets.size(); j++) {
Resultset resultset = (Resultset) m_Resultsets.elementAt(j);
if (m_FoldColumn >= 0) {
// sort on folds first in case they are out of order
resultset.sort(m_FoldColumn);
}
resultset.sort(m_RunColumn);
}
m_ResultsetsValid = true;
}
/**
* Gets the number of datasets in the resultsets
*
* @return the number of datasets in the resultsets
*/
public int getNumDatasets() {
if (!m_ResultsetsValid) {
try {
prepareData();
} catch (Exception ex) {
ex.printStackTrace();
return 0;
}
}
return m_DatasetSpecifiers.numSpecifiers();
}
/**
* Gets the number of resultsets in the data.
*
* @return the number of resultsets in the data
*/
public int getNumResultsets() {
if (!m_ResultsetsValid) {
try {
prepareData();
} catch (Exception ex) {
ex.printStackTrace();
return 0;
}
}
return m_Resultsets.size();
}
/**
* Gets a string descriptive of the specified resultset.
*
* @param index the index of the resultset
* @return a descriptive string for the resultset
*/
public String getResultsetName(int index) {
if (!m_ResultsetsValid) {
try {
prepareData();
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
}
return ((Resultset) m_Resultsets.elementAt(index)).templateString();
}
/**
* Computes a paired t-test comparison for a specified dataset between
* two resultsets.
*
* @param datasetSpecifier the dataset specifier
* @param resultset1Index the index of the first resultset
* @param resultset2Index the index of the second resultset
* @param comparisonColumn the column containing values to compare
* @return the results of the paired comparison
* @exception Exception if an error occurs
*/
public PairedStats calculateStatistics(Instance datasetSpecifier,
int resultset1Index,
int resultset2Index,
int comparisonColumn) throws Exception {
if (m_Instances.attribute(comparisonColumn).type()
!= Attribute.NUMERIC) {
throw new Exception("Comparison column " + (comparisonColumn + 1)
+ " ("
+ m_Instances.attribute(comparisonColumn).name()
+ ") is not numeric");
}
if (!m_ResultsetsValid) {
prepareData();
}
Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index);
Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index);
FastVector dataset1 = resultset1.dataset(datasetSpecifier);
FastVector dataset2 = resultset2.dataset(datasetSpecifier);
String datasetName = templateString(datasetSpecifier);
if (dataset1 == null) {
throw new Exception("No results for dataset=" + datasetName
+ " for resultset=" + resultset1.templateString());
} else if (dataset2 == null) {
throw new Exception("No results for dataset=" + datasetName
+ " for resultset=" + resultset2.templateString());
} else if (dataset1.size() != dataset2.size()) {
throw new Exception("Results for dataset=" + datasetName
+ " differ in size for resultset="
+ resultset1.templateString()
+ " and resultset="
+ resultset2.templateString()
);
}
PairedStats pairedStats = new PairedStats(m_SignificanceLevel);
for (int k = 0; k < dataset1.size(); k ++) {
Instance current1 = (Instance) dataset1.elementAt(k);
Instance current2 = (Instance) dataset2.elementAt(k);
if (current1.isMissing(comparisonColumn)) {
throw new Exception("Instance has missing value in comparison "
+ "column!\n" + current1);
}
if (current2.isMissing(comparisonColumn)) {
throw new Exception("Instance has missing value in comparison "
+ "column!\n" + current2);
}
if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) {
System.err.println("Run numbers do not match!\n"
+ current1 + current2);
}
if (m_FoldColumn != -1) {
if (current1.value(m_FoldColumn) != current2.value(m_FoldColumn)) {
System.err.println("Fold numbers do not match!\n"
+ current1 + current2);
}
}
double value1 = current1.value(comparisonColumn);
double value2 = current2.value(comparisonColumn);
pairedStats.add(value1, value2);
}
pairedStats.calculateDerived();
System.err.println("Differences stats:\n" + pairedStats.differencesStats);
return pairedStats;
}
/**
* Creates a key that maps resultset numbers to their descriptions.
*
* @return a value of type 'String'
*/
public String resultsetKey() {
if (!m_ResultsetsValid) {
try {
prepareData();
} catch (Exception ex) {
ex.printStackTrace();
return ex.getMessage();
}
}
String result = "";
for (int j = 0; j < getNumResultsets(); j++) {
result += "(" + (j + 1) + ") " + getResultsetName(j) + '\n';
}
return result + '\n';
}
/**
* Creates a "header" string describing the current resultsets.
*
* @param comparisonColumn a value of type 'int'
* @return a value of type 'String'
*/
public String header(int comparisonColumn) {
if (!m_ResultsetsValid) {
try {
prepareData();
} catch (Exception ex) {
ex.printStackTrace();
return ex.getMessage();
}
}
return "Analysing: "
+ m_Instances.attribute(comparisonColumn).name() + '\n'
+ "Datasets: " + getNumDatasets() + '\n'
+ "Resultsets: " + getNumResultsets() + '\n'
+ "Confidence: " + getSignificanceLevel() + " (two tailed)\n"
+ "Date: " + (new SimpleDateFormat()).format(new Date()) + "\n\n";
}
/**
* Carries out a comparison between all resultsets, counting the number
* of datsets where one resultset outperforms the other.
*
* @param comparisonColumn the index of the comparison column
* @return a 2d array where element [i][j] is the number of times resultset
* j performed significantly better than resultset i.
* @exception Exception if an error occurs
*/
public int [][] multiResultsetWins(int comparisonColumn)
throws Exception {
int numResultsets = getNumResultsets();
int [][] win = new int [numResultsets][numResultsets];
for (int i = 0; i < numResultsets; i++) {
for (int j = i + 1; j < numResultsets; j++) {
System.err.print("Comparing (" + (i + 1) + ") with ("
+ (j + 1) + ")\r");
System.err.flush();
for (int k = 0; k < getNumDatasets(); k++) {
try {
PairedStats pairedStats =
calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j,
comparisonColumn);
if (pairedStats.differencesSignificance < 0) {
win[i][j]++;
} else if (pairedStats.differencesSignificance > 0) {
win[j][i]++;
}
} catch (Exception ex) {
ex.printStackTrace();
System.err.println(ex.getMessage());
}
}
}
}
return win;
}
/**
* Carries out a comparison between all resultsets, counting the number
* of datsets where one resultset outperforms the other. The results
* are summarized in a table.
*
* @param comparisonColumn the index of the comparison column
* @return the results in a string
* @exception Exception if an error occurs
*/
public String multiResultsetSummary(int comparisonColumn)
throws Exception {
int [][] win = multiResultsetWins(comparisonColumn);
int numResultsets = getNumResultsets();
int resultsetLength = 1 + Math.max((int)(Math.log(numResultsets)
/ Math.log(10)),
(int)(Math.log(getNumDatasets()) /
Math.log(10)));
String result = "";
String titles = "";
if (m_latexOutput) {
result += "\\begin{table}[thb]\n\\caption{\\label{labelname}"
+"Table Caption}\n";
result += "\\footnotesize\n";
result += "{\\centering \\begin{tabular}{l";
}
for (int i = 0; i < numResultsets; i++) {
if (m_latexOutput) {
titles += " &";
result += "c";
}
titles += ' ' + Utils.padLeft("" + (char)((int)'a' + i % 26),
resultsetLength);
}
if (m_latexOutput) {
result += "}}\\\\\n\\hline\n";
result += titles + " \\\\\n\\hline\n";
} else {
result += titles + " (No. of datasets where [col] >> [row])\n";
}
for (int i = 0; i < numResultsets; i++) {
for (int j = 0; j < numResultsets; j++) {
if (m_latexOutput && j == 0) {
result += (char)((int)'a' + i % 26);
}
if (j == i) {
if (m_latexOutput) {
result += " & - ";
} else {
result += ' ' + Utils.padLeft("-", resultsetLength);
}
} else {
if (m_latexOutput) {
result += "& " + win[i][j] + ' ';
} else {
result += ' ' + Utils.padLeft("" + win[i][j], resultsetLength);
}
}
}
if (!m_latexOutput) {
result += " | " + (char)((int)'a' + i % 26)
+ " = " + getResultsetName(i) + '\n';
} else {
result += "\\\\\n";
}
}
if (m_latexOutput) {
result += "\\hline\n\\end{tabular} \\footnotesize \\par}\n\\end{table}";
}
return result;
}
public String multiResultsetRanking(int comparisonColumn)
throws Exception {
int [][] win = multiResultsetWins(comparisonColumn);
int numResultsets = getNumResultsets();
int [] wins = new int [numResultsets];
int [] losses = new int [numResultsets];
int [] diff = new int [numResultsets];
for (int i = 0; i < win.length; i++) {
for (int j = 0; j < win[i].length; j++) {
wins[j] += win[i][j];
diff[j] += win[i][j];
losses[i] += win[i][j];
diff[i] -= win[i][j];
}
}
int biggest = Math.max(wins[Utils.maxIndex(wins)],
losses[Utils.maxIndex(losses)]);
int width = Math.max(2 + (int)(Math.log(biggest) / Math.log(10)),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -