📄 .#semisupclustererevaluation.java.1.9
字号:
* @exception Exception if the class of the instance is not set */ protected void updateStatsForClusterer(double [] distrib, int classValue) { for (int i=0; i<distrib.length; i++) { // System.out.println("Adding value to distrib: " + i + " with classValue: " + classValue); m_ConfusionMatrix[i][classValue] += distrib[i]; } } public final double objectiveFunction() { return m_Objective; } public final double purity() { return m_Purity; } public final double entropy() { return m_Entropy; } public final double klDivergence() { return m_KLDivergence; } public final double mutualInformation() { if (m_ClassIsNominal) { double [] clusterTotals = new double[m_NumClusters]; double [] classTotals = new double[m_NumClasses]; for (int i=0; i<m_NumClusters; i++) { for (int j=0; j<m_NumClasses; j++) { clusterTotals[i] += m_ConfusionMatrix[i][j]; classTotals[j] += m_ConfusionMatrix[i][j]; } } try { System.out.println(toMatrixString("\nConfusion matrix:")); } catch(Exception e) { e.printStackTrace(); } // calculate MI from counts m_MIMetric = 0.0; int numInstances = m_Test.numInstances(); double MI = 0; for (int i=0; i<m_NumClusters; i++) { for (int j=0; j<m_NumClasses; j++) { if(m_ConfusionMatrix[i][j] !=0 && clusterTotals[i] != 0 && classTotals[i] != 0) { MI += (1.0 * m_ConfusionMatrix[i][j]/numInstances) * Math.log((1.0 * m_ConfusionMatrix[i][j] * numInstances) / (clusterTotals[i] * classTotals[j])); } } } double classEntropy = 0, clusterEntropy = 0; for (int i=0; i<m_NumClusters; i++) { clusterEntropy -= (1.0 * clusterTotals[i])/numInstances * Math.log(1.0 * clusterTotals[i]/numInstances); } for (int j=0; j<m_NumClasses; j++) { classEntropy -= (1.0 * classTotals[j])/numInstances * Math.log(1.0 * classTotals[j]/numInstances); } m_MIMetric = 2*MI / (classEntropy + clusterEntropy); System.out.println("Final MI is: " + m_MIMetric); } return m_MIMetric; } /** * Outputs the performance statistics as a classification confusion * matrix. For each class value, shows the distribution of * predicted class values. * * @param title the title for the confusion matrix * @return the confusion matrix as a String * @exception Exception if the class is numeric */ public String toMatrixString(String title) throws Exception { StringBuffer text = new StringBuffer(); char [] IDChars = {'a','b','c','d','e','f','g','h','i','j', 'k','l','m','n','o','p','q','r','s','t', 'u','v','w','x','y','z'}; int IDWidth; boolean fractional = false; // Find the maximum value in the matrix // and check for fractional display requirement double maxval = 0; for(int i = 0; i < m_NumClusters; i++) { for(int j = 0; j < m_NumClasses; j++) { double current = m_ConfusionMatrix[i][j]; if (current < 0) { current *= -10; } if (current > maxval) { maxval = current; } double fract = current - Math.rint(current); if (!fractional && ((Math.log(fract) / Math.log(10)) >= -2)) { fractional = true; } } } IDWidth = 1 + Math.max((int)(Math.log(maxval) / Math.log(10) + (fractional ? 3 : 0)), (int)(Math.log(m_NumClasses) / Math.log(IDChars.length))); text.append(title).append("\n"); for(int i = 0; i < m_NumClasses; i++) { if (fractional) { text.append(" ").append(num2ShortID(i,IDChars,IDWidth - 3)) .append(" "); } else { text.append(" ").append(num2ShortID(i,IDChars,IDWidth)); } } text.append(" <-- classes; rows=clusters\n"); for(int i = 0; i< m_NumClusters; i++) { for(int j = 0; j < m_NumClasses; j++) { text.append(" ").append( Utils.doubleToString(m_ConfusionMatrix[i][j], IDWidth, (fractional ? 2 : 0))); } text.append(" | ").append(num2ShortID(i,IDChars,IDWidth)) .append(" = ").append(m_ClassNames[i]).append("\n"); } return text.toString(); } /** * Method for generating indices for the confusion matrix. * * @param num integer to format * @return the formatted integer as a string */ private String num2ShortID(int num,char [] IDChars,int IDWidth) { char ID [] = new char [IDWidth]; int i; for(i = IDWidth - 1; i >=0; i--) { ID[i] = IDChars[num % IDChars.length]; num = num / IDChars.length - 1; if (num < 0) { break; } } for(i--; i >= 0; i--) { ID[i] = ' '; } return new String(ID); } public final double pairwisePrecision() { if (m_ClassIsNominal) { int [] clusterTotals = new int[m_NumClusters]; int [] goodPairTotals = new int[m_NumClusters]; m_totalPairs = 0; m_goodPairs = 0; for (int i = 0; i < m_NumClusters; i++) { for (int j = 0; j < m_NumClasses; j++) { goodPairTotals[i] += m_ConfusionMatrix[i][j] * (m_ConfusionMatrix[i][j] - 1) / 2; clusterTotals[i] += m_ConfusionMatrix[i][j]; } } for (int i = 0; i < m_NumClusters; i++) { m_totalPairs += clusterTotals[i] * (clusterTotals[i] - 1) / 2; m_goodPairs += goodPairTotals[i]; } } return (m_goodPairs+0.0)/m_totalPairs; } public final double pairwiseRecall() { if (m_ClassIsNominal) { int [] classTotals = new int[m_NumClasses]; int [] goodPairTotals = new int[m_NumClasses]; m_trueGoodPairs = 0; m_goodPairs = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClusters; j++) { goodPairTotals[i] += m_ConfusionMatrix[j][i] * (m_ConfusionMatrix[j][i] - 1) / 2; classTotals[i] += m_ConfusionMatrix[j][i]; } } for (int i = 0; i < m_NumClasses; i++) { m_trueGoodPairs += classTotals[i] * (classTotals[i] - 1) / 2; m_goodPairs += goodPairTotals[i]; } } return (m_goodPairs+0.0)/m_trueGoodPairs; } public final double pairwiseFMeasure() { double fmeasure = 0; if (m_ClassIsNominal) { int [] clusterTotals = new int[m_NumClusters]; int [] classTotals = new int[m_NumClasses]; int [] goodPairTotals = new int[m_NumClusters]; int totalClassPairs = 0; int totalClusterPairs = 0; int goodPairs = 0; for (int i = 0; i < m_NumClusters; i++) { for (int j = 0; j < m_NumClasses; j++) { goodPairTotals[i] += m_ConfusionMatrix[i][j] * (m_ConfusionMatrix[i][j] - 1) / 2; clusterTotals[i] += m_ConfusionMatrix[i][j]; classTotals[j] += m_ConfusionMatrix[i][j]; } } for (int i = 0; i < m_NumClusters; i++) { totalClusterPairs += clusterTotals[i] * (clusterTotals[i] - 1) / 2; goodPairs += goodPairTotals[i]; } for (int i = 0; i < m_NumClasses; i++) { totalClassPairs += classTotals[i] * (classTotals[i] - 1) / 2; } double precision = (goodPairs+0.0)/totalClusterPairs; double recall = (goodPairs+0.0)/totalClassPairs; if (precision > 0) { // avoid divide by zero in the p=0&r=0 case fmeasure = 2 * (precision * recall) / (precision + recall); } System.out.println("Final F-Measure is: " + fmeasure + "; Precision=" + precision + " Recall=" + recall + "\n"); } else { // the class is not nominal fmeasure = 2.0 * m_goodPairs / (m_totalPairs + m_trueGoodPairs); } return fmeasure; } public final double numSameClassPairs() { int numSameClassPairs = 0; for (int i = 0; i < m_labeledTrainPairs.size(); i++) { InstancePair pair = (InstancePair) m_labeledTrainPairs.get(i); if (pair.linkType == InstancePair.MUST_LINK) { numSameClassPairs++; } } return numSameClassPairs; } public final double numDiffClassPairs() { int numDiffClassPairs = 0; for (int i = 0; i < m_labeledTrainPairs.size(); i++) { InstancePair pair = (InstancePair) m_labeledTrainPairs.get(i); if (pair.linkType == InstancePair.CANNOT_LINK) { numDiffClassPairs++; } } return numDiffClassPairs; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -