📄 simplekmeans.java
字号:
public String toString() { if (m_ClusterCentroids == null) { return "No clusterer built yet!"; } int maxWidth = 0; int maxAttWidth = 0; boolean containsNumeric = false; for (int i = 0; i < m_NumClusters; i++) { for (int j = 0 ;j < m_ClusterCentroids.numAttributes(); j++) { if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) { maxAttWidth = m_ClusterCentroids.attribute(j).name().length(); } if (m_ClusterCentroids.attribute(j).isNumeric()) { containsNumeric = true; double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) / Math.log(10.0); // System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width); if (width < 0) { width = 1; } // decimal + # decimal places + 1 width += 6.0; if ((int)width > maxWidth) { maxWidth = (int)width; } } } } for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { if (m_ClusterCentroids.attribute(i).isNominal()) { Attribute a = m_ClusterCentroids.attribute(i); for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) { String val = a.value((int)m_ClusterCentroids.instance(j).value(i)); if (val.length() > maxWidth) { maxWidth = val.length(); } } for (int j = 0; j < a.numValues(); j++) { String val = a.value(j) + " "; if (val.length() > maxAttWidth) { maxAttWidth = val.length(); } } } } if (m_displayStdDevs) { // check for maximum width of maximum frequency count for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { if (m_ClusterCentroids.attribute(i).isNominal()) { int maxV = Utils.maxIndex(m_FullNominalCounts[i]); /* int percent = (int)((double)m_FullNominalCounts[i][maxV] / Utils.sum(m_ClusterSizes) * 100.0); */ int percent = 6; // max percent width (100%) String nomV = "" + m_FullNominalCounts[i][maxV]; // + " (" + percent + "%)"; if (nomV.length() + percent > maxWidth) { maxWidth = nomV.length() + 1; } } } } // check for size of cluster sizes for (int i = 0; i < m_ClusterSizes.length; i++) { String size = "(" + m_ClusterSizes[i] + ")"; if (size.length() > maxWidth) { maxWidth = size.length(); } } if (m_displayStdDevs && maxAttWidth < "missing".length()) { maxAttWidth = "missing".length(); } String plusMinus = "+/-"; maxAttWidth += 2; if (m_displayStdDevs && containsNumeric) { maxWidth += plusMinus.length(); } if (maxAttWidth < "Attribute".length() + 2) { maxAttWidth = "Attribute".length() + 2; } if (maxWidth < "Full Data".length()) { maxWidth = "Full Data".length() + 1; } if (maxWidth < "missing".length()) { maxWidth = "missing".length() + 1; } StringBuffer temp = new StringBuffer(); // String naString = "N/A"; /* for (int i = 0; i < maxWidth+2; i++) { naString += " "; } */ temp.append("\nkMeans\n======\n"); temp.append("\nNumber of iterations: " + m_Iterations+"\n"); temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors)); if (!m_dontReplaceMissing) { temp.append("\nMissing values globally replaced with mean/mode"); } temp.append("\n\nCluster centroids:\n"); temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true)); temp.append("\n"); temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false)); temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true)); // cluster numbers for (int i = 0; i < m_NumClusters; i++) { String clustNum = "" + i; temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true)); } temp.append("\n"); // cluster sizes String cSize = "(" + Utils.sum(m_ClusterSizes) + ")"; temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true)); for (int i = 0; i < m_NumClusters; i++) { cSize = "(" + m_ClusterSizes[i] + ")"; temp.append(pad(cSize, " ",maxWidth + 1 - cSize.length(), true)); } temp.append("\n"); temp.append(pad("", "=", maxAttWidth + (maxWidth * (m_ClusterCentroids.numInstances()+1) + m_ClusterCentroids.numInstances() + 1), true)); temp.append("\n"); for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { String attName = m_ClusterCentroids.attribute(i).name(); temp.append(attName); for (int j = 0; j < maxAttWidth - attName.length(); j++) { temp.append(" "); } String strVal; String valMeanMode; // full data if (m_ClusterCentroids.attribute(i).isNominal()) { if (m_FullMeansOrModes[i] == -1) { // missing valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad((strVal = m_ClusterCentroids.attribute(i).value((int)m_FullMeansOrModes[i])), " ", maxWidth + 1 - strVal.length(), true); } } else { if (Double.isNaN(m_FullMeansOrModes[i])) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad((strVal = Utils.doubleToString(m_FullMeansOrModes[i], maxWidth,4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); for (int j = 0; j < m_NumClusters; j++) { if (m_ClusterCentroids.attribute(i).isNominal()) { if (m_ClusterCentroids.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad((strVal = m_ClusterCentroids.attribute(i).value((int)m_ClusterCentroids.instance(j).value(i))), " ", maxWidth + 1 - strVal.length(), true); } } else { if (m_ClusterCentroids.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad((strVal = Utils.doubleToString(m_ClusterCentroids.instance(j).value(i), maxWidth,4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); } temp.append("\n"); if (m_displayStdDevs) { // Std devs/max nominal String stdDevVal = ""; if (m_ClusterCentroids.attribute(i).isNominal()) { // Do the values of the nominal attribute Attribute a = m_ClusterCentroids.attribute(i); for (int j = 0; j < a.numValues(); j++) { // full data String val = " " + a.value(j); temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false)); int count = m_FullNominalCounts[i][j]; int percent = (int)((double)m_FullNominalCounts[i][j] / Utils.sum(m_ClusterSizes) * 100.0); String percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); // Clusters for (int k = 0; k < m_NumClusters; k++) { count = m_ClusterNominalCounts[k][i][j]; percent = (int)((double)m_ClusterNominalCounts[k][i][j] / m_ClusterSizes[k] * 100.0); percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); } temp.append("\n"); } // missing (if any) if (m_FullMissingCounts[i] > 0) { // Full data temp.append(pad(" missing", " ", maxAttWidth + 1 - " missing".length(), false)); int count = m_FullMissingCounts[i]; int percent = (int)((double)m_FullMissingCounts[i] / Utils.sum(m_ClusterSizes) * 100.0); String percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); // Clusters for (int k = 0; k < m_NumClusters; k++) { count = m_ClusterMissingCounts[k][i]; percent = (int)((double)m_ClusterMissingCounts[k][i] / m_ClusterSizes[k] * 100.0); percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); } temp.append("\n"); } temp.append("\n"); } else { // Full data if (Double.isNaN(m_FullMeansOrModes[i])) { stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true); } else { stdDevVal = pad((strVal = plusMinus + Utils.doubleToString(m_FullStdDevs[i], maxWidth,4).trim()), " ", maxWidth + maxAttWidth + 1 - strVal.length(), true); } temp.append(stdDevVal); // Clusters for (int j = 0; j < m_NumClusters; j++) { if (m_ClusterCentroids.instance(j).isMissing(i)) { stdDevVal = pad("--", " ", maxWidth + 1 - 2, true); } else { stdDevVal = pad((strVal = plusMinus + Utils.doubleToString(m_ClusterStdDevs.instance(j).value(i), maxWidth,4).trim()), " ", maxWidth + 1 - strVal.length(), true); } temp.append(stdDevVal); } temp.append("\n\n"); } } } temp.append("\n\n"); return temp.toString(); } private String pad(String source, String padChar, int length, boolean leftPad) { StringBuffer temp = new StringBuffer(); if (leftPad) { for (int i = 0; i< length; i++) { temp.append(padChar); } temp.append(source); } else { temp.append(source); for (int i = 0; i< length; i++) { temp.append(padChar); } } return temp.toString(); } /** * Gets the the cluster centroids * * @return the cluster centroids */ public Instances getClusterCentroids() { return m_ClusterCentroids; } /** * Gets the standard deviations of the numeric attributes in each cluster * * @return the standard deviations of the numeric attributes * in each cluster */ public Instances getClusterStandardDevs() { return m_ClusterStdDevs; } /** * Returns for each cluster the frequency counts for the values of each * nominal attribute * * @return the counts */ public int [][][] getClusterNominalCounts() { return m_ClusterNominalCounts; } /** * Gets the squared error for all clusters * * @return the squared error */ public double getSquaredError() { return Utils.sum(m_squaredErrors); } /** * Gets the number of instances in each cluster * * @return The number of instances in each cluster */ public int [] getClusterSizes() { return m_ClusterSizes; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.39 $"); } /** * Main method for testing this class. * * @param argv should contain the following arguments: <p> * -t training file [-N number of clusters] */ public static void main (String[] argv) { runClusterer(new SimpleKMeans(), argv); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -