⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simplekmeans.java

📁 数据挖掘聚类算法:SimpleKMeans源代码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
  public String toString() {    if (m_ClusterCentroids == null) {      return "No clusterer built yet!";    }    int maxWidth = 0;    int maxAttWidth = 0;    boolean containsNumeric = false;    for (int i = 0; i < m_NumClusters; i++) {      for (int j = 0 ;j < m_ClusterCentroids.numAttributes(); j++) {        if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) {          maxAttWidth = m_ClusterCentroids.attribute(j).name().length();        }	if (m_ClusterCentroids.attribute(j).isNumeric()) {          containsNumeric = true;	  double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) /	    Math.log(10.0);          //          System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width);          if (width < 0) {            width = 1;          }          // decimal + # decimal places + 1	  width += 6.0;	  if ((int)width > maxWidth) {	    maxWidth = (int)width;	  }	}      }    }    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {      if (m_ClusterCentroids.attribute(i).isNominal()) {        Attribute a = m_ClusterCentroids.attribute(i);        for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) {          String val = a.value((int)m_ClusterCentroids.instance(j).value(i));          if (val.length() > maxWidth) {            maxWidth = val.length();          }        }        for (int j = 0; j < a.numValues(); j++) {          String val = a.value(j) + " ";          if (val.length() > maxAttWidth) {            maxAttWidth = val.length();          }        }      }    }    if (m_displayStdDevs) {      // check for maximum width of maximum frequency count      for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {        if (m_ClusterCentroids.attribute(i).isNominal()) {          int maxV = Utils.maxIndex(m_FullNominalCounts[i]);          /*          int percent = (int)((double)m_FullNominalCounts[i][maxV] /                      Utils.sum(m_ClusterSizes) * 100.0); */          int percent = 6; // max percent width (100%)          String nomV = "" + m_FullNominalCounts[i][maxV];            //            + " (" + percent + "%)";          if (nomV.length() + percent > maxWidth) {            maxWidth = nomV.length() + 1;          }        }      }    }    // check for size of cluster sizes    for (int i = 0; i < m_ClusterSizes.length; i++) {      String size = "(" + m_ClusterSizes[i] + ")";      if (size.length() > maxWidth) {        maxWidth = size.length();      }    }        if (m_displayStdDevs && maxAttWidth < "missing".length()) {      maxAttWidth = "missing".length();    }        String plusMinus = "+/-";    maxAttWidth += 2;    if (m_displayStdDevs && containsNumeric) {      maxWidth += plusMinus.length();    }    if (maxAttWidth < "Attribute".length() + 2) {      maxAttWidth = "Attribute".length() + 2;    }    if (maxWidth < "Full Data".length()) {      maxWidth = "Full Data".length() + 1;    }    if (maxWidth < "missing".length()) {      maxWidth = "missing".length() + 1;    }        StringBuffer temp = new StringBuffer();    //    String naString = "N/A";        /*    for (int i = 0; i < maxWidth+2; i++) {      naString += " ";      } */    temp.append("\nkMeans\n======\n");    temp.append("\nNumber of iterations: " + m_Iterations+"\n");    temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));    if (!m_dontReplaceMissing) {      temp.append("\nMissing values globally replaced with mean/mode");    }    temp.append("\n\nCluster centroids:\n");    temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true));    temp.append("\n");    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));        temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true));    // cluster numbers    for (int i = 0; i < m_NumClusters; i++) {      String clustNum = "" + i;      temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true));    }    temp.append("\n");    // cluster sizes    String cSize = "(" + Utils.sum(m_ClusterSizes) + ")";    temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true));    for (int i = 0; i < m_NumClusters; i++) {      cSize = "(" + m_ClusterSizes[i] + ")";      temp.append(pad(cSize, " ",maxWidth + 1 - cSize.length(), true));    }    temp.append("\n");    temp.append(pad("", "=", maxAttWidth +                     (maxWidth * (m_ClusterCentroids.numInstances()+1)                      + m_ClusterCentroids.numInstances() + 1), true));    temp.append("\n");    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {      String attName = m_ClusterCentroids.attribute(i).name();      temp.append(attName);      for (int j = 0; j < maxAttWidth - attName.length(); j++) {        temp.append(" ");      }      String strVal;      String valMeanMode;      // full data      if (m_ClusterCentroids.attribute(i).isNominal()) {        if (m_FullMeansOrModes[i] == -1) { // missing          valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);        } else {          valMeanMode =             pad((strVal = m_ClusterCentroids.attribute(i).value((int)m_FullMeansOrModes[i])),                " ", maxWidth + 1 - strVal.length(), true);        }      } else {        if (Double.isNaN(m_FullMeansOrModes[i])) {          valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);        } else {          valMeanMode =  pad((strVal = Utils.doubleToString(m_FullMeansOrModes[i],                                                            maxWidth,4).trim()),                              " ", maxWidth + 1 - strVal.length(), true);        }      }      temp.append(valMeanMode);      for (int j = 0; j < m_NumClusters; j++) {        if (m_ClusterCentroids.attribute(i).isNominal()) {          if (m_ClusterCentroids.instance(j).isMissing(i)) {            valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);          } else {            valMeanMode =               pad((strVal = m_ClusterCentroids.attribute(i).value((int)m_ClusterCentroids.instance(j).value(i))),                  " ", maxWidth + 1 - strVal.length(), true);          }        } else {          if (m_ClusterCentroids.instance(j).isMissing(i)) {            valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);          } else {            valMeanMode = pad((strVal = Utils.doubleToString(m_ClusterCentroids.instance(j).value(i),                                                               maxWidth,4).trim()),                                 " ", maxWidth + 1 - strVal.length(), true);          }        }        temp.append(valMeanMode);      }      temp.append("\n");      if (m_displayStdDevs) {        // Std devs/max nominal        String stdDevVal = "";        if (m_ClusterCentroids.attribute(i).isNominal()) {          // Do the values of the nominal attribute          Attribute a = m_ClusterCentroids.attribute(i);          for (int j = 0; j < a.numValues(); j++) {            // full data            String val = "  " + a.value(j);            temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false));            int count = m_FullNominalCounts[i][j];            int percent = (int)((double)m_FullNominalCounts[i][j] /                              Utils.sum(m_ClusterSizes) * 100.0);            String percentS = "" + percent + "%)";            percentS = pad(percentS, " ", 5 - percentS.length(), true);            stdDevVal = "" + count + " (" + percentS;            stdDevVal =               pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);            temp.append(stdDevVal);            // Clusters            for (int k = 0; k < m_NumClusters; k++) {              count = m_ClusterNominalCounts[k][i][j];              percent = (int)((double)m_ClusterNominalCounts[k][i][j] /                              m_ClusterSizes[k] * 100.0);              percentS = "" + percent + "%)";              percentS = pad(percentS, " ", 5 - percentS.length(), true);              stdDevVal = "" + count + " (" + percentS;              stdDevVal =                 pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);              temp.append(stdDevVal);            }            temp.append("\n");          }          // missing (if any)          if (m_FullMissingCounts[i] > 0) {            // Full data            temp.append(pad("  missing", " ", maxAttWidth + 1 - "  missing".length(), false));            int count = m_FullMissingCounts[i];            int percent = (int)((double)m_FullMissingCounts[i] /                              Utils.sum(m_ClusterSizes) * 100.0);            String percentS = "" + percent + "%)";            percentS = pad(percentS, " ", 5 - percentS.length(), true);            stdDevVal = "" + count + " (" + percentS;            stdDevVal =               pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);            temp.append(stdDevVal);                       // Clusters            for (int k = 0; k < m_NumClusters; k++) {              count = m_ClusterMissingCounts[k][i];              percent = (int)((double)m_ClusterMissingCounts[k][i] /                              m_ClusterSizes[k] * 100.0);              percentS = "" + percent + "%)";              percentS = pad(percentS, " ", 5 - percentS.length(), true);              stdDevVal = "" + count + " (" + percentS;              stdDevVal =                 pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);              temp.append(stdDevVal);            }            temp.append("\n");          }          temp.append("\n");        } else {          // Full data          if (Double.isNaN(m_FullMeansOrModes[i])) {            stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true);          } else {            stdDevVal = pad((strVal = plusMinus                              + Utils.doubleToString(m_FullStdDevs[i],                                                    maxWidth,4).trim()),                             " ", maxWidth + maxAttWidth + 1 - strVal.length(), true);          }          temp.append(stdDevVal);          // Clusters          for (int j = 0; j < m_NumClusters; j++) {            if (m_ClusterCentroids.instance(j).isMissing(i)) {              stdDevVal = pad("--", " ", maxWidth + 1 - 2, true);            } else {              stdDevVal =                 pad((strVal = plusMinus                      + Utils.doubleToString(m_ClusterStdDevs.instance(j).value(i),                                            maxWidth,4).trim()),                     " ", maxWidth + 1 - strVal.length(), true);            }            temp.append(stdDevVal);          }          temp.append("\n\n");        }      }    }    temp.append("\n\n");    return temp.toString();  }  private String pad(String source, String padChar,                      int length, boolean leftPad) {    StringBuffer temp = new StringBuffer();    if (leftPad) {      for (int i = 0; i< length; i++) {        temp.append(padChar);      }      temp.append(source);    } else {      temp.append(source);      for (int i = 0; i< length; i++) {        temp.append(padChar);      }    }    return temp.toString();  }  /**   * Gets the the cluster centroids   *    * @return		the cluster centroids   */  public Instances getClusterCentroids() {    return m_ClusterCentroids;  }  /**   * Gets the standard deviations of the numeric attributes in each cluster   *    * @return		the standard deviations of the numeric attributes    * 			in each cluster   */  public Instances getClusterStandardDevs() {    return m_ClusterStdDevs;  }  /**   * Returns for each cluster the frequency counts for the values of each    * nominal attribute   *    * @return		the counts   */  public int [][][] getClusterNominalCounts() {    return m_ClusterNominalCounts;  }  /**   * Gets the squared error for all clusters   *    * @return		the squared error   */  public double getSquaredError() {    return Utils.sum(m_squaredErrors);  }  /**   * Gets the number of instances in each cluster   *    * @return		The number of instances in each cluster   */  public int [] getClusterSizes() {    return m_ClusterSizes;  }    /**   * Returns the revision string.   *    * @return		the revision   */  public String getRevision() {    return RevisionUtils.extract("$Revision: 1.39 $");  }  /**   * Main method for testing this class.   *   * @param argv should contain the following arguments: <p>   * -t training file [-N number of clusters]   */  public static void main (String[] argv) {    runClusterer(new SimpleKMeans(), argv);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -