⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmeans.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
   * Same as recomputeCenters, but does not check if center stays the same.   *    * @param centers the input center and output centers   * @param instOfCentIndexes the indexes of the instances to the centers    * @param model data model information   */  protected void recomputeCentersFast(Instances centers,          				    int[][] instOfCentIndexes, 				    Instances model   				    ) {    for (int i = 0; i < centers.numInstances(); i++) {      double val;      for (int j = 0; j < model.numAttributes(); j++) {	val = meanOrMode(m_Instances, instOfCentIndexes[i], j);	centers.instance(i).setValue(j, val);      }    }  }  /**   * Computes Mean Or Mode of one attribute on a subset of m_Instances.    * The subset is defined by an index list.   * @param instances all instances   * @param instList the indexes of the instances the mean is computed from   * @param attIndex the index of the attribute   * @return mean value   */  protected double meanOrMode(Instances instances, 			    int[] instList, 			    int attIndex) {    double result, found;    int[] counts;    int numInst = instList.length;    if (instances.attribute(attIndex).isNumeric()) {      result = found = 0;      for (int j = 0; j < numInst; j++) {	Instance currInst = instances.instance(instList[j]);	if (!currInst.isMissing(attIndex)) {	  found += currInst.weight();	  result += currInst.weight() * 	    currInst.value(attIndex);	}      }      if (Utils.eq(found, 0)) {	return 0;      } else {	return result / found;      }    } else if (instances.attribute(attIndex).isNominal()) {      counts = new int[instances.attribute(attIndex).numValues()];      for (int j = 0; j < numInst; j++) {	Instance currInst = instances.instance(instList[j]);	if (!currInst.isMissing(attIndex)) {	  counts[(int) currInst.value(attIndex)] += currInst.weight();	}      }      return (double)Utils.maxIndex(counts);    } else {      return 0;    }  }   /**   * Assigns instances to centers.   *   * @param tree KDTree on all instances   * @param centers all the input centers   * @param instOfCent the instances to each center   * @param allInstList list of all instances   * @param assignments assignments of instances to centers   * @param iterationCount the number of iteration    * @return true if converged   * @throws Exception is something goes wrong   */  protected boolean assignToCenters(KDTree tree,                                  Instances centers, 				  int[][] instOfCent,                                   int[] allInstList,				  int[] assignments,                                  int iterationCount) throws Exception {        boolean converged = true;    if (tree != null) {      // using KDTree structure for assigning      converged = assignToCenters(tree,				  centers, 				  instOfCent,				  assignments,				  iterationCount);    } else {      converged = assignToCenters(centers, 				  instOfCent,				  allInstList, 				  assignments);    }    return converged;  }  /**   * Assign instances to centers using KDtree.   * First part of conventionell K-Means, returns true if new assignment   * is the same as the last one.   *   * @param kdtree KDTree on all instances   * @param centers all the input centers   * @param instOfCent the instances to each center   * @param assignments assignments of instances to centers   * @param iterationCount the number of iteration    * @return true if converged   * @throws Exception in case instances are not assigned to cluster   */  protected boolean assignToCenters(KDTree kdtree,				  Instances centers, 				  int[][] instOfCent, 				  int[] assignments,                                  int iterationCount) throws Exception {    int numCent = centers.numInstances();    int numInst = m_Instances.numInstances();     int[] oldAssignments = new int[numInst];        // WARNING:  assignments is "input/output-parameter"    // should not be null    if (assignments == null) {      assignments = new int[numInst];      for (int i = 0; i < numInst; i++) {	assignments[0] = -1;      }    }        // WARNING:  instOfCent is "input/output-parameter"    // should not be null    if (instOfCent == null) {      instOfCent = new int [numCent][];    }        // save old assignments    for (int i = 0; i < assignments.length; i++) {      oldAssignments[i] = assignments[i];    }        // use tree to get new assignments    kdtree.centerInstances(centers, assignments,			   Math.pow(.8, iterationCount));	    boolean converged = true;      // compare with previous assignment    for (int i = 0; converged && (i < assignments.length); i++) {      converged = (oldAssignments[i] == assignments[i]);      if (assignments[i] == -1) 	throw new Exception("Instance " + i + 			    " has not been assigned to cluster.");    }    if (!converged) {      int[] numInstOfCent = new int[numCent];      for (int i = 0; i < numCent; i++)	numInstOfCent[i] = 0;      // count num of assignments per center      for (int i = 0; i < numInst; i++)	numInstOfCent[assignments[i]]++;            // prepare instancelists per center      for (int i = 0; i < numCent; i++){	instOfCent[i] = new int[numInstOfCent[i]];      }      // write instance lists per center      for (int i = 0; i < numCent; i++) {	int index = -1;   	for (int j = 0; j < numInstOfCent[i]; j++) {	  index = nextAssignedOne(i, index, assignments);	  instOfCent[i][j] = index;	}      }    }      return converged;  }  /**   * Assign instances to centers.   * Part of conventionell K-Means, returns true if new assignment   * is the same as the last one.   *   * @param centers all the input centers   * @param instOfCent the instances to each center   * @param allInstList list of all indexes   * @param assignments assignments of instances to centers   * @return true if converged   * @throws Exception if something goes wrong   */  protected boolean assignToCenters(Instances centers, 				  int[][] instOfCent,				  int[] allInstList,				  int[] assignments)     throws Exception {        // todo: undecided situations    boolean converged = true; // true if new assignment is the same                               // as the old one    int numInst = allInstList.length;     int numCent = centers.numInstances();    int[] numInstOfCent = new int [numCent];    for (int i = 0; i < numCent; i++) numInstOfCent[i] = 0;    // WARNING:  assignments is "input/output-parameter"    // should not be null    if (assignments == null) {      assignments = new int[numInst];      for (int i = 0; i < numInst; i++) {	assignments[i] = -1;      }    }    // WARNING: instOfCent is "input/output-parameter"    // should not be null    if (instOfCent == null) {      instOfCent = new int [numCent][];    }    // set assignments    for (int i = 0; i < numInst; i++) {      Instance inst = m_Instances.instance(allInstList[i]);      int newC = clusterProcessedInstance(inst, centers);            if (converged && newC != assignments[i]) {	converged = false;      }      numInstOfCent[newC]++;       if (!converged)	assignments[i] = newC;    }    // the following is only done    // if assignments are not the same, because too much effort    if (!converged) {      PFD(D_FOLLOWSPLIT, "assignToCenters -> it has NOT converged");      for (int i = 0; i < numCent; i++) {	instOfCent[i] = new int [numInstOfCent[i]];      }      for (int i = 0; i < numCent; i++) {	int index = -1;   	for (int j = 0; j < numInstOfCent[i]; j++) {	  index = nextAssignedOne(i, index, assignments);	  instOfCent[i][j] = allInstList[index];	}      }    }    else      PFD(D_FOLLOWSPLIT, "assignToCenters -> it has converged");    return converged;  }  /**   * Searches along the assignment array for the next entry of the center    * in question.   * @param cent index of the center    * @param lastIndex index to start searching   * @param assignments assignments   * @return index of the instance the center cent is assigned to   */  protected int nextAssignedOne(int cent, int lastIndex, 			      int[] assignments) {    int len = assignments.length;    int index = lastIndex + 1;    while (index < len) {      if (assignments[index] == cent) {	return (index);      }      index++;    }    return (-1);  }  /**   * Split centers in their region. Generates random vector of    * length = variance and   * adds and substractsx to cluster vector to get two new clusters.   *    * @param random random function   * @param center the center that is split here   * @param variance variance of the cluster    * @param model data model valid   * @return a pair of new centers   * @throws Exception something in AlgVector goes wrong   */  protected Instances splitCenter(Random random,			        Instance center,			        double variance,			        Instances model) throws Exception {    m_NumSplits++;    AlgVector r = null;    Instances children = new Instances(model, 2);    if (m_DebugVectorsFile.exists() && m_DebugVectorsFile.isFile()) {      Instance nextVector = getNextDebugVectorsInstance(model);      PFD(D_RANDOMVECTOR, "Random Vector from File " + nextVector);      r = new AlgVector(nextVector);    }    else {      // random vector of length = variance      r = new AlgVector(model, random);    }    r.changeLength(Math.pow(variance, 0.5));    PFD(D_RANDOMVECTOR, "random vector *variance "+ r);        // add random vector to center    AlgVector c = new AlgVector(center);    AlgVector c2 = (AlgVector) c.clone();    c = c.add(r);    Instance newCenter = c.getAsInstance(model, random);    children.add(newCenter);    PFD(D_FOLLOWSPLIT, "first child "+ newCenter);        // substract random vector to center    c2 = c2.substract(r);    newCenter = c2.getAsInstance(model, random);    children.add(newCenter);    PFD(D_FOLLOWSPLIT, "second child "+ newCenter);    return children;  }  /**   * Split centers in their region.   * (*Alternative version of splitCenter()*)    *    * @param random the random number generator   * @param instances of the region   * @param model the model for the centers    * (should be the same as that of instances)   * @return a pair of new centers   */  protected Instances splitCenters(Random random,				 Instances instances,				 Instances model) {    Instances children = new Instances(model, 2);    int instIndex = Math.abs(random.nextInt()) % instances.numInstances();    children.add(instances.instance(instIndex));    int instIndex2 = instIndex;    int count = 0;    while ((instIndex2 == instIndex) && count < 10) {      count++;      instIndex2 = Math.abs(random.nextInt()) % instances.numInstances();    }    children.add(instances.instance(instIndex2));        return children;  }  /**   * Generates new centers randomly. Used for starting centers.   *   * @param random0 random number generator   * @param model data model of the instances   * @param numClusters number of clusters   * @return new centers   */  protected Instances makeCentersRandomly(Random random0,					Instances model,					int numClusters) {    Instances clusterCenters = new Instances(model, numClusters);    m_NumClusters = numClusters;    // makes the new centers randomly    for (int i = 0; i < numClusters; i++) {      int instIndex = Math.abs(random0.nextInt()) % m_Instances.numInstances();      clusterCenters.add(m_Instances.instance(instIndex));    }    return clusterCenters;  }  /**   * Returns the BIC-value for the given center and instances.   * @param instList The indices of the instances that belong to the center   * @param center the center.   * @param mle maximum likelihood   * @param model the data model   * @return the BIC value    */     protected double calculateBIC(int[] instList, Instance center,			      double mle, Instances model) {    int[][] w1 = new int[1][instList.length];    for (int i = 0; i < instList.length; i++) {      w1[0][i] = instList[i];    }    double[] m = {mle};    Instances w2 = new Instances(model, 1);    w2.add(center);    return calculateBIC(w1, w2, m);    }    /**   * Calculates the BIC for the given set of centers and instances.   * @param instOfCent The instances that belong to their respective centers   * @param centers the centers   * @param mle maximum likelihood   * @return The BIC for the input.   */  protected double calculateBIC(int[][] instOfCent, Instances centers,			      double[] mle) {    double loglike = 0.0;    int numInstTotal = 0;    int numCenters = centers.numInstances();    int numDimensions = centers.numAttributes();    int numParameters = (numCenters - 1) + //probabilities      numCenters * numDimensions + //means      numCenters; // variance params    for (int i = 0; i < centers.numInstances(); i++) {      loglike += logLikelihoodEstimate(instOfCent[i].length, centers.instance(i),				       mle[i], centers.numInstances() * 2);      numInstTotal += instOfCent[i].length;    }    /* diff       thats how we did it    loglike -= ((centers.numAttributes() + 1.0) * centers.numInstances() * 1)      * Math.log(count);      */    loglike -= numInstTotal * Math.log(numInstTotal);    //System.out.println ("numInstTotal " + numInstTotal +    //                    "calculateBIC res " + loglike);    loglike -= (numParameters / 2.0) * Math.log(numInstTotal);    //System.out.println ("numParam " +    //                     + numParameters +    //			" calculateBIC res " + loglike);    return loglike;  }    /**   * Calculates the log-likelihood of the data for the given model, taken   * at the maximum likelihood point.   *   * @param numInst number of instances that belong to the center   * @param center the center   * @param distortion distortion    * @param numCent number of centers 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -