📄 xmeans.java
字号:
} /** * Recompute the new centers. New cluster center is center of mass of its * instances. Returns true if cluster stays the same. * @param centers the input and output centers * @param instancesOfCent the instances to the centers * @param model data model information * @return true if converged. */ private boolean recomputeCenters(Instances centers, int [][] instOfCent, Instances model) { boolean converged = true; for (int i = 0; i < centers.numInstances(); i++) { double val; for (int j = 0; j < model.numAttributes(); j++) { val = meanOrMode(m_Instances, instOfCent[i], j); for (int k = 0; k < instOfCent[i].length; k++) if (converged && m_ClusterCenters.instance(i).value(j) != val) converged = false; if (!converged) m_ClusterCenters.instance(i).setValue(j, val); } } return converged; } /** * Recompute the new centers - 2nd version * Same as recomputeCenters, but does not check if center stays the same. * * @param centers the input center and output centers * @param instOfCentIndexes the indexes of the instances to the centers * @param model data model information */ private void recomputeCentersFast(Instances centers, int [][] instOfCentIndexes, Instances model ) { for (int i = 0; i < centers.numInstances(); i++) { double val; for (int j = 0; j < model.numAttributes(); j++) { val = meanOrMode(m_Instances, instOfCentIndexes[i], j); centers.instance(i).setValue(j, val); } } } /** * Computes Mean Or Mode of one attribute on a subset of m_Instances. * The subset is defined by an index list. * @param instances all instances * @param instList the indexes of the instances the mean is computed from * @param attIndex the index of the attribute * @return mean value */ private double meanOrMode(Instances instances, int [] instList, int attIndex) { double result, found; int [] counts; int numInst = instList.length; if (instances.attribute(attIndex).isNumeric()) { result = found = 0; for (int j = 0; j < numInst; j++) { Instance currInst = instances.instance(instList[j]); if (!currInst.isMissing(attIndex)) { found += currInst.weight(); result += currInst.weight() * currInst.value(attIndex); } } if (Utils.eq(found, 0)) { return 0; } else { return result / found; } } else if (instances.attribute(attIndex).isNominal()) { counts = new int[instances.attribute(attIndex).numValues()]; for (int j = 0; j < numInst; j++) { Instance currInst = instances.instance(instList[j]); if (!currInst.isMissing(attIndex)) { counts[(int) currInst.value(attIndex)] += currInst.weight(); } } return (double)Utils.maxIndex(counts); } else { return 0; } } /** * Assigns instances to centers. * * @param tree KDTree on all instances * @param centers all the input centers * @param instOfCent the instances to each center * @param allInstList list of all instances * @param assignments assignments of instances to centers * @param iterationCount the number of iteration * @return true if converged */ private boolean assignToCenters(KDTree tree, Instances centers, int [][] instOfCent, int [] allInstList, int [] assignments, int iterationCount) throws Exception { boolean converged = true; if (tree != null) { // using KDTree structure for assigning converged = assignToCenters(tree, centers, instOfCent, assignments, iterationCount); } else { converged = assignToCenters(centers, instOfCent, allInstList, assignments); } return converged; } /** * Assign instances to centers using KDtree. * First part of conventionell K-Means, returns true if new assignment * is the same as the last one. * * @param tree KDTree on all instances * @param centers all the input centers * @param instOfCent the instances to each center * @param assignments assignments of instances to centers * @param iterationCount the number of iteration * @return true if converged */ private boolean assignToCenters(KDTree kdtree, Instances centers, int [][] instOfCent, int [] assignments, int iterationCount) throws Exception { int numCent = centers.numInstances(); int numInst = m_Instances.numInstances(); int [] oldAssignments = new int[numInst]; // WARNING: assignments is "input/output-parameter" // should not be null if (assignments == null) { OOPS(D_METH_MISUSE, "assignment was null"); assignments = new int[numInst]; for (int i = 0; i < numInst; i++) { assignments[0] = -1; } } // WARNING: instOfCent is "input/output-parameter" // should not be null if (instOfCent == null) { OOPS(D_METH_MISUSE, "inst of cent was null"); instOfCent = new int [numCent][]; } // save old assignments for (int i = 0; i < assignments.length; i++) { oldAssignments[i] = assignments[i]; } // use tree to get new assignments kdtree.centerInstances(centers, assignments, Math.pow(.8, iterationCount)); boolean converged = true; //PFD_CURR("assignments"); //for (int d = 0; d < assignments.length; d++) { // System.out.print(" "+assignments[d]+", "); //} //PFD(D_CURR, " "); // compare with previous assignment for (int i = 0; converged && (i < assignments.length); i++) { converged = (oldAssignments[i] == assignments[i]); if (assignments[i] == -1) throw new Exception("Instance " + i + " has not been assigned to cluster."); } if (!converged) { int [] numInstOfCent = new int[numCent]; for (int i = 0; i < numCent; i++) numInstOfCent[i] = 0; // count num of assignments per center for (int i = 0; i < numInst; i++) numInstOfCent[assignments[i]]++; // prepare instancelists per center for (int i = 0; i < numCent; i++){ instOfCent[i] = new int[numInstOfCent[i]]; } // write instance lists per center for (int i = 0; i < numCent; i++) { int index = -1; for (int j = 0; j < numInstOfCent[i]; j++) { index = nextAssignedOne(i, index, assignments); instOfCent[i][j] = index; } } /* for (int i = 0; i < numInst; i++) { int center = assignments[i]; instOfCent[center][numInstOfCent[center]++] = i; }*/ } return converged; } /** * Assign instances to centers. * Part of conventionell K-Means, returns true if new assignment * is the same as the last one. * * @param centers all the input centers * @param instOfCent the instances to each center * @param allInstList list of all indexes * @param assignments assignments of instances to centers * @return true if converged */ private boolean assignToCenters(Instances centers, int [][] instOfCent, int [] allInstList, int [] assignments) throws Exception { // todo: undecided situations boolean converged = true; // true if new assignment is the same // as the old one int numInst = allInstList.length; int numCent = centers.numInstances(); int [] numInstOfCent = new int [numCent]; for (int i = 0; i < numCent; i++) numInstOfCent[i] = 0; // WARNING: assignments is "input/output-parameter" // should not be null if (assignments == null) { OOPS(D_METH_MISUSE, "assignment was null"); assignments = new int[numInst]; for (int i = 0; i < numInst; i++) { assignments[i] = -1; } } // WARNING: instOfCent is "input/output-parameter" // should not be null if (instOfCent == null) { OOPS(D_METH_MISUSE, "inst of cent was null"); instOfCent = new int [numCent][]; } // set assignments for (int i = 0; i < numInst; i++) { Instance inst = m_Instances.instance(allInstList[i]); int newC = clusterProcessedInstance(inst, centers); if (converged && newC != assignments[i]) { converged = false; } numInstOfCent[newC]++; if (!converged) assignments[i] = newC; } // the following is only done // if assignments are not the same, because too much effort if (!converged) { PFD(D_FOLLOWSPLIT, "assignToCenters -> it has NOT converged"); for (int i = 0; i < numCent; i++) { instOfCent[i] = new int [numInstOfCent[i]]; } for (int i = 0; i < numCent; i++) { int index = -1; for (int j = 0; j < numInstOfCent[i]; j++) { index = nextAssignedOne(i, index, assignments); instOfCent[i][j] = allInstList[index]; } } } else PFD(D_FOLLOWSPLIT, "assignToCenters -> it has converged"); return converged; } /** * Searches along the assignment array for the next entry of the center * in question. * @param cent index of the center * @param lasIndex index to start searching * @param assignments assignments * @return index of the instance the center cent is assigned to */ private int nextAssignedOne(int cent, int lastIndex, int [] assignments) { int len = assignments.length; int index = lastIndex + 1; while (index < len) { if (assignments[index] == cent) { return (index); } index++; } return (-1); } /** * Split centers in their region. Generates random vektor of * length = variance and * adds and substractsx to cluster vektor to get two new clusters. * * @param random random function * @param center the center that is split here * @param variance variance of the cluster * @param model data model valid * @return a pair of new centers * @exception something in AlgVector goes wrong */ private Instances splitCenter(Random random, Instance center, double variance, Instances model) throws Exception { m_NumSplits++; AlgVector r = null; Instances children = new Instances(model, 2); if (m_DebugVektorsFile != null) { Instance nextVektor = getNextDebugVektorsInstance(model); PFD(D_RANDOMVEKTOR, "Random Vector from File " + nextVektor); r = new AlgVector(nextVektor); } else { //OOPS("before split variance "+ variance); //OOPS("center to be split "+ center); // random vector of length = variance r = new AlgVector(model, random); } r.changeLength(Math.pow(variance, 0.5)); //OOPS(D_FOLLOWSPLIT, "variance " + variance + // " sqrt-variance " + Math.pow(variance, 0.5)); PFD(D_RANDOMVEKTOR, "random vector *variance "+ r); // add random vector to center AlgVector c = new AlgVector(center); AlgVector c2 = (AlgVector) c.clone(); c.add(r); Instance newCenter = c.getAsInstance(model, random); children.add(newCenter); PFD(D_FOLLOWSPLIT, "first child "+ newCenter); // substract random vector to center c2.substract(r); newCenter = c.getAsInstance(model, random); children.add(newCenter); PFD(D_FOLLOWSPLIT, "second child "+ newCenter); return children; } /** * Split centers in their region. * (*Alternative version of splitCenter()*) * @param instances of the region * @return a pair of new centers */ private Instances splitCenters(Random random, Instances instances, Instances model) { Instances children = new Instances(model, 2); int instIndex = Math.abs(random.nextInt()) % instances.numInstances(); children.add(instances.instance(instIndex)); int instIndex2 = instIndex; int count = 0; while ((instIndex2 == instIndex) && count < 10) { count++; instIndex2 = Math.abs(random.nextInt()) % instances.numInstances(); } children.add(instances.instance(instIndex2)); return children; } /** * Generates new centers randomly. Used for starting centers. * * @param random0 random number generator * @param model data model of the instances * @param numClusters number of clusters * @return new centers */ private Instances makeCentersRandomly(Random random0, Instances model, int numClusters) { Instances clusterCenters = new Instances(model, numClusters); m_NumClusters = numClusters; // makes the new centers randomly for (int i = 0; i < numClusters; i++) { int instIndex = Math.abs(random0.nextInt()) % m_Instances.numInstances(); clusterCenters.add(m_Instances.instance(instIndex)); } return clusterCenters; } /** * Returns the BIC-value for the given center and instances. * @param instList The indices of the instances that belong to the center * @param center the center. * @param mle maximum likelihood * @param model the data model * @return the BIC value */ private double calculateBIC(int [] instList, Instance center, double mle, Instances model) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -