📄 xmeans.java
字号:
Instances model ) { for (int i = 0; i < centers.numInstances(); i++) { double val; for (int j = 0; j < model.numAttributes(); j++) { val = meanOrMode(m_Instances, instOfCentIndexes[i], j); centers.instance(i).setValue(j, val); } } } /** * Computes Mean Or Mode of one attribute on a subset of m_Instances. * The subset is defined by an index list. * @param instances all instances * @param instList the indexes of the instances the mean is computed from * @param attIndex the index of the attribute * @return mean value */ private double meanOrMode(Instances instances, int [] instList, int attIndex) { double result, found; int [] counts; int numInst = instList.length; if (instances.attribute(attIndex).isNumeric()) { result = found = 0; for (int j = 0; j < numInst; j++) { Instance currInst = instances.instance(instList[j]); if (!currInst.isMissing(attIndex)) { found += currInst.weight(); result += currInst.weight() * currInst.value(attIndex); } } if (Utils.eq(found, 0)) { return 0; } else { return result / found; } } else if (instances.attribute(attIndex).isNominal()) { counts = new int[instances.attribute(attIndex).numValues()]; for (int j = 0; j < numInst; j++) { Instance currInst = instances.instance(instList[j]); if (!currInst.isMissing(attIndex)) { counts[(int) currInst.value(attIndex)] += currInst.weight(); } } return (double)Utils.maxIndex(counts); } else { return 0; } } /** * Assigns instances to centers. * * @param tree KDTree on all instances * @param centers all the input centers * @param instOfCent the instances to each center * @param allInstList list of all instances * @param assignments assignments of instances to centers * @param iterationCount the number of iteration * @return true if converged * @throws Exception is something goes wrong */ private boolean assignToCenters(KDTree tree, Instances centers, int [][] instOfCent, int [] allInstList, int [] assignments, int iterationCount) throws Exception { boolean converged = true; if (tree != null) { // using KDTree structure for assigning converged = assignToCenters(tree, centers, instOfCent, assignments, iterationCount); } else { converged = assignToCenters(centers, instOfCent, allInstList, assignments); } return converged; } /** * Assign instances to centers using KDtree. * First part of conventionell K-Means, returns true if new assignment * is the same as the last one. * * @param kdtree KDTree on all instances * @param centers all the input centers * @param instOfCent the instances to each center * @param assignments assignments of instances to centers * @param iterationCount the number of iteration * @return true if converged * @throws Exception in case instances are not assigned to cluster */ private boolean assignToCenters(KDTree kdtree, Instances centers, int [][] instOfCent, int [] assignments, int iterationCount) throws Exception { int numCent = centers.numInstances(); int numInst = m_Instances.numInstances(); int [] oldAssignments = new int[numInst]; // WARNING: assignments is "input/output-parameter" // should not be null if (assignments == null) { assignments = new int[numInst]; for (int i = 0; i < numInst; i++) { assignments[0] = -1; } } // WARNING: instOfCent is "input/output-parameter" // should not be null if (instOfCent == null) { instOfCent = new int [numCent][]; } // save old assignments for (int i = 0; i < assignments.length; i++) { oldAssignments[i] = assignments[i]; } // use tree to get new assignments kdtree.centerInstances(centers, assignments, Math.pow(.8, iterationCount)); boolean converged = true; // compare with previous assignment for (int i = 0; converged && (i < assignments.length); i++) { converged = (oldAssignments[i] == assignments[i]); if (assignments[i] == -1) throw new Exception("Instance " + i + " has not been assigned to cluster."); } if (!converged) { int [] numInstOfCent = new int[numCent]; for (int i = 0; i < numCent; i++) numInstOfCent[i] = 0; // count num of assignments per center for (int i = 0; i < numInst; i++) numInstOfCent[assignments[i]]++; // prepare instancelists per center for (int i = 0; i < numCent; i++){ instOfCent[i] = new int[numInstOfCent[i]]; } // write instance lists per center for (int i = 0; i < numCent; i++) { int index = -1; for (int j = 0; j < numInstOfCent[i]; j++) { index = nextAssignedOne(i, index, assignments); instOfCent[i][j] = index; } } } return converged; } /** * Assign instances to centers. * Part of conventionell K-Means, returns true if new assignment * is the same as the last one. * * @param centers all the input centers * @param instOfCent the instances to each center * @param allInstList list of all indexes * @param assignments assignments of instances to centers * @return true if converged * @throws Exception if something goes wrong */ private boolean assignToCenters(Instances centers, int [][] instOfCent, int [] allInstList, int [] assignments) throws Exception { // todo: undecided situations boolean converged = true; // true if new assignment is the same // as the old one int numInst = allInstList.length; int numCent = centers.numInstances(); int [] numInstOfCent = new int [numCent]; for (int i = 0; i < numCent; i++) numInstOfCent[i] = 0; // WARNING: assignments is "input/output-parameter" // should not be null if (assignments == null) { assignments = new int[numInst]; for (int i = 0; i < numInst; i++) { assignments[i] = -1; } } // WARNING: instOfCent is "input/output-parameter" // should not be null if (instOfCent == null) { instOfCent = new int [numCent][]; } // set assignments for (int i = 0; i < numInst; i++) { Instance inst = m_Instances.instance(allInstList[i]); int newC = clusterProcessedInstance(inst, centers); if (converged && newC != assignments[i]) { converged = false; } numInstOfCent[newC]++; if (!converged) assignments[i] = newC; } // the following is only done // if assignments are not the same, because too much effort if (!converged) { PFD(D_FOLLOWSPLIT, "assignToCenters -> it has NOT converged"); for (int i = 0; i < numCent; i++) { instOfCent[i] = new int [numInstOfCent[i]]; } for (int i = 0; i < numCent; i++) { int index = -1; for (int j = 0; j < numInstOfCent[i]; j++) { index = nextAssignedOne(i, index, assignments); instOfCent[i][j] = allInstList[index]; } } } else PFD(D_FOLLOWSPLIT, "assignToCenters -> it has converged"); return converged; } /** * Searches along the assignment array for the next entry of the center * in question. * @param cent index of the center * @param lastIndex index to start searching * @param assignments assignments * @return index of the instance the center cent is assigned to */ private int nextAssignedOne(int cent, int lastIndex, int [] assignments) { int len = assignments.length; int index = lastIndex + 1; while (index < len) { if (assignments[index] == cent) { return (index); } index++; } return (-1); } /** * Split centers in their region. Generates random vektor of * length = variance and * adds and substractsx to cluster vektor to get two new clusters. * * @param random random function * @param center the center that is split here * @param variance variance of the cluster * @param model data model valid * @return a pair of new centers * @throws something in AlgVector goes wrong */ private Instances splitCenter(Random random, Instance center, double variance, Instances model) throws Exception { m_NumSplits++; AlgVector r = null; Instances children = new Instances(model, 2); if (m_DebugVektorsFile != null) { Instance nextVektor = getNextDebugVektorsInstance(model); PFD(D_RANDOMVEKTOR, "Random Vector from File " + nextVektor); r = new AlgVector(nextVektor); } else { // random vector of length = variance r = new AlgVector(model, random); } r.changeLength(Math.pow(variance, 0.5)); PFD(D_RANDOMVEKTOR, "random vector *variance "+ r); // add random vector to center AlgVector c = new AlgVector(center); AlgVector c2 = (AlgVector) c.clone(); c = c.add(r); Instance newCenter = c.getAsInstance(model, random); children.add(newCenter); PFD(D_FOLLOWSPLIT, "first child "+ newCenter); // substract random vector to center c2 = c2.substract(r); newCenter = c2.getAsInstance(model, random); children.add(newCenter); PFD(D_FOLLOWSPLIT, "second child "+ newCenter); return children; } /** * Split centers in their region. * (*Alternative version of splitCenter()*) * * @param random the random number generator * @param instances of the region * @param model * @return a pair of new centers */ private Instances splitCenters(Random random, Instances instances, Instances model) { Instances children = new Instances(model, 2); int instIndex = Math.abs(random.nextInt()) % instances.numInstances(); children.add(instances.instance(instIndex)); int instIndex2 = instIndex; int count = 0; while ((instIndex2 == instIndex) && count < 10) { count++; instIndex2 = Math.abs(random.nextInt()) % instances.numInstances(); } children.add(instances.instance(instIndex2)); return children; } /** * Generates new centers randomly. Used for starting centers. * * @param random0 random number generator * @param model data model of the instances * @param numClusters number of clusters * @return new centers */ private Instances makeCentersRandomly(Random random0, Instances model, int numClusters) { Instances clusterCenters = new Instances(model, numClusters); m_NumClusters = numClusters; // makes the new centers randomly for (int i = 0; i < numClusters; i++) { int instIndex = Math.abs(random0.nextInt()) % m_Instances.numInstances(); clusterCenters.add(m_Instances.instance(instIndex)); } return clusterCenters; } /** * Returns the BIC-value for the given center and instances. * @param instList The indices of the instances that belong to the center * @param center the center. * @param mle maximum likelihood * @param model the data model * @return the BIC value */ private double calculateBIC(int [] instList, Instance center, double mle, Instances model) { int [][] w1 = new int[1][instList.length]; for (int i = 0; i < instList.length; i++) { w1[0][i] = instList[i]; } double [] m = {mle}; Instances w2 = new Instances(model, 1); w2.add(center); return calculateBIC(w1, w2, m); } /** * Calculates the BIC for the given set of centers and instances. * @param instOfCent The instances that belong to their respective centers * @param centers the centers * @param mle maximum likelihood * @return The BIC for the input. */ private double calculateBIC(int [][] instOfCent, Instances centers, double [] mle) { double loglike = 0.0; int numInstTotal = 0; int numCenters = centers.numInstances(); int numDimensions = centers.numAttributes(); int numParameters = (numCenters - 1) + //probabilities numCenters * numDimensions + //means numCenters; // variance params for (int i = 0; i < centers.numInstances(); i++) { loglike += logLikelihoodEstimate(instOfCent[i].length, centers.instance(i), mle[i], centers.numInstances() * 2); numInstTotal += instOfCent[i].length; } /* diff thats how we did it loglike -= ((centers.numAttributes() + 1.0) * centers.numInstances() * 1) * Math.log(count); */ loglike -= numInstTotal * Math.log(numInstTotal); //System.out.println ("numInstTotal " + numInstTotal + // "calculateBIC res " + loglike); loglike -= (numParameters / 2.0) * Math.log(numInstTotal); //System.out.println ("numParam " + // + numParameters + // " calculateBIC res " + loglike); return loglike; } /** * Calculates the log-likelihood of the data for the given model, taken * at the maximum likelihood point. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -