📄 xmeans.java
字号:
int [][] w1 = new int[1][instList.length]; for (int i = 0; i < instList.length; i++) { w1[0][i] = instList[i]; } double [] m = {mle}; Instances w2 = new Instances(model, 1); w2.add(center); return calculateBIC(w1, w2, m); } /** * Calculates the BIC for the given set of centers and instances. * @param instOfCent The instances that belong to their respective centers * @param centers the centers * @param mle maximum likelihood * @return The BIC for the input. */ private double calculateBIC(int [][] instOfCent, Instances centers, double [] mle) { double loglike = 0.0; int numInstTotal = 0; int numCenters = centers.numInstances(); int numDimensions = centers.numAttributes(); int numParameters = (numCenters - 1) + //probabilities numCenters * numDimensions + //means numCenters; // variance params for (int i = 0; i < centers.numInstances(); i++) { loglike += logLikelihoodEstimate(instOfCent[i].length, centers.instance(i), mle[i], centers.numInstances() * 2); numInstTotal += instOfCent[i].length; } /* diff thats how we did it loglike -= ((centers.numAttributes() + 1.0) * centers.numInstances() * 1) * Math.log(count); */ loglike -= numInstTotal * Math.log(numInstTotal); //System.out.println ("numInstTotal " + numInstTotal + // "calculateBIC res " + loglike); loglike -= (numParameters / 2.0) * Math.log(numInstTotal); //System.out.println ("numParam " + // + numParameters + // " calculateBIC res " + loglike); return loglike; } /** * Calculates the log-likelihood of the data for the given model, taken * at the maximum likelihood point. * * @param numInst number of instances that belong to the center * @param center the center * @param distortion distortion * @param numCent number of centers * @return the likelihood estimate */ private double logLikelihoodEstimate(int numInst, Instance center, double distortion, int numCent) { // R(n) num of instances of the center -> numInst // K num of centers -> not used // //todo take the diff comments away double loglike = 0; /* if is new */ if (numInst > 1) { /* diff variance is new */ // // distortion = Sum over instances x of the center(x-center) // different to paper; sum should be squared // // (Sum of distances to center) / R(n) - 1.0 // different to paper; should be R(n)-K double variance = distortion / (numInst - 1.0); // // -R(n)/2 * log(pi*2) // double p1 = - (numInst / 2.0) * Math.log(Math.PI * 2.0); /* diff thats how we had it double p2 = -((ni * center.numAttributes()) / 2) * distortion; */ // // -(R(n)*M)/2 * log(variance) // double p2 = - (numInst * center.numAttributes()) / 2 * Math.log(variance); /* diff thats how we had it, the difference is a bug in x-means double p3 = - (numInst - numCent) / 2; */ // // -(R(n)-1)/2 // double p3 = - (numInst - 1.0) / 2.0; // // R(n)*log(R(n)) // double p4 = numInst * Math.log(numInst); /* diff x-means doesn't have this part double p5 = - numInst * Math.log(numInstTotal); */ /* loglike = -(ni / 2) * Math.log(Math.PI * 2) - (ni * center.numAttributes()) / 2.0) * logdistortion - (ni - k) / 2.0 + ni * Math.log(ni) - ni * Math.log(r); */ //OOPS("distortion " + distortion); //OOPS("variance " + variance); //OOPS("p1 " + p1); //OOPS("p2 " + p2); //OOPS("p3 " + p3); //OOPS("p4 " + p4); //OOPS(p1 + " " + p2 + " " + p3 + " " + p4 + " " + p5 + " " + // distortion); loglike = p1 + p2 + p3 + p4; // diff + p5; //OOPS("loglike " + loglike); //the log(r) is something that can be reused. //as is the log(2 PI), these could provide extra speed up later on. //since distortion is so expensive to compute, I only do that once. } return loglike; } /** * Calculates the maximum likelihood estimate for the variance. * @param instOfCent indices of instances to each center * @param centers the centers * @return the list of distortions distortion. */ private double [] distortion(int[][] instOfCent, Instances centers) throws Exception { double [] distortion = new double [centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { distortion[i] = 0.0; for (int j = 0; j < instOfCent[i].length; j++) { distortion[i] += m_DistanceF.distance( m_Instances.instance(instOfCent[i][j]), centers.instance(i)); } } /* diff not done in x-means res *= 1.0 / (count - centers.numInstances()); */ return distortion; } /** * Clusters an instance. * @param instance the instance to assign a cluster to. * @param centers the centers to cluster the instance to. * @return a cluster index. */ private int clusterProcessedInstance(Instance instance, Instances centers)throws Exception{ double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < centers.numInstances(); i++) { double dist = m_DistanceF.distance(instance, centers.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } }; return bestCluster; } /** * Clusters an instance that has been through the filters. * * @param instance the instance to assign a cluster to * @return a cluster number */ private int clusterProcessedInstance(Instance instance) throws Exception { double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < m_NumClusters; i++) { double dist = m_DistanceF.distance(instance, m_ClusterCenters.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } } return bestCluster; } /** * Classifies a given instance. * * @param instance the instance to be assigned to a cluster * @return the number of the assigned cluster as an integer * if the class is enumerated, otherwise the predicted value * @exception if instance could not be classified * successfully */ public int clusterInstance(Instance instance) throws Exception { m_ReplaceMissingFilter.input(instance); Instance inst = m_ReplaceMissingFilter.output(); return clusterProcessedInstance(inst); } /** * Returns the number of clusters. * * @return the number of clusters generated for a training dataset. */ public int numberOfClusters() { return m_NumClusters; } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options **/ public Enumeration listOptions() { Vector newVector = new Vector(4); newVector.addElement(new Option( "\tmaximum number of overall iterations" + " (default = 1).", "I", 1, "-I <num>")); newVector.addElement(new Option( "\tmaximum number of iterations in the kMeans loop in" + " the Improve-Parameter part "+ " (default = 1000).", "M", 1, "-M <num>")); newVector.addElement(new Option( "\tmaximum number of iterations in the kMeans loop" + " for the splitted centroids in the Improve-Structure part "+ " (default = 1000).", "J", 1, "-J <num>")); newVector.addElement(new Option( "\tminimum number of clusters" + " (default = 2).", "L", 1, "-L <num>")); newVector.addElement(new Option( "\tmaximum number of clusters" + " (default = 4).", "H", 1, "-H <num>")); newVector.addElement(new Option( "\tdistance value for binary attributes" + " (default = 1.0).", "V", 1, "-V <value>")); newVector.addElement(new Option( "\tFull class name of KDTree class to use, followed\n" + "\tby scheme options.\n" + "\teg: \"weka.core.KDTree -P\"\n" + "(default = no KDTree class used).", "K", 1, "-K <KDTree class specification>")); newVector.addElement(new Option( "\tcutoff factor, takes the given percentage of the splitted \n" + "\tcentroids if none of the children win\n" + "\t(default = 0.0).", "C", 1, "-C <value>")); newVector.addElement(new Option( "\tFull class name of Distance function class to use, followed\n" + "\tby scheme options.\n" + "\teg: \"weka.core.MahalanobisDistance\"\n" + "\t(default = weka.core.EuclideanDistance).", "K", 1, "-K <distance function class specification>")); newVector.addElement(new Option( "\tfile to read starting centers from (ARFF format).", "N", 1, "-N <file name>")); newVector.addElement(new Option( "\tfile to write centers to (ARFF format).", "O", 1, "-O <file name>")); newVector.addElement(new Option( "\trandom number seed (default 10).", "S", 1, "-S <num>")); return newVector.elements(); } /** * Returns the tip text for this property * @return tip text for this property */ public String minNumClustersTipText() { return "set minimum number of clusters"; } /** * Returns the tip text for this property * @return tip text for this property */ public String maxNumClustersTipText() { return "set maximum number of clusters"; } /** * Sets the maximum number of iterations to perform. * @param i the number of iterations * @exception Exception if i is less than 1 */ public void setMaxIterations(int i) throws Exception { if (i < 0) throw new Exception("Only positive values for iteration number" + " allowed (Option I)."); m_MaxIterations = i; } /** * Gets the maximum number of iterations. * @return the number of iterations */ public int getMaxIterations() { return m_MaxIterations; } /** * Set the maximum number of iterations to perform in KMeans * @param i the number of iterations */ public void setMaxKMeans(int i) { m_MaxKMeans = i; m_MaxKMeansForChildren = i; } /** * Gets the maximum number of iterations in KMeans. * @return the number of iterations */ public int getMaxKMeans() { return m_MaxKMeans; } /** * Sets the maximum number of iterations KMeans that is performed * on the child centers. * @param i the number of iterations */ public void setMaxKMeansForChildren(int i) throws Exception { m_MaxKMeansForChildren = i; } /** * Gets the maximum number of iterations in KMeans. * @return the number of iterations */ public int getMaxKMeansForChildren() { return m_MaxKMeansForChildren; } /** * Sets a new cutoff factor. * @param i the new cutoff factor */ public void setCutOffFactor(double i) throws Exception { m_CutOffFactor = i; } /** * Gets the cutoff factor. * @return the cutoff factor */ public double getCutOffFactor() { return m_CutOffFactor; } /** * Sets the minimum number of clusters to generate. * * @param n the minimum number of clusters to generate */ public void setMinNumClusters(int n) { if (n <= m_MaxNumClusters) { m_MinNumClusters = n; } } /** * Sets the maximum number of clusters to generate. * @param n the maximum number of clusters to generate */ public void setMaxNumClusters(int n) { if (n >= m_MinNumClusters) { m_MaxNumClusters = n; } } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String binValueTipText() { return "Set the value that represents true in the new attributes."; } /** * Gets value that represents true in a new numeric attribute. * (False is always represented by 0.0.) * @return the value that represents true in a new numeric attribute */ public double getBinValue() { return m_BinValue; } /** * Sets the distance e value between true and false of binary attributes * and "same" and "different" of nominal attributes * @param double value */ public void setBinValue(double value) { m_BinValue = value; } /** * gets the "binary" distance value * @param distanceF the distance function with all options set */ public void setDistanceF(DistanceFunction distanceF) { m_DistanceF = distanceF; } /** * Gets the distance function. * @return the distance function */ public DistanceFunction getDistanceF() { return m_DistanceF; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -