📄 xmeans.java
字号:
* @return the likelihood estimate */ protected double logLikelihoodEstimate(int numInst, Instance center, double distortion, int numCent) { // R(n) num of instances of the center -> numInst // K num of centers -> not used // //todo take the diff comments away double loglike = 0; /* if is new */ if (numInst > 1) { /* diff variance is new */ // // distortion = Sum over instances x of the center(x-center) // different to paper; sum should be squared // // (Sum of distances to center) / R(n) - 1.0 // different to paper; should be R(n)-K double variance = distortion / (numInst - 1.0); // // -R(n)/2 * log(pi*2) // double p1 = - (numInst / 2.0) * Math.log(Math.PI * 2.0); /* diff thats how we had it double p2 = -((ni * center.numAttributes()) / 2) * distortion; */ // // -(R(n)*M)/2 * log(variance) // double p2 = - (numInst * center.numAttributes()) / 2 * Math.log(variance); /* diff thats how we had it, the difference is a bug in x-means double p3 = - (numInst - numCent) / 2; */ // // -(R(n)-1)/2 // double p3 = - (numInst - 1.0) / 2.0; // // R(n)*log(R(n)) // double p4 = numInst * Math.log(numInst); /* diff x-means doesn't have this part double p5 = - numInst * Math.log(numInstTotal); */ /* loglike = -(ni / 2) * Math.log(Math.PI * 2) - (ni * center.numAttributes()) / 2.0) * logdistortion - (ni - k) / 2.0 + ni * Math.log(ni) - ni * Math.log(r); */ loglike = p1 + p2 + p3 + p4; // diff + p5; //the log(r) is something that can be reused. //as is the log(2 PI), these could provide extra speed up later on. //since distortion is so expensive to compute, I only do that once. } return loglike; } /** * Calculates the maximum likelihood estimate for the variance. * @param instOfCent indices of instances to each center * @param centers the centers * @return the list of distortions distortion. */ protected double[] distortion(int[][] instOfCent, Instances centers) { double[] distortion = new double[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { distortion[i] = 0.0; for (int j = 0; j < instOfCent[i].length; j++) { distortion[i] += m_DistanceF.distance(m_Instances .instance(instOfCent[i][j]), centers.instance(i)); } } /* * diff not done in x-means res *= 1.0 / (count - centers.numInstances()); */ return distortion; } /** * Clusters an instance. * * @param instance * the instance to assign a cluster to. * @param centers * the centers to cluster the instance to. * @return a cluster index. */ protected int clusterProcessedInstance(Instance instance, Instances centers) { double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < centers.numInstances(); i++) { double dist = m_DistanceF.distance(instance, centers.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } } ; return bestCluster; } /** * Clusters an instance that has been through the filters. * * @param instance * the instance to assign a cluster to * @return a cluster number */ protected int clusterProcessedInstance(Instance instance) { double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < m_NumClusters; i++) { double dist = m_DistanceF .distance(instance, m_ClusterCenters.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } } return bestCluster; } /** * Classifies a given instance. * * @param instance the instance to be assigned to a cluster * @return the number of the assigned cluster as an integer * if the class is enumerated, otherwise the predicted value * @throws Exception if instance could not be classified * successfully */ public int clusterInstance(Instance instance) throws Exception { m_ReplaceMissingFilter.input(instance); Instance inst = m_ReplaceMissingFilter.output(); return clusterProcessedInstance(inst); } /** * Returns the number of clusters. * * @return the number of clusters generated for a training dataset. */ public int numberOfClusters() { return m_NumClusters; } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options **/ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tmaximum number of overall iterations\n" + "\t(default 1).", "I", 1, "-I <num>")); result.addElement(new Option( "\tmaximum number of iterations in the kMeans loop in\n" + "\tthe Improve-Parameter part \n" + "\t(default 1000).", "M", 1, "-M <num>")); result.addElement(new Option( "\tmaximum number of iterations in the kMeans loop\n" + "\tfor the splitted centroids in the Improve-Structure part \n" + "\t(default 1000).", "J", 1, "-J <num>")); result.addElement(new Option( "\tminimum number of clusters\n" + "\t(default 2).", "L", 1, "-L <num>")); result.addElement(new Option( "\tmaximum number of clusters\n" + "\t(default 4).", "H", 1, "-H <num>")); result.addElement(new Option( "\tdistance value for binary attributes\n" + "\t(default 1.0).", "B", 1, "-B <value>")); result.addElement(new Option( "\tUses the KDTree internally\n" + "\t(default no).", "use-kdtree", 0, "-use-kdtree")); result.addElement(new Option( "\tFull class name of KDTree class to use, followed\n" + "\tby scheme options.\n" + "\teg: \"weka.core.neighboursearch.kdtrees.KDTree -P\"\n" + "\t(default no KDTree class used).", "K", 1, "-K <KDTree class specification>")); result.addElement(new Option( "\tcutoff factor, takes the given percentage of the splitted \n" + "\tcentroids if none of the children win\n" + "\t(default 0.0).", "C", 1, "-C <value>")); result.addElement(new Option( "\tFull class name of Distance function class to use, followed\n" + "\tby scheme options.\n" + "\t(default weka.core.EuclideanDistance).", "D", 1, "-D <distance function class specification>")); result.addElement(new Option( "\tfile to read starting centers from (ARFF format).", "N", 1, "-N <file name>")); result.addElement(new Option( "\tfile to write centers to (ARFF format).", "O", 1, "-O <file name>")); result.addElement(new Option( "\tThe debug level.\n" + "\t(default 0)", "U", 1, "-U <int>")); result.addElement(new Option( "\tThe debug vectors file.", "Y", 1, "-Y <file name>")); Enumeration en = super.listOptions(); while (en.hasMoreElements()) result.addElement(en.nextElement()); return result.elements(); } /** * Returns the tip text for this property. * @return tip text for this property */ public String minNumClustersTipText() { return "set minimum number of clusters"; } /** * Sets the minimum number of clusters to generate. * * @param n the minimum number of clusters to generate */ public void setMinNumClusters(int n) { if (n <= m_MaxNumClusters) { m_MinNumClusters = n; } } /** * Gets the minimum number of clusters to generate. * @return the minimum number of clusters to generate */ public int getMinNumClusters() { return m_MinNumClusters; } /** * Returns the tip text for this property. * @return tip text for this property */ public String maxNumClustersTipText() { return "set maximum number of clusters"; } /** * Sets the maximum number of clusters to generate. * @param n the maximum number of clusters to generate */ public void setMaxNumClusters(int n) { if (n >= m_MinNumClusters) { m_MaxNumClusters = n; } } /** * Gets the maximum number of clusters to generate. * @return the maximum number of clusters to generate */ public int getMaxNumClusters() { return m_MaxNumClusters; } /** * Returns the tip text for this property. * @return tip text for this property */ public String maxIterationsTipText() { return "the maximum number of iterations to perform"; } /** * Sets the maximum number of iterations to perform. * @param i the number of iterations * @throws Exception if i is less than 1 */ public void setMaxIterations(int i) throws Exception { if (i < 0) throw new Exception("Only positive values for iteration number" + " allowed (Option I)."); m_MaxIterations = i; } /** * Gets the maximum number of iterations. * @return the number of iterations */ public int getMaxIterations() { return m_MaxIterations; } /** * Returns the tip text for this property. * @return tip text for this property */ public String maxKMeansTipText() { return "the maximum number of iterations to perform in KMeans"; } /** * Set the maximum number of iterations to perform in KMeans. * @param i the number of iterations */ public void setMaxKMeans(int i) { m_MaxKMeans = i; m_MaxKMeansForChildren = i; } /** * Gets the maximum number of iterations in KMeans. * @return the number of iterations */ public int getMaxKMeans() { return m_MaxKMeans; } /** * Returns the tip text for this property. * @return tip text for this property */ public String maxKMeansForChildrenTipText() { return "the maximum number of iterations KMeans that is performed on the child centers"; } /** * Sets the maximum number of iterations KMeans that is performed * on the child centers. * @param i the number of iterations */ public void setMaxKMeansForChildren(int i) { m_MaxKMeansForChildren = i; } /** * Gets the maximum number of iterations in KMeans. * @return the number of iterations */ public int getMaxKMeansForChildren() { return m_MaxKMeansForChildren; } /** * Returns the tip text for this property. * @return tip text for this property */ public String cutOffFactorTipText() { return "the cut-off factor to use"; } /** * Sets a new cutoff factor. * @param i the new cutoff factor */ public void setCutOffFactor(double i) { m_CutOffFactor = i; } /** * Gets the cutoff factor. * @return the cutoff factor */ public double getCutOffFactor() { return m_CutOffFactor; } /** * Returns the tip text for this property. * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String binValueTipText() { return "Set the value that represents true in the new attributes."; } /** * Gets value that represents true in a new numeric attribute. * (False is always represented by 0.0.) * @return the value that represents true in a new numeric attribute */ public double getBinValue() { return m_BinValue; } /** * Sets the distance value between true and false of binary attributes. * and "same" and "different" of nominal attributes * @param value the distance */ public void setBinValue(double value) { m_BinValue = value; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String distanceFTipText() { return "The distance function to use."; } /** * gets the "binary" distance value.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -