📄 xmeans.java
字号:
* @param numInst number of instances that belong to the center * @param center the center * @param distortion distortion * @param numCent number of centers * @return the likelihood estimate */ private double logLikelihoodEstimate(int numInst, Instance center, double distortion, int numCent) { // R(n) num of instances of the center -> numInst // K num of centers -> not used // //todo take the diff comments away double loglike = 0; /* if is new */ if (numInst > 1) { /* diff variance is new */ // // distortion = Sum over instances x of the center(x-center) // different to paper; sum should be squared // // (Sum of distances to center) / R(n) - 1.0 // different to paper; should be R(n)-K double variance = distortion / (numInst - 1.0); // // -R(n)/2 * log(pi*2) // double p1 = - (numInst / 2.0) * Math.log(Math.PI * 2.0); /* diff thats how we had it double p2 = -((ni * center.numAttributes()) / 2) * distortion; */ // // -(R(n)*M)/2 * log(variance) // double p2 = - (numInst * center.numAttributes()) / 2 * Math.log(variance); /* diff thats how we had it, the difference is a bug in x-means double p3 = - (numInst - numCent) / 2; */ // // -(R(n)-1)/2 // double p3 = - (numInst - 1.0) / 2.0; // // R(n)*log(R(n)) // double p4 = numInst * Math.log(numInst); /* diff x-means doesn't have this part double p5 = - numInst * Math.log(numInstTotal); */ /* loglike = -(ni / 2) * Math.log(Math.PI * 2) - (ni * center.numAttributes()) / 2.0) * logdistortion - (ni - k) / 2.0 + ni * Math.log(ni) - ni * Math.log(r); */ loglike = p1 + p2 + p3 + p4; // diff + p5; //the log(r) is something that can be reused. //as is the log(2 PI), these could provide extra speed up later on. //since distortion is so expensive to compute, I only do that once. } return loglike; } /** * Calculates the maximum likelihood estimate for the variance. * @param instOfCent indices of instances to each center * @param centers the centers * @return the list of distortions distortion. */ private double [] distortion(int[][] instOfCent, Instances centers) throws Exception { double [] distortion = new double [centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { distortion[i] = 0.0; for (int j = 0; j < instOfCent[i].length; j++) { distortion[i] += m_DistanceF.distance( m_Instances.instance(instOfCent[i][j]), centers.instance(i)); } } /* diff not done in x-means res *= 1.0 / (count - centers.numInstances()); */ return distortion; } /** * Clusters an instance. * @param instance the instance to assign a cluster to. * @param centers the centers to cluster the instance to. * @return a cluster index. */ private int clusterProcessedInstance(Instance instance, Instances centers)throws Exception{ double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < centers.numInstances(); i++) { double dist = m_DistanceF.distance(instance, centers.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } }; return bestCluster; } /** * Clusters an instance that has been through the filters. * * @param instance the instance to assign a cluster to * @return a cluster number */ private int clusterProcessedInstance(Instance instance) throws Exception { double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < m_NumClusters; i++) { double dist = m_DistanceF.distance(instance, m_ClusterCenters.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } } return bestCluster; } /** * Classifies a given instance. * * @param instance the instance to be assigned to a cluster * @return the number of the assigned cluster as an integer * if the class is enumerated, otherwise the predicted value * @throws if instance could not be classified * successfully */ public int clusterInstance(Instance instance) throws Exception { m_ReplaceMissingFilter.input(instance); Instance inst = m_ReplaceMissingFilter.output(); return clusterProcessedInstance(inst); } /** * Returns the number of clusters. * * @return the number of clusters generated for a training dataset. */ public int numberOfClusters() { return m_NumClusters; } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options **/ public Enumeration listOptions() { Vector newVector = new Vector(4); newVector.addElement(new Option( "\tmaximum number of overall iterations\n" + "\t(default = 1).", "I", 1, "-I <num>")); newVector.addElement(new Option( "\tmaximum number of iterations in the kMeans loop in\n" + "\tthe Improve-Parameter part \n"+ "\t(default = 1000).", "M", 1, "-M <num>")); newVector.addElement(new Option( "\tmaximum number of iterations in the kMeans loop\n" + "\tfor the splitted centroids in the Improve-Structure part \n"+ "\t(default = 1000).", "J", 1, "-J <num>")); newVector.addElement(new Option( "\tminimum number of clusters\n" + "\t(default = 2).", "L", 1, "-L <num>")); newVector.addElement(new Option( "\tmaximum number of clusters\n" + "\t(default = 4).", "H", 1, "-H <num>")); newVector.addElement(new Option( "\tdistance value for binary attributes\n" + "\t(default = 1.0).", "V", 1, "-V <value>")); newVector.addElement(new Option( "\tFull class name of KDTree class to use, followed\n" + "\tby scheme options.\n" + "\teg: \"weka.core.KDTree -P\"\n" + "\t(default = no KDTree class used).", "K", 1, "-K <KDTree class specification>")); newVector.addElement(new Option( "\tcutoff factor, takes the given percentage of the splitted \n" + "\tcentroids if none of the children win\n" + "\t(default = 0.0).", "C", 1, "-C <value>")); newVector.addElement(new Option( "\tFull class name of Distance function class to use, followed\n" + "\tby scheme options.\n" + "\teg: \"weka.core.MahalanobisDistance\"\n" + "\t(default = weka.core.EuclideanDistance).", "K", 1, "-K <distance function class specification>")); newVector.addElement(new Option( "\tfile to read starting centers from (ARFF format).", "N", 1, "-N <file name>")); newVector.addElement(new Option( "\tfile to write centers to (ARFF format).", "O", 1, "-O <file name>")); newVector.addElement(new Option( "\trandom number seed (default 10).", "S", 1, "-S <num>")); return newVector.elements(); } /** * Returns the tip text for this property * @return tip text for this property */ public String minNumClustersTipText() { return "set minimum number of clusters"; } /** * Returns the tip text for this property * @return tip text for this property */ public String maxNumClustersTipText() { return "set maximum number of clusters"; } /** * Sets the maximum number of iterations to perform. * @param i the number of iterations * @throws Exception if i is less than 1 */ public void setMaxIterations(int i) throws Exception { if (i < 0) throw new Exception("Only positive values for iteration number" + " allowed (Option I)."); m_MaxIterations = i; } /** * Gets the maximum number of iterations. * @return the number of iterations */ public int getMaxIterations() { return m_MaxIterations; } /** * Set the maximum number of iterations to perform in KMeans * @param i the number of iterations */ public void setMaxKMeans(int i) { m_MaxKMeans = i; m_MaxKMeansForChildren = i; } /** * Gets the maximum number of iterations in KMeans. * @return the number of iterations */ public int getMaxKMeans() { return m_MaxKMeans; } /** * Sets the maximum number of iterations KMeans that is performed * on the child centers. * @param i the number of iterations */ public void setMaxKMeansForChildren(int i) throws Exception { m_MaxKMeansForChildren = i; } /** * Gets the maximum number of iterations in KMeans. * @return the number of iterations */ public int getMaxKMeansForChildren() { return m_MaxKMeansForChildren; } /** * Sets a new cutoff factor. * @param i the new cutoff factor */ public void setCutOffFactor(double i) throws Exception { m_CutOffFactor = i; } /** * Gets the cutoff factor. * @return the cutoff factor */ public double getCutOffFactor() { return m_CutOffFactor; } /** * Sets the minimum number of clusters to generate. * * @param n the minimum number of clusters to generate */ public void setMinNumClusters(int n) { if (n <= m_MaxNumClusters) { m_MinNumClusters = n; } } /** * Sets the maximum number of clusters to generate. * @param n the maximum number of clusters to generate */ public void setMaxNumClusters(int n) { if (n >= m_MinNumClusters) { m_MaxNumClusters = n; } } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String binValueTipText() { return "Set the value that represents true in the new attributes."; } /** * Gets value that represents true in a new numeric attribute. * (False is always represented by 0.0.) * @return the value that represents true in a new numeric attribute */ public double getBinValue() { return m_BinValue; } /** * Sets the distance e value between true and false of binary attributes * and "same" and "different" of nominal attributes * @param double value */ public void setBinValue(double value) { m_BinValue = value; } /** * gets the "binary" distance value * @param distanceF the distance function with all options set */ public void setDistanceF(EuclideanDistance distanceF) { m_DistanceF = distanceF; } /** * Gets the distance function. * @return the distance function */ public EuclideanDistance getDistanceF() { return m_DistanceF; } /** * Gets the distance function specification string, which contains the * class name of the distance function class and any options to it * * @return the distance function specification string */ protected String getDistanceFSpec() { EuclideanDistance d = getDistanceF(); if (d instanceof OptionHandler) { return d.getClass().getName() + " " + Utils.joinOptions(((OptionHandler) d).getOptions()); } return d.getClass().getName(); } /** * Sets a file name for a file that has the random vektors stored. * Just used for debugging reasons. * @param fileName file name for the file to read the random vektors from */ public void setDebugVektorsFile(String fileName) { m_DebugVektorsFile = fileName; } /** * Initialises the debug vektor input. */ public void initDebugVektorsInput() throws Exception { m_DebugVektorsInput = new BufferedReader(new FileReader(m_DebugVektorsFile)); m_DebugVektors = new Instances(m_DebugVektorsInput); m_DebugVektorsIndex = 0; } /** * Read an instance from debug vektors file. * @param model the data model for the instance */ public Instance getNextDebugVektorsInstance(Instances model) throws Exception { if (m_DebugVektorsIndex >= m_DebugVektors.numInstances()) throw new Exception("no more prefabricated Vektors"); Instance nex = m_DebugVektors.instance(m_DebugVektorsIndex); nex.setDataset(model); m_DebugVektorsIndex++; return nex; } /** * Sets the name of the file to read the list of centers from. * * @param fileName file name of file to read centers from */ public void setInputCenterFile(String fileName) { m_InputCenterFile = fileName; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -