pcsoftkmeans.java
来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,952 行 · 第 1/5 页
JAVA
1,952 行
/** Dummy: not implemented for PCSoftKMeans */ public int[] bestInstancesForActiveLearning(int numActive) throws Exception{ throw new Exception("Not implemented for PCSoftKMeans"); } /** Dummy: not implemented for PCSoftKMeans */ public InstancePair[] bestPairsForActiveLearning(int numActive) throws Exception{ throw new Exception("Not implemented for PCSoftKMeans"); } /** * Checks if instance has to be normalized and returns the * distribution of the instance using the current clustering * * @param instance the instance under consideration * @return an array containing the estimated membership * probabilities of the test instance in each cluster (this * should sum to at most 1) * @exception Exception if distribution could not be * computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { if (m_Algorithm == ALGORITHM_SPHERICAL) { // check here, since evaluateModel calls this function on test data normalize(instance); } return null; } /** * Computes the density for a given instance. * * @param inst the instance to compute the density for * @return the density. * @exception Exception if the density could not be computed * successfully */ public double densityForInstance(Instance inst) throws Exception { throw new Exception("Not implemented for PCSoftKMeans, since posterior probs directly computed and density weights not stored!!"); } /** lookup the instance in the checksum hash * @param instance instance to be looked up * @return the index of the cluster to which the instance was assigned, -1 if the instance has not bee clustered */ protected int lookupInstanceCluster(Instance instance) { int classIdx = instance.classIndex(); double[] values1 = instance.toDoubleArray(); double checksum = 0; for (int i = 0; i < values1.length; i++) { if (i != classIdx) { checksum += m_checksumCoeffs[i] * values1[i]; } } Object list = m_checksumHash.get(new Double(checksum)); if (list != null) { // go through the list of instances with the same checksum and find the one that is equivalent ArrayList checksumList = (ArrayList) list; for (int i = 0; i < checksumList.size(); i++) { int instanceIdx = ((Integer) checksumList.get(i)).intValue(); Instance listInstance = m_Instances.instance(instanceIdx); double[] values2 = listInstance.toDoubleArray(); boolean equal = true; for (int j = 0; j < values1.length && equal == true; j++) { if (j != classIdx) { if (values1[j] != values2[j]) { equal = false; } } } if (equal == true) { return m_ClusterAssignments[instanceIdx]; } } } return -1; } /** * Classifies the instance using the current clustering, without considering constraints * * @param instance the instance to be assigned to a cluster * @return the number of the assigned cluster as an integer * if the class is enumerated, otherwise the predicted value * @exception Exception if instance could not be classified * successfully */ public int assignInstanceToCluster(Instance instance) throws Exception { int bestCluster = 0; double bestDistance = Double.POSITIVE_INFINITY; double bestSimilarity = Double.NEGATIVE_INFINITY; // lookup the cluster assignment of the instance int lookupCluster = lookupInstanceCluster(instance); if (lookupCluster >= 0) { return lookupCluster; } System.err.println("\n\nWARNING!!! Couldn't lookup instance - it wasn't in the clustering!!!\n\n"); System.err.println("\n\n Assuming user wants prediction of new test instance based on clustering model ...\nDoing instance assignment without constraints!!!\n\n"); for (int i = 0; i < m_NumClusters; i++) { double distance = 0, similarity = 0; if (!m_objFunDecreasing) { similarity = m_metric.similarity(instance, m_ClusterCentroids.instance(i)); if (similarity > bestSimilarity) { bestSimilarity = similarity; bestCluster = i; } } else { distance = m_metric.distance(instance, m_ClusterCentroids.instance(i)); if (distance < bestDistance) { bestDistance = distance; bestCluster = i; } } } return bestCluster; } /** Set the cannot link constraint weight */ public void setCannotLinkWeight(double w) { m_CannotLinkWeight = w; } /** Return the cannot link constraint weight */ public double getCannotLinkWeight() { return m_CannotLinkWeight; } /** Set the must link constraint weight */ public void setMustLinkWeight(double w) { m_MustLinkWeight = w; } /** Return the must link constraint weight */ public double getMustLinkWeight() { return m_MustLinkWeight; } /** Return the number of clusters */ public int getNumClusters() { return m_NumClusters; } /** A duplicate function to conform to Clusterer abstract class. * @returns the number of clusters */ public int numberOfClusters() { return getNumClusters(); } /** Set the m_SeedHash */ public void setSeedHash(HashMap seedhash) { System.err.println("Not implemented here"); } /** * Set the random number seed * @param s the seed */ public void setRandomSeed (int s) { m_RandomSeed = s; } /** Return the random number seed */ public int getRandomSeed () { return m_RandomSeed; } /** * Set the minimum value of the objective function difference required for convergence * @param objFunConvergenceDifference the minimum value of the objective function difference required for convergence */ public void setObjFunConvergenceDifference(double objFunConvergenceDifference) { m_ObjFunConvergenceDifference = objFunConvergenceDifference; } /** * Get the minimum value of the objective function difference required for convergence * @returns the minimum value of the objective function difference required for convergence */ public double getObjFunConvergenceDifference() { return m_ObjFunConvergenceDifference; } /** Sets training instances */ public void setInstances(Instances instances) { m_Instances = instances; // create the checksum coefficients m_checksumCoeffs = new double[instances.numAttributes()]; Random r = new Random(instances.numInstances()); for (int i = 0; i < m_checksumCoeffs.length; i++) { m_checksumCoeffs[i] = r.nextDouble(); } // hash the instance checksums m_checksumHash = new HashMap(instances.numInstances()); int classIdx = instances.classIndex(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); double[] values = instance.toDoubleArray(); double checksum = 0; for (int j = 0; j < values.length; j++) { if (j != classIdx) { checksum += m_checksumCoeffs[j] * values[j]; } } // take care of chaining Object list = m_checksumHash.get(new Double(checksum)); ArrayList idxList = null; if (list == null) { idxList = new ArrayList(); m_checksumHash.put(new Double(checksum), idxList); } else { // chaining idxList = (ArrayList) list; } idxList.add(new Integer(i)); } } /** Return training instances */ public Instances getInstances() { return m_Instances; } /** * Set the number of clusters to generate * * @param n the number of clusters to generate */ public void setNumClusters(int n) { m_NumClusters = n; if (m_verbose) { System.out.println("Number of clusters: " + n); } } /** * Set the distance metric * * @param s the metric */ public void setMetric (LearnableMetric m) { m_metric = m; String metricName = m_metric.getClass().getName(); System.out.println("Setting m_metric to " + metricName); m_objFunDecreasing = m.isDistanceBased(); } /** * Get the distance metric * * @returns the distance metric used */ public Metric getMetric () { return m_metric; } /** * Set the KMeans algorithm. Values other than * ALGORITHM_SIMPLE or ALGORITHM_SPHERICAL will be ignored * * @param algo algorithm type */ public void setAlgorithm (SelectedTag algo) { if (algo.getTags() == TAGS_ALGORITHM) { if (m_verbose) { System.out.println("Algorithm: " + algo.getSelectedTag().getReadable()); } m_Algorithm = algo.getSelectedTag().getID(); } } /** * Get the KMeans algorithm type. Will be one of * ALGORITHM_SIMPLE or ALGORITHM_SPHERICAL * * @returns algorithm type */ public SelectedTag getAlgorithm () { return new SelectedTag(m_Algorithm, TAGS_ALGORITHM); } /** Read the seeds from a hastable, where every key is an instance and every value is: * the cluster assignment of that instance * seedVector vector containing seeds */ public void seedClusterer(HashMap seedHash) { System.err.println("Not implemented here"); } /** Prints clusters */ public void printClusters () throws Exception{ ArrayList clusters = getClusters(); for (int i=0; i<clusters.size(); i++) { Cluster currentCluster = (Cluster) clusters.get(i); System.out.println("\nCluster " + i + ": " + currentCluster.size() + " instances"); if (currentCluster == null) { System.out.println("(empty)"); } else { for (int j=0; j<currentCluster.size(); j++) { Instance instance = (Instance) currentCluster.get(j); System.out.println("Instance: " + instance); } } } } /** * Computes the clusters from the cluster assignments, for external access * * @exception Exception if clusters could not be computed successfully */ public ArrayList getClusters() throws Exception { m_Clusters = new ArrayList(); Cluster [] clusterArray = new Cluster[m_NumClusters]; for (int i=0; i < m_Instances.numInstances(); i++) { Instance inst = m_Instances.instance(i); if(clusterArray[m_ClusterAssignments[i]] == null) clusterArray[m_ClusterAssignments[i]] = new Cluster(); clusterArray[m_ClusterAssignments[i]].add(inst, 1); } for (int j =0; j< m_NumClusters; j++) m_Clusters.add(clusterArray[j]); return m_Clusters; } /** * Computes the clusters from the cluster assignments, for external access * * @exception Exception if clusters could not be computed successfully */ public HashMap[] getIndexClusters() throws Exception {// m_IndexClusters = new HashMap[m_NumClusters];// for (int j=0; j < m_NumClusters; j++) { // m_IndexClusters[j] = new HashMap();// for (int i=0; i < m_Instances.numInstances(); i++) {// m_IndexClusters[j].put(new Integer(i),new Double(m_ClusterDistribution[i][j]));// }// }// return m_IndexClusters; return null; } public Enumeration listOptions () { Vector newVector = new Vector(10); newVector.addElement(new Option("\tnumber of clusters (default = 3)." , "N", 1, "-N <num>")); newVector.addElement(new Option("\trandom number seed (default 1)" , "R", 1, "-R <num>")); newVector.addElement(new Option("\tperform no seeding (default false)" , "NS", 1, "-NS")); newVector.addElement(new Option("\tperform active learning (default false)" , "A", 1, "-A")); newVector.addElement(new Option("\tphase two of active learning is random (default false)" , "P2", 1, "-P2")); newVector.addElement(new Option("\tdo only Explore phase in active learning (default false)" , "E", 1, "-E")); newVector.addElement(new Option("\tmetric type (default WeightedEuclidean)" , "M", 1, "-M <string> (WeightedEuclidean or WeightedDotP)")); newVector.addElement(new Option("\tconstraints file" , "C", 1, "-C <string> (each line is of the form \"firstID\\tsecondID\\t<+1/-1>\", where +1=>must-link, -1=>cannot-link)")); newVector.addElement(new Option("\tmust link weight (default 1)" , "ML", 1, "-ML <double>")); newVector.addElement(new Option("\tcannot link weight (default 1)" , "CL", 1, "-CL <double>")); newVector.addElement(new Option("\talgorithm type (default Simple)" , "A", 1, "-A <string> (Simple => Simple-KMeans, Spherical => Spherical-KMeans)"));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?