📄 pckmeans.java
字号:
} /** Updates the clusterAssignments for all points after clustering. * Map assignments from [0,numInstances-1] to [0,numClusters-1] * i.e. from [0 2 2 0 6 6 2] -> [0 1 1 0 2 2 0] * **** NOTE: THIS FUNCTION IS NO LONGER USED!!! **** */ protected void updateClusterAssignments() throws Exception { // **** DEPRECATED: THIS FUNCTION IS NO LONGER USED!!! **** int numInstances = m_Instances.numInstances(); HashMap clusterNumberHash = new HashMap((int) (m_NumClusters/0.75+10)); int clusterNumber = 0; if (m_verbose) { System.out.println("Mapping cluster assignments. Initial cluster assignments:"); for (int i=0; i<numInstances; i++) { System.out.print(m_ClusterAssignments[i] + " "); } System.out.println(); } for (int i=0; i<numInstances; i++) { if (m_ClusterAssignments[i]!=-1) { Integer clusterNum = new Integer(m_ClusterAssignments[i]); if (!clusterNumberHash.containsKey(clusterNum)) { clusterNumberHash.put(clusterNum, new Integer(clusterNumber)); clusterNumber++; } } } if (clusterNumber != m_NumClusters) { throw new Exception("Number of clusters do not match"); } for (int i=0; i<numInstances; i++) { if (m_ClusterAssignments[i]!=-1) { int newCluster = ((Integer) clusterNumberHash.get(new Integer(m_ClusterAssignments[i]))).intValue(); m_ClusterAssignments[i] = newCluster; } } if (m_verbose) { System.out.println("Done updating cluster assignments. New cluster assignments:"); for (int i=0; i<numInstances; i++) { System.out.print(m_ClusterAssignments[i] + " "); } System.out.println(); } clusterNumberHash.clear(); clusterNumberHash = null; //free memory } /** Outputs the current clustering * * @exception Exception if something goes wrong */ public void printIndexClusters() throws Exception { if (m_IndexClusters == null) throw new Exception ("Clusters were not created"); for (int i = 0; i < m_NumClusters; i++) { HashSet cluster = m_IndexClusters[i]; if (cluster == null) { System.out.println("Cluster " + i + " is null"); } else { System.out.println ("Cluster " + i + " consists of " + cluster.size() + " elements"); Iterator iter = cluster.iterator(); while(iter.hasNext()) { int idx = ((Integer) iter.next()).intValue(); System.out.println("\t\t" + idx); } } } } /** E-step of the KMeans clustering algorithm -- find best cluster assignments */ protected void findBestAssignments() throws Exception{ int moved = 0; int numInstances = m_Instances.numInstances(); int [] indices = new int[numInstances]; for (int i=0; i<numInstances; i++) { indices[i] = i; // initialize } if (m_InstanceOrdering == ORDERING_DEFAULT) { for (int i=0; i<numInstances; i++) { try { // Update number of points moved moved += assignInstanceToClusterWithConstraints(i); } catch (Exception e) { System.out.println("Could not find distance. Exception: " + e); e.printStackTrace(); } } if (m_MovePointsTillAssignmentStabilizes) { int newMoved = -1; for (int t=0; t<100 && newMoved != 0; t++) { // move points till assignment stabilizes newMoved = 0; for (int i=0; i<numInstances; i++) { newMoved += assignInstanceToClusterWithConstraints(i); } if (newMoved > 0) { System.out.println(newMoved + " points moved on changing order in t=" + t); } } } } else if (m_InstanceOrdering == ORDERING_RANDOM) { // randomize instance ordering m_RandomNumberGenerator = new Random(m_RandomSeed); // initialize random number generator again for (int i = numInstances - 1; i > 0; i--) { int indexToSwap = m_RandomNumberGenerator.nextInt(i+1); int temp = indices[i]; // swap indices[i] = indices[indexToSwap]; indices[indexToSwap] = temp; } for (int i=0; i<numInstances; i++) { try { // Update number of points moved moved += assignInstanceToClusterWithConstraints(indices[i]); } catch (Exception e) { System.out.println("Could not find distance. Exception: " + e); e.printStackTrace(); } } if (m_MovePointsTillAssignmentStabilizes) { int newMoved = -1; for (int t=0; t<100 && newMoved != 0; t++) { // move points till assignment stabilizes newMoved = 0; for (int i=0; i<numInstances; i++) { newMoved += assignInstanceToClusterWithConstraints(indices[i]); } if (newMoved > 0) { System.out.println(newMoved + " points moved on changing order in t=" + t); } } } } else if (m_InstanceOrdering == ORDERING_SORTED) { int [] sortedIndices = null; double bestSquareDistance = Integer.MAX_VALUE; double bestSimilarity = Integer.MIN_VALUE; double [] distances = new double[numInstances]; // find closest cluster centroid for each instance for (int i = 0; i < numInstances; i++) { for (int j = 0; j < m_NumClusters; j++) { double squareDistance = 0, similarity = 0; if (!m_objFunDecreasing) { similarity = similarityInPottsModel(i,j); if (similarity > bestSimilarity) { bestSimilarity = similarity; distances[i] = -similarity; // hacked distance conversion for sorting } } else { squareDistance = squareDistanceInPottsModel(i,j); if (squareDistance < bestSquareDistance) { bestSquareDistance = squareDistance; distances[i] = squareDistance; } } } } sortedIndices = Utils.sort(distances); // sort in ascending order for (int i=0; i<numInstances; i++) { try { // Update number of points moved moved += assignInstanceToClusterWithConstraints(sortedIndices[i]); } catch (Exception e) { System.out.println("Could not find distance. Exception: " + e); e.printStackTrace(); } } if (m_MovePointsTillAssignmentStabilizes) { int newMoved = -1; for (int t=0; t<100 && newMoved != 0; t++) { // move points till assignment stabilizes newMoved = 0; for (int i=0; i<numInstances; i++) { newMoved += assignInstanceToClusterWithConstraints(sortedIndices[i]); } if (newMoved > 0) { System.out.println(newMoved + " points moved on changing order in t=" + t); } } } } else { throw new Exception ("Unknown instance ordering!!"); } System.out.println("\t" + moved + " points moved in this E-step"); } /** * Classifies the instance using the current clustering considering * constraints, updates cluster assignments * * @param instance the instance to be assigned to a cluster * @return 1 if the point is moved, 0 otherwise * @exception Exception if instance could not be classified * successfully */ public int assignInstanceToClusterWithConstraints(int instIdx) throws Exception { int bestCluster = 0; double bestSquareDistance = Integer.MAX_VALUE; double bestSimilarity = Integer.MIN_VALUE; int moved = 0; for (int i = 0; i < m_NumClusters; i++) { double squareDistance = 0, similarity = 0; if (!m_objFunDecreasing) { similarity = similarityInPottsModel(instIdx, i); // System.out.println("Sim between instance " + instIdx + " and cluster " + i + " = " + similarity); if (similarity > bestSimilarity) { bestSimilarity = similarity; bestCluster = i; } } else { squareDistance = squareDistanceInPottsModel(instIdx, i); if (squareDistance < bestSquareDistance) { bestSquareDistance = squareDistance; bestCluster = i; } } } if (m_ClusterAssignments[instIdx] != bestCluster) { if (m_verbose) { System.out.println("Moving instance " + instIdx + " from cluster " + m_ClusterAssignments[instIdx] + " to cluster " + bestCluster); } moved = 1;// // remove instIdx from old cluster// if (m_ClusterAssignments[instIdx] < m_NumClusters && m_ClusterAssignments[instIdx] != -1 && m_IndexClusters[m_ClusterAssignments[instIdx]] != null ) {// m_IndexClusters[m_ClusterAssignments[instIdx]].remove(new Integer(instIdx));// }// // add instIdx to new cluster// if (m_IndexClusters[bestCluster] == null) {// m_IndexClusters[bestCluster] = new HashSet();// }// m_IndexClusters[bestCluster].add(new Integer (instIdx)); // updates cluster Assignments m_ClusterAssignments[instIdx] = bestCluster; } if (m_verbose) { System.out.println("Assigning instance " + instIdx + " to cluster " + bestCluster); } return moved; } /** finds similarity between instance and centroid in Potts Model */ double similarityInPottsModel(int instIdx, int centroidIdx) throws Exception{ double sim = m_metric.similarity(m_Instances.instance(instIdx), m_ClusterCentroids.instance(centroidIdx)); Object list = m_instanceConstraintHash.get(new Integer(instIdx)); if (list != null) { // there are constraints associated with this instance ArrayList constraintList = (ArrayList) list; for (int i = 0; i < constraintList.size(); i++) { InstancePair pair = (InstancePair) constraintList.get(i); int firstIdx = pair.first; int secondIdx = pair.second; Instance instance1 = m_Instances.instance(firstIdx); Instance instance2 = m_Instances.instance(secondIdx); int otherIdx = (firstIdx == instIdx) ? m_ClusterAssignments[secondIdx] : m_ClusterAssignments[firstIdx]; // check whether the constraint is violated if (otherIdx != -1) { if (otherIdx != centroidIdx && pair.linkType == InstancePair.MUST_LINK) { sim -= m_MustLinkWeight; } else if (otherIdx == centroidIdx && pair.linkType == InstancePair.CANNOT_LINK) { sim -= m_CannotLinkWeight; } } } } if(m_verbose) { System.out.println("Final similarity between instance " + instIdx + " and centroid " + centroidIdx + " is: " + sim); } return sim; } /** finds squaredistance between instance and centroid in Potts Model */ double squareDistanceInPottsModel(int instIdx, int centroidIdx) throws Exception{ double dist = m_metric.distance(m_Instances.instance(instIdx), m_ClusterCentroids.instance(centroidIdx)); dist *= dist; // doing the squaring here itself if(m_verbose) { System.out.println("Unconstrained distance between instance " + instIdx + " and centroid " + centroidIdx + " is: " + dist); } Object list = m_instanceConstraintHash.get(new Integer(instIdx)); if (list != null) { // there are constraints associated with this instance ArrayList constraintList = (ArrayList) list; for (int i = 0; i < constraintList.size(); i++) { InstancePair pair = (InstancePair) constraintList.get(i); int firstIdx = pair.first; int secondIdx = pair.second; Instance instance1 = m_Instances.instance(firstIdx); Instance instance2 = m_Instances.instance(secondIdx); int otherIdx = (firstIdx == instIdx) ? m_ClusterAssignments[secondIdx] : m_ClusterAssignments[firstIdx]; // check whether the constraint is violated if (otherIdx != -1) { if (otherIdx != centroidIdx && pair.linkType == InstancePair.MUST_LINK) { dist += m_MustLinkWeight; } else if (otherIdx == centroidIdx && pair.linkType == InstancePair.CANNOT_LINK) { dist += m_CannotLinkWeight; } } } } if(m_verbose) { System.out.println("Final distance between instance " + instIdx + " and centroid " + centroidIdx + " is: " + dist); } return dist; } /** M-step of the KMeans clustering algorithm -- updates cluster centroids */ protected void updateClusterCentroids() throws Exception { // M-step: update cluster centroids Instances [] tempI = new Instances[m_NumClusters]; m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); for (int i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(m_Instances, 0); // tempI[i] stores the cluster instances for cluster i } for (int i = 0; i < m_Instances.numInstances(); i++) { tempI[m_ClusterAssignments[i]].add(m_Instances.instance(i)); } // Calculates cluster centroids for (int i = 0; i < m_NumClusters; i++) { double [] values = new double[m_Instances.numAttributes()]; if (m_isSparseInstance) { values = meanOrMode(tempI[i]); // uses fast meanOrMode } else { for (int j = 0; j < m_Instances.numAttributes(); j++) { values[j] = tempI[i].meanOrMode(j); // uses usual meanOrMode } } // cluster centroids are dense in SPKMeans m_ClusterCentroids.add(new Instance(1.0, values)); if (m_Algorithm == ALGORITHM_SPHERICAL) { try { normalize(m_ClusterCentroids.instance(i)); } catch (Exception e) { e.printStackTrace(); } } } for (int i = 0; i < m_NumClusters; i++) { tempI[i] = null; // free memory for garbage collector to pick up } } /** calculates objective function */ protected void calculateObjectiveFunction() throws Exception { if (m_verbose) { System.out.println("Calculating objective function ..."); } m_Objective = 0; double tempML = m_MustLinkWeight; double tempCL = m_CannotLinkWeight; m_MustLinkWeight = tempML/2; m_CannotLinkWeight = tempCL/2; // adjust weights to take care of double counting of constraints if (m_verbose) { System.out.println("Must link weight: " + m_MustLinkWeight); System.out.println("Cannot link weight: " + m_CannotLinkWeight); } for (int i=0; i<m_Instances.num
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -