📄 pckmeans.java
字号:
if (!m_objFunDecreasing) { normalize(clusterCentroids[i]); } else { normalizeByWeight(clusterCentroids[i]); } } double[] similaritiesToCentroids = new double[m_NumClusters]; for (int i=0; i<m_NumClusters; i++) { similaritiesToCentroids[i] = m_metric.similarity(clusterCentroids[i], m_Instances.instance(X)); } // handles both Euclidean and WeightedDotP if (m_verbose) { System.out.println("Before sort"); for (int i=0; i<m_NumClusters; i++) { System.out.println(similaritiesToCentroids[i]); } } int[] indices = Utils.sort(similaritiesToCentroids); if (m_verbose) { System.out.println("After sort"); for (int i=0; i<m_NumClusters; i++) { System.out.println(indices[i]); } } for(int h = m_NumClusters-1; h >=0; h-- ){ // since sort is ascending, and we want descending sort of similarity values int index = indices[h]; Iterator NbrIt = m_NeighborSets[index].iterator(); Y = ((Integer) NbrIt.next()).intValue(); // get any point from neighborhood Label = askOracle(X,Y); query++; System.out.println("Making query:" + query); if (m_verbose) System.out.println("Number of queries: " + query); if( Label == InstancePair.MUST_LINK ){ // update data structures m_NeighborSets[index].add(new Integer(X)); m_SumOfClusterInstances[index] = sumWithInstance(m_SumOfClusterInstances[index], m_Instances.instance(X)); m_ClusterAssignments[X] = index; if (m_verbose) System.out.println("Adding " + X + " to cluster: " + index); m_AssignedSet.add(new Integer(X)); if (m_verbose) System.out.println("Exiting phase 2 for loop"); break; // exit from for } else { if (m_verbose) System.out.println(X + " is CANNOT-LINKed to cluster " + index); } if( query >= numQueries ){ if (m_verbose) System.out.println("Ran out of queries"); createGlobalCentroids(); addMLAndCLTransitiveClosure(null); return; } } } // end reverse for createGlobalCentroids(); addMLAndCLTransitiveClosure(null); return; } /** Creates the global cluster centroid */ protected void createGlobalCentroids() throws Exception { // initialize using m_NumCurrentClusters neighborhoods (< m_NumClusters), make random for rest System.out.println("Creating centroids"); if (m_verbose) System.out.println("Current number of clusters: " + m_NumCurrentClusters); // compute centroids of all clusters m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); for (int i=0; i<m_NumCurrentClusters; i++) { if (m_SumOfClusterInstances[i] != null) { if (m_verbose) { System.out.println("Normalizing cluster center " + i); } if (!m_objFunDecreasing) { normalize(m_SumOfClusterInstances[i]); } else { normalizeByWeight(m_SumOfClusterInstances[i]); } } m_SumOfClusterInstances[i].setDataset(m_Instances); m_ClusterCentroids.add(m_SumOfClusterInstances[i]); } // fill up remaining by randomPerturbInit if (m_NumCurrentClusters < m_NumClusters) { // find global centroid System.out.println("Creating global centroid"); double [] globalValues = new double[m_Instances.numAttributes()]; if (m_isSparseInstance) { globalValues = meanOrMode(m_Instances); // uses fast meanOrMode } else { for (int j = 0; j < m_Instances.numAttributes(); j++) { globalValues[j] = m_Instances.meanOrMode(j); // uses usual meanOrMode } } // global centroid is dense in SPKMeans m_GlobalCentroid = new Instance(1.0, globalValues); m_GlobalCentroid.setDataset(m_Instances); // normalize before random perturbation if (!m_objFunDecreasing) { normalizeInstance(m_GlobalCentroid); } System.out.println("Creating " + (m_NumClusters - m_NumCurrentClusters) + " random centroids"); for (int i=m_NumCurrentClusters; i<m_NumClusters; i++) { double [] values = new double[m_Instances.numAttributes()]; double normalizer = 0; for (int j = 0; j < m_Instances.numAttributes(); j++) { values[j] = m_GlobalCentroid.value(j) * (1 + m_DefaultPerturb * (m_RandomNumberGenerator.nextFloat() - 0.5)); normalizer += values[j] * values[j]; } if (!m_objFunDecreasing) { normalizer = Math.sqrt(normalizer); for (int j = 0; j < m_Instances.numAttributes(); j++) { values[j] /= normalizer; } } // values suitably normalized at this point if required if (m_isSparseInstance) { m_ClusterCentroids.add(new SparseInstance(1.0, values)); // sparse for consistency with other cluster centroids } else { m_ClusterCentroids.add(new Instance(1.0, values)); } } } System.out.println("Finished creating centroids"); m_NumCurrentClusters = m_NumClusters; } /** adding other inferred ML and CL links to m_ConstraintsHash, from * m_NeighborSets */ protected void addMLAndCLTransitiveClosure(int[] indices) throws Exception { // add all ML links within clusters if (m_verbose) { for (int j=0; j<m_NumCurrentClusters; j++) { int i = j; if (indices != null) { i = indices[j]; } System.out.println("Neighborhood list " + j + " is:"); System.out.println(m_NeighborSets[i]); } } for (int j=0; j<m_NumCurrentClusters; j++) { int i = j; if (indices != null) { i = indices[j]; } if (m_NeighborSets[i] != null) { Iterator iter1 = m_NeighborSets[i].iterator(); while (iter1.hasNext()) { int first = ((Integer) iter1.next()).intValue(); Iterator iter2 = m_NeighborSets[i].iterator(); while (iter2.hasNext()) { int second = ((Integer) iter2.next()).intValue(); if (first < second) { InstancePair pair = null; pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK); if (!m_ConstraintsHash.containsKey(pair)) { m_ConstraintsHash.put(pair, new Integer(InstancePair.MUST_LINK)); if (m_verbose) { System.out.println("Adding inferred ML (" + pair.first +","+pair.second+")"); } // hash the constraints for the instances involved Integer firstInt = new Integer(first); Integer secondInt = new Integer(second); InstancePair pairML = null; pairML = new InstancePair(first, second, InstancePair.MUST_LINK); Object constraintList1 = m_instanceConstraintHash.get(firstInt); if (constraintList1 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairML); m_instanceConstraintHash.put(firstInt, constraintList); } else { ((ArrayList)constraintList1).add(pairML); } Object constraintList2 = m_instanceConstraintHash.get(secondInt); if (constraintList2 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairML); m_instanceConstraintHash.put(secondInt, constraintList); } else { ((ArrayList)constraintList2).add(pairML); } if (m_verbose) { System.out.println("Adding inferred ML link: " + pair); } if (!m_SeedHash.contains(new Integer(first))) { m_SeedHash.add(new Integer(first)); } if (!m_SeedHash.contains(new Integer(second))) { m_SeedHash.add(new Integer(second)); } } } } } } } // add all CL links between clusters for (int ii=0; ii<m_NumCurrentClusters; ii++) { int i = ii; if (indices != null) { i = indices[ii]; } if (m_NeighborSets[i] != null) { for (int jj=ii+1; jj<m_NumCurrentClusters; jj++) { int j = jj; if (indices != null) { j = indices[jj]; } // check if there is at least one CL between neighborhoods ii & jj boolean existsCL = false; Iterator iter1 = m_NeighborSets[i].iterator(); while (iter1.hasNext()) { int index1 = ((Integer) iter1.next()).intValue(); if (m_NeighborSets[j] != null) { Iterator iter2 = m_NeighborSets[j].iterator(); while (iter2.hasNext()) { int index2 = ((Integer) iter2.next()).intValue(); int first = (index1 < index2)? index1:index2; int second = (index1 >= index2)? index1:index2; if (first == second) { throw new Exception(" Same instance " + first + " cannot be in cluster: " + i + " and cluster " + j); } else if (first < second) { InstancePair pair; pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK); if (m_ConstraintsHash.containsKey(pair)) { // found one CL between the neighborhoods existsCL = true; break; // out of inner while } } } } if (existsCL) { break; // out of outer while } } // now add the inferred CLs if (existsCL) { iter1 = m_NeighborSets[i].iterator(); while (iter1.hasNext()) { int index1 = ((Integer) iter1.next()).intValue(); if (m_NeighborSets[j] != null) { Iterator iter2 = m_NeighborSets[j].iterator(); while (iter2.hasNext()) { int index2 = ((Integer) iter2.next()).intValue(); int first = (index1 < index2)? index1:index2; int second = (index1 >= index2)? index1:index2; if (first == second) { throw new Exception(" Same instance " + first + " cannot be in cluster: " + i + " and cluster " + j); } else if (first < second) { InstancePair pair; pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK); // add new constraint if (!m_ConstraintsHash.containsKey(pair)) { m_ConstraintsHash.put(pair, new Integer(InstancePair.CANNOT_LINK)); if (m_verbose) { System.out.println("Adding inferred CL (" + pair.first +","+pair.second+")"); } // hash the constraints for the instances involved Integer firstInt = new Integer(first); Integer secondInt = new Integer(second); InstancePair pairCL; pairCL = new InstancePair(first, second, InstancePair.CANNOT_LINK); Object constraintList1 = m_instanceConstraintHash.get(firstInt); if (constraintList1 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairCL); m_instanceConstraintHash.put(firstInt, constraintList); } else { ((ArrayList)constraintList1).add(pairCL); } Object constraintList2 = m_instanceConstraintHash.get(secondInt); if (constraintList2 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairCL); m_instanceConstraintHash.put(secondInt, constraintList); } else { ((ArrayList)constraintList2).add(pairCL); } if (m_verbose) { System.out.println("Adding inferred CL link: " + pair); } if (!m_SeedHash.contains(new Integer(first))) { m_SeedHash.add(new Integer(first)); } if (!m_SeedHash.contains(new Integer(second))) { m_SeedHash.add(new Integer(second)); } } } } } } } } } } } /** Main Depth First Search routine */ protected void DFS() throws Exception { int [] vertexColor = new int[m_Instances.numInstances()]; m_NumCurrentClusters = 0; for(int u=0; u<m_Instances.numInstances(); u++){ vertexColor[u] = WHITE; } for(int u=0; u<m_Instances.numInstances(); u++){ if (m_AdjacencyList[u] != null && vertexColor[u] == WHITE) { m_NeighborSets[m_NumCurrentClusters] = new HashSet(); DFS_VISIT(u, vertexColor); // finds whole neighbourhood of u m_NumCurrentClusters++; } } } /** Recursive subroutine for DFS */ protected void DFS_VISIT(int u, int[] vertexColor) throws Exception { vertexColor[u] = GRAY; Iterator iter = null; if (m_AdjacencyList[u] != null) { iter = m_AdjacencyList[u].iterator(); while (iter.hasNext()) { int j = ((Integer) iter.next()).intValue(); if(vertexColor[j] == WHITE){ // if the vertex is still undiscovered DFS_VISIT(j, vertexColor); } } } // update stats for u m_ClusterAssignments[u] = m_NumCurrentClusters; m_NeighborSets[m_NumCurrentClusters].add(new Integer(u)); m_SumOfClusterInstances[m_NumCurrentClusters] = sumWithInstance(m_SumOfClusterInstances[m_NumCurrentClusters], m_Instances.instance(u)); vertexColor[u] = BLACK; } /** Initialization routine for non-active algorithm */ protected void nonActivePairwiseInit() throws Exception { m_NeighborSets = new HashSet[m_Instances.numInstances()]; m_SumOfClusterInstances = new Instance[m_Instances.numInstances()]; m_AdjacencyList = new HashSet[m_Instances.numInstances()]; for (int i=0; i<m_Instances.numInstances(); i++) { m_ClusterAssignments[i] = -1; } if (m_ConstraintsHash != null) { Set pointPairs = (Set) m_ConstraintsHash.keySet(); Iterator pairItr = pointPairs.iterator(); System.out.println("In non-active init"); // iterate over the pairs in ConstraintHash to create Adjacency List while( pairItr.hasNext() ){ InstancePair pair = (InstancePair) pairItr.next(); int linkType = ((Integer) m_ConstraintsHash.get(pair)).intValue(); if (m_verbose)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -