📄 weightedffneighborhoodinit.java
字号:
if (!m_objFunDecreasing) { ClusterUtils.normalizeInstance(m_GlobalCentroid); } // System.out.println("Global centroid is: " + m_GlobalCentroid); for (int i=m_NumCurrentClusters; i<m_numClusters; i++) { double [] values = new double[m_Instances.numAttributes()]; double normalizer = 0; for (int j = 0; j < m_Instances.numAttributes(); j++) { values[j] = m_GlobalCentroid.value(j) * (1 + m_DefaultPerturb * (m_RandomNumberGenerator.nextFloat() - 0.5)); normalizer += values[j] * values[j]; } if (!m_objFunDecreasing) { normalizer = Math.sqrt(normalizer); for (int j = 0; j < m_Instances.numAttributes(); j++) { values[j] /= normalizer; } } if (m_isSparseInstance) { m_ClusterCentroids.add(new SparseInstance(1.0, values)); // sparse for consistency with other cluster centroids } else { m_ClusterCentroids.add(new Instance(1.0, values)); } } System.out.println("Done calculating random centroids by perturbation"); m_NumCurrentClusters = m_numClusters; } m_clusterer.setClusterAssignments(m_ClusterAssignments); return m_ClusterCentroids; } /** Main Depth First Search routine */ protected void DFS() throws Exception { int [] vertexColor = new int[m_Instances.numInstances()]; m_NumCurrentClusters = 0; for(int u=0; u<m_Instances.numInstances(); u++){ vertexColor[u] = WHITE; } for(int u=0; u<m_Instances.numInstances(); u++){ // NOTE: Have to uncomment check for m_AdjacencyList != null to enable farthestFirst // as default initialization, instead of randomPerturbInit if (m_AdjacencyList[u] != null && vertexColor[u] == WHITE) { m_NeighborSets[m_NumCurrentClusters] = new HashSet(); // m_NeighborSets[m_NumCurrentClusters].add(new Integer(u)); // m_SumOfClusterInstances[m_NumCurrentClusters] = sumWithInstance(m_SumOfClusterInstances[m_NumCurrentClusters], m_Instances.instance(u)); // m_ClusterAssignments[u] = m_NumCurrentClusters; DFS_VISIT(u, vertexColor); // found whole neighbourhood of u m_NumCurrentClusters++; } } } /** Recursive subroutine for DFS */ protected void DFS_VISIT(int u, int[] vertexColor) throws Exception { vertexColor[u] = GRAY; Iterator iter = null; if (m_AdjacencyList[u] != null) { iter = m_AdjacencyList[u].iterator(); while (iter.hasNext()) { int j = ((Integer) iter.next()).intValue(); if(vertexColor[j] == WHITE){ // if the vertex is still undiscovered DFS_VISIT(j, vertexColor); } } } // update stats for u m_ClusterAssignments[u] = m_NumCurrentClusters; m_NeighborSets[m_NumCurrentClusters].add(new Integer(u)); m_SumOfClusterInstances[m_NumCurrentClusters] = ClusterUtils.sumWithInstance(m_SumOfClusterInstances[m_NumCurrentClusters], m_Instances.instance(u),m_Instances); vertexColor[u] = BLACK; } /** Finds point in setOfPoints which has weighted max min-distance from set visitedPoints */ int weightedFarthestFromSetOfPoints(Instance[] setOfPoints, HashSet visitedPoints, HashSet eliminationSet) throws Exception { // implements weighted farthest-first search algorithm in the given setOfPoints: /* for (each datapoint x from setOfPoints not in visitedPoints) { distance of x to visitedPoints = min{weighted d(x,f):f \in visitedPoints} } select the point x with maximum distance as new center; */ if (visitedPoints.size() == 0) { int point; point = m_RandomNumberGenerator.nextInt(setOfPoints.length); // Note - no need to check for labeled data now, since we have // no visitedPoints => no labeled data if (m_verbose) System.out.println("First point selected: " + point); return point; } double minSimilaritySoFar = Double.POSITIVE_INFINITY; double maxDistanceSoFar = Double.NEGATIVE_INFINITY; ArrayList bestPoints = new ArrayList(); for (int i=0; i<setOfPoints.length; i++) { if (!visitedPoints.contains(new Integer(i))) { if (eliminationSet == null || !eliminationSet.contains(new Integer(i))) { // point should not belong to visitedPoints or eliminationSet Instance inst = setOfPoints[i]; Iterator iter = visitedPoints.iterator(); double minDistanceFromSet = Double.POSITIVE_INFINITY; double maxSimilarityFromSet = Double.NEGATIVE_INFINITY; while (iter.hasNext()) { Instance pointInSet = setOfPoints[((Integer) iter.next()).intValue()]; if (!m_objFunDecreasing) { double sim = m_metric.similarity(inst, pointInSet) / Math.sqrt(pointInSet.weight() * inst.weight()); if (sim > maxSimilarityFromSet) { maxSimilarityFromSet = sim; } } else { double dist = 0; if (m_metric instanceof KL) { dist = ((KL)m_metric).distanceJS(inst, pointInSet) * Math.sqrt(pointInSet.weight() * inst.weight()); } else { dist = m_metric.distance(inst, pointInSet) * Math.sqrt(pointInSet.weight() * inst.weight()); } if (dist < minDistanceFromSet) { minDistanceFromSet = dist; } } } if (!m_objFunDecreasing) { if (maxSimilarityFromSet == minSimilaritySoFar) { minSimilaritySoFar = maxSimilarityFromSet; bestPoints.add(new Integer(i)); } else if (maxSimilarityFromSet < minSimilaritySoFar) { minSimilaritySoFar = maxSimilarityFromSet; bestPoints.clear(); bestPoints.add(new Integer(i)); } } else { if (minDistanceFromSet == maxDistanceSoFar) { minDistanceFromSet = maxDistanceSoFar; bestPoints.add(new Integer(i)); if (m_verbose) { System.out.println("Additional point added: " + i + " with distance: " + maxDistanceSoFar); } } else if (minDistanceFromSet > maxDistanceSoFar) { maxDistanceSoFar = minDistanceFromSet; bestPoints.clear(); bestPoints.add(new Integer(i)); if (m_verbose) { System.out.println("Farthest point from set is: " + i + " with distance: " + maxDistanceSoFar); } } } } } } int bestPoint = -1; if (bestPoints.size() > 1) { // multiple points, get random from whole set bestPoint = m_RandomNumberGenerator.nextInt(setOfPoints.length); while ((visitedPoints != null && visitedPoints.contains(new Integer(bestPoint))) || (eliminationSet != null && eliminationSet.contains(new Integer(bestPoint)))) { bestPoint = m_RandomNumberGenerator.nextInt(setOfPoints.length); } } else { // only 1 point, fine bestPoint = ((Integer)bestPoints.get(0)).intValue(); } if (m_verbose) { if (!m_objFunDecreasing) { System.out.println("Selected " + bestPoint + " with similarity: " + minSimilaritySoFar); } else { System.out.println("Selected " + bestPoint + " with distance: " + maxDistanceSoFar); } } return bestPoint; } /** adding other inferred ML and CL links to m_ConstraintsHash, from * m_NeighborSets */ protected void addMLAndCLTransitiveClosure(int[] indices) throws Exception { // add all ML links within clusters if (m_verbose) { for (int j=0; j<m_NumCurrentClusters; j++) { int i = j; if (indices != null) { i = indices[j]; } System.out.println("Neighborhood list " + j + " is:"); System.out.println(m_NeighborSets[i]); } } for (int j=0; j<m_NumCurrentClusters; j++) { int i = j; if (indices != null) { i = indices[j]; } if (m_NeighborSets[i] != null) { Iterator iter1 = m_NeighborSets[i].iterator(); while (iter1.hasNext()) { int first = ((Integer) iter1.next()).intValue(); Iterator iter2 = m_NeighborSets[i].iterator(); while (iter2.hasNext()) { int second = ((Integer) iter2.next()).intValue(); if (first < second) { InstancePair pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK); if (!m_ConstraintsHash.containsKey(pair)) { m_ConstraintsHash.put(pair, new Integer(InstancePair.MUST_LINK)); if (m_verbose) { System.out.println("Adding inferred ML (" + pair.first +","+pair.second+")"); } // hash the constraints for the instances involved Integer firstInt = new Integer(first); Integer secondInt = new Integer(second); InstancePair pairML = new InstancePair(first, second, InstancePair.MUST_LINK); Object constraintList1 = m_instanceConstraintHash.get(firstInt); if (constraintList1 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairML); m_instanceConstraintHash.put(firstInt, constraintList); } else { ((ArrayList)constraintList1).add(pairML); } Object constraintList2 = m_instanceConstraintHash.get(secondInt); if (constraintList2 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairML); m_instanceConstraintHash.put(secondInt, constraintList); } else { ((ArrayList)constraintList2).add(pairML); } if (m_verbose) { System.out.println("Adding inferred ML link: " + pair); } if (!m_SeedHash.contains(new Integer(first))) { m_SeedHash.add(new Integer(first)); } if (!m_SeedHash.contains(new Integer(second))) { m_SeedHash.add(new Integer(second)); } } } } } } } // add all CL links between clusters for (int ii=0; ii<m_NumCurrentClusters; ii++) { int i = ii; if (indices != null) { i = indices[ii]; } if (m_NeighborSets[i] != null) { for (int jj=ii+1; jj<m_NumCurrentClusters; jj++) { int j = jj; if (indices != null) { j = indices[jj]; } // check if there is at least one CL between neighborhoods ii & jj boolean existsCL = false; Iterator iter1 = m_NeighborSets[i].iterator(); while (iter1.hasNext()) { int index1 = ((Integer) iter1.next()).intValue(); if (m_NeighborSets[j] != null) { Iterator iter2 = m_NeighborSets[j].iterator(); while (iter2.hasNext()) { int index2 = ((Integer) iter2.next()).intValue(); int first = (index1 < index2)? index1:index2; int second = (index1 >= index2)? index1:index2; if (first == second) { throw new Exception(" Same instance " + first + " cannot be in cluster: " + i + " and cluster " + j); } else if (first < second) { InstancePair pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK); if (m_ConstraintsHash.containsKey(pair)) { // found one CL between the neighborhoods existsCL = true; break; // out of inner while } } } } if (existsCL) { break; // out of outer while } } // now add the inferred CLs if (existsCL) { iter1 = m_NeighborSets[i].iterator(); while (iter1.hasNext()) { int index1 = ((Integer) iter1.next()).intValue(); if (m_NeighborSets[j] != null) { Iterator iter2 = m_NeighborSets[j].iterator(); while (iter2.hasNext()) { int index2 = ((Integer) iter2.next()).intValue(); int first = (index1 < index2)? index1:index2; int second = (index1 >= index2)? index1:index2; if (first == second) { throw new Exception(" Same instance " + first + " cannot be in cluster: " + i + " and cluster " + j); } else if (first < second) { // add new constraint InstancePair pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK); if (!m_ConstraintsHash.containsKey(pair)) { m_ConstraintsHash.put(pair, new Integer(InstancePair.CANNOT_LINK)); if (m_verbose) { System.out.println("Adding inferred CL (" + pair.first +","+pair.second+")"); } // hash the constraints for the instances involved Integer firstInt = new Integer(first); Integer secondInt = new Integer(second); InstancePair pairCL = new InstancePair(first, second, InstancePair.CANNOT_LINK); Object constraintList1 = m_instanceConstraintHash.get(firstInt); if (constraintList1 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairCL); m_instanceConstraintHash.put(firstInt, constraintList); } else { ((ArrayList)constraintList1).add(pairCL); } Object constraintList2 = m_instanceConstraintHash.get(secondInt); if (constraintList2 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pairCL); m_instanceConstraintHash.put(secondInt, constraintList); } else { ((ArrayList)constraintList2).add(pairCL); } if (m_verbose) { System.out.println("Adding inferred CL link: " + pair); } if (!m_SeedHash.contains(new Integer(first))) { m_SeedHash.add(new Integer(first)); } if (!m_SeedHash.contains(new Integer(second))) { m_SeedHash.add(new Integer(second)); } } } } } } } } } } m_clusterer.setInstanceConstraintsHash(m_instanceConstraintHash); } public void setOptions (String[] options) throws Exception { // TODO } public Enumeration listOptions () { // TODO return null; } public String [] getOptions () { String[] options = new String[10]; int current = 0; options[current++] = "-N"; options[current++] = "" + m_numClusters; while (current < options.length) { options[current++] = ""; } return options; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -