📄 pckmeans.java
字号:
System.out.println(pair + ": type = " + linkType); if( linkType == InstancePair.MUST_LINK ){ // concerned with MUST-LINK in Adjacency List if (m_AdjacencyList[pair.first] == null) { m_AdjacencyList[pair.first] = new HashSet(); } if (!m_AdjacencyList[pair.first].contains(new Integer(pair.second))) { m_AdjacencyList[pair.first].add(new Integer(pair.second)); } if (m_AdjacencyList[pair.second] == null) { m_AdjacencyList[pair.second] = new HashSet(); } if (!m_AdjacencyList[pair.second].contains(new Integer(pair.first))) { m_AdjacencyList[pair.second].add(new Integer(pair.first)); } } } // DFS for finding connected components in Adjacency List, updates required stats DFS(); } if (!m_Seedable) { // don't perform any seeding, initialize from random m_NumCurrentClusters = 0; } // System.out.println("Need to make " + m_NumClusters + " clusters, already made " + m_NumCurrentClusters); // if the required number of clusters has been obtained, wrap-up if( m_NumCurrentClusters >= m_NumClusters ){ if (m_verbose) { System.out.println("Got the required number of clusters ..."); System.out.println("num clusters: " + m_NumClusters + ", num current clusters: " + m_NumCurrentClusters); } int clusterSizes[] = new int[m_NumCurrentClusters]; for (int i=0; i<m_NumCurrentClusters; i++) { if (m_verbose) { System.out.println("Neighbor set: " + i + " has size: " + m_NeighborSets[i].size()); } clusterSizes[i] = -m_NeighborSets[i].size(); // for reverse sort (decreasing order) } int[] indices = Utils.sort(clusterSizes); Instance[] clusterCentroids = new Instance[m_NumClusters]; // compute centroids of m_NumClusters clusters m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); for (int j=0; j < m_NumClusters; j++) { int i = indices[j]; System.out.println("Neighborhood selected: " + m_NeighborSets[i].size() + "(" + m_TotalTrainWithLabels.instance(((Integer)(m_NeighborSets[i].iterator().next())).intValue()).classValue()+ ")\t"); if (m_SumOfClusterInstances[i] != null) { if (m_verbose) { System.out.println("Normalizing instance " + i); } if (!m_objFunDecreasing) { normalize(m_SumOfClusterInstances[i]); } else { normalizeByWeight(m_SumOfClusterInstances[i]); } } Iterator iter = m_NeighborSets[i].iterator(); while (iter.hasNext()) { // assign points of new cluster int instNumber = ((Integer) iter.next()).intValue(); if (m_verbose) { System.out.println("Assigning " + instNumber + " to cluster: " + j); } // have to re-assign after sorting m_ClusterAssignments[instNumber] = j; } m_SumOfClusterInstances[j].setDataset(m_Instances); // m_SumOfClusterInstances suitably normalized now m_ClusterCentroids.add(m_SumOfClusterInstances[i]); } for (int j=m_NumClusters; j < m_NumCurrentClusters; j++) { int i = indices[j]; Iterator iter = m_NeighborSets[i].iterator(); while (iter.hasNext()) { int instNumber = ((Integer) iter.next()).intValue(); if (m_verbose) { System.out.println("Assigning " + instNumber + " to cluster -1"); } m_ClusterAssignments[instNumber] = -1; } } m_NumCurrentClusters = m_NumClusters; // adding other inferred ML and CL links addMLAndCLTransitiveClosure(indices); return; } else if( m_NumCurrentClusters < m_NumClusters ){ createGlobalCentroids(); addMLAndCLTransitiveClosure(null); } } // Query: oracle replies on link, added to m_ConstraintsHash protected int askOracle(int X, int Y) { Instance first = m_TotalTrainWithLabels.instance(X); Instance second = m_TotalTrainWithLabels.instance(Y); int linkType; if (m_verbose) { System.out.print("["+X+","+Y); } if (first.classValue() == second.classValue()) { if (m_verbose) { System.out.println(",MUST]"); } linkType = InstancePair.MUST_LINK; } else if (first.classValue() != second.classValue()) { if (m_verbose) { System.out.println(",CANNOT]"); } linkType = InstancePair.CANNOT_LINK; } else { if (m_verbose) { System.out.println(",DONT_CARE]"); } linkType = InstancePair.DONT_CARE_LINK; } // add to constraintHash and seedHash int firstIndex = (X<Y)? X:Y; int secondIndex = (X>=Y)? X:Y; InstancePair newPair = new InstancePair(firstIndex, secondIndex, InstancePair.DONT_CARE_LINK); if (!m_ConstraintsHash.containsKey(newPair)) { m_ConstraintsHash.put(newPair, new Integer(linkType)); } Integer firstInt = new Integer(firstIndex); Integer secondInt = new Integer(secondIndex); // for first point if(!m_SeedHash.contains(firstInt)) { // add instances with constraints to seedHash m_SeedHash.add(firstInt); } // for second point if(!m_SeedHash.contains(secondInt)) { m_SeedHash.add(secondInt); } return linkType; } /** Finds point which has max min-distance from set visitedPoints, does not consider points from eliminationSet */ int farthestFromSet(HashSet visitedPoints, HashSet eliminationSet) throws Exception { // implements farthest-first search algorithm: /* for (each datapoint x not in visitedPoints) { distance of x to visitedPoints = min{d(x,f):f \in visitedPoints} } select the point x with maximum distance as new center; */ if (visitedPoints.size() == 0) { int point; if (m_StartingIndexOfTest < m_Instances.numInstances()) { point = m_RandomNumberGenerator.nextInt(m_StartingIndexOfTest); // takes care not to select test example } else { point = m_RandomNumberGenerator.nextInt(m_Instances.numInstances()); } // Note: no need to check for labeled data now, since we have no visitedPoints // => no labeled data if (m_verbose) System.out.println("First point selected: " + point); return point; } else { if (m_verbose) { Iterator iter = visitedPoints.iterator(); if (eliminationSet != null) { iter = eliminationSet.iterator(); while(iter.hasNext()) { System.out.println("In elimination set: " + ((Integer) iter.next()).intValue()); } } } } double minSimilaritySoFar = Double.POSITIVE_INFINITY; double maxDistanceSoFar = Double.NEGATIVE_INFINITY; ArrayList bestPointArray = null; int bestPoint = -1; for (int i=0; i<m_Instances.numInstances() && i<m_StartingIndexOfTest; i++) { // point should not belong to test set if (visitedPoints == null || !visitedPoints.contains(new Integer(i))) { // point should not belong to visitedPoints if (eliminationSet == null || !eliminationSet.contains(new Integer(i))) { // point should not belong to eliminationSet Instance inst = m_Instances.instance(i); Iterator iter = visitedPoints.iterator(); double minDistanceFromSet = Double.POSITIVE_INFINITY; double maxSimilarityFromSet = Double.NEGATIVE_INFINITY; while (iter.hasNext()) { Instance pointInSet = m_Instances.instance(((Integer) iter.next()).intValue()); if (!m_objFunDecreasing) { double sim = m_metric.similarity(inst, pointInSet); if (sim > maxSimilarityFromSet) { maxSimilarityFromSet = sim; } } else { double dist = m_metric.distance(inst, pointInSet); if (dist < minDistanceFromSet) { minDistanceFromSet = dist; } } } if (!m_objFunDecreasing) { if (maxSimilarityFromSet == minSimilaritySoFar) { minSimilaritySoFar = maxSimilarityFromSet; bestPointArray.add(new Integer(i)); } else if (maxSimilarityFromSet < minSimilaritySoFar) { minSimilaritySoFar = maxSimilarityFromSet; bestPointArray = new ArrayList(); bestPointArray.add(new Integer(i)); } } else { if (minDistanceFromSet == maxDistanceSoFar) { minDistanceFromSet = maxDistanceSoFar; bestPointArray.add(new Integer(i)); if (m_verbose) { System.out.println("Additional point added: " + i + " with similarity: " + minSimilaritySoFar); } } else if (minDistanceFromSet > maxDistanceSoFar) { maxDistanceSoFar = minDistanceFromSet; bestPointArray = new ArrayList(); bestPointArray.add(new Integer(i)); if (m_verbose) { System.out.println("Farthest point from set is: " + i + " with distance: " + maxDistanceSoFar); } } } } } } if (bestPointArray == null) { System.out.println("\n\nAttention!! No more points left, all assigned\n\n"); } else { if (m_verbose) System.out.println("Have " + bestPointArray.size() + " points in bestPointArray"); int index = m_RandomNumberGenerator.nextInt(bestPointArray.size()); // select one of the bestPoints at random bestPoint = ((Integer) bestPointArray.get(index)).intValue(); } if (m_verbose) { if (!m_objFunDecreasing) { System.out.println("Selected " + bestPoint + " with similarity: " + minSimilaritySoFar); } else { System.out.println("Selected " + bestPoint + " with distance: " + maxDistanceSoFar); } } return bestPoint; } /** Finds point which is nearest to center. This point should not be * a test point and should not belong to visitedPoints */ int nearestFromPoint(Instance center, HashSet visitedPoints) throws Exception { double maxSimilarity = Double.NEGATIVE_INFINITY; double minDistance = Double.POSITIVE_INFINITY; int bestPoint = -1; for (int i=0; i<m_Instances.numInstances() && i<m_StartingIndexOfTest; i++) { // bestPoint should not be a test point if (!visitedPoints.contains(new Integer(i))) { // bestPoint should not belong to visitedPoints Instance inst = m_Instances.instance(i); if (!m_objFunDecreasing) { double sim = m_metric.similarity(inst, center); if (sim > maxSimilarity) { bestPoint = i; maxSimilarity = sim; } } else { double dist = m_metric.distance(inst, center); if (dist < minDistance) { bestPoint = i; minDistance = dist; if (m_verbose) { System.out.println("Nearest point is: " + bestPoint + " with dist: " + minDistance); } } } } } return bestPoint; } /** This function divides every attribute value in an instance by * the instance weight -- useful to find the mean of a cluster in * Euclidean space * @param inst Instance passed in for normalization (destructive update) */ protected void normalizeByWeight(Instance inst) { double weight = inst.weight(); if (m_verbose) { System.out.println("Before weight normalization: " + inst); } if (inst instanceof SparseInstance) { for (int i=0; i<inst.numValues(); i++) { inst.setValueSparse(i, inst.valueSparse(i)/weight); } } else if (!(inst instanceof SparseInstance)) { for (int i=0; i<inst.numAttributes(); i++) { inst.setValue(i, inst.value(i)/weight); } } if (m_verbose) { System.out.println("After weight normalization: " + inst); } } /** Finds the sum of instance sum with instance inst */ Instance sumWithInstance(Instance sum, Instance inst) throws Exception { Instance newSum; if (sum == null) { if (m_isSparseInstance) { newSum = new SparseInstance(inst); newSum.setDataset(m_Instances); } else { newSum = new Instance(inst); newSum.setDataset(m_Instances); } } else { newSum = sumInstances(sum, inst); } return newSum; } /** Finds sum of 2 instances (handles sparse and non-sparse) */ protected Instance sumInstances(Instance inst1, Instance inst2) throws Exception { int numAttributes = inst1.numAttributes(); if (inst2.numAttributes() != numAttributes) { throw new Exception ("Error!! inst1 and inst2 should have same number of attributes."); } // if (m_verbose) { // System.out.println("Instance 1 is: " + inst1 + ", instance 2 is: " + inst2); // } double weight1 = inst1.weight(), weight2 = inst2.weight(); double [] values = new double[numAttributes]; Instance newInst; for (int i=0; i<numAttributes; i++) { values[i] = 0; } if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) { for (int i=0; i<inst1.numValues(); i++) { int indexOfIndex = inst1.index(i); values[indexOfIndex] = inst1.valueSparse(i); } for (int i=0; i<inst2.numValues(); i++) { int indexOfIndex = inst2.index(i); values[indexOfIndex] += inst2.valueSparse(i); } newInst = new SparseInstance(weight1+weight2, values); newInst.setDataset(m_Instances); } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)){ for (int i=0; i<numAttributes; i++) { values[i] = inst1.value(i) + inst2.value(i); } newInst = new Instance(weight1+weight2, values); newInst.setDataset(m_Instances); } else { throw new Exception ("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse"); } // if (m_verbose) { // System.out.println("Sum instance is: " + newInst); // } return newInst;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -