📄 conditionalclusterer.java
字号:
Collection cl = new LinkedHashSet (); cl.add (nodes.get(index)); clustering.add (cl); nodes.remove (index); while (nodes.size() > 0) { Object closestNode = null; Collection closestCluster = null; // the maximum confidence that "closestNode" should join // "closestCluster" double maxAddToClusterConfidence = this.MIN; Object farthestNode = null; // the minimum confidence that "farthestNode" should be its own // cluster double minMakeNewClusterConfidence = this.MAX; for (int i=0; i < nodes.size(); i++) { double addToClusterConfidence = this.MIN; Collection clusterToAppend = null; double makeNewClusterConfidence = this.MAX; Iterator iter = clustering.iterator (); while (iter.hasNext()) { Collection cluster = (Collection) iter.next(); double sim = getSimilarityToCluster (nodes.get(i), cluster, this.classifier); double posConfidence = sim - threshold; double negConfidence = -posConfidence; if (posConfidence >= 0 && posConfidence > addToClusterConfidence) { addToClusterConfidence = posConfidence; clusterToAppend = cluster; //System.err.println ("AddToClusterConfidence for node " + i + " is " + addToClusterConfidence); } if (negConfidence > 0 && negConfidence < makeNewClusterConfidence) { makeNewClusterConfidence = negConfidence; } } if (addToClusterConfidence > maxAddToClusterConfidence) { maxAddToClusterConfidence = addToClusterConfidence; closestNode = nodes.get(i); closestCluster = clusterToAppend; } if (makeNewClusterConfidence != this.MAX && makeNewClusterConfidence < minMakeNewClusterConfidence) { minMakeNewClusterConfidence = makeNewClusterConfidence; farthestNode = nodes.get(i); } } // decide whether to add the closestNode to closestCluster, or // fartestNode to a new cluster if (closestNode == null && minMakeNewClusterConfidence == this.MAX) throw new IllegalStateException ("We found neither a cluster to merge with nor evidence that this should be a separate cluster. Something is wrong."); if (closestNode == null || minMakeNewClusterConfidence > maxAddToClusterConfidence) { // make new cluster Collection newC = new LinkedHashSet (); newC.add (farthestNode); System.err.println ("Adding new cluster with confidence " + minMakeNewClusterConfidence); clustering.add (newC); if (!nodes.remove (farthestNode)) throw new IllegalArgumentException ("FarthestNode not in nodes list"); } else { if (closestNode == null) throw new IllegalArgumentException ("ClosestNode is null!"); closestCluster.add (closestNode); if (!nodes.remove (closestNode)) throw new IllegalArgumentException ("ClosestNode not in nodes list"); System.err.println ("Adding node to cluster with confidence " + maxAddToClusterConfidence); } System.err.println ("Predicting " + clustering.size() + " clusters, " + nodes.size() + " nodes remaining."); double cacheHitRatio = (double)cacheHits / cacheAccesses; System.err.println ("Cache Hit Ratio: " + cacheHitRatio); if (trueClustering != null) { evaluateClustering (clustering, trueClustering); } } return clustering; } /** Returns the similarity of "node" to "cluster," as given by the * posterior of "classifier."*/ private double getSimilarityToCluster (Object node, Collection cluster, Classifier theclassifier) { this.cacheAccesses++; String key = String.valueOf (node.hashCode()) + "__" + String.valueOf (cluster.hashCode()); Double value = this.simCache == null ? null : (Double) this.simCache.get (key); if (value == null) { NodeClusterPair pair = new NodeClusterPair (node, cluster); Instance inst = new Instance (pair, "unknown", null, pair, theclassifier.getInstancePipe()); Classification classification = theclassifier.classify (inst); Labeling labeling = classification.getLabeling (); double val = 0.0; if (labeling.labelAtLocation(0).toString().equals("yes")) { if (hasNegativeEdge ((NodeClusterPair)inst.getSource())) { val = this.MIN; } else val = labeling.valueAtLocation(0) - labeling.valueAtLocation(1); } else val = labeling.valueAtLocation(1) - labeling.valueAtLocation(0); value = new Double (val); if (this.simCache != null) this.simCache.put (key, value); } else { cacheHits++; //System.err.println ("Cache size: " + simCache.size() + // " Cache hit rate: " + ((double)cacheHits / cacheAccesses)); } return value.doubleValue(); } private boolean hasNegativeEdge (NodeClusterPair p) { double val = p.getFeatureValue ("ClusterContainsAtLeast1NegativeNodes"); if (val > 0) System.err.println ("HAS NEGATIVE EDGE"); return (val > 0); } /** Pick a random ordering to cluster nodes, using the learned * classifier to make yes/no decisions. */ public Collection clusterRandom (ArrayList _nodes, Collection trueClustering, Random r) { this.evalCache = new HashMap (); ArrayList nodes = (ArrayList)_nodes.clone(); Collection clustering = new ArrayList (); // don't use cache since we never repeat comparisons simCache = null; while (nodes.size() > 0) { int index = r.nextInt (nodes.size()); clustering = placeNodeInClosestCluster (nodes.get(index), clustering, this.classifier); nodes.remove (index); System.err.println ("Predicting " + clustering.size() + " clusters and " + nodes.size() + " nodes remaining."); if (trueClustering != null) { evaluateClustering (clustering, trueClustering); } } return clustering; } /** Place "node" in the closest cluster in "clustering" that is * above "threshold." If posterior is below "threshold," place in * new cluster. */ private Collection placeNodeInClosestCluster (Object node, Collection clustering, Classifier theclassifier) { Iterator iter = clustering.iterator(); Collection closestCluster = null; double closestValue = -9999999.9; while (iter.hasNext()) { Collection cluster = (Collection)iter.next(); double val = getSimilarityToCluster (node, cluster, theclassifier); if (val > closestValue) { closestValue = val; closestCluster = cluster; } } if (closestCluster != null && closestValue > threshold) { // add to existing cluster System.err.println ("Adding node to preexisting cluster with value " + closestValue); closestCluster.add (node); } else { // create separate cluster Collection newC = new LinkedHashSet (); newC.add (node); clustering.add (newC); } return clustering; } private void evaluateClustering (Collection predicted, Collection truth) { // this is no good //CitationUtils.evaluateClustering (predicted, truth, "INTERMEDIATE RESULTS"); Iterator clusterIter = predicted.iterator(); int totalTP = 0; int totalFP = 0; int ci = 0; while (clusterIter.hasNext()) { Collection c = (Collection) clusterIter.next(); Double cachedAccuracy = (Double) evalCache.get (c); if (cachedAccuracy != null) { System.err.println ("Cluster " + ci + " Accuracy: " + cachedAccuracy); ci++; continue; } Object[] nodes = c.toArray (); if (nodes.length == 1) { System.err.println ("Cluster " + ci + " has one node\n"); ci++; continue; } int tp = 0; int fp = 0; for (int i=0; i < nodes.length; i++) { for (int j=i+1; j < nodes.length; j++) { if (inSameCluster (nodes[i], nodes[j], truth)) tp++; else { fp++; System.err.println ("FP:\nN1: " + nodes[i] + "\nN2:\n" + nodes[j]); } } } totalTP += tp; totalFP += fp; double accuracy = (tp + fp == 0) ? 0 : (double)tp / (tp + fp); System.err.println ("Cluster " + ci + " Accuracy: " + accuracy); evalCache.put (c, new Double (accuracy)); ci++; } double accuracy = (totalTP + totalFP == 0) ? 0 : (double)totalTP / (totalTP + totalFP); System.err.println ("OVERALL PAIR ACCURACY: " + accuracy); } private boolean inSameCluster (Object n1, Object n2, Collection c) { Iterator iter = c.iterator (); while (iter.hasNext()) { Collection cl = (Collection)iter.next(); boolean n1here = cl.contains (n1); boolean n2here = cl.contains (n2); if (n1here && n2here) return true; else if (n1here || n2here) return false; } return false; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -