📄 conditionalclusterer.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
		Collection cl = new LinkedHashSet ();		cl.add (nodes.get(index));		clustering.add (cl);		nodes.remove (index);		while (nodes.size() > 0) {			Object closestNode = null;			Collection closestCluster = null;			// the maximum confidence that "closestNode" should join			// "closestCluster"			double maxAddToClusterConfidence = this.MIN;			Object farthestNode = null;			// the minimum confidence that "farthestNode" should be its own			// cluster			double minMakeNewClusterConfidence = this.MAX;						for (int i=0; i < nodes.size(); i++) {				double addToClusterConfidence = this.MIN;				Collection clusterToAppend = null;				double makeNewClusterConfidence = this.MAX;				Iterator iter = clustering.iterator ();				while (iter.hasNext()) {					Collection cluster = (Collection) iter.next();					double sim = getSimilarityToCluster (nodes.get(i), cluster, this.classifier);					double posConfidence = sim - threshold;					double negConfidence = -posConfidence;					if (posConfidence >= 0 && posConfidence > addToClusterConfidence) {						addToClusterConfidence = posConfidence;						clusterToAppend = cluster;						//System.err.println ("AddToClusterConfidence for node " + i + " is " + addToClusterConfidence);					}					if (negConfidence > 0 && negConfidence < makeNewClusterConfidence) {						makeNewClusterConfidence = negConfidence;					}				}				if (addToClusterConfidence > maxAddToClusterConfidence) {					maxAddToClusterConfidence = addToClusterConfidence;					closestNode = nodes.get(i);					closestCluster = clusterToAppend;				}				if (makeNewClusterConfidence != this.MAX && makeNewClusterConfidence < minMakeNewClusterConfidence) {					minMakeNewClusterConfidence = makeNewClusterConfidence;					farthestNode = nodes.get(i);				}							}						// decide whether to add the closestNode to closestCluster, or			// fartestNode to a new cluster			if (closestNode == null && minMakeNewClusterConfidence == this.MAX)				throw new IllegalStateException ("We found neither a cluster to merge with nor evidence that this should be a separate cluster. Something is wrong.");			if (closestNode == null || minMakeNewClusterConfidence > maxAddToClusterConfidence) {				// make new cluster				Collection newC = new LinkedHashSet ();				newC.add (farthestNode);				System.err.println ("Adding new cluster with confidence " + minMakeNewClusterConfidence);				clustering.add (newC);				if (!nodes.remove (farthestNode))					throw new IllegalArgumentException ("FarthestNode not in nodes list");			}			else {				if (closestNode == null)					throw new IllegalArgumentException ("ClosestNode is null!");				closestCluster.add (closestNode);				if (!nodes.remove (closestNode))					throw new IllegalArgumentException ("ClosestNode not in nodes list");				System.err.println ("Adding node to cluster with confidence " + maxAddToClusterConfidence);			}			System.err.println ("Predicting " + clustering.size() + " clusters, " +													nodes.size() + " nodes remaining.");			double cacheHitRatio = (double)cacheHits / cacheAccesses;			System.err.println ("Cache Hit Ratio: " + cacheHitRatio);			if (trueClustering != null) {				evaluateClustering (clustering, trueClustering);			}		}		return clustering;	}		/** Returns the similarity of "node" to "cluster," as given by the	 * posterior of "classifier."*/	private double getSimilarityToCluster (Object node, Collection cluster, Classifier theclassifier) {	  this.cacheAccesses++;		String key = String.valueOf (node.hashCode()) + "__" + String.valueOf (cluster.hashCode());		Double value = this.simCache == null ? null : (Double) this.simCache.get (key);		if (value == null) {			NodeClusterPair pair = new NodeClusterPair (node, cluster);			Instance inst = new Instance (pair, "unknown", null, pair, theclassifier.getInstancePipe());			Classification classification = theclassifier.classify (inst);			Labeling labeling = classification.getLabeling ();			double val = 0.0;			if (labeling.labelAtLocation(0).toString().equals("yes")) {				if (hasNegativeEdge ((NodeClusterPair)inst.getSource())) {					val = this.MIN;				}				else 					val = labeling.valueAtLocation(0) - labeling.valueAtLocation(1);			}			else				val = labeling.valueAtLocation(1) - labeling.valueAtLocation(0);			value = new Double (val);			if (this.simCache != null)				this.simCache.put (key, value);		}		else {			cacheHits++;			//System.err.println ("Cache size: " + simCache.size() +			//										" Cache hit rate: " + ((double)cacheHits / cacheAccesses));		}				return value.doubleValue();	}	private boolean hasNegativeEdge (NodeClusterPair p) {		double val = p.getFeatureValue ("ClusterContainsAtLeast1NegativeNodes");		if (val > 0)			System.err.println ("HAS NEGATIVE EDGE");		return (val > 0);	}		/** Pick a random ordering to cluster nodes, using the learned	 * classifier to make yes/no decisions. */	public Collection clusterRandom (ArrayList _nodes, Collection trueClustering, Random r) {		this.evalCache = new HashMap ();		ArrayList nodes = (ArrayList)_nodes.clone();		Collection clustering = new ArrayList ();		// don't use cache since we never repeat comparisons		simCache = null;		while (nodes.size() > 0) {			int index = r.nextInt (nodes.size());			clustering = placeNodeInClosestCluster (nodes.get(index), clustering, this.classifier);			nodes.remove (index);			System.err.println ("Predicting " + clustering.size() + " clusters and "													+ nodes.size() + " nodes remaining.");			if (trueClustering != null) {				evaluateClustering (clustering, trueClustering);			}					}		return clustering;	}	/** Place "node" in the closest cluster in "clustering" that is	 * above "threshold." If posterior is below "threshold," place in	 * new cluster. */	private Collection placeNodeInClosestCluster (Object node, Collection clustering, Classifier theclassifier) {		Iterator iter = clustering.iterator();		Collection closestCluster = null;		double closestValue = -9999999.9;				while (iter.hasNext()) {			Collection cluster = (Collection)iter.next();			double val = getSimilarityToCluster (node, cluster, theclassifier);			if (val > closestValue) {				closestValue = val;				closestCluster = cluster;			}		}		if (closestCluster != null && closestValue > threshold) { // add to existing cluster			System.err.println ("Adding node to preexisting cluster with value " + closestValue);			closestCluster.add (node);		}		else { // create separate cluster			Collection newC = new LinkedHashSet ();			newC.add (node);			clustering.add (newC);		}		return clustering;	}	private void evaluateClustering (Collection predicted, Collection truth) {		// this is no good		//CitationUtils.evaluateClustering (predicted, truth, "INTERMEDIATE RESULTS");		Iterator clusterIter = predicted.iterator();		int totalTP = 0;		int totalFP = 0;		int ci = 0;		while (clusterIter.hasNext()) {			Collection c = (Collection) clusterIter.next();			Double cachedAccuracy = (Double) evalCache.get (c);			if (cachedAccuracy != null) {				System.err.println ("Cluster " + ci + " Accuracy: " + cachedAccuracy);				ci++;				continue;			}			Object[] nodes = c.toArray ();			if (nodes.length == 1) {				System.err.println ("Cluster " + ci + " has one node\n");				ci++;				continue;			}							int tp = 0;			int fp = 0;			for (int i=0; i < nodes.length; i++) {				for (int j=i+1; j < nodes.length; j++) {					if (inSameCluster (nodes[i], nodes[j], truth)) 						tp++;					else {						fp++;						System.err.println ("FP:\nN1: " + nodes[i] + "\nN2:\n" + nodes[j]);					}				}			}			totalTP += tp;			totalFP += fp;			double accuracy = (tp + fp == 0) ? 0 : (double)tp / (tp + fp);			System.err.println ("Cluster " + ci + " Accuracy: " + accuracy);			evalCache.put (c, new Double (accuracy));			ci++;		}		double accuracy = (totalTP + totalFP == 0) ? 0 : (double)totalTP / (totalTP + totalFP);		System.err.println ("OVERALL PAIR ACCURACY: " + accuracy);	}	private boolean inSameCluster (Object n1, Object n2, Collection c) {		Iterator iter = c.iterator ();		while (iter.hasNext()) {			Collection cl = (Collection)iter.next();			boolean n1here = cl.contains (n1);			boolean n2here = cl.contains (n2);			if (n1here && n2here)				return true;			else if (n1here || n2here)				return false;					}		return false;	}}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -