📄 corefcluster.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).http://www.cs.umass.edu/~mccallum/malletThis software is provided under the terms of the Common Public License,version 1.0, as published by http://www.opensource.org.  For furtherinformation, see the file `LICENSE' included with this distribution. *//** @author Ben Wellner */package edu.umass.cs.mallet.projects.seg_plus_coref.coreference;import edu.umass.cs.mallet.projects.seg_plus_coref.clustering.*;import edu.umass.cs.mallet.projects.seg_plus_coref.graphs.*;import salvo.jesus.graph.*;import salvo.jesus.graph.VertexImpl;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.base.util.*;import java.util.*;import java.util.Arrays;import java.lang.*;import java.io.*;/*An object of this class will allow an InstanceList as well as a List ofmentions to be passed to a method that will return a list of listsrepresenting the partitioning of the mentions into clusters/equiv. classes.There should be exactly M choose 2 instances in the InstanceList where M isthe size of the List of mentions (assuming a complete graph).*/public class CorefCluster {    private final double NegativeInfinite = -1000000000;    MaxEnt meClassifier = null;	double threshold = 0.0;    private static int falsePositives = 0;    private static int falseNegatives = 0;	public CorefCluster () {}	    public CorefCluster (double threshold) {			this.threshold = threshold;		}    public CorefCluster (double threshold, MaxEnt classifier) {			this.threshold = threshold;			this.meClassifier = classifier;    }	public void setThreshold (double t) {		this.threshold = t;	}    /*    Initialize a list of lists where each inner list is a list with a single element.    */	public void train (InstanceList ilist) {		// just to plain MaxEnt training for now		MaxEnt me = (MaxEnt)(new MaxEntTrainer().train (ilist, null, null, null, null));		Trial t = new Trial(me, ilist);		System.out.println("CorefCluster -> Training F1 on \"yes\" is: " + t.labelF1("yes"));		this.meClassifier = me;	}	public MaxEnt getClassifier() {return meClassifier;}	/* performance time method */	public Collection clusterMentions (InstanceList ilist, List mentions) {		if (meClassifier != null) {			// Change: mhay 1/10/04			// must add vertices explicitly, because the ilist may not contain			// every vertex (i.e. singletons from canopies pre-process)			WeightedGraph graph = new WeightedGraphImpl();			System.out.println("The graph has " + graph.getVertexSet().size() + " vertices");			System.out.println(graph);			HashMap alreadyAddedVertices = new HashMap(); // keep track of            addVerticesToGraph(graph, mentions, alreadyAddedVertices);			for (int i=0; i<ilist.size(); i++) {				constructEdgesUsingTrainedClusterer(graph, ilist.getInstance(i),																						alreadyAddedVertices, meClassifier);			}			System.out.println("False positives: " + falsePositives);			System.out.println("False negatives: " + falseNegatives);			//addVerticesToGraph(graph, mentions, alreadyAddedVertices);  // mhay 1/30/04			System.out.println("The graph has " + graph.getVertexSet().size() + " vertices");			return typicalClusterPartition (graph);		}		else { return null; }	}	public void addVerticesToGraph(WeightedGraph graph,																					List mentions, HashMap alreadyAddedVertices) {		for (int i=0; i < mentions.size(); i++) {			Object o = mentions.get(i);			if (alreadyAddedVertices.get(o) == null) { // add only if it hasn't been added				VertexImpl v = new VertexImpl(o);                alreadyAddedVertices.put(o,v);   // mhay 1/30/04				try {					graph.add(v); // add the vertex				} catch (Exception e) {e.printStackTrace();}			}		}	}	/*		This version is plain old single-link clustering and doesn't use the Graph		infrastructure at all.	 */	public Collection absoluteCluster (InstanceList ilist, List mentions) {		List clustering = new ArrayList();		List mCopy = new ArrayList();				for (int i=0; i < mentions.size(); i++) {			mCopy.add(mentions.get(i));		}				for (int i=0; i < ilist.size(); i++) {			Instance inst = (Instance)ilist.getInstance(i);			Classification classification = (Classification)meClassifier.classify(inst);			Labeling labeling = classification.getLabeling();			NodePair pair = (NodePair)inst.getSource();			Citation c1 = (Citation)pair.getObject1();			Citation c2 = (Citation)pair.getObject2();			if (labeling.getBestLabel().toString().equals("yes") && labeling.getBestValue() > 0.8) {				boolean newCluster = true;				for (int j=0; j<clustering.size(); j++) {					List cl = (List)clustering.get(j);					if (cl.contains(c1)) {						cl.add(c2);						newCluster = false;						break;					} else if (cl.contains(c2)) {						cl.add(c1);						newCluster = false;						break;					}				}				if (newCluster) {					List nc = new ArrayList();					nc.add(c1);					nc.add(c2);					clustering.add(nc);				} 				mCopy.remove(c1);				mCopy.remove(c2);			}		}		// add in singletons remaining		for (int i=0; i < mCopy.size(); i++) {			List newone = new ArrayList();			newone.add((Citation)mCopy.get(i));			clustering.add(newone);		}		return (Collection)clustering;	}				public Collection typicalClusterPartition (WeightedGraph graph) {		/*		Iterator i0 = ((Set)graph.getVertexSet()).iterator();		while (i0.hasNext()) {			VertexImpl v = (VertexImpl)i0.next();			System.out.println("Map: " + v.getObject() + " -> " +												 ((Citation)((Node)v.getObject()).getObject()).getBaseString() );												 }*/		//System.out.println("Top Graph: " + graph);		while (true) {			double bestEdgeVal = -100000000;			WeightedEdge bestEdge = null;			//System.out.println("Top Graph: " + graph);			Set edgeSet = graph.getEdgeSet();			Iterator i1 = edgeSet.iterator();			// get highest edge value in this loop			while (i1.hasNext()) {				WeightedEdge e1 = (WeightedEdge)i1.next();				if (e1.getWeight() > bestEdgeVal) {					bestEdgeVal = e1.getWeight();					bestEdge = e1;				}			}			if (bestEdgeVal < threshold)				break;			else {				if (bestEdge != null) {					VertexImpl v1 = (VertexImpl)bestEdge.getVertexA();					VertexImpl v2 = (VertexImpl)bestEdge.getVertexB();					/*					System.out.println("Best edge val: " + bestEdgeVal);					System.out.println("Merging vertices: " + v1.getObject()					+ " and " + v2.getObject()); */					mergeVertices(graph, v1, v2);				}			}		}		System.out.println("Final graph now has " + graph.getVertexSet().size() + " nodes");		return getCollectionOfOriginalObjects ((Collection)graph.getVertexSet());	}	/**	 * Construct a Collection of Collections where the objects in the	 * collections are the original objects (i.e. the object of the vertices)	 */	private Collection getCollectionOfOriginalObjects (Collection vertices) {		Collection collection = new LinkedHashSet();		Iterator i1 = vertices.iterator();		while (i1.hasNext()) {			VertexImpl v = (VertexImpl)i1.next();			Object o = v.getObject(); // underlying object			if (o != null) {				Collection c1 = new LinkedHashSet();				if (o instanceof Collection) {					Iterator i2 = ((Collection)o).iterator();					while (i2.hasNext()) {						c1.add(i2.next());   // add the underlying object					}				} else {					// in this case, the vertex is a singleton, but we wrap it in a					// Collection so that we always have a collection of collections					c1.add(o);				}				collection.add(c1); // add the cluster to the collection of clusters			}		}		return collection;	}	/**	 * The graph may not be fully connected.  If an edge does not exist,	 * that corresponds to an edge weight of negative inifinite.  So,	 * before merging two vertices, one must add in any negative weights.	 * For example, if v1 has an edge to v3 but v2 does not, then the	 * merged v1-v2 should have a negative weighted edge to v3.	 * @param g	 * @param v1
12 下一页
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -