📄 corefcluster.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).http://www.cs.umass.edu/~mccallum/malletThis software is provided under the terms of the Common Public License,version 1.0, as published by http://www.opensource.org. For furtherinformation, see the file `LICENSE' included with this distribution. *//** @author Ben Wellner */package edu.umass.cs.mallet.projects.seg_plus_coref.coreference;import edu.umass.cs.mallet.projects.seg_plus_coref.clustering.*;import edu.umass.cs.mallet.projects.seg_plus_coref.graphs.*;import salvo.jesus.graph.*;import salvo.jesus.graph.VertexImpl;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.base.util.*;import java.util.*;import java.util.Arrays;import java.lang.*;import java.io.*;/*An object of this class will allow an InstanceList as well as a List ofmentions to be passed to a method that will return a list of listsrepresenting the partitioning of the mentions into clusters/equiv. classes.There should be exactly M choose 2 instances in the InstanceList where M isthe size of the List of mentions (assuming a complete graph).*/public class CorefCluster { private final double NegativeInfinite = -1000000000; MaxEnt meClassifier = null; double threshold = 0.0; private static int falsePositives = 0; private static int falseNegatives = 0; public CorefCluster () {} public CorefCluster (double threshold) { this.threshold = threshold; } public CorefCluster (double threshold, MaxEnt classifier) { this.threshold = threshold; this.meClassifier = classifier; } public void setThreshold (double t) { this.threshold = t; } /* Initialize a list of lists where each inner list is a list with a single element. */ public void train (InstanceList ilist) { // just to plain MaxEnt training for now MaxEnt me = (MaxEnt)(new MaxEntTrainer().train (ilist, null, null, null, null)); Trial t = new Trial(me, ilist); System.out.println("CorefCluster -> Training F1 on \"yes\" is: " + t.labelF1("yes")); this.meClassifier = me; } public MaxEnt getClassifier() {return meClassifier;} /* performance time method */ public Collection clusterMentions (InstanceList ilist, List mentions) { if (meClassifier != null) { // Change: mhay 1/10/04 // must add vertices explicitly, because the ilist may not contain // every vertex (i.e. singletons from canopies pre-process) WeightedGraph graph = new WeightedGraphImpl(); System.out.println("The graph has " + graph.getVertexSet().size() + " vertices"); System.out.println(graph); HashMap alreadyAddedVertices = new HashMap(); // keep track of addVerticesToGraph(graph, mentions, alreadyAddedVertices); for (int i=0; i<ilist.size(); i++) { constructEdgesUsingTrainedClusterer(graph, ilist.getInstance(i), alreadyAddedVertices, meClassifier); } System.out.println("False positives: " + falsePositives); System.out.println("False negatives: " + falseNegatives); //addVerticesToGraph(graph, mentions, alreadyAddedVertices); // mhay 1/30/04 System.out.println("The graph has " + graph.getVertexSet().size() + " vertices"); return typicalClusterPartition (graph); } else { return null; } } public void addVerticesToGraph(WeightedGraph graph, List mentions, HashMap alreadyAddedVertices) { for (int i=0; i < mentions.size(); i++) { Object o = mentions.get(i); if (alreadyAddedVertices.get(o) == null) { // add only if it hasn't been added VertexImpl v = new VertexImpl(o); alreadyAddedVertices.put(o,v); // mhay 1/30/04 try { graph.add(v); // add the vertex } catch (Exception e) {e.printStackTrace();} } } } /* This version is plain old single-link clustering and doesn't use the Graph infrastructure at all. */ public Collection absoluteCluster (InstanceList ilist, List mentions) { List clustering = new ArrayList(); List mCopy = new ArrayList(); for (int i=0; i < mentions.size(); i++) { mCopy.add(mentions.get(i)); } for (int i=0; i < ilist.size(); i++) { Instance inst = (Instance)ilist.getInstance(i); Classification classification = (Classification)meClassifier.classify(inst); Labeling labeling = classification.getLabeling(); NodePair pair = (NodePair)inst.getSource(); Citation c1 = (Citation)pair.getObject1(); Citation c2 = (Citation)pair.getObject2(); if (labeling.getBestLabel().toString().equals("yes") && labeling.getBestValue() > 0.8) { boolean newCluster = true; for (int j=0; j<clustering.size(); j++) { List cl = (List)clustering.get(j); if (cl.contains(c1)) { cl.add(c2); newCluster = false; break; } else if (cl.contains(c2)) { cl.add(c1); newCluster = false; break; } } if (newCluster) { List nc = new ArrayList(); nc.add(c1); nc.add(c2); clustering.add(nc); } mCopy.remove(c1); mCopy.remove(c2); } } // add in singletons remaining for (int i=0; i < mCopy.size(); i++) { List newone = new ArrayList(); newone.add((Citation)mCopy.get(i)); clustering.add(newone); } return (Collection)clustering; } public Collection typicalClusterPartition (WeightedGraph graph) { /* Iterator i0 = ((Set)graph.getVertexSet()).iterator(); while (i0.hasNext()) { VertexImpl v = (VertexImpl)i0.next(); System.out.println("Map: " + v.getObject() + " -> " + ((Citation)((Node)v.getObject()).getObject()).getBaseString() ); }*/ //System.out.println("Top Graph: " + graph); while (true) { double bestEdgeVal = -100000000; WeightedEdge bestEdge = null; //System.out.println("Top Graph: " + graph); Set edgeSet = graph.getEdgeSet(); Iterator i1 = edgeSet.iterator(); // get highest edge value in this loop while (i1.hasNext()) { WeightedEdge e1 = (WeightedEdge)i1.next(); if (e1.getWeight() > bestEdgeVal) { bestEdgeVal = e1.getWeight(); bestEdge = e1; } } if (bestEdgeVal < threshold) break; else { if (bestEdge != null) { VertexImpl v1 = (VertexImpl)bestEdge.getVertexA(); VertexImpl v2 = (VertexImpl)bestEdge.getVertexB(); /* System.out.println("Best edge val: " + bestEdgeVal); System.out.println("Merging vertices: " + v1.getObject() + " and " + v2.getObject()); */ mergeVertices(graph, v1, v2); } } } System.out.println("Final graph now has " + graph.getVertexSet().size() + " nodes"); return getCollectionOfOriginalObjects ((Collection)graph.getVertexSet()); } /** * Construct a Collection of Collections where the objects in the * collections are the original objects (i.e. the object of the vertices) */ private Collection getCollectionOfOriginalObjects (Collection vertices) { Collection collection = new LinkedHashSet(); Iterator i1 = vertices.iterator(); while (i1.hasNext()) { VertexImpl v = (VertexImpl)i1.next(); Object o = v.getObject(); // underlying object if (o != null) { Collection c1 = new LinkedHashSet(); if (o instanceof Collection) { Iterator i2 = ((Collection)o).iterator(); while (i2.hasNext()) { c1.add(i2.next()); // add the underlying object } } else { // in this case, the vertex is a singleton, but we wrap it in a // Collection so that we always have a collection of collections c1.add(o); } collection.add(c1); // add the cluster to the collection of clusters } } return collection; } /** * The graph may not be fully connected. If an edge does not exist, * that corresponds to an edge weight of negative inifinite. So, * before merging two vertices, one must add in any negative weights. * For example, if v1 has an edge to v3 but v2 does not, then the * merged v1-v2 should have a negative weighted edge to v3. * @param g * @param v1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -