corefclusteradv.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 1,911 行 · 第 1/5 页

JAVA
1,911
字号
	    WeightedEdge ne = new WeightedEdgeImpl(n1, n2, e1.getWeight());	    try {				copy.addEdge(ne);	    } catch (Exception e) {e.printStackTrace();}					}		return copy;	}		public void addVerticesToGraph(WeightedGraph graph,																 List mentions, HashMap alreadyAddedVertices) {		for (int i=0; i < mentions.size(); i++) {	    Object o = mentions.get(i);	    if (alreadyAddedVertices.get(o) == null) { // add only if it hasn't been				// added				List l = new ArrayList();				l.add(o);				VertexImpl v = new VertexImpl(l);				try {					graph.add(v); // add the vertex				} catch (Exception e) {e.printStackTrace();}	    }		}	}	public WeightedEdge chooseEdge3 (List edges, double minVal, double total, java.util.Random rand) {		if (edges.size() > 0) {	    return (WeightedEdge)edges.get(0);		} else return null;	}	// simpler more heuristic-based approach	public WeightedEdge chooseEdge2 (List edges, double minVal, double total, java.util.Random rand) {		//return (WeightedEdge)edges.first();		if (edges.size() < 1)	    return null;				int x = rand.nextInt(10);		if (x > edges.size())	    x = edges.size();		WeightedEdge e = null;		Iterator i1 = edges.iterator();		int i=0;		while (i1.hasNext() && i < x) {	    e = (WeightedEdge)i1.next();	    i++;		}		if (e != null)	    return e;		else 	    return (WeightedEdge)edges.get(0);	}	/*		Algorithm: Sort edges by magnitude.  Scale so they're all		positive. Choose a random number between 0 and the sum of all the		magnitudes.		Select an edge in this fashion.		Merge the two vertices and 	*/	private WeightedEdge chooseEdge (List edges, double minVal, double total, java.util.Random rand) {				double x = rand.nextDouble() * total;  // 0 < x < total		double cur = 0.0;		Iterator i1 = edges.iterator();		while (i1.hasNext()) {	    WeightedEdge e = (WeightedEdge)i1.next();	    cur += (e.getWeight()-minVal); // SUBTRACT minVal	    if (cur > x) {				return e;	    }		}		// this shouldn't really happend unless there is some kind if numerical		// issues - default to the first edge		return (WeightedEdge)edges.get(0);	}	private PseudoEdge choosePseudoEdge (List edges, java.util.Random rand) {		if (edges.size() == 0)	    return null;		double factor = Math.ceil(Math.log(edges.size()))*20;		int x = rand.nextInt(10);		if (x > edges.size())	    x = edges.size();		PseudoEdge e = null;		Iterator i1 = edges.iterator();		int i=0;		while (i1.hasNext() && i < x) {	    e = (PseudoEdge)i1.next();	    i++;		}		if (e != null)	    return e;		else 	    return (PseudoEdge)edges.get(0);	}	public double evaluatePartitioningExternal (InstanceList ilist, List mentions, Collection collection) {		return evaluatePartitioningExternal (ilist, mentions, collection, -1);			}	public double evaluatePartitioningExternal (InstanceList ilist, List mentions, Collection collection,																							int nBestList) {		if (nBestList > 0 ) {	    return evaluatePartitioning (collection, wgraph);		}		else	    return evaluatePartitioning (collection, wgraph);	}	private double evaluatePartitioningAgree (Collection clustering, WeightedGraph graph) {		Set edges = (Set)graph.getEdgeSet();		Iterator i1 = edges.iterator();		double cost = 0.0;		while (i1.hasNext()) {	    WeightedEdge e = (WeightedEdge)i1.next();	    VertexImpl v1 = (VertexImpl)e.getVertexA();	    VertexImpl v2 = (VertexImpl)e.getVertexB();	    	    if (inSameCluster (clustering, ((List)v1.getObject()).get(0), ((List)v2.getObject()).get(0))) {				cost += e.getWeight();	    }		}		return cost;	}	private double evaluatePartitioningDisAgree (Collection clustering, WeightedGraph graph) {		Set edges = (Set)graph.getEdgeSet();		Iterator i1 = edges.iterator();		double cost = 0.0;		while (i1.hasNext()) {	    WeightedEdge e = (WeightedEdge)i1.next();	    VertexImpl v1 = (VertexImpl)e.getVertexA();	    VertexImpl v2 = (VertexImpl)e.getVertexB();	    if (!inSameCluster (clustering, ((List)v1.getObject()).get(0), 													((List)v2.getObject()).get(0)))				cost -= e.getWeight();		}		return cost;	}		public double evaluatePartitioning (Collection clustering, WeightedGraph graph) {		Set edges = (Set)graph.getEdgeSet();		Iterator i1 = edges.iterator();		double cost = 0.0;		Citation c1,c2;		Object o1,o2;		if (clustering == null) {	    System.out.println(" YIKES: clustering is null");	    return 0.0;		}				while (i1.hasNext()) {	    WeightedEdge e = (WeightedEdge)i1.next();	    VertexImpl v1 = (VertexImpl)e.getVertexA();	    VertexImpl v2 = (VertexImpl)e.getVertexB();	    o1 = v1.getObject();	    o2 = v2.getObject();	    if ((o1 instanceof List) && ((List)o1).size() == 1)				c1 = (Citation)((List)o1).get(0);	    else break;	    if ((o2 instanceof List) && ((List)o2).size() == 1)				c2 = (Citation)((List)o2).get(0);	    else break;	    if (inSameCluster (clustering, c1, c2)) {				/*				System.out.println("SAME: " + c1.getIndex() + " and " +									c2.getIndex() + ": " + e.getWeight());*/				cost += e.getWeight();	    }	    else {				/*				System.out.println("DIFFERENT: " + c1.getIndex() + " and " +									c2.getIndex() + ": " +									(-e.getWeight())); */				cost -= e.getWeight();	    }		}		return cost;	}	public boolean inSameCluster (Collection clustering, Object o1, Object o2) {		Iterator i1 = clustering.iterator();		while (i1.hasNext()) {	    Collection c = (Collection)i1.next();	    if (c.contains(o1))				return (c.contains(o2)) ? true : false;	    if (c.contains(o2))				return (c.contains(o1)) ? true : false;						}		return false;	}	public class PseudoEdge {		double weight;		PseudoVertex v1;		PseudoVertex v2;		public PseudoEdge (PseudoVertex v1, PseudoVertex v2, double weight) {	    this.v1 = v1;	    this.v2 = v2;	    this.weight = weight;		}		public double getWeight () {	    return weight;		}		public PseudoVertex getV1 () {	    return v1;		}		public PseudoVertex getV2 () {	    return v2;		}	}	public List createPseudoEdges (InstanceList instances, Map map) {		List al = (List)new ArrayList();		for (Iterator i1 = instances.iterator(); i1.hasNext();) {	    Instance inst = (Instance)i1.next();	    Object o1 = ((NodePair)inst.getSource()).getObject1();	    Object o2 = ((NodePair)inst.getSource()).getObject2();	    PseudoVertex po1 = (PseudoVertex)map.get(o1);	    PseudoVertex po2 = (PseudoVertex)map.get(o2);	    //			System.out.println("Creating edge out of " + po1 + " and " +	    //			po2);	    if (useNBestInference)				al.add (new PseudoEdge(po1, po2, computeScore_NBest(meClassifier, inst)));	    else				al.add (new PseudoEdge(po1, po2, computeScore(meClassifier, inst)));		}		return al;	}	// this is similar to pseudo edge	// the graph is implicit and this has structures to optimize	// the agglomerative clustering AND maintain the objective	// function score as we go	public class PseudoVertex  {		Set cluster; // let this be a set for faster duplicate detection		Object obj;		HashMap map;		double treeVal; // the current tree value the cluster to which this vertex belongs				public PseudoVertex (InstanceList instances, Object mention) {	    cluster = new LinkedHashSet(); // list of other vertices in	    this.obj = mention;	    this.map = new HashMap();	    initializeMap (instances, mention);	    cluster.add(this);		}		public double lookupEdgeWeight (PseudoVertex v2) {	    Double d = (Double)map.get(v2.getObject());	    if (d == null) {				return 0.0;	    }	    return (double)d.doubleValue();		}		public Set getCluster () {	    return cluster;		}		public Map getMap () {	    return map;		}		public Object getObject() {	    return obj;		}				private void initializeMap (InstanceList l1, Object o1) {	    for (Iterator i1 = l1.iterator(); i1.hasNext();) {				Instance inst = (Instance)i1.next();				NodePair p1 = (NodePair)inst.getSource();				if (p1.getObject1() == o1)					map.put(p1.getObject2(), new Double(computeScore(meClassifier, inst)));				else if (p1.getObject2() == o1)					map.put(p1.getObject1(), new Double(computeScore(meClassifier, inst)));	    }		}	}	public Collection createPseudoVertices (InstanceList instances, List mentions, HashMap map) {		Collection vs = new ArrayList();		for (Iterator i1 = mentions.iterator(); i1.hasNext();) {	    Object o1 = i1.next();	    PseudoVertex pv = new PseudoVertex (instances, o1);	    vs.add (pv);	    map.put (o1, pv);		}		return vs;	}	private double computeInitialObjFnVal (Collection edges) {		double val = 0.0;		for (Iterator i1 = edges.iterator(); i1.hasNext(); ) {	    val -= ((PseudoEdge)i1.next()).getWeight();		}		return val;	}	public double updateScore (double curScore, double [] treeScore, PseudoVertex v1, PseudoVertex v2,														 Set s1, Set s2, boolean over_ride) {		double origScore = curScore;		double nScore = 0.0;		double newScore = 0.0;				for (Iterator i1 = s1.iterator(); i1.hasNext(); ) {	    PseudoVertex v11 = (PseudoVertex)i1.next();	    for (Iterator i2 = v2.getCluster().iterator(); i2.hasNext(); ) {				PseudoVertex v22 = (PseudoVertex)i2.next();				nScore += (2.0 * v11.lookupEdgeWeight(v22));	    }		}		newScore = nScore + curScore;		/*			This section will update the tree model score efficiently.		*/		double updatedVal = 0.0;				if (treeModel != null) {	    Collection clusterpair = (Collection)new ArrayList();	    Collection c1 = (Collection)new ArrayList();	    Collection c2 = (Collection)new ArrayList();	    Collection cBoth = (Collection)new ArrayList();	    for (Iterator ii = s1.iterator(); ii.hasNext(); ) {				PseudoVertex ppv = (PseudoVertex)ii.next();				c1.add((Citation)ppv.getObject());				cBoth.add((Citation)ppv.getObject());	    }	    for (Iterator ii = s2.iterator(); ii.hasNext(); ) {				PseudoVertex ppv = (PseudoVertex)ii.next();				c2.add((Citation)ppv.getObject());					cBoth.add((Citation)ppv.getObject());			    }	    clusterpair.add(c1);	    clusterpair.add(c2);	    //System.out.println("--------------");	    //System.out.println("Pair: ");	    double pairVal = treeModel.computeTreeObjFn(clusterpair, false);	    Collection clusterWrap = (Collection)new ArrayList();	    clusterWrap.add(cBoth);	    //System.out.println("New group: ");	    double newVal = treeModel.computeTreeObjFn(clusterWrap, false);	    //System.out.println("pairVal: " + pairVal + "  newVal" + newVal);	    //System.out.println("--------------");	    updatedVal = (treeScore[0] + (newVal - pairVal));		}				//now commit to the results if the newScore is higher		if ((newScore >= origScore) || over_ride) {	    // update tree score, as we're committing to this update	    treeScore[0] = updatedVal;				    for (Iterator i1 = s1.iterator(); i1.hasNext(); ) {				PseudoVertex v11 = (PseudoVertex)i1.next();				Set s11 = v11.getCluster();				s11.addAll(s2);				s11.addAll(s1);	    }	    for (Iterator i2 = s2.iterator(); i2.hasNext(); ) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?