📄 corefclusteradv.java
字号:
return evaluatePartitioningExternal (ilist, mentions, collection, -1); } public double evaluatePartitioningExternal (InstanceList ilist, List mentions, Collection collection, int nBestList) { if (nBestList > 0 ) { return evaluatePartitioning (collection, wgraph); } else return evaluatePartitioning (collection, wgraph); } private double evaluatePartitioningAgree (Collection clustering, WeightedGraph graph) { Set edges = (Set)graph.getEdgeSet(); Iterator i1 = edges.iterator(); double cost = 0.0; while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); VertexImpl v1 = (VertexImpl)e.getVertexA(); VertexImpl v2 = (VertexImpl)e.getVertexB(); if (inSameCluster (clustering, ((List)v1.getObject()).get(0), ((List)v2.getObject()).get(0))) { cost += e.getWeight(); } } return cost; } private double evaluatePartitioningDisAgree (Collection clustering, WeightedGraph graph) { Set edges = (Set)graph.getEdgeSet(); Iterator i1 = edges.iterator(); double cost = 0.0; while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); VertexImpl v1 = (VertexImpl)e.getVertexA(); VertexImpl v2 = (VertexImpl)e.getVertexB(); if (!inSameCluster (clustering, ((List)v1.getObject()).get(0), ((List)v2.getObject()).get(0))) cost -= e.getWeight(); } return cost; } private double evaluatePartitioning (Collection clustering, WeightedGraph graph) { Set edges = (Set)graph.getEdgeSet(); Iterator i1 = edges.iterator(); double cost = 0.0; Citation c1,c2; Object o1,o2; if (clustering == null) { System.out.println(" YIKES: clustering is null"); return 0.0; } while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); VertexImpl v1 = (VertexImpl)e.getVertexA(); VertexImpl v2 = (VertexImpl)e.getVertexB(); o1 = v1.getObject(); o2 = v2.getObject(); if ((o1 instanceof List) && ((List)o1).size() == 1) c1 = (Citation)((List)o1).get(0); else break; if ((o2 instanceof List) && ((List)o2).size() == 1) c2 = (Citation)((List)o2).get(0); else break; if (inSameCluster (clustering, c1, c2)) { /* System.out.println("SAME: " + c1.getIndex() + " and " + c2.getIndex() + ": " + e.getWeight());*/ cost += e.getWeight(); } else { /* System.out.println("DIFFERENT: " + c1.getIndex() + " and " + c2.getIndex() + ": " + (-e.getWeight())); */ cost -= e.getWeight(); } } return cost; } public boolean inSameCluster (Collection clustering, Object o1, Object o2) { Iterator i1 = clustering.iterator(); while (i1.hasNext()) { Collection c = (Collection)i1.next(); if (c.contains(o1)) return (c.contains(o2)) ? true : false; if (c.contains(o2)) return (c.contains(o1)) ? true : false; } return false; } public class PseudoEdge { double weight; PseudoVertex v1; PseudoVertex v2; public PseudoEdge (PseudoVertex v1, PseudoVertex v2, double weight) { this.v1 = v1; this.v2 = v2; this.weight = weight; } public double getWeight () { return weight; } public PseudoVertex getV1 () { return v1; } public PseudoVertex getV2 () { return v2; } } public List createPseudoEdges (InstanceList instances, Map map) { List al = (List)new ArrayList(); for (Iterator i1 = instances.iterator(); i1.hasNext();) { Instance inst = (Instance)i1.next(); Object o1 = ((NodePair)inst.getSource()).getObject1(); Object o2 = ((NodePair)inst.getSource()).getObject2(); PseudoVertex po1 = (PseudoVertex)map.get(o1); PseudoVertex po2 = (PseudoVertex)map.get(o2); // System.out.println("Creating edge out of " + po1 + " and " + // po2); if (useNBestInference) al.add (new PseudoEdge(po1, po2, computeScore_NBest(meClassifier, inst))); else al.add (new PseudoEdge(po1, po2, computeScore(meClassifier, inst))); } return al; } // this is similar to pseudo edge // the graph is implicit and this has structures to optimize // the agglomerative clustering AND maintain the objective // function score as we go public class PseudoVertex { Set cluster; // let this be a set for faster duplicate detection Object obj; HashMap map; double treeVal; // the current tree value the cluster to which this vertex belongs public PseudoVertex (InstanceList instances, Object mention) { cluster = new LinkedHashSet(); // list of other vertices in this.obj = mention; this.map = new HashMap(); initializeMap (instances, mention); cluster.add(this); } public double lookupEdgeWeight (PseudoVertex v2) { Double d = (Double)map.get(v2.getObject()); if (d == null) { return 0.0; } return (double)d.doubleValue(); } public Set getCluster () { return cluster; } public Map getMap () { return map; } public Object getObject() { return obj; } private void initializeMap (InstanceList l1, Object o1) { for (Iterator i1 = l1.iterator(); i1.hasNext();) { Instance inst = (Instance)i1.next(); NodePair p1 = (NodePair)inst.getSource(); if (p1.getObject1() == o1) map.put(p1.getObject2(), new Double(computeScore(meClassifier, inst))); else if (p1.getObject2() == o1) map.put(p1.getObject1(), new Double(computeScore(meClassifier, inst))); } } } public Collection createPseudoVertices (InstanceList instances, List mentions, HashMap map) { Collection vs = new ArrayList(); for (Iterator i1 = mentions.iterator(); i1.hasNext();) { Object o1 = i1.next(); PseudoVertex pv = new PseudoVertex (instances, o1); vs.add (pv); map.put (o1, pv); } return vs; } private double computeInitialObjFnVal (Collection edges) { double val = 0.0; for (Iterator i1 = edges.iterator(); i1.hasNext(); ) { val -= ((PseudoEdge)i1.next()).getWeight(); } return val; } public double updateScore (double curScore, double [] treeScore, PseudoVertex v1, PseudoVertex v2, Set s1, Set s2, boolean over_ride) { double origScore = curScore; double nScore = 0.0; double newScore = 0.0; for (Iterator i1 = s1.iterator(); i1.hasNext(); ) { PseudoVertex v11 = (PseudoVertex)i1.next(); for (Iterator i2 = v2.getCluster().iterator(); i2.hasNext(); ) { PseudoVertex v22 = (PseudoVertex)i2.next(); nScore += (2.0 * v11.lookupEdgeWeight(v22)); } } newScore = nScore + curScore; /* This section will update the tree model score efficiently. */ double updatedVal = 0.0; if (treeModel != null) { Collection clusterpair = (Collection)new ArrayList(); Collection c1 = (Collection)new ArrayList(); Collection c2 = (Collection)new ArrayList(); Collection cBoth = (Collection)new ArrayList(); for (Iterator ii = s1.iterator(); ii.hasNext(); ) { PseudoVertex ppv = (PseudoVertex)ii.next(); c1.add((Citation)ppv.getObject()); cBoth.add((Citation)ppv.getObject()); } for (Iterator ii = s2.iterator(); ii.hasNext(); ) { PseudoVertex ppv = (PseudoVertex)ii.next(); c2.add((Citation)ppv.getObject()); cBoth.add((Citation)ppv.getObject()); } clusterpair.add(c1); clusterpair.add(c2); //System.out.println("--------------"); //System.out.println("Pair: "); double pairVal = treeModel.computeTreeObjFn(clusterpair, false); Collection clusterWrap = (Collection)new ArrayList(); clusterWrap.add(cBoth); //System.out.println("New group: "); double newVal = treeModel.computeTreeObjFn(clusterWrap, false); //System.out.println("pairVal: " + pairVal + " newVal" + newVal); //System.out.println("--------------"); updatedVal = (treeScore[0] + (newVal - pairVal)); } //now commit to the results if the newScore is higher if ((newScore >= origScore) || over_ride) { // update tree score, as we're committing to this update treeScore[0] = updatedVal; for (Iterator i1 = s1.iterator(); i1.hasNext(); ) { PseudoVertex v11 = (PseudoVertex)i1.next(); Set s11 = v11.getCluster(); s11.addAll(s2); s11.addAll(s1); } for (Iterator i2 = s2.iterator(); i2.hasNext(); ) { PseudoVertex v22 = (PseudoVertex)i2.next(); Set s22 = v22.getCluster(); s22.addAll(s1); s22.addAll(s2); } } return newScore; } public double computeInitialTreeObjScore (Collection pvertices) { Collection c1 = (Collection)new ArrayList(); for (Iterator ii = pvertices.iterator(); ii.hasNext(); ) { PseudoVertex ppv = (PseudoVertex)ii.next(); Collection c2 = (Collection)new ArrayList(); c2.add((Citation)ppv.getObject()); c1.add(c2); } return treeModel.computeTreeObjFn(c1); } public Collection absoluteCluster (InstanceList ilist, List mentions) { List mCopy = new ArrayList(); boolean newCluster; java.util.Random r = new java.util.Random(); int numTries = 0; // get pseudo edges and sort em HashMap vsToPvs = new HashMap(); // vsToPvs set destructively Collection pvertices = createPseudoVertices (ilist, mentions, vsToPvs); for (Iterator i1 = pvertices.iterator(); i1.hasNext(); ) { PseudoVertex v = (PseudoVertex)i1.next(); mCopy.add(v); } List pedges = createPseudoEdges (ilist, vsToPvs); Collections.sort(pedges,new PseudoEdgeComparator()); double initialObjVal = computeInitialObjFnVal (pedges); System.out.println("initial obj fn: " + initialObjVal); double objFnVal = initialObjVal; double prevVal = -10000000000.0; int i = 0; while (true) { prevVal = objFnVal; PseudoEdge pedge = (PseudoEdge)pedges.get(i); // choosePseudoEdge (pedges, r); PseudoVertex v1 = (PseudoVertex)pedge.getV1(); PseudoVertex v2 = (PseudoVertex)pedge.getV2(); Set s1 = new LinkedHashSet(); Set s2 = new LinkedHashSet(); // make copies of the sets for (Iterator i1 = v1.getCluster().iterator(); i1.hasNext(); ) { s1.add(i1.next()); } for (Iterator i1 = v2.getCluster().iterator(); i1.hasNext(); ) { s2.add(i1.next()); } // this is the case for when the edge is now irrelevent through // transitive closure - just remove it and start at beginning if (s1.contains(v2) || s2.contains(v1)) { //pedges.remove(i); continue; } double[] n = new double[]{0.0}; objFnVal = updateScore (objFnVal, n , v1, v2, s1, s2, false); i++; if (objFnVal <= prevVal) { numTries++; objFnVal = prevVal; // reset it, since we don't commit to this } else { numTries = 0; } if (numTries > MAX_REDUCTIONS) break; //System.out.println("ObjFnVal: " + objFnVal); } // build a proper graph from the edges since // the evaluation code relies on this structure this.wgraph = buildGraphFromPseudoEdges (pedges, mentions); Collection citClustering = new ArrayList(); for (Iterator i1 = pvertices.iterator(); i1.hasNext(); ) { PseudoVertex v1 = (PseudoVertex)i1.next(); Collection cluster = v1.getCluster(); newCluster = true; if (citClustering.size() == 0) newCluster = true; for (Iterator i2 = citClustering.iterator(); i2.hasNext(); ) { Collection c2 = (Collection)i2.next(); if (c2.containsAll(cluster)) { newCluster = false; break; } } if (newCluster) { citClustering.add(cluster); } } Collection realClustering = new ArrayList(); for (Iterator i1 = citClustering.iterator(); i1.hasNext(); ) { Collection s1 = (Collection)i1.next(); Collection n1 = new ArrayList(); for (Iterator i2 = s1.iterator(); i2.hasNext(); ) { n1.add((Citation)((PseudoVertex)i2.next()).getObject()); } realClustering.add (n1); } return (Collection)realClustering; } protected Collection getPseudoClustering (Collection pvertices) { Collection citClustering = new ArrayList(); boolean newCluster; for (Iterator i1 = pvertices.iterator(); i1.hasNext(); ) { PseudoVertex v1 = (PseudoVertex)i1.next(); Collection cluster = v1.getCluster(); newCluster = true; if (citClustering.size() == 0) newCluster = true; for (Iterator i2 = citClustering.iterator(); i2.hasNext(); ) { Collection c2 = (Collection)i2.next(); if (c2.containsAll(cluster)) { newCluster = false; break; } } if (newCluster) { citClustering.add(cluster); } } return citClustering; } public Collection typicalClusterAdv (InstanceList ilist, List mentions) { List pedgesC = null; Collection pvertices = null; for (int j=0; j < MAX_ITERS; j++) { List mCopy = new ArrayList(); java.util.Random r = new java.util.Random(); int numTries = 0; // get pseudo edges and sort em HashMap vsToPvs = new HashMap(); // vsToPvs set destructively pvertices = createPseudoVertices (ilist, mentions, vsToPvs); for (Iterator i1 = pvertices.iterator(); i1.hasNext(); ) { PseudoVertex v = (PseudoVertex)i1.next(); mCopy.add(v); } List pedges = createPseudoEdges (ilist, vsToPvs); pedgesC = new ArrayList(); for (Iterator it = pedges.iterator(); it.hasNext();) { pedgesC.add((PseudoEdge)it.next()); } // build the actual graph for scoring puposees this.wgraph = buildGraphFromPseudoEdges (pedgesC, mentions); Collections.sort(pedges,new PseudoEdgeComparator()); for (int k=0; k < 50; k++) { PseudoEdge e1 = (PseudoEdge)pedges.get(k); } double initialObjVal = computeInitialObjFnVal (pedges); double initialTreeObjVal; if (treeModel != null) initialTreeObjVal = computeInitialTreeObjScore (pvertices); else initialTreeObjVal = 0.0; double[] treeObjVal; treeObjVal = new double[]{initialTreeObjVal}; double objFnVal = initialObjVal; double prevVal = -10000000000.0; int i = 0; int numClusters = pvertices.size(); while (true) { prevVal = objFnVal; int choice = r.nextInt(rBeamSize); // selection size if (choice > pedges.size()) break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -