📄 corefclusteradv.java
字号:
PseudoEdge pedge = (PseudoEdge)pedges.get(choice); // choosePseudoEdge (pedges, r); PseudoVertex v1 = (PseudoVertex)pedge.getV1(); PseudoVertex v2 = (PseudoVertex)pedge.getV2(); Set s1 = new LinkedHashSet(); Set s2 = new LinkedHashSet(); // make copies of the sets of vertices represented by each pseudovertex for (Iterator i1 = v1.getCluster().iterator(); i1.hasNext(); ) { s1.add(i1.next()); } for (Iterator i1 = v2.getCluster().iterator(); i1.hasNext(); ) { s2.add(i1.next()); } // this is the case for when the edge is now irrelevent through // transitive closure - just remove it and start at beginning if (s1.contains(v2) || s2.contains(v1)) { pedges.remove(choice); continue; } numClusters--; if (trueNumStop) { objFnVal = updateScore (objFnVal, treeObjVal, v1, v2, s1, s2, true); //Collection cl1 = (Collection)getClusteringFromPseudo(pvertices); //System.out.println("+++++++++++++++++++++++++++"); //double treeV = treeModel.computeTreeObjFn(cl1, false); //System.out.println("+++++++++++++++++++++++++++"); //System.out.println("treeV: " + treeV + " updated: " + treeObjVal[0]); } else objFnVal = updateScore (objFnVal, treeObjVal, v1, v2, s1, s2, false); if (!trueNumStop && objFnVal <= prevVal) { numTries++; objFnVal = prevVal; // reset it, since we don't commit to this } else { //System.out.println(objFnVal + "," + treeObjVal[0]); pedges.remove(choice); numTries = 0; } if (trueNumStop && numClusters <= keyPartitioning.size()) { Collection cl1 = (Collection)getClusteringFromPseudo(pvertices); PairEvaluate pairEval = new PairEvaluate (keyPartitioning, cl1); pairEval.evaluate(); double curAgree = evaluatePartitioningAgree (cl1, this.wgraph); double curDisAgree = evaluatePartitioningDisAgree (cl1, this.wgraph); /* double treeV = treeModel.computeTreeObjFn(cl1, false); if (Math.abs(treeV - treeObjVal[0]) > 0.01) System.out.println("Tree values don't match: " + treeV + ":" + treeObjVal[0]);*/ int singles = numSingletons(cl1); System.out.println(objFnVal + "," + treeObjVal[0] + "," + curAgree + "," + curDisAgree + "," + singles + "," + pairEval.getF1()); break; } else if (numTries > MAX_REDUCTIONS) { Collection cl1 = (Collection)getClusteringFromPseudo(pvertices); PairEvaluate pairEval = new PairEvaluate (keyPartitioning, cl1); pairEval.evaluate(); System.out.println(objFnVal + "," + treeObjVal[0] + "," + pairEval.getF1()); break; } //System.out.println("ObjFnVal: " + objFnVal); } } // build a proper graph from the edges since // the evaluation code relies on this structure return (Collection)getClusteringFromPseudo(pvertices); } protected int numSingletons (Collection clustering) { int total = 0; for (Iterator it = clustering.iterator(); it.hasNext(); ) { if (((Collection)it.next()).size() == 1) total++; } return total; } protected Collection getClusteringFromPseudo (Collection pvertices) { Collection citClustering = getPseudoClustering (pvertices); Collection realClustering = new ArrayList(); for (Iterator i1 = citClustering.iterator(); i1.hasNext(); ) { Collection s1 = (Collection)i1.next(); Collection n1 = new ArrayList(); for (Iterator i2 = s1.iterator(); i2.hasNext(); ) { n1.add((Citation)((PseudoVertex)i2.next()).getObject()); } realClustering.add (n1); } return (Collection)realClustering; } protected WeightedGraph buildGraphFromPseudoEdges (List pedges, List mentions) { HashMap alreadyAdded = new HashMap(); WeightedGraph w = (WeightedGraph)new WeightedGraphImpl(); for (Iterator it = pedges.iterator(); it.hasNext(); ) { constructEdgesFromPseudoEdges (w, (PseudoEdge)it.next(), alreadyAdded); } addVerticesToGraph (w, mentions, alreadyAdded); return w; } public Collection typicalClusterPartition (WeightedGraph graph) { /* Iterator i0 = ((Set)graph.getVertexSet()).iterator(); while (i0.hasNext()) { VertexImpl v = (VertexImpl)i0.next(); System.out.println("Map: " + v.getObject() + " -> " + ((Citation)((Node)v.getObject()).getObject()).getBaseString() ); }*/ //System.out.println("Top Graph: " + graph); while (true) { double bestEdgeVal = -100000000; WeightedEdge bestEdge = null; //System.out.println("Top Graph: " + graph); Set edgeSet = graph.getEdgeSet(); Iterator i1 = edgeSet.iterator(); // get highest edge value in this loop while (i1.hasNext()) { WeightedEdge e1 = (WeightedEdge)i1.next(); if (e1.getWeight() > bestEdgeVal) { bestEdgeVal = e1.getWeight(); bestEdge = e1; } } if (bestEdgeVal < threshold) break; else { if (bestEdge != null) { VertexImpl v1 = (VertexImpl)bestEdge.getVertexA(); VertexImpl v2 = (VertexImpl)bestEdge.getVertexB(); /* System.out.println("Best edge val: " + bestEdgeVal); System.out.println("Merging vertices: " + v1.getObject() + " and " + v2.getObject()); */ mergeVertices(graph, v1, v2); } } } System.out.println("Final graph now has " + graph.getVertexSet().size() + " nodes"); return getCollectionOfOriginalObjects ((Collection)graph.getVertexSet()); } public Collection partitionGraph (WeightedGraph origGraph) { java.util.Random rand = new java.util.Random(); double bestCost = -100000000000.0; double curCost = bestCost; Collection bestPartitioning = null; // evalFreq is the frequency with which evaluations occur // in the early stages, it is silly to keep doing a complete // evaluation of the objective fn for (int i=0; i < MAX_ITERS; i++) { //System.out.println("Iteration " + i); double cost = -100000000.0; double bCost = cost; int evalFreq = 10; // this is a counter that will increment each time // a new edge is tried and the result is a graph // with a reduced total objective value int numReductions = 0; double treeCost = 0.0; int iter = 0; Collection curPartitioning = null; Collection localBestPartitioning = null; //System.out.println("Initial Graph: "); //printGraph(origGraph); WeightedGraph graph = copyGraph(origGraph); WeightedGraph graph1 = copyGraph(graph); while (true) { Collection c0 = (Collection)graph.getEdgeSet(); List sortedEdges = Collections.list(Collections.enumeration(c0)); System.out.println("Size of sorted edges: " + sortedEdges.size()); if (sortedEdges.size() > 0) { EdgeComparator comp = new EdgeComparator(); Collections.sort(sortedEdges, comp); double minVal = ((WeightedEdge)sortedEdges.get(sortedEdges.size()-1)).getWeight(); double totalVal = 0.0; Iterator il = (Iterator)sortedEdges.iterator(); while (il.hasNext()) { totalVal += ((WeightedEdge)il.next()).getWeight(); } totalVal += sortedEdges.size()*(-minVal); //WeightedEdge chosenEdge = chooseEdge(sortedEdges, minVal, //totalVal, rand); //WeightedEdge chosenEdge = chooseEdge2(sortedEdges, minVal, //totalVal, rand); WeightedEdge chosenEdge = chooseEdge2(sortedEdges, minVal, totalVal, rand); if (chosenEdge != null) { VertexImpl v1 = (VertexImpl)chosenEdge.getVertexA(); VertexImpl v2 = (VertexImpl)chosenEdge.getVertexB(); /* System.out.println("Best edge val: " + chosenEdge.getWeight()); System.out.println("Merging vertices: " + v1.getObject() + " and " + v2.getObject()); */ mergeVertices(graph, v1, v2); } } else // edges has size zero break; if ((evalFreq > 0) && ((iter % evalFreq) == 0)) { curPartitioning = getCollectionOfOriginalObjects ((Collection)graph.getVertexSet()); cost = evaluatePartitioning(curPartitioning, origGraph); if (keyPartitioning != null && curPartitioning != null) { PairEvaluate pairEval = new PairEvaluate (keyPartitioning, curPartitioning); pairEval.evaluate(); treeCost = 0.0; if (treeModel != null) treeCost = treeModel.computeTreeObjFn (curPartitioning); System.out.println(cost + "," + evaluateAgainstKey (curPartitioning) + "," + pairEval.getF1() + "," + treeCost + "," + curPartitioning.size()); } else { if (keyPartitioning == null) System.out.println(" keyPart is NULL!!"); if (curPartitioning == null) System.out.println(" curPart is NULL!!"); } //System.out.println("Cost at iteration " + iter + " is: " + cost); // in this case, we've gone too far and have lowered the FN value if (!trueNumStop && cost <= bCost) { //System.out.println("Graph val reduced to " + cost + " from " + prevCost); //System.out.println("Due to merging on edge with weight: " + //chosenEdge.getWeight()); graph = graph1; // set graph to what is was before the merge numReductions++; // increment the number of reductions //reset the evaluation frequency evalFreq = (int)Math.ceil(evalFreq/2); } else { numReductions = 0; localBestPartitioning = curPartitioning; bCost = cost; // let prevCost now equal the new higher cost } // only copy graph if we allow back-tracking in search if (!trueNumStop) graph1 = copyGraph(graph); // make a copy of the graph } iter++; // we stop when we've tried to increase the obj fn value // MAX_REDUCTIONS times if (!trueNumStop && (numReductions > MAX_REDUCTIONS)) break; if (trueNumStop && (graph.getVertexSet().size() <= keyPartitioning.size())) { localBestPartitioning = curPartitioning; break; } } curPartitioning = getCollectionOfOriginalObjects ((Collection)graph.getVertexSet()); if (localBestPartitioning == null) localBestPartitioning = curPartitioning; //System.out.println("best cost was: " + bCost); curCost = evaluatePartitioning(localBestPartitioning, origGraph); //System.out.println("curCost is: " + curCost); //double curAgree = evaluatePartitioningAgree(localBestPartitioning, origGraph); //double curDisAgree = evaluatePartitioningDisAgree(localBestPartitioning, origGraph); PairEvaluate pairEval = new PairEvaluate (keyPartitioning, localBestPartitioning); pairEval.evaluate(); if (treeModel != null) treeCost = treeModel.computeTreeObjFn (curPartitioning); if (curCost > bestCost) { bestCost = curCost; bestPartitioning = localBestPartitioning; } System.out.println(curCost + "," + evaluateAgainstKey (localBestPartitioning) + "," + pairEval.getF1() + "," + treeCost + "," + keyPartitioning.size()); } return bestPartitioning; } public double evaluateAgainstKey (Collection col) { if (keyPartitioning != null) { ClusterEvaluate cl = new ClusterEvaluate(keyPartitioning, col); cl.evaluate(); //cl.printResults(); //cl.printVerbose(); return cl.getRecall(); } return 0.0; } public class EdgeComparator implements Comparator { public EdgeComparator () {} public int compare (Object e1, Object e2) { // note that this is backwards because we want it to sort in // descending order if (e1 == e2) return 0; double val = ((WeightedEdge)e2).getWeight()-((WeightedEdge)e1).getWeight(); if (val > 0) return 1; else return -1; } public boolean equals (Object e1, Object e2) { return e1 == e2; } } public class PseudoEdgeComparator implements Comparator { public PseudoEdgeComparator () {} public int compare (Object e1, Object e2) { // note that this is backwards because we want it to sort in // descending order if (e1 == e2) return 0; double val = ((PseudoEdge)e2).getWeight()-((PseudoEdge)e1).getWeight(); if (val > 0) return 1; else return -1; } public boolean equals (Object e1, Object e2) { return e1 == e2; } } /** * Construct a Collection of Collections where the objects in the * collections are the original objects (i.e. the object of the vertices) */ private Collection getCollectionOfOriginalObjects (Collection vertices) { Collection collection = new LinkedHashSet(); Iterator i1 = vertices.iterator(); while (i1.hasNext()) { VertexImpl v = (VertexImpl)i1.next(); Object o = v.getObject(); // underlying object if (o != null) { Collection c1 = new LinkedHashSet(); if (o instanceof Collection) { Iterator i2 = ((Collection)o).iterator(); while (i2.hasNext()) { c1.add(i2.next()); // add the underlying object } } else { // in this case, the vertex is a singleton, but we wrap it in a // Collection so that we always have a collection of collections c1.add(o); } collection.add(c1); // add the cluster to the collection of clusters } } return collection; } /** * The graph may not be fully connected. If an edge does not exist, * that corresponds to an edge weight of negative inifinite. So, * before merging two vertices, one must add in any negative weights. * For example, if v1 has an edge to v3 but v2 does not, then the * merged v1-v2 should have a negative weighted edge to v3. * @param g * @param v1 * @param v2 */ private void addNegativeWeights(WeightedGraph g, Vertex v1, Vertex v2) { List adjacentToV1; List adjacentToV2; adjacentToV1 = g.getAdjacentVertices(v1); adjacentToV2 = g.getAdjacentVertices(v2); // Check that v1 is connected to all of v2's adjacent vertices for (int i=0; i < adjacentToV2.size(); i++) { Vertex v = (Vertex)adjacentToV2.get(i); //System.out.println("v1: " + v1 + " v: " + v); if (v == v1) { continue; } if (!adjacentToV1.contains(v)) { try { g.addEdge(v, v1, NegativeInfinite); } catch (Exception e) { e.printStackTrace(); } } } // Check that v2 is connected to all of v1's adjacent vertices for (int i=0; i < adjacentToV1.size(); i++) { Vertex v = (Vertex)adjacentToV1.get(i); //System.out.println("v2: " + v2 + " v " + v + " " + g.isConnected(v, v2)); if (v == v2) { continue; } if (!adjacentToV2.contains(v)) { try { System.out.println("Adding negative infinite edge: " + v2 + " to " + v); g.addEdge(v, v2, NegativeInfinite); } catch (Exception e) { e.printStackTrace(); } } } } private void printVObj (Object o1) { if (o1 instanceof List) { List l10 = (List)o1; for (int k=0; k < l10.size(); k++) { System.out.print(" " + ((Citation)l10.get(k)).getIndex()); } } else System.out.print(" " + ((Citation)o1).getIndex()); } public void mergeVertices (WeightedGraph g, VertexImpl v1, VertexImpl v2) { // Change: mhay 1/10/04 //addNegativeWeights(g, v1, v2); Object o1 = v1.getObject(); Object o2 = v2.getObject(); List l1 = new ArrayList(); //System.out.println("Merging o1 = " + toString(o1) + " and o2 = " + //toString(o2)); if ((o1 instanceof List) && (o2 instanceof List)) { l1.addAll((List)o1); l1.addAll((List)o2); } else if (o1 instanceof List) { l1.addAll((List)o1); l1.add(o2); } else if (o2 instanceof List) { l1.addAll((List)o2); l1.add(o1); } else { l1.add(o1); l1.add(o2); } VertexImpl newVertex = new VertexImpl(l1); try { g.add(newVertex); } catch (Exception e) { e.printStackTrace(); } List edges1 = (List) g.getEdges(v1); Iterator i1 = edges1.iterator();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -