corefclusteradv.java
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 1,911 行 · 第 1/5 页
JAVA
1,911 行
WeightedEdge ne = new WeightedEdgeImpl(n1, n2, e1.getWeight()); try { copy.addEdge(ne); } catch (Exception e) {e.printStackTrace();} } return copy; } public void addVerticesToGraph(WeightedGraph graph, List mentions, HashMap alreadyAddedVertices) { for (int i=0; i < mentions.size(); i++) { Object o = mentions.get(i); if (alreadyAddedVertices.get(o) == null) { // add only if it hasn't been // added List l = new ArrayList(); l.add(o); VertexImpl v = new VertexImpl(l); try { graph.add(v); // add the vertex } catch (Exception e) {e.printStackTrace();} } } } public WeightedEdge chooseEdge3 (List edges, double minVal, double total, java.util.Random rand) { if (edges.size() > 0) { return (WeightedEdge)edges.get(0); } else return null; } // simpler more heuristic-based approach public WeightedEdge chooseEdge2 (List edges, double minVal, double total, java.util.Random rand) { //return (WeightedEdge)edges.first(); if (edges.size() < 1) return null; int x = rand.nextInt(10); if (x > edges.size()) x = edges.size(); WeightedEdge e = null; Iterator i1 = edges.iterator(); int i=0; while (i1.hasNext() && i < x) { e = (WeightedEdge)i1.next(); i++; } if (e != null) return e; else return (WeightedEdge)edges.get(0); } /* Algorithm: Sort edges by magnitude. Scale so they're all positive. Choose a random number between 0 and the sum of all the magnitudes. Select an edge in this fashion. Merge the two vertices and */ private WeightedEdge chooseEdge (List edges, double minVal, double total, java.util.Random rand) { double x = rand.nextDouble() * total; // 0 < x < total double cur = 0.0; Iterator i1 = edges.iterator(); while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); cur += (e.getWeight()-minVal); // SUBTRACT minVal if (cur > x) { return e; } } // this shouldn't really happend unless there is some kind if numerical // issues - default to the first edge return (WeightedEdge)edges.get(0); } private PseudoEdge choosePseudoEdge (List edges, java.util.Random rand) { if (edges.size() == 0) return null; double factor = Math.ceil(Math.log(edges.size()))*20; int x = rand.nextInt(10); if (x > edges.size()) x = edges.size(); PseudoEdge e = null; Iterator i1 = edges.iterator(); int i=0; while (i1.hasNext() && i < x) { e = (PseudoEdge)i1.next(); i++; } if (e != null) return e; else return (PseudoEdge)edges.get(0); } public double evaluatePartitioningExternal (InstanceList ilist, List mentions, Collection collection) { return evaluatePartitioningExternal (ilist, mentions, collection, -1); } public double evaluatePartitioningExternal (InstanceList ilist, List mentions, Collection collection, int nBestList) { if (nBestList > 0 ) { return evaluatePartitioning (collection, wgraph); } else return evaluatePartitioning (collection, wgraph); } private double evaluatePartitioningAgree (Collection clustering, WeightedGraph graph) { Set edges = (Set)graph.getEdgeSet(); Iterator i1 = edges.iterator(); double cost = 0.0; while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); VertexImpl v1 = (VertexImpl)e.getVertexA(); VertexImpl v2 = (VertexImpl)e.getVertexB(); if (inSameCluster (clustering, ((List)v1.getObject()).get(0), ((List)v2.getObject()).get(0))) { cost += e.getWeight(); } } return cost; } private double evaluatePartitioningDisAgree (Collection clustering, WeightedGraph graph) { Set edges = (Set)graph.getEdgeSet(); Iterator i1 = edges.iterator(); double cost = 0.0; while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); VertexImpl v1 = (VertexImpl)e.getVertexA(); VertexImpl v2 = (VertexImpl)e.getVertexB(); if (!inSameCluster (clustering, ((List)v1.getObject()).get(0), ((List)v2.getObject()).get(0))) cost -= e.getWeight(); } return cost; } public double evaluatePartitioning (Collection clustering, WeightedGraph graph) { Set edges = (Set)graph.getEdgeSet(); Iterator i1 = edges.iterator(); double cost = 0.0; Citation c1,c2; Object o1,o2; if (clustering == null) { System.out.println(" YIKES: clustering is null"); return 0.0; } while (i1.hasNext()) { WeightedEdge e = (WeightedEdge)i1.next(); VertexImpl v1 = (VertexImpl)e.getVertexA(); VertexImpl v2 = (VertexImpl)e.getVertexB(); o1 = v1.getObject(); o2 = v2.getObject(); if ((o1 instanceof List) && ((List)o1).size() == 1) c1 = (Citation)((List)o1).get(0); else break; if ((o2 instanceof List) && ((List)o2).size() == 1) c2 = (Citation)((List)o2).get(0); else break; if (inSameCluster (clustering, c1, c2)) { /* System.out.println("SAME: " + c1.getIndex() + " and " + c2.getIndex() + ": " + e.getWeight());*/ cost += e.getWeight(); } else { /* System.out.println("DIFFERENT: " + c1.getIndex() + " and " + c2.getIndex() + ": " + (-e.getWeight())); */ cost -= e.getWeight(); } } return cost; } public boolean inSameCluster (Collection clustering, Object o1, Object o2) { Iterator i1 = clustering.iterator(); while (i1.hasNext()) { Collection c = (Collection)i1.next(); if (c.contains(o1)) return (c.contains(o2)) ? true : false; if (c.contains(o2)) return (c.contains(o1)) ? true : false; } return false; } public class PseudoEdge { double weight; PseudoVertex v1; PseudoVertex v2; public PseudoEdge (PseudoVertex v1, PseudoVertex v2, double weight) { this.v1 = v1; this.v2 = v2; this.weight = weight; } public double getWeight () { return weight; } public PseudoVertex getV1 () { return v1; } public PseudoVertex getV2 () { return v2; } } public List createPseudoEdges (InstanceList instances, Map map) { List al = (List)new ArrayList(); for (Iterator i1 = instances.iterator(); i1.hasNext();) { Instance inst = (Instance)i1.next(); Object o1 = ((NodePair)inst.getSource()).getObject1(); Object o2 = ((NodePair)inst.getSource()).getObject2(); PseudoVertex po1 = (PseudoVertex)map.get(o1); PseudoVertex po2 = (PseudoVertex)map.get(o2); // System.out.println("Creating edge out of " + po1 + " and " + // po2); if (useNBestInference) al.add (new PseudoEdge(po1, po2, computeScore_NBest(meClassifier, inst))); else al.add (new PseudoEdge(po1, po2, computeScore(meClassifier, inst))); } return al; } // this is similar to pseudo edge // the graph is implicit and this has structures to optimize // the agglomerative clustering AND maintain the objective // function score as we go public class PseudoVertex { Set cluster; // let this be a set for faster duplicate detection Object obj; HashMap map; double treeVal; // the current tree value the cluster to which this vertex belongs public PseudoVertex (InstanceList instances, Object mention) { cluster = new LinkedHashSet(); // list of other vertices in this.obj = mention; this.map = new HashMap(); initializeMap (instances, mention); cluster.add(this); } public double lookupEdgeWeight (PseudoVertex v2) { Double d = (Double)map.get(v2.getObject()); if (d == null) { return 0.0; } return (double)d.doubleValue(); } public Set getCluster () { return cluster; } public Map getMap () { return map; } public Object getObject() { return obj; } private void initializeMap (InstanceList l1, Object o1) { for (Iterator i1 = l1.iterator(); i1.hasNext();) { Instance inst = (Instance)i1.next(); NodePair p1 = (NodePair)inst.getSource(); if (p1.getObject1() == o1) map.put(p1.getObject2(), new Double(computeScore(meClassifier, inst))); else if (p1.getObject2() == o1) map.put(p1.getObject1(), new Double(computeScore(meClassifier, inst))); } } } public Collection createPseudoVertices (InstanceList instances, List mentions, HashMap map) { Collection vs = new ArrayList(); for (Iterator i1 = mentions.iterator(); i1.hasNext();) { Object o1 = i1.next(); PseudoVertex pv = new PseudoVertex (instances, o1); vs.add (pv); map.put (o1, pv); } return vs; } private double computeInitialObjFnVal (Collection edges) { double val = 0.0; for (Iterator i1 = edges.iterator(); i1.hasNext(); ) { val -= ((PseudoEdge)i1.next()).getWeight(); } return val; } public double updateScore (double curScore, double [] treeScore, PseudoVertex v1, PseudoVertex v2, Set s1, Set s2, boolean over_ride) { double origScore = curScore; double nScore = 0.0; double newScore = 0.0; for (Iterator i1 = s1.iterator(); i1.hasNext(); ) { PseudoVertex v11 = (PseudoVertex)i1.next(); for (Iterator i2 = v2.getCluster().iterator(); i2.hasNext(); ) { PseudoVertex v22 = (PseudoVertex)i2.next(); nScore += (2.0 * v11.lookupEdgeWeight(v22)); } } newScore = nScore + curScore; /* This section will update the tree model score efficiently. */ double updatedVal = 0.0; if (treeModel != null) { Collection clusterpair = (Collection)new ArrayList(); Collection c1 = (Collection)new ArrayList(); Collection c2 = (Collection)new ArrayList(); Collection cBoth = (Collection)new ArrayList(); for (Iterator ii = s1.iterator(); ii.hasNext(); ) { PseudoVertex ppv = (PseudoVertex)ii.next(); c1.add((Citation)ppv.getObject()); cBoth.add((Citation)ppv.getObject()); } for (Iterator ii = s2.iterator(); ii.hasNext(); ) { PseudoVertex ppv = (PseudoVertex)ii.next(); c2.add((Citation)ppv.getObject()); cBoth.add((Citation)ppv.getObject()); } clusterpair.add(c1); clusterpair.add(c2); //System.out.println("--------------"); //System.out.println("Pair: "); double pairVal = treeModel.computeTreeObjFn(clusterpair, false); Collection clusterWrap = (Collection)new ArrayList(); clusterWrap.add(cBoth); //System.out.println("New group: "); double newVal = treeModel.computeTreeObjFn(clusterWrap, false); //System.out.println("pairVal: " + pairVal + " newVal" + newVal); //System.out.println("--------------"); updatedVal = (treeScore[0] + (newVal - pairVal)); } //now commit to the results if the newScore is higher if ((newScore >= origScore) || over_ride) { // update tree score, as we're committing to this update treeScore[0] = updatedVal; for (Iterator i1 = s1.iterator(); i1.hasNext(); ) { PseudoVertex v11 = (PseudoVertex)i1.next(); Set s11 = v11.getCluster(); s11.addAll(s2); s11.addAll(s1); } for (Iterator i2 = s2.iterator(); i2.hasNext(); ) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?