📄 peoplegraph.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a> */package edu.umass.cs.mallet.projects.dex.graph;import edu.umass.cs.mallet.projects.dex.types.*;import salvo.jesus.graph.*;import salvo.jesus.graph.algorithm.*;import salvo.jesus.util.HeapNodeComparator;import java.util.ArrayList;import java.util.Vector;import java.util.TreeMap;import java.util.Iterator;import java.util.Stack;import java.util.Set;import java.util.List;import java.util.HashMap;import java.io.*;/** Social network of People */public class PeopleGraph extends WeightedGraphImpl { public PeopleGraph (People people) { this (people, false); } public PeopleGraph (People people, boolean useEmailLinks) { try { HashMap person2vertex = new HashMap (people.size()); System.err.println ("Adding " + people.size() + " nodes to social network."); Iterator keyIter = people.getMap().keySet().iterator(); while (keyIter.hasNext()) { Integer id = (Integer)keyIter.next(); Person p = (Person)people.getMap().get(id); VertexImpl v = (VertexImpl)person2vertex.get(p); if (v == null) { v = new VertexImpl (p); v.setLabel (String.valueOf(p.getId())); add (v); person2vertex.put (p, v); } Iterator pIter = p.outLinks.iterator(); while (pIter.hasNext()) { Person neighbor = people.getPerson ((Integer)pIter.next()); VertexImpl neighborVertex = (VertexImpl)person2vertex.get(neighbor); if (neighborVertex == null) { neighborVertex = new VertexImpl (neighbor); neighborVertex.setLabel (String.valueOf (neighbor.getId())); add (neighborVertex); person2vertex.put (neighbor, neighborVertex); } if (useEmailLinks) addEdge (v, neighborVertex, 1.0); else { if (p.keyWords.numLocations() == 0 || neighbor.keyWords.numLocations() == 0) addEdge (v, neighborVertex, 0.0); else { addEdge (v, neighborVertex, p.calculateCosineWithKeyWords (neighbor.keyWords)); } } } if (useEmailLinks) { Iterator lIter = p.emailLinks.keySet().iterator(); while (lIter.hasNext()) { String name = (String) lIter.next(); int pid = people.findPersonByName (name); Person emailNeighbor = people.getPerson (pid); if (p.equals(emailNeighbor)) { System.err.println ("Skipping self edge"); continue; } if (pid == -1) { System.err.println ("person " + name + " not in people object"); continue; } VertexImpl emailNeighborVertex = (VertexImpl)person2vertex.get(emailNeighbor); if (emailNeighborVertex == null) { emailNeighborVertex = new VertexImpl (emailNeighbor); emailNeighborVertex.setLabel (String.valueOf (emailNeighbor.getId())); add (emailNeighborVertex); person2vertex.put (emailNeighbor, emailNeighborVertex); } System.err.println ("adding edge with weight " + p.emailLinks.get(name)); addEdge (v, emailNeighborVertex, ((Integer)p.emailLinks.get (name)).intValue()); } } } System.err.println (getEdgesCount() + " edges added to social network."); // omit isolated people Vertex[] vs = (Vertex[])getVertices (0).toArray(new Vertex[] {}); for (int i=0; i < vs.length; i++) remove (vs[i]); }catch (Exception e) { System.err.println ("Error constructing graph: " + e); e.printStackTrace(); System.exit(-1); } } private String getName (VertexImpl v) { String name = ""; Person p = (Person)v.getObject(); return (p.getFirstName()==null) ? v.toString() : p.getFirstName(); } public void printZoomGraphFile (File f) { try { BufferedWriter out = new BufferedWriter (new FileWriter (f)); // print verts out.write ("nodedef> name VARCHAR(256)\n"); Iterator viter = getVerticesIterator (); while (viter.hasNext()) { VertexImpl v = (VertexImpl) viter.next(); out.write (getName (v) + "\n"); } out.write ("edgedef> n1 VARCHAR(256),n2 VARCHAR(256)\n"); Iterator eiter = getEdgeSet().iterator(); while (eiter.hasNext()) { Edge e = (Edge) eiter.next(); out.write (getName((VertexImpl)e.getVertexA()) + "," + getName((VertexImpl)e.getVertexB()) + "\n"); } out.close(); } catch (IOException e) { System.err.println (e); System.exit(-1); } } public void printAllStatistics () { printAllStatistics (true); } public void printAllStatistics (boolean cluster) { try { System.err.println ("Printing All Statistics"); System.err.println ("ORDERED BY DEGREE:"); printPeopleByDegree (); System.err.println ("DEGREE DISTRIBUTION:"); printDegreeDistribution (); printConnectedComponentsStatistics (); System.err.println ("GRAPH DEGREE: " + getDegree()); System.err.println ("NUMBER VERTICES: " + getVerticesCount()); System.err.println ("NUMBER EDGES: " + getEdgesCount()); HashMap between = getBetweennessOfEdges (); printTopNBetweennessNodes (10, between); if (cluster) clusterByBetweenness (between); printConnectedComponentsStatistics (); printConnectedComponents (); printZoomGraphFile (new File ("zoom.dat")); writeObjectFile (new File ("graph.obj")); } catch (Exception e) { System.err.println ("Exception printing graph statistics: " + e); System.exit(-1); } } private void writeObjectFile (File f) throws Exception { ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); oos.writeObject (this); } /** Cluster the graph by iteratively removing edges of highest * betweenness. See Joshua R. Tyler, "Email as Spectroscopy: * Automated Discovery of Community Structure within Organizations" * NOTE- this method removes edges and is not recoverable. */ private void clusterByBetweenness (HashMap betweenness) throws Exception { // xxx remove top edge that is valid for now. later, choose // randomly to get different clusterings xxx just remove edge for // now. later, duplicate vertex to include in both clusters. xxx // make this iterate over connected components so we don't // recalculate betweenness for components not affected by edge // removal TreeMap rankedEdges = null; Vector highestEdges = null; Edge highestEdge = null; Integer[] keys = null; while (true) { rankedEdges = sortEdgesByBetweenness (betweenness); keys = new Integer [rankedEdges.size()]; keys = (Integer[]) rankedEdges.keySet().toArray (keys); boolean found = false; for (int i=keys.length-1; i >= 0 && !found; i--) { highestEdges = (Vector) rankedEdges.get (keys[i]); for (int j=0; j < highestEdges.size() && !found; j++) { highestEdge = (Edge) highestEdges.get (j); if (canRemoveEdge (highestEdge, betweenness)) { System.err.println ("Removing Edge: " + highestEdge); removeEdge (highestEdge); printConnectedComponentsStatistics(); betweenness = getBetweennessOfEdges (); found = true; } } } // no valid edges to remove if (!found) { System.err.println ("No more valid edges to remove. Clustering complete."); break; } } } /** We do not remove an edge if (1) it divides a component of size < * 6, or (2) the highest betweenness of any edge in the component <= * N-1, where N is the size of the component*/ private boolean canRemoveEdge (Edge e, HashMap betweenness) { Set component = getConnectedSet (e.getVertexA()); if (component.size() < 6 || highestBetweenness (component, betweenness) <= component.size()-1) return false; return true; } /** Return the highest betweenness of all edges in a set of vertices. */ private int highestBetweenness (Set vertices, HashMap betweenness) { TreeMap sortedEdges = new TreeMap ();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -