📄 people.java
字号:
/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/** Create list of persons
* @author Ron Bekkerman <A HREF="mailto:ronb@cs.umass.edu">ronb@cs.umass.edu</A>
*/
package edu.umass.cs.mallet.projects.dex.types;
import edu.umass.cs.mallet.base.types.Alphabet;
import edu.umass.cs.mallet.base.types.AugmentableFeatureVector;
import java.util.*;
import java.io.*;
public class People implements Serializable {
public People() {
id2person = new HashSetMap();
alphabet = new Alphabet ();
this.peopleAdded = 0;
}
public People(Vector _people) {
System.err.println ("Constructing " + _people.size() + " people");
id2person = new HashSetMap (_people.size());
alphabet = new Alphabet ();
this.peopleAdded = 0;
for (int i=0; i < _people.size(); i++) {
Person p = (Person)_people.get (i);
addPerson (p);
}
}
public int size() {
return id2person.values().size();
}
public HashSetMap getMap () {return this.id2person;}
public Iterator iterator () {return this.id2person.values().iterator();}
public Person getPerson(Integer id) {
return (Person)id2person.get (id);
}
public Person getPerson(int id) {
return getPerson (new Integer (id));
}
public void addPerson(Person person) {
person = consolidateAlphabetFor (person);
if (!person.alphabet.equals (this.alphabet))
throw new IllegalArgumentException ("Alphabets don't match!");
person.setId (peopleAdded++);
id2person.put (new Integer (person.getId()), person);
Iterator iiter = person.inLinks.iterator();
while (iiter.hasNext()) {
Person from = (Person)id2person.get ((Integer)iiter.next());
from.addOutLink (person.getId());
}
Person[] people = (Person[])id2person.values().toArray(new Person[]{});
for (int i=0; i < people.length; i++) {
Person p = people[i];
if (p != null && (p.loginsIntersect(person) || p.namesIntersect(person))
&& !p.equals(person)) {
System.err.println ("Merging " + p + " AND " + person);
p.mergeTwoPeople (person);
int id = person.getId();
changeLinks (p, person.getId(), p.getId());
removePerson (person);
id2person.put (new Integer (id), id2person.get(new Integer(p.getId())));
//System.err.println ("After merge: " + id2person);
people = (Person[])id2person.values().toArray(new Person[]{});
i = 0;
person = p;
}
}
//System.err.println ("Resulting people: " + id2person);
}
public void addAll (People otherPeople) {
System.err.println ("Merging " + otherPeople.size() + " people into " +
size() + " existing people...");
Iterator iter = otherPeople.iterator();
while (iter.hasNext()) {
addPerson ((Person)iter.next());
}
/* Iterator iter = otherPeople.iterator();
int i=0;
int startid = id2person.values().size();
while (iter.hasNext()) { // adjust links
Person p = (Person)iter.next();
consolidateAlphabetFor (p);
int oldId = p.getId();
int newId = id2person.values().size();
otherPeople.changeLinks (p, oldId, newId);
id2person.put (new Integer (newId), p);
}
iter = otherPeople.iterator();
*/
//xxx this only works if merging people do not overlap current people
// while (iter.hasNext()) { // resolve coreference
// addPerson ((Person)iter.next());
//}
}
private void changeLinks (Person p, int oldId, int newId) {
changeInLinks (p, oldId, newId);
changeOutLinks (p, oldId, newId);
}
private void changeInLinks (Person p, int oldId, int newId) {
Iterator iter = p.inLinks.iterator();
while (iter.hasNext()) {
int index = ((Integer)iter.next()).intValue();
Person n = getPerson (index);
if (n == null) {
System.err.println ("WARNING: Link to " + p + " from index " + index +
" is dead since the person at this index no longer exists\n");
continue;
}
n.outLinks.remove (new Integer (oldId));
n.outLinks.add (new Integer (newId));
}
}
private void changeOutLinks (Person p, int oldId, int newId) {
Iterator iter = p.outLinks.iterator();
while (iter.hasNext()) {
int id = ((Integer)iter.next()).intValue();
Person n = getPerson (id);
n.inLinks.remove (new Integer (oldId));
n.inLinks.add (new Integer (newId));
}
}
private Person consolidateAlphabetFor (Person p) {
if (p.keyWords.singleSize() == 0 && p.contextModel.singleSize() == 0)
p.setAlphabet (this.alphabet);
else { // add entries from person to global alphabet
Alphabet oldAlph = p.alphabet;
AugmentableFeatureVector newKeyWords = new AugmentableFeatureVector (this.alphabet);
AugmentableFeatureVector newContextModel = new AugmentableFeatureVector (this.alphabet);
for (int i=0; i < p.keyWords.numLocations (); i++) {
int oldIndex = p.keyWords.indexAtLocation (i);
double value = p.keyWords.valueAtLocation (i);
String entry = (String)oldAlph.lookupObject (oldIndex);
newKeyWords.add (entry, value);
}
for (int i=0; i < p.contextModel.numLocations (); i++) {
int oldIndex = p.contextModel.indexAtLocation (i);
double value = p.contextModel.valueAtLocation (i);
String entry = (String)oldAlph.lookupObject (oldIndex);
newContextModel.add (entry, value);
}
p.keyWords = newKeyWords;
p.contextModel = newContextModel;
p.setAlphabet (alphabet);
}
return p;
}
public void expand() {
Person[] people = (Person[])id2person.values().toArray(new Person[]{});
for (int i=0; i < people.length; i++) {
Person person = people[i];
if (person.processedForWebPages == false ||
person.processedForContactInformation == true)
continue;
person.processedForContactInformation = true;
ArrayList namesFromWeb = person.getContactRecord().getNames();
for (int j=0; j < namesFromWeb.size(); j++) {
CountedString name = (CountedString)namesFromWeb.get(j);
Vector names = new Vector(1);
names.add (new CountedString (name.str));
Person newp = new Person (names, null, person.domains, null, null);
newp.contextPages.add (person.pages.get(name.cnt));
newp.addInLink (person.getId());
addPerson (newp);
}
}
}
public boolean removePerson(Person p) {
Iterator iiter = p.inLinks.iterator();
while (iiter.hasNext()) {
int index = ((Integer)iiter.next()).intValue();
Person n = getPerson (index);
if (n == null) {
System.err.println ("WARNING: can't remove person " + index + " because it no longer exists");
}
/*
Iterator iter = n.outLinks.iterator();
while (iter.hasNext()) {
if (((Integer)iter.next()).intValue() == p.getId())
throw new IllegalArgumentException ("Removing person " + p +
"\nbut person " + n + " \nstill references it.");
}
*/
}
return (id2person.remove (new Integer (p.getId())) == null);
}
public boolean removePerson(String login) {
Integer[] keys = (Integer[]) id2person.keySet().toArray(new Integer[]{});
boolean ret = false;
for (int i=0; i < keys.length; i++) {
Integer key = keys[i];
Person person = (Person)id2person.get(key);
if (person != null && person.findLogin(login)) {
removePerson (person);
ret = true;
}
}
return ret;
}
public void stopKeyWords(HashSet stop) {
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
Person p = (Person)iter.next();
p.stopKeyWords(stop);
}
}
public void buildEmailLinks() {
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
Person p1 = (Person)iter.next();;
Iterator iter2 = id2person.values().iterator();
while (iter2.hasNext()) {
Person p2 = (Person)iter2.next();
if(p1 != p2)
p1.addEmailLink(p2);
}
}
}
public int findPersonByName(String name) {
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
Person person = (Person)iter.next();
if(person.findName(name))
return person.getId();
}
return -1;
}
public void writeToFile(File file) {
try {
BufferedWriter out
= new BufferedWriter(new FileWriter(file));
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
((Person)iter.next()).writeToFile(out);
}
out.close();
} catch(IOException e) {
System.out.print("Cannot open file ");
System.out.println(file.getName());
}
}
public void print() {
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
Person p = (Person)iter.next();
System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
System.out.println("Person Number " + p.getId());
p.print();
}
}
public void write (File f) {
try {
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));
oos.writeObject(this);
oos.close();
System.err.println ("Wrote " + size() + " People Successfully");
}
catch (IOException e) {
System.err.println("Exception writing file " + f + ": " + e);
}
}
public void writeHTML (File f) {
try {
BufferedWriter out
= new BufferedWriter(new FileWriter(f));
out.write("<html>\n<body><h2> Extracted Contact Information</h2>\n<br>\n<table border=1 cellspacing=3>\n");
out.write ("<tr><td> ID </td>\n");
out.write ("<td> First </td>\n");
out.write ("<td> Middle </td>\n");
out.write ("<td> Last </td>\n");
out.write ("<td> Email </td>\n");
out.write ("<td> URL </td>\n");
out.write ("<td> Suffix </td>\n");
out.write ("<td> Title </td>\n");
out.write ("<td> JobTitle </td>\n");
out.write ("<td> CompanyName </td>\n");
out.write ("<td> Department </td>\n");
out.write ("<td> AddressLine </td>\n");
out.write ("<td> City </td>\n");
out.write ("<td> State </td>\n");
out.write ("<td> Country </td>\n");
out.write ("<td> PostalCode </td>\n");
out.write ("<td> HomePhoneNumber </td>\n");
out.write ("<td> DirectPhoneNumber </td>\n");
out.write ("<td> MobilePhoneNumber </td>\n");
out.write ("<td> OutLinks </td>\n");
out.write ("<td> InLinks </td>\n");
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
((Person)iter.next()).writeHTML(out, this);
}
out.write ("</table>\n</body>\n<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br></html>");
out.flush();
out.close();
} catch(IOException e) {
System.out.print("Cannot open file " + f);
System.exit(-1);
}
}
public void writeTXT (File f) {
writeToFile (f);
}
public void writeCSV (File f) {
throw new UnsupportedOperationException ("csv output not yet implemented");
/*try {
BufferedWriter out
= new BufferedWriter(new FileWriter(f));
for(int i = 0; i < people.size(); i++) {
((Person)people.elementAt(i)).writeCSV(out, this);
}
out.close();
} catch(IOException e) {
System.out.print("Cannot open file " + f);
} */
}
public void writeVCF (File f) {
try {
BufferedWriter out
= new BufferedWriter(new FileWriter(f));
Iterator iter = id2person.values().iterator();
while (iter.hasNext()) {
((Person)iter.next()).writeVCF(out, this);
}
out.flush();
out.close();
} catch(IOException e) {
System.out.print("Cannot open file " + f);
}
}
public static People read (File f) {
People ret = null;
try {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(f));
ret = (People)ois.readObject();
ois.close();
System.err.println ("Read " + ret.size() + " People successfully!");
}
catch (IOException e) {
System.err.println("Exception reading file: " + e);
System.exit(-1);
}
catch (ClassNotFoundException cnfe) {
System.err.println("Cound not find class reading in object: " + cnfe);
System.exit(-2);
}
return ret;
}
//Fields
//public Vector people;
private HashSetMap id2person;
private int peopleAdded;
protected Alphabet alphabet;
// Serialization
private static final long serialVersionUID = 1;
private static final int CURRENT_SERIAL_VERSION = 1;
private void writeObject (ObjectOutputStream out) throws IOException {
out.writeInt (CURRENT_SERIAL_VERSION);
if (CURRENT_SERIAL_VERSION < 1) {
out.writeInt (size());
for (int i=0; i < size(); i++)
out.writeObject (getPerson(i));
out.writeObject (alphabet);
}
else {
out.writeObject (id2person);
out.writeObject (alphabet);
out.writeInt (peopleAdded);
}
}
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
int version = in.readInt ();
if (version < 1) {
System.err.println ("Version 0 serialization no longer supported\n");
}
else {
this.id2person = (HashSetMap)in.readObject();
this.alphabet = (Alphabet)in.readObject();
this.peopleAdded = in.readInt();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -