📄 person.java
字号:
public double calculateCosineWithKeyWords (AugmentableFeatureVector tocompare) {
double tocompareTwoNorm = tocompare.twoNorm();
double keyWordsTwoNorm = keyWords.twoNorm();
if (tocompareTwoNorm == 0 || keyWordsTwoNorm == 0)
return 0.0;
return tocompare.dotProduct (keyWords) / (keyWordsTwoNorm*tocompareTwoNorm);
}
private AugmentableFeatureVector mergeFeatureVectors (AugmentableFeatureVector v1,
AugmentableFeatureVector v2) {
if (v2 != null) {
if (v1 == null)
v1 = v2;
else // xxx does this handle correctly the case where v1 does not have feature x, but v2 does??
v1.plusEquals (v2, 1.0);
}
return v1;
}
public void mergeTwoPeople(Person person) {
names = mergeVectors(names, person.names);
logins = mergeVectors(logins, person.logins);
domains = mergeVectors(domains, person.domains);
this.contextModel = mergeFeatureVectors (this.contextModel, person.contextModel);
this.keyWords = mergeFeatureVectors (this.keyWords, person.keyWords);
this.processedForWebPages = (person.processedForWebPages ? true : this.processedForWebPages);
this.processedForContactInformation = (person.processedForContactInformation ? true :
this.processedForContactInformation);
emailLinks.putAll (person.emailLinks);
contextPages.addAll (person.contextPages);
numberOfOccurrences += person.getNumberOfOccurrences();
outLinks.addAll (person.outLinks);
inLinks.addAll (person.inLinks);
}
private Vector mergeLinks (Vector links, Vector newLinks) {
Vector ret = new Vector();
for (int i=0; i < links.size(); i++) {
Integer l1 = (Integer) links.get (i);
if (!contains (ret, l1)) {
ret.add (l1);
}
for (int j=0; j < newLinks.size(); j++) {
Integer l2 = (Integer) newLinks.get (j);
if (!contains (ret, l2)) {
ret.add (l2);
}
}
}
return ret;
}
public void setTopKeyWordWeights(ArrayList sortedWords) {
this.topKeyWords = sortedWords;
}
public void writeToFilePersonalInfo(BufferedWriter out) {
try {
out.write("Person:");
out.newLine();
out.write("\tNames:");
for (int i = 0; i < names.size(); i++) {
if (i > 0)
out.write(",");
out.write(" " + names.elementAt(i));
}
out.newLine();
out.write("\tLogins:");
for (int i = 0; i < logins.size(); i++) {
if (i > 0)
out.write(",");
out.write(" " + logins.elementAt(i));
}
out.newLine();
out.write("\tDomains:");
for (int i = 0; i < domains.size(); i++) {
if (i > 0)
out.write(",");
out.write(" " + domains.elementAt(i));
}
out.newLine();
out.write("\tInLinks: " + inLinks);
out.newLine();
out.write("\tOutLinks:" + outLinks);
out.newLine();
} catch(IOException e) {
System.out.println("Cannot write to file");
}
}
public void writeToFile(BufferedWriter out) {
try {
writeToFilePersonalInfo(out);
out.write("\tWords:");
if (keyWords != null) out.write (keyWords.toString(true));
out.newLine();
} catch(IOException e) {
System.out.println("Cannot write to file");
}
}
/** Write this person's contact information in an html table */
public void writeHTML (BufferedWriter out, People people) {
try {
if (this.contactRecord == null || this.names.size() == 0)
return;
String[] names = getNames();
if (names == null) {
return;
}
int currIndex = this.id;
String anchor = "<a name=\"" + currIndex + "\"></a> ";
out.write ("<tr><td>" + anchor + currIndex + "</td>");
out.write ("<td>" + names[0] + "</td>");
out.write ("<td>" + names[1] + "</td>");
out.write ("<td>" + names[2] + "</td>");
String email = getEmail ();
out.write ("<td>" + (email==null ? " " : email) + "</td>");
String url = getURL ();
out.write ("<td><a href=\"" + (url==null ? " " : url) + "\"> " +
(url==null ? " " : url) + "</a></td>");
String[] fields = getFields (new String[] {"Suffix", "Title", "JobTitle", "CompanyName", "Department", "AddressLine", "City1", "State", "Country", "PostalCode", "HomePhoneNumber", "DirectPhoneNumber", "MobilePhoneNumber"});
for (int i=0; i < fields.length; i++) {
out.write ("<td>" + (fields[i]==null ? " " : fields[i]) + "</td>");
}
// write outlinks
out.write ("<td>");
Iterator oiter = outLinks.iterator();
while (oiter.hasNext()) {
Person p = people.getPerson ((Integer)oiter.next());
out.write ("<a href=\"" + "#" + p.getId() + "\">" +
p.getFirstName() + "</a><br>");
}
out.write ("</td>");
// write inlinks
out.write ("<td>");
Iterator iiter = inLinks.iterator();
while (iiter.hasNext()) {
Person p = people.getPerson ((Integer)iiter.next());
out.write ("<a href=\"" + "#" + p.getId() + "\">" +
p.getFirstName() + "</a><br>");
}
HashSet pageSet = new HashSet (contextPages);
iiter = pageSet.iterator();
int ci = 1;
while (iiter.hasNext()) {
out.write ("<a href=\"" + ((WebPage)iiter.next()).url + "\">"+(ci++)+"</a> ");
}
out.write ("</td>");
out.write ("</tr>\n");
} catch (IOException e) {
System.err.println ("Cannot write to file");
}
}
/** Write this person's contact information in VCard 3.0 format*/
public void writeCSV (BufferedWriter out, People people) {
}
/** Write this person's contact information in VCard 3.0 format*/
public void writeVCF (BufferedWriter out, People people) {
try {
if (this.contactRecord == null)
return;
if (this.names.size() == 0)
return;
String[] names = getNames ();
if (names == null) {
return;
}
out.write ("BEGIN:VCARD\nVERSION:3.0\n");
out.write ("N: " + names[2] + ";" + names[0] + ";" + names[1] + ";" + contactRecord.getFirstValue ("Title") + "\n");
String email = contactRecord.getFirstValue ("Email");
if (logins.size() > 0 && domains.size() > 0) {
email = ((CountedString)logins.get (0)).str + "@" +
((CountedString)domains.get(0)).str;
}
out.write ("EMAIL;Internet: " + email + "\n");
String url = contactRecord.getFirstValue ("WebPageURL");
if (pages.size() > 0) {
url = ((WebPage)pages.get(0)).url;
}
out.write ("URL: " + url + "\n");
out.write ("TITLE: " + contactRecord.getFirstValue ("JobTitle") + "\n");
out.write ("ORG: " + contactRecord.getFirstValue ("CompanyName") + ";" +
contactRecord.getFirstValue ("Department") + "\n");
out.write ("ADR;TYPE=POSTAL:;;" + contactRecord.getFirstValue ("AddressLine") + ";" +
contactRecord.getFirstValue ("City1") + ";" +
contactRecord.getFirstValue ("State") + ";" +
contactRecord.getFirstValue ("PostalCode") + "\n");
out.write ("TEL;Home:" + contactRecord.getFirstValue("HomePhoneNumber") + "\n");
out.write ("TEL;Fax:" + contactRecord.getFirstValue("FaxNumber") + "\n");
out.write ("TEL;Work:" + contactRecord.getFirstValue("DirectPhoneNumber") + "\n");
out.write ("TEL;Voice:" + contactRecord.getFirstValue("CompanyPhoneNumber") + "\n");
out.write ("TEL;Cell:" + contactRecord.getFirstValue("MobilePhoneNumber") + "\n");
Iterator oiter = outLinks.iterator();
while (oiter.hasNext()) {
Person p = people.getPerson ((Integer)oiter.next());
out.write ("item1.X-ABRELATEDNAMES;type=pref:" + p.getFirstName() +
"\nitem1.X-ABLabel:_$!<Friend>!$_\n");
}
Iterator iiter = inLinks.iterator();
while (iiter.hasNext()) {
Person p = (Person) people.getPerson ((Integer)iiter.next());
out.write ("item2.X-ABRELATEDNAMES:" + p.getFirstName() +
"\nitem2.X-ABLabel:PointedFrom\n");
}
String topWords = "";
for (int fi=0; topKeyWords != null && fi < 10 && fi < topKeyWords.size(); fi++) {
WeightedString s = (WeightedString) topKeyWords.get (fi);
topWords += s.str + " . ";
}
out.write ("NOTE: " + topWords + "\n");
out.write ("END:VCARD\n");
} catch (IOException e) {
System.err.println ("Cannot write to file");
}
}
public String[] getFields (String[] fields) {
String[] ret = new String[fields.length];
for (int i=0; i < fields.length; i++) {
ret[i] = contactRecord.getFirstValue (fields[i]);
if (ret[i] != null)
ret[i].replaceAll (",", " ");
}
return ret;
}
public String getURL () {
String url = contactRecord.getFirstValue ("WebPageURL");
if (pages.size() > 0) {
url = ((WebPage)pages.get(0)).url;
}
return url;
}
public String getEmail () {
String email = contactRecord.getFirstValue ("Email");
if (logins.size() > 0 && domains.size() > 0)
email = ((CountedString)logins.get (0)).str + "@" +
((CountedString)domains.get(0)).str;
return email;
}
public String[] getNames () {
String[] ret = new String[3];
String[] names = this.getFirstName().split(" ");
names = firstLetterCapital (names);
if (names.length == 3)
return names;
else if (names.length == 2) {
ret[0] = names[0];
ret[1] = "";
ret[2] = names[1];
return ret;
}
else return null;
}
public static String[] firstLetterCapital(String[] names) {
for(int i = 0; i < names.length; i++) {
String start = names[i].substring(0,1);
String rest = names[i].substring(1);
names[i] = start.toUpperCase() + rest;
}
return names;
}
public void printPersonalInfo() {
System.out.println("Person:");
System.out.print("\tNames:");
for(int i = 0; i < names.size(); i++) {
if(i > 0)
System.out.print(",");
System.out.print(" " + names.elementAt(i));
}
System.out.println();
System.out.print("\tLogins:");
for(int i = 0; i < logins.size(); i++) {
if(i > 0)
System.out.print(",");
System.out.print(" " + logins.elementAt(i));
}
System.out.println();
System.out.print("\tDomains:");
for(int i = 0; i < domains.size(); i++) {
if(i > 0)
System.out.print(",");
System.out.print(" " + domains.elementAt(i));
}
System.out.println();
System.out.println("\tNumber of occurrences: " + numberOfOccurrences);
System.out.println("\tInLinks: " + inLinks);
System.out.print("\tOutLinks: " + outLinks);
System.out.println();
}
public void print() {
printPersonalInfo();
System.out.print("\tWords:" + keyWords);
System.out.println();
}
public String toString () {
String name = getFirstName();
if (name == null)
name = (logins.size() > 0) ? ((CountedString)logins.get(0)).str : "null";
return name + "(" + id + ")";
}
// Comparable interface
public int compareTo (Object o) {
return this.id - ((Person)o).getId();
}
private void writeObject (ObjectOutputStream out) throws IOException {
int i,size;
out.writeInt (CURRENT_SERIAL_VERSION);
out.writeBoolean (processedForWebPages);
out.writeBoolean (processedForContactInformation);
size = names.size();
out.writeInt (size);
for (i=0; i < size; i++)
out.writeObject (names.get (i));
size = logins.size();
out.writeInt (size);
for (i=0; i < size; i++)
out.writeObject (logins.get(i));
size = domains.size();
out.writeInt (size);
for (i=0; i < size; i++)
out.writeObject (domains.get(i));
out.writeObject (keyWords);
size = topKeyWords.size();
out.writeInt (size);
for (i=0; i<size; i++)
out.writeObject (topKeyWords.get(i));
out.writeObject (contextModel);
out.writeObject (alphabet);
out.writeObject (emailLinks);
size = pages.size();
out.writeInt (size);
for (i=0; i < size; i++)
out.writeObject (pages.get(i));
size = contextPages.size();
out.writeInt (size);
for (i=0; i < size; i++)
out.writeObject (contextPages.get(i));
out.writeObject (contactRecord);
out.writeObject (inLinks);
out.writeObject (outLinks);
out.writeInt (numberOfOccurrences);
out.writeInt (id);
}
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
int i,size;
int version = in.readInt ();
processedForWebPages = in.readBoolean ();
processedForContactInformation = in.readBoolean ();
size = in.readInt();
names = new Vector (size);
for (i=0; i < size; i++)
names.add ((CountedString)in.readObject());
size = in.readInt();
logins = new Vector (size);
for (i=0; i < size; i++)
logins.add ((CountedString)in.readObject());
size = in.readInt();
domains = new Vector (size);
for (i=0; i < size; i++)
domains.add ((CountedString)in.readObject());
keyWords = (AugmentableFeatureVector)in.readObject();
size = in.readInt();
topKeyWords = new ArrayList (size);
for (i=0; i < size; i++)
topKeyWords.add ((WeightedString)in.readObject());
contextModel = (AugmentableFeatureVector)in.readObject();
alphabet = (Alphabet)in.readObject();
emailLinks = (HashMap) in.readObject();
size = in.readInt();
pages = new Vector (size);
for (i=0; i < size; i++)
pages.add ((WebPage)in.readObject ());
size = in.readInt();
contextPages = new Vector (size);
for (i=0; i < size; i++)
contextPages.add ((WebPage)in.readObject ());
contactRecord = (ContactRecord)in.readObject();
inLinks = (HashSet) in.readObject();
outLinks = (HashSet) in.readObject();
numberOfOccurrences = in.readInt();
id = in.readInt ();
}
/** have we searched for home pages for this person?*/
public boolean processedForWebPages;
/** have we tried to extract contact information for this person?*/
public boolean processedForContactInformation;
/** CountedStrings of names for this person*/
public Vector names;
/** CountedStrings of login ids*/
public Vector logins;
/** CountedStrings of domains (e.g. umass.edu)*/
public Vector domains;
/** Vector of key words from web pages */
public AugmentableFeatureVector keyWords;
public ArrayList topKeyWords;
/** Vector of words on pages/emails this person is mentioned (not homepages)*/;
public AugmentableFeatureVector contextModel;
/** Vocabulary for keyWords and contextModel*/
public Alphabet alphabet;
/** Maps Strings (a person's name) to number of times this person
* co-occurs with the other person in the header of emails */
public HashMap emailLinks;
/** WebPages that are part of this person's web presence */
public Vector pages;
/** WebPages this person is found on */
public Vector contextPages;
/** Contact information for this person*/
public ContactRecord contactRecord;
/** Vector of Integers (Person ids) indicating which people this person points to */
public HashSet inLinks;
/** Vector of Integers (Person ids) indicating which people point to this person */
public HashSet outLinks;
/** Number of times this person occurs in email/web search*/
public int numberOfOccurrences;
/** unique id */
private int id;
private static final long serialVersionUID = 1;
private static final int CURRENT_SERIAL_VERSION = 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -