⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 person.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	public double calculateCosineWithKeyWords (AugmentableFeatureVector tocompare) {
		double tocompareTwoNorm = tocompare.twoNorm();
		double keyWordsTwoNorm = keyWords.twoNorm();
		if (tocompareTwoNorm == 0 || keyWordsTwoNorm == 0)
			return 0.0;		
		return tocompare.dotProduct (keyWords) / (keyWordsTwoNorm*tocompareTwoNorm);
	}

	private AugmentableFeatureVector mergeFeatureVectors (AugmentableFeatureVector v1,
																												AugmentableFeatureVector v2) {
		if (v2 != null) {
			if (v1 == null) 
				v1 = v2;
			else  // xxx does this handle correctly the case where v1 does not have feature x, but v2 does??
				v1.plusEquals (v2, 1.0);
		}
		return v1;
	}
	
	public void mergeTwoPeople(Person person) {
		names = mergeVectors(names, person.names);
		logins = mergeVectors(logins, person.logins);
		domains = mergeVectors(domains, person.domains);
		this.contextModel = mergeFeatureVectors (this.contextModel, person.contextModel);
		this.keyWords = mergeFeatureVectors (this.keyWords, person.keyWords);
		this.processedForWebPages = (person.processedForWebPages ? true : this.processedForWebPages);
		this.processedForContactInformation = (person.processedForContactInformation ? true :
																					 this.processedForContactInformation);
		emailLinks.putAll (person.emailLinks);
		contextPages.addAll (person.contextPages);
		numberOfOccurrences += person.getNumberOfOccurrences();
		outLinks.addAll (person.outLinks);
		inLinks.addAll (person.inLinks);
	}

	private Vector mergeLinks (Vector links, Vector newLinks) {
		Vector ret = new Vector();
		for (int i=0; i < links.size(); i++) {
			Integer l1 = (Integer) links.get (i);
			if (!contains (ret, l1)) {
				ret.add (l1);
			}
			for (int j=0; j < newLinks.size(); j++) {
				Integer l2 = (Integer) newLinks.get (j);
				if (!contains (ret, l2)) {
					ret.add (l2);
				}
			}
		}
		return ret;
	}
	
	public void setTopKeyWordWeights(ArrayList sortedWords) {
		this.topKeyWords = sortedWords;
	}
	
	public void writeToFilePersonalInfo(BufferedWriter out) {
		try {
	    out.write("Person:");
	    out.newLine();
	    out.write("\tNames:");
	    for (int i = 0; i < names.size(); i++) {
				if (i > 0)
					out.write(",");
				out.write(" " + names.elementAt(i));
	    }
	    out.newLine();
	    out.write("\tLogins:");
	    for (int i = 0; i < logins.size(); i++) {
				if (i > 0)
		    out.write(",");
				out.write(" " + logins.elementAt(i));
	    }
	    out.newLine();
	    out.write("\tDomains:");
	    for (int i = 0; i < domains.size(); i++) {
				if (i > 0)
					out.write(",");
				out.write(" " + domains.elementAt(i));
	    }
	    out.newLine();
	    out.write("\tInLinks: " + inLinks);
	    out.newLine();
	    out.write("\tOutLinks:" + outLinks);
	    out.newLine();
		} catch(IOException e) {
	    System.out.println("Cannot write to file");
		}
	}
	
	public void writeToFile(BufferedWriter out) {
		try {
	    writeToFilePersonalInfo(out);
	    out.write("\tWords:");
			if (keyWords != null) out.write (keyWords.toString(true));
	    out.newLine();
		} catch(IOException e) {
	    System.out.println("Cannot write to file");
		}
	}

	/** Write this person's contact information in an html table */
	public void writeHTML (BufferedWriter out, People people) {
		try {
			if (this.contactRecord == null || this.names.size() == 0)
				return;
			String[] names = getNames();
			if (names == null) {
				return;
			}
			int currIndex = this.id;
			String anchor = "<a name=\"" + currIndex + "\"></a> ";
			out.write ("<tr><td>" + anchor + currIndex + "</td>");
			out.write ("<td>" + names[0] + "</td>");
			out.write ("<td>" + names[1] + "</td>");
			out.write ("<td>" + names[2] + "</td>");
			String email = getEmail ();
			out.write ("<td>" + (email==null ? "&nbsp;" : email) + "</td>");
			String url = getURL ();
			out.write ("<td><a href=\"" + (url==null ? "&nbsp;" : url) + "\"> " +
								 (url==null ? "&nbsp;" : url) + "</a></td>");
			String[] fields = getFields (new String[] {"Suffix", "Title", "JobTitle", "CompanyName", "Department", "AddressLine", "City1", "State", "Country", "PostalCode", "HomePhoneNumber", "DirectPhoneNumber", "MobilePhoneNumber"});
			for (int i=0; i < fields.length; i++) {
				out.write ("<td>" + (fields[i]==null ? "&nbsp;" : fields[i]) + "</td>");				
			}
			// write outlinks
			out.write ("<td>");
			Iterator oiter = outLinks.iterator();
			while (oiter.hasNext()) {
				Person p = people.getPerson ((Integer)oiter.next());
				out.write ("<a href=\"" + "#" + p.getId() + "\">" +
									 p.getFirstName() + "</a><br>");
			}
			out.write ("</td>");
			// write inlinks
			out.write ("<td>");
			Iterator iiter = inLinks.iterator();
			while (iiter.hasNext()) {
				Person p = people.getPerson ((Integer)iiter.next());
				out.write ("<a href=\"" + "#" + p.getId() + "\">" +
									 p.getFirstName() + "</a><br>");
			}
			HashSet pageSet = new HashSet (contextPages);
			iiter = pageSet.iterator();
			int ci = 1;
			while (iiter.hasNext()) {
				out.write ("<a href=\"" + ((WebPage)iiter.next()).url + "\">"+(ci++)+"</a> ");
			}
			out.write ("</td>");
			out.write ("</tr>\n");
		} catch (IOException e) {
			System.err.println ("Cannot write to file");			
		}
	}

	/** Write this person's contact information in VCard 3.0 format*/
	public void writeCSV (BufferedWriter out, People people) {
	}
	
	/** Write this person's contact information in VCard 3.0 format*/
	public void writeVCF (BufferedWriter out, People people) {
		try {
			if (this.contactRecord == null)
				return;
			if (this.names.size() == 0)
				return;
			String[] names = getNames ();
			if (names == null) {
				return;
			}
			out.write ("BEGIN:VCARD\nVERSION:3.0\n");
			out.write ("N: " + names[2] + ";" + names[0] + ";" + names[1] + ";" + contactRecord.getFirstValue ("Title") + "\n");
	    String email = contactRecord.getFirstValue ("Email");
	    if (logins.size() > 0 && domains.size() > 0) {
				email = ((CountedString)logins.get (0)).str + "@" + 
								((CountedString)domains.get(0)).str;
	    }
			out.write ("EMAIL;Internet: " + email + "\n");
			String url = contactRecord.getFirstValue ("WebPageURL");
			if (pages.size() > 0) {
				url = ((WebPage)pages.get(0)).url;
	    }			
			out.write ("URL: " + url + "\n");
			out.write ("TITLE: " + contactRecord.getFirstValue ("JobTitle") + "\n");
			out.write ("ORG: " + contactRecord.getFirstValue ("CompanyName") + ";" +
								 contactRecord.getFirstValue ("Department") + "\n");
			out.write ("ADR;TYPE=POSTAL:;;" + contactRecord.getFirstValue ("AddressLine") + ";" +
								 contactRecord.getFirstValue ("City1") + ";" +
								 contactRecord.getFirstValue ("State") + ";" +
								 contactRecord.getFirstValue ("PostalCode") + "\n");
			out.write ("TEL;Home:" + contactRecord.getFirstValue("HomePhoneNumber") + "\n");
			out.write ("TEL;Fax:" + contactRecord.getFirstValue("FaxNumber") + "\n");
			out.write ("TEL;Work:" + contactRecord.getFirstValue("DirectPhoneNumber") + "\n");
			out.write ("TEL;Voice:" + contactRecord.getFirstValue("CompanyPhoneNumber") + "\n");
			out.write ("TEL;Cell:" + contactRecord.getFirstValue("MobilePhoneNumber") + "\n");
			Iterator oiter = outLinks.iterator();
			while (oiter.hasNext()) {
				Person p = people.getPerson ((Integer)oiter.next());
				out.write ("item1.X-ABRELATEDNAMES;type=pref:" + p.getFirstName() +
									 "\nitem1.X-ABLabel:_$!<Friend>!$_\n");												 
			}
			Iterator iiter = inLinks.iterator();
			while (iiter.hasNext()) {
				Person p = (Person) people.getPerson ((Integer)iiter.next());
				out.write ("item2.X-ABRELATEDNAMES:" + p.getFirstName() +
									 "\nitem2.X-ABLabel:PointedFrom\n");												 
			}
			String topWords = "";
			for (int fi=0; topKeyWords != null && fi < 10 && fi < topKeyWords.size(); fi++) {
				WeightedString s = (WeightedString) topKeyWords.get (fi);
				topWords += s.str + " . ";
			}
			out.write ("NOTE: " + topWords + "\n");
			out.write ("END:VCARD\n");
		} catch (IOException e) {
			System.err.println ("Cannot write to file");
		}
	}

	public String[] getFields (String[] fields) {
		String[] ret = new String[fields.length];
		for (int i=0; i < fields.length; i++) {
			ret[i] = contactRecord.getFirstValue (fields[i]);
			if (ret[i] != null)
				ret[i].replaceAll (",", " ");			
		}
		return ret;
	}
	
	public String getURL () {
		String url = contactRecord.getFirstValue ("WebPageURL");
		if (pages.size() > 0) {
			url = ((WebPage)pages.get(0)).url;
		}
		return url;
	}
	
	public String getEmail () {
		String email = contactRecord.getFirstValue ("Email");
		if (logins.size() > 0 && domains.size() > 0)
			email = ((CountedString)logins.get (0)).str + "@" + 
							((CountedString)domains.get(0)).str;
		return email;
	}
	
	public String[] getNames () {
		String[] ret = new String[3];
		String[] names = this.getFirstName().split(" ");
		names = firstLetterCapital (names);
		if (names.length == 3)
			return names;
		else if (names.length == 2) {
			ret[0] = names[0];
			ret[1] = "";
			ret[2] = names[1];
			return ret;
		}
		else return null;
	}
	public static String[] firstLetterCapital(String[] names) {
		for(int i = 0; i < names.length; i++) {
			String start = names[i].substring(0,1);
			String rest = names[i].substring(1);
			names[i] = start.toUpperCase() + rest;
		}
		return names;
	}
	
	public void printPersonalInfo() {
		System.out.println("Person:");
		System.out.print("\tNames:");
		for(int i = 0; i < names.size(); i++) {
	    if(i > 0)
				System.out.print(",");
	    System.out.print(" " + names.elementAt(i));
		}
		System.out.println();
		System.out.print("\tLogins:");
		for(int i = 0; i < logins.size(); i++) {
	    if(i > 0)
				System.out.print(",");
	    System.out.print(" " + logins.elementAt(i));
		}
		System.out.println();
		System.out.print("\tDomains:");
		for(int i = 0; i < domains.size(); i++) {
	    if(i > 0)
				System.out.print(",");
	    System.out.print(" " + domains.elementAt(i));
		}
		System.out.println();
		System.out.println("\tNumber of occurrences: " + numberOfOccurrences);
		System.out.println("\tInLinks: " + inLinks);
		System.out.print("\tOutLinks: " + outLinks);
		System.out.println();
	}
	
	public void print() {
		printPersonalInfo();
		System.out.print("\tWords:" + keyWords);
		System.out.println();
	}

	public String toString () {
		String name = getFirstName();
		if (name == null)
			name = (logins.size() > 0) ? ((CountedString)logins.get(0)).str : "null";
		return name + "(" + id + ")";
	}

	// Comparable interface
	public int compareTo (Object o) {
		return this.id - ((Person)o).getId();
	}
	
	private void writeObject (ObjectOutputStream out) throws IOException {
		int i,size;
		out.writeInt (CURRENT_SERIAL_VERSION);
		out.writeBoolean (processedForWebPages);
		out.writeBoolean (processedForContactInformation);
		
		size = names.size();
		out.writeInt (size);
		for (i=0; i < size; i++)
	    out.writeObject (names.get (i));
		
		size = logins.size();
		out.writeInt (size);
		for (i=0; i < size; i++)
	    out.writeObject (logins.get(i));
		
		size = domains.size();
		out.writeInt (size);
		for (i=0; i < size; i++)
	    out.writeObject (domains.get(i));
		
		out.writeObject (keyWords);

		size = topKeyWords.size();
		out.writeInt (size);
		for (i=0; i<size; i++)
			out.writeObject (topKeyWords.get(i));
		
		out.writeObject (contextModel);

		out.writeObject (alphabet);
		
		out.writeObject (emailLinks);
		
		size = pages.size();
		out.writeInt (size);
		for (i=0; i < size; i++)
	    out.writeObject (pages.get(i));
		
		size = contextPages.size();
		out.writeInt (size);
		for (i=0; i < size; i++)
	    out.writeObject (contextPages.get(i));
		
		out.writeObject (contactRecord);
				
		out.writeObject (inLinks);
		out.writeObject (outLinks);

		out.writeInt (numberOfOccurrences);
		out.writeInt (id);
	}
	
	
	private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
		int i,size;
		int version = in.readInt ();
		processedForWebPages = in.readBoolean ();
		processedForContactInformation = in.readBoolean ();
		
		size = in.readInt();
		names = new Vector (size);
		for (i=0; i < size; i++)
	    names.add ((CountedString)in.readObject());
		
		size = in.readInt();
		logins = new Vector (size);
		for (i=0; i < size; i++)
	    logins.add ((CountedString)in.readObject());
		
		size = in.readInt();
		domains = new Vector (size);
		for (i=0; i < size; i++)
	    domains.add ((CountedString)in.readObject());
		
		keyWords = (AugmentableFeatureVector)in.readObject();

		size = in.readInt();
		topKeyWords = new ArrayList (size);
		for (i=0; i < size; i++)
			topKeyWords.add ((WeightedString)in.readObject());
		
		contextModel = (AugmentableFeatureVector)in.readObject();

		alphabet = (Alphabet)in.readObject();
		
		emailLinks = (HashMap) in.readObject();
		
		size = in.readInt();
		pages = new Vector (size);
		for (i=0; i < size; i++)
	    pages.add ((WebPage)in.readObject ());
		
		size = in.readInt();
		contextPages = new Vector (size);
		for (i=0; i < size; i++)
	    contextPages.add ((WebPage)in.readObject ());
		
		contactRecord = (ContactRecord)in.readObject();
		
		inLinks = (HashSet) in.readObject();
		outLinks = (HashSet) in.readObject();
		numberOfOccurrences = in.readInt();
		id = in.readInt ();
	}

	/** have we searched for home pages for this person?*/
	public boolean processedForWebPages;
	/** have we tried to extract contact information for this person?*/
	public boolean processedForContactInformation;
	/** CountedStrings of names for this person*/
	public Vector names;
	/** CountedStrings of login ids*/
	public Vector logins;
	/** CountedStrings of domains (e.g. umass.edu)*/
	public Vector domains;
	/** Vector of key words from web pages */
	public AugmentableFeatureVector keyWords;
	public ArrayList topKeyWords;
	/** Vector of words on pages/emails this person is mentioned (not homepages)*/;
	public AugmentableFeatureVector contextModel;
	/** Vocabulary for keyWords and contextModel*/
	public Alphabet alphabet;
	/** Maps Strings (a person's name) to number of times this person
	 * co-occurs with the other person in the header of emails */
	public HashMap emailLinks;
  /** WebPages that are part of this person's web presence */
	public Vector pages;
	/** WebPages this person is found on */
	public Vector contextPages;
	/** Contact information for this person*/
	public ContactRecord contactRecord;
	/** Vector of Integers (Person ids) indicating which people this person points to */
	public HashSet inLinks;
	/** Vector of Integers (Person ids) indicating which people point to this person */
	public HashSet outLinks;
	/** Number of times this person occurs in email/web search*/
	public int numberOfOccurrences;
  /** unique id */
	private int id;
	private static final long serialVersionUID = 1;
	private static final int CURRENT_SERIAL_VERSION = 0;
	
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -