📄 contactrecord.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a> */package edu.umass.cs.mallet.projects.dex.types;import java.util.Vector;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.regex.Pattern;import java.util.regex.Matcher;import java.io.*;/** Stores contact information about a person */public class ContactRecord implements Serializable { /** arbitrary list of fields and their values*/ private HashMap fields = null; private ArrayList names; // regex for types of fields to perform filtering private static final Pattern PHONE = Pattern.compile (".*[Nn][Uu][Mm][Bb][Ee][Rr].*"); private static final Pattern EMAIL = Pattern.compile ("[Ee][Mm][Aa][Ii][Ll]"); private static final Pattern URL = Pattern.compile (".*[Uu][Rr][Ll]"); private static final Pattern LASTNAME = Pattern.compile ("[Ll][Aa][Ss][Tt][Nn][Aa][Mm][Ee]"); private static final Pattern F_PHONE = Pattern.compile ("[x\0-9\\(\\)\\[\\]\\-\\/\\.]+"); private static final Pattern F_EMAIL = Pattern.compile (".*@.*"); private static final Pattern F_URL = Pattern.compile (".*www\\.|http.*"); private static final Pattern F_LASTNAME = Pattern.compile (".*\\p{Alpha}\\p{Alpha}.*"); public ContactRecord () { this.fields = new HashMap(); this.names = new ArrayList(); } public void freeNames () {this.names = null; this.names = new ArrayList();} public void setNames (ArrayList peeps) {this.names = peeps;} public ArrayList getNames () {return this.names; } public void addName (CountedString n) {names.add (n);} public void addNames (ArrayList a) {names.addAll (a);} public void addAll (String key, ArrayList values) { Vector v = (Vector) fields.get (key); if (v == null) v = new Vector(); for (int i=0; i < values.size(); i++) { String w = (String)values.get(i); w = removeEndline (w); if (filter (key, w)) { v.add (w); } } if (v.size() > 0) this.fields.put (key, v); } public void setFieldValue (String key, String value) { Vector v = (Vector) fields.get (key); if (v == null) v = new Vector(); value = removeEndline (value); if (filter (key, value)) { v.add (value); this.fields.put (key, v); } } public int size () { return numberFields(); } public boolean hasField (String key) { return (((Vector)this.fields.get(key)) != null); } public String getFirstValue (String key) { if (hasField (key)) { Vector v = getFieldValues (key); return (String)v.get (0); } else return ""; } public int numberFields () { return this.fields.keySet().size(); } public Vector getFieldValues (String key) { return (Vector) this.fields.get (key); } /** Prints record in csv format, where each column corresponds to a * field in <code>fields</code>. NOTE - only prints the top listing * for each field.*/ public String toCSV (ArrayList fields) { String ret = ""; for (int i=0; i < fields.size(); i++) { String s = getFirstValue ((String)fields.get (i)); if (s == null) s = " "; s.replaceAll (",", " "); ret += s; if (i != fields.size()-1) ret += ", "; } return ret; } public String toString () { if (this.fields == null) return ""; String ret = ""; Iterator iter = this.fields.keySet().iterator(); while (iter.hasNext()) { String key = (String) iter.next(); Vector v = (Vector) this.fields.get (key); if (v == null) throw new IllegalStateException (key + " not in hash."); ret += key + ": "; for (int i=0; i < v.size(); i++) ret += (String)v.get (i) + " | "; ret += "\n"; } return ret; } private String removeEndline (String s) { return s.replaceAll ("ENDLINE", " ").replaceAll (",", " "); } /** Perform some heuristic filters to catch some obvious errors*/ private boolean filter (String key, String value) { // hack - ENDLINE used as a token in CRF, but shouldn't be in output Matcher mPhone = PHONE.matcher (key); Matcher mEmail = EMAIL.matcher (key); Matcher mURL = URL.matcher (key); Matcher mLastName = LASTNAME.matcher (key); if (mPhone.matches ()) { Matcher fPhone = F_PHONE.matcher (value); if (fPhone.matches()) { //System.err.println (key + ": " + value + " MATCHES"); return true; } else { System.err.println (key + ": " + value + " DOESN'T MATCH"); return false; } } else if (mEmail.matches ()) { Matcher fEmail = F_EMAIL.matcher (value); if (fEmail.matches()) { //System.err.println (key + ": " + value + " MATCHES"); return true; } else { System.err.println (key + ": " + value + " DOESN'T MATCH"); return false; } } else if (mURL.matches ()) { Matcher fURL = F_URL.matcher (value); if (fURL.matches()) { //System.err.println (key + ": " + value + " MATCHES"); return true; } else { System.err.println (key + ": " + value + " DOESN'T MATCH"); return false; } } else if (mLastName.matches ()) { Matcher fLastName = F_LASTNAME.matcher (value); if (fLastName.matches()) { //System.err.println (key + ": " + value + " MATCHES"); return true; } else { System.err.println (key + ": " + value + " DOESN'T MATCH"); return false; } } else return true; } private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject (fields); out.writeInt (names.size()); for (int i=0; i < names.size(); i++) out.writeObject ((CountedString)names.get (i)); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt (); fields = (HashMap) in.readObject(); int size = in.readInt(); names = new ArrayList(); for (int i=0; i < size; i++) names.add ((CountedString) in.readObject()); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -