📄 query.java
字号:
/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/** Query to Google.
* @author Ron Bekkerman <A HREF="mailto:ronb@cs.umass.edu">ronb@cs.umass.edu</A>
*/
package edu.umass.cs.mallet.projects.dex.web;
import edu.umass.cs.mallet.projects.dex.types.*;
import java.util.Vector;
import java.util.regex.*;
public class Query {
public Query(int num) {
minOccurrences = num;
}
public Vector buildQueriesWithDomainFields(Vector queries, String name,
Vector domains, int numOfFields){
Vector currentDomains = new Vector();
for (int i = 0; i < domains.size(); i++) {
String domain = ( (CountedString) domains.elementAt(i)).str;
// Take three last fields of the domain
Pattern pat = Pattern.compile("\\.");
String[] fields = pat.split(domain);
domain = "";
if (fields.length >= numOfFields) {
for(int j = numOfFields; j > 1; j--)
domain = domain + fields[fields.length - j] + ".";
domain = domain + fields[fields.length - 1];
for (int ii = 0; ii < currentDomains.size(); ii++) {
String currentDomain = (String)currentDomains.get(ii);
if(currentDomain.equals(domain))
continue;
}
currentDomains.add(domain);
String query = "\"" + name + "\" site:" + domain;
System.out.println("Query: " + query);
queries.add(query);
}
}
return queries;
}
public Vector buildQueries(Person person) {
Vector queries = new Vector();
if(person.getNumberOfOccurrences() <= minOccurrences) {
//We don't work with rarely occurred people
return queries;
}
if(person.names.size() == 0) {
//No name
return queries;
}
// Take only first 3 names in the list
for(int i = 0; i < person.names.size() && i < 3; i++) {
String name = ( (CountedString) person.names.elementAt(i)).str;
queries = buildQueriesWithDomainFields(queries, name, person.domains, 3);
queries = buildQueriesWithDomainFields(queries, name, person.domains, 2);
}
// Now queries without domain
for(int i = 0; i < person.names.size() && i < 3; i++) {
String name = ( (CountedString) person.names.elementAt(i)).str;
if(name.indexOf(" ") == -1)
continue;
String query = "\"" + name + "\"";
System.out.println("Query: " + query);
queries.add(query);
}
// Only surname with domains
String surname = person.getSurname();
if(surname != null) {
queries = buildQueriesWithDomainFields(queries, surname, person.domains, 3);
//queries = buildQueriesWithDomainFields(queries, surname, person.domains, 2);
}
return queries;
}
//Fields
private int minOccurrences;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -