📄 parseprofile.java
字号:
return textFound;
}
/**
*
* @param tagAttName
* "id" or "class" or anything.
* @param pattern
* the att value to hit.
* @return the text node right after the hit tagnode, after clear some
* tagnodes prior to it it compares tag attrib in a trict way,
* using equals method.
*/
protected String getTextByTagAtt(String tagAttName, String pattern) {
String s = "";
String tagAttValue;
try {
n = l.nextNode();
while (n != null && nodeCount < maxNodeCount) {
s = n.getText();
if (isASeperator(s)) {
// sequence.poll();
n = l.nextNode();
return "";
}
if (n instanceof Tag) {
tagAttValue = ((Tag) n).getAttribute(tagAttName);
if (tagAttValue != null) {
if (tagAttValue.equalsIgnoreCase(pattern)) {
while (n instanceof Tag) {
n = l.nextNode();
nodeCount++;
}
s = n == null ? "" : n.getText();
return s;
}
}
}
n = l.nextNode();
nodeCount++;
}
} catch (ParserException e) {
e.printStackTrace();
return "";
}
return s;
}
protected String getMultiLineRaw(String startString, String endString) {
String firstField = null;
int start = page.indexOf(startString, l.getPosition());
int end = page.indexOf(endString, l.getPosition());
firstField = page.substring(start, end);
firstField = firstField.toString().replaceAll("<BR>", "\n");
firstField = firstField.replaceAll("<br>", "\n");
return firstField;
}
protected String getMultiLine() {
StringBuilder sb = new StringBuilder();
do {
try {
n = l.nextNode();
} catch (ParserException e) {
e.printStackTrace();
}
if (n instanceof Text) {
sb.append(n.getText());
}
if (n instanceof Tag)
if (((Tag) n).getTagName().equalsIgnoreCase("P"))
break;
} while (n != null && nodeCount < maxNodeCount);
return Util.cleanString(sb.toString());
}
}
class JobSection extends Section {
public JobSection() {
sectionStartKey = "experience";
super.separators.add("HR");
super.separators.add("hr /");
}
public void parse() throws SQLException {
if (!super.navigateToSection())
return;
String firstField;
while ((firstField = getTextByTagAtt("class", "title")) != "") {
Job job = new Job(profile.person);
job.role = Util.cleanString(firstField);
job.organization_name = Util.cleanString(getTextByTagAtt(
"class", "org summary"));
String[] orgDetails = Util.cleanString(
getTextByTagAtt("class", "organization-details"))
.split(";");
for (int i = 0; i < orgDetails.length; i++)
orgDetails[i] = Util.cleanString(orgDetails[i]);
if (orgDetails.length == 4) {
job.organization_type = orgDetails[0];
job.organization_size = orgDetails[1];
job.organization_stock_ticker = orgDetails[2];
job.industry = orgDetails[3];
}
if (orgDetails.length < 4) {
int i;
for (i = 0; i < orgDetails.length; i++) {
if (orgDetails[i].contains("industry"))
job.industry = orgDetails[i];
else if (orgDetails[i].contains("employees")
|| orgDetails[i].equalsIgnoreCase("Myself Only"))
job.organization_size = orgDetails[i];
else if (orgDetails[i].toUpperCase().equals(
orgDetails[i]))
job.organization_stock_ticker = orgDetails[i];
else
job.organization_type = orgDetails[i];
}
}
job.start_date = Util.parseDate(getDateTag("dtstart", 5));
job.end_date = Util.parseDate(getDateTag("dtend", 5));
String act = getTextByTagAtt2("description", "/li");
if (!(act.equals(""))) {
job.description = Util.cleanString(act) + "\n"
+ getMultiLine();
}
if (job.role != "")
jobs.push(job);
}
sequence.poll();
}
}
class SchoolSection extends Section {
public SchoolSection() {
super.sectionStartKey = "education";
super.separators.add("HR");
super.separators.add("hr /");
}
public void parse() {
if (!super.navigateToSection())
return;
String firstField;
while ((firstField = getTextByTagAtt("class", "summary fn org")) != "") {
School school = new School(profile.person);
school.university = Util.cleanString(firstField);
int pos = l.getPosition();
school.degree = Util.cleanString(getTextByTagAtt2("degree",
"/li"));
l.setPosition(pos);
school.major = Util
.cleanString(getTextByTagAtt2("major", "/li"));
l.setPosition(pos);
school.start_date = Util.parseDate(getDateTag("dtstart", 20));
l.setPosition(pos);
school.end_date = Util.parseDate(getDateTag("dtend", 40));
l.setPosition(pos);
String act = getTextByTagAtt2("activities-societies", "/li");
if (!(act.equals(""))) {
int i;
for (i = 0; i < 6; i++) {
try {
n = l.nextNode();
} catch (ParserException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
school.activities = n == null ? null : Util.cleanString(n
.getText());
}
if (school.university != "")
schools.push(school);
}
sequence.poll();
}
}
class AdditionalSection extends Section {
public AdditionalSection() {
super.sectionStartKey = "additional-information";
super.separators.add("HR");
super.separators.add("hr /");
}
public void parse() {
if (!super.navigateToSection())
return;
if (page.indexOf("Groups:", l.getPosition()) > 0) {
parseGroups();
}
if (page.indexOf("Honors:", l.getPosition()) > 0) {
parseHonors();
}
}
private void parseGroups() {
String firstField;
getTextByTagAtt("class", "affiliation vcard");
while ((firstField = getTextByTagAtt2("fn org", "/ul")) != "") {
Group group = new Group(profile.person);
group.name = Util.cleanString(firstField);
if (!group.name.equals(""))
profile.groups.add(group);
}
}
private void parseHonors() {
String firstField = getTextByTagAtt("class", "honors");
if (firstField == "") {
return;
}
Honor honor = new Honor(profile.person);
honor.name = Util.cleanString(firstField);
profile.honors.add(honor);
String sss = getMultiLine();
if (sss.equals(""))
return;
for (String s : sss.split("\n")) {
honor = new Honor(profile.person);
honor.name = Util.cleanString(s);
if (!honor.name.equals(""))
profile.honors.add(honor);
}
}
}
class SummarySection extends Section {
public SummarySection() {
super.sectionStartKey = "summary";
super.separators.add("HR");
super.separators.add("hr /");
}
public void parse() {
// if (!super.tryParse())
// return;
sequence.poll();
// to hit the separator and cause the action to next section
// getNext(sectionStartKey, 0, 0);
// getNext(sectionStartKey, 0, 0);
}
}
class PersonSection extends Section {
public boolean ignorJobSchoolOutline;
public PersonSection(boolean ignorJobSchoolOutline) {
sectionStartKey = "nameplate";
separators.add("HR");
separators.add("hr /");
this.ignorJobSchoolOutline = ignorJobSchoolOutline;
}
public void parse() {
if (!super.navigateToSection())
return;
int pos = l.getPosition();
Hashtable<String, String> ht = new Hashtable<String, String>();
ht.put("given-name", "");
ht.put("family-name", "");
ht.put("headline title", "");
ht.put("locality", "");
ht.put("country-name", "");
ht.put("recommendation-count", "");
ht.put("connection-count", "");
getTextsByTagAtts(ht, "hr /");
if (ht.get("given-name") == "")
return;
Person person = profile.person;
person.first_name = ht.get("given-name");
person.last_name = ht.get("family-name");
person.career_heading = ht.get("headline title");
person.location = ht.get("locality") + ht.get("country-name");
person.recommendation_count = Util.getNumberFromString(ht
.get("recommendation-count"));
person.connection_count = Util.getNumberFromString(ht
.get("connection-count"));
l.setPosition(pos);
if (!ignorJobSchoolOutline) {
getTextByTagAtt("id", "overview");
parseJobOutline();
parseSchoolOutLine();
}
// to hit the separator and cause the action to next section
// l.setPosition(pos);
// getTextByTagAtt("id", sectionStartKey);
sequence.poll();
}
private void parseJobOutline() {
if (page.indexOf("<dt>Current</dt>", l.getPosition()) > 0)
getSimpleJobsSub();
if (page.indexOf("<dt>Past</dt>", l.getPosition()) > 0)
getSimpleJobsSub();
}
private void getSimpleJobsSub() {
ArrayList<String> jobsList = getList();
if (jobsList.size() == 0)
return;
String jobDesp;
String[] jobArray;
for (String s : jobsList) {
jobDesp = Util.cleanString(s);
jobArray = jobDesp.split(" at ");
if (jobArray.length == 2) {
Job job = new Job(profile.person);
job.role = jobArray[0];
job.organization_name = jobArray[1];
jobs.push(job);
}
}
}
private void parseSchoolOutLine() {
if (page.indexOf("<dt>Education</dt>", l.getPosition()) < 0)
return;
ArrayList<String> schoolsList = getList();
if (schoolsList.size() == 0)
return;
String schoolDesp;
for (String s : schoolsList) {
schoolDesp = Util.cleanString(s);
School school = new School(profile.person);
school.university = schoolDesp;
schools.push(school);
}
}
private ArrayList<String> getList() {
ArrayList<String> list = new ArrayList<String>();
String item;
while ((item = getTextByTag("li", "/ul")) != "") {
list.add(item);
}
return list;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -