heuristicpipe.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 109 行

JAVA
109
字号
package edu.umass.cs.mallet.projects.seg_plus_coref.coreference;import com.wcohen.secondstring.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.base.util.*;import java.util.*;import java.lang.*;import java.io.*;public class HeuristicPipe extends Pipe {	String[] fields;		public HeuristicPipe (String[] fields) {		this.fields = fields;	}	public boolean isConferencePaper (Citation c1) {		String booktitle = c1.getField(Citation.conference);		if (booktitle.length() > 0)			return true;				booktitle = c1.getField(Citation.booktitle);		if (booktitle.matches(".*proc\\..*") ||				booktitle.matches(".*proceedings.*") ||				booktitle.matches(".*workshop.*") ||				booktitle.matches(".*conference.*"))			return true;		return false;	}	public boolean isJournalPaper (Citation c1) {		String journal = c1.getField(Citation.journal);		if (journal.length() > 0)			return true;		String volume = c1.getField(Citation.volume);		if (volume.length() > 0)			return true;		return false;	}	public boolean isTechPaper (Citation c1) {		String journal = c1.getField(Citation.journal);		if (journal.length() > 0)			return false;		String volume = c1.getField(Citation.volume);		if (volume.length() > 0)			return false;		String tech = c1.getField(Citation.tech);		if (tech.length() > 0)			return true;		return false;	}	public String getPaperType (Citation c1) {		if (isJournalPaper(c1))			return "journal";		else if (isConferencePaper(c1))			return "conference";		else if (isTechPaper(c1))			return "tech";		else			return "none";	}	public Instance pipe (Instance carrier) {		NodePair pair = (NodePair)carrier.getData();		Citation c1 = (Citation)pair.getObject1();		Citation c2 = (Citation)pair.getObject2();		String paperType1 = getPaperType(c1);		String paperType2 = getPaperType(c2);		double authorVal = 0.0;		double titleVal = 0.0;		authorVal = pair.getFeatureValue("trigramTFIDF_author");		titleVal = pair.getFeatureValue("trigramTFIDF_title");				// only set if they're fairly close//		if ( authorVal > 0.2 && titleVal > 0.1) {		//System.out.println("Pair has high venue score!!!");		if (paperType1.equals(paperType2))			pair.setFeatureValue("SamePaperType",1.0);		else			pair.setFeatureValue("DiffPaperType",1.0);		if ((paperType1.equals("conference") && paperType2.equals("journal")) ||				(paperType2.equals("conference") && paperType1.equals("journal")))			pair.setFeatureValue("OneJournalOneConference", 1.0);		else if ((paperType1.equals("conference") && paperType2.equals("tech")) ||						 (paperType2.equals("conference") && paperType1.equals("tech")))			pair.setFeatureValue("OneConferenceOneTech", 1.0);		else if ((paperType1.equals("journal") && paperType2.equals("tech")) ||						 (paperType2.equals("journal") && paperType1.equals("tech")))			pair.setFeatureValue("OneJournalOneTech", 1.0);		/*			else if (paperType1.equals("conference") && paperType2.equals("conference"))			pair.setFeatureValue("BothConferencePapers", 1.0);			else if (paperType1.equals("journal") && paperType2.equals("journal"))			pair.setFeatureValue("BothJournalPapers", 1.0);			else if (paperType1.equals("tech") && paperType2.equals("tech"))			pair.setFeatureValue("BothTechPapers", 1.0);		*/				return carrier;	}	}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?