📄 mentionpairiterator.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* Copyright (C) 2002 Dept. of Computer Science, Univ. of Massachusetts, Amherst   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This program toolkit free software; you can redistribute it and/or   modify it under the terms of the GNU General Public License as   published by the Free Software Foundation; either version 2 of the   License, or (at your option) any later version.   This program is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  For more   details see the GNU General Public License and the file README-LEGAL.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA   02111-1307, USA. *//**	 @author Ben Wellner */package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;import org.jdom.input.*;import org.xml.sax.*;import org.xml.sax.ext.LexicalHandler;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;import org.jdom.Document;import org.jdom.JDOMException;import org.jdom.*;import org.jdom.output.XMLOutputter;import java.util.*;import java.util.regex.Pattern;import java.util.regex.Matcher;import java.io.File;import java.io.*;import java.net.URI;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.*;public class MentionPairIterator extends AbstractPipeInputIterator{    public static final int NP_LOOKBACK = 20000;  // need to change this to look at sentences        FileIterator fileIterator;    public static final String [] pronouns = new String[] {"He", "he", "Him",							   "him",							   "His", "his",							   "She", "she", "Her",							   "her", "hers", "it",							   "It",							   "its",							   "Its", "itself",							   "himself",							   "herself"};        /*      public static final String [] pronouns = new String[] {"He", "he", "Him",      "him", "They",      "they", "I", "We",      "we", "Our",      "our","His", "his",      "She", "she", "Her",      "her", "hers", "it",      "their", "It",      "Their", "its",      "Its", "itself",      "himself", "herself",      "ourselves",      "themselves"};    */    public static final String [] malePronouns = new String[] {"He", "he", "Him", "him", "His", "his", "himself"};    public static final String [] femalePronouns = new String[] {"She","she","Her","her","hers","herself"};    public static final int pronounsSize = 18;    public static final int numMalePronouns = 7;    public static final int numFemalePronouns = 6;            int        refIndex = 0;    SAXBuilder builder = null;    Document currentDocument;  // JDOM structure    MalletDocument malletDocument; // Mallet document structure, optimized for access    DocumentMentionPairIterator docNodePairIterator;    java.util.Vector    allDocuments;    File       targetDocPath;    boolean   positiveAntecedent = false;    String    sourceType = "";    boolean   addNullAntecedent;    int       numberOfReferents = 0;    boolean   includeProperNouns = false;    boolean   includeEverything = false;    List      filters;    ArrayList    nodePairArray;        public MentionPairIterator (FileIterator fi, String sourceType)    {	this (fi, sourceType, true);    }    public MentionPairIterator (FileIterator fi, String sourceType, boolean addNullAntecedent,				boolean includeNNPs, boolean includeAll, List filters)    {	this.includeProperNouns = includeNNPs;	this.sourceType = sourceType;	this.addNullAntecedent = addNullAntecedent;	this.includeEverything = includeAll;	this.filters = filters;	constructAux(fi, sourceType);    }    public MentionPairIterator (FileIterator fi, String sourceType, boolean addNullAntecedent,				boolean includeNNPs)    {	this.includeProperNouns = includeNNPs;	this.sourceType = sourceType;	this.addNullAntecedent = addNullAntecedent;	constructAux(fi, sourceType);    }        public MentionPairIterator (FileIterator fi, String sourceType, boolean addNullAntecedent)    {	this.sourceType = sourceType;	this.addNullAntecedent = addNullAntecedent;	constructAux(fi, sourceType);    }    private void constructAux (FileIterator fi, String sourceType)    {	fileIterator = fi;	builder = new SAXBuilder();	allDocuments = new java.util.Vector();	if ((currentDocument == null) && fileIterator.hasNext()) {	    try {		File openDocPath = (File)fileIterator.nextInstance().getData();		targetDocPath = new		    File(((String)openDocPath.getAbsolutePath()).concat(".sys"));		currentDocument = builder.build(openDocPath);		malletDocument = new MalletDocument(currentDocument, sourceType);		allDocuments.add(currentDocument);		docNodePairIterator = new DocumentMentionPairIterator();	    } catch (JDOMException e) { e.printStackTrace(); }	}    }        public int getNumReferents ()    {	return numberOfReferents;    }        public java.util.Vector getAllDocuments ()    {	return allDocuments;    }        public MentionPair getNextMentionPairFromDocument()    {	if (docNodePairIterator.hasNext())	    return (MentionPair)docNodePairIterator.next();	else	    return null;    }		        public MentionPair getNextMentionPair ()    {	MentionPair pair = null;	File openDocPath = null;		while (((pair = (MentionPair)docNodePairIterator.next()) == null) && currentDocument != null)	    {		if (fileIterator.hasNext()) {		    try {			openDocPath = (File)fileIterator.nextInstance().getData();			targetDocPath = new			    File(((String)openDocPath.getAbsolutePath()).concat(".sys"));								currentDocument = builder.build(openDocPath);			malletDocument = new MalletDocument (currentDocument, sourceType);		    } catch (JDOMException e) { e.printStackTrace(); }		    docNodePairIterator = new DocumentMentionPairIterator();		}	    }	return pair;    }        public Instance nextInstance()    {	MentionPair pair = null;	String    target = null;	URI      nodePairURI = null;	Mention     referent = null;		if (this.hasNext()) {	    pair = (MentionPair)getNextMentionPair();	    if (pair.nullPair()) {		if (!positiveAntecedent) {		    target = "yes";		}		else {		    target = "no";		}		positiveAntecedent = false;	    } else {		String tval = null;		tval = docNodePairIterator.getTargetValue(pair);		if (tval != null) {		    target = new String("yes");		    pair.setEntityReference(tval);		} else {		    pair.setEntityReference(null);		    target = new String("no");		}		if (target.equals("yes")) {		    positiveAntecedent = true;		}	    }	    try {		nodePairURI = new URI("nodePairURI");	    } catch (Exception e) { e.printStackTrace(); }	}	// include pair as the "source" of this pipe object because we want this to carry	// into the feature vector	return new Instance (pair, target, nodePairURI, null);    }        // Iterator methods    public boolean hasNext() { return (docNodePairIterator.hasNext() || fileIterator.hasNext()); }    public void remove () { throw new UnsupportedOperationException(); }        public class DocumentMentionPairIterator implements Iterator    {	int              pairCount;	int           currentIndex;		public DocumentMentionPairIterator ()	{	    java.util.Vector candPatterns = new java.util.Vector();	    nodePairArray = new ArrayList ();	    //should parameterize this:	    //candPatterns.add(Pattern.compile("[A-Z]*NAME[A-Z]*"));	    //candPatterns.add(Pattern.compile("PRO[A-Z]*"));	    if (sourceType.equals("TB")) {		candPatterns.add(Pattern.compile("NP.*")); // for TreeBank		candPatterns.add(Pattern.compile("lex"));	    } else if (sourceType.equals("MUC")) {		candPatterns.add(Pattern.compile("NG")); // for the MUC data		candPatterns.add(Pattern.compile("COREF")); // for the MUC data	    }	    	    fillNodePairArray (candPatterns);	    nodePairArray = filterPairs();	    pairCount = nodePairArray.size();	    System.out.println("Pair array size = " + pairCount);	    currentIndex = 0;	}	private ArrayList filterPairs ()	{	    ArrayList newList = new ArrayList();	    Iterator i = filters.iterator();	    while (i.hasNext()) {		Filter filter = (Filter)i.next();		Iterator i2 = nodePairArray.iterator();		while (i2.hasNext()) {		    MentionPair pair = (MentionPair)i2.next();		    if (!filter.filters(pair)) {			/*			if (pair.getAntecedent() != null)			    System.out.println("Pair: " + pair.getAntecedent().getString() + " " + pair.getReferent().getString());			else			System.out.println("Pair: " + "NULL" + " " + pair.getReferent().getString()); */			newList.add(pair);		    }		}	    }	    return newList;	}		private boolean compatible (Element n, java.util.Vector patterns)	{	    if (sourceType.equals("TB")) {		List children = n.getChildren();		Iterator iter = children.iterator();		Pattern p1 = Pattern.compile("NP.*");
12 下一页
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -