📄 tuimuc.java

📁 这是一个matlab的java实现。里面有许多内容。请大家慢慢捉摸。
💻 JAVA
字号:
package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.*;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.SerialPipes;import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;import java.io.*;import java.util.*;import java.util.regex.*;public class TUImuc{	public static void main (String[] args)	{	    String trainingDataPath;	    String testDataPath;		if (args.length != 2) {			// System.exit(-1);		    trainingDataPath = new String ("/odin.mitre.org/tmp/muc/tr/all-docs/training");		    testDataPath = new String ("/odin.mitre.org/tmp/muc/tr/all-docs/test-annotated");		} else {		    trainingDataPath = args[0];		    testDataPath = args[1];		}		// This iterator takes a directory and iterates over the files contained		// in it		XMLFileFilter filter = new XMLFileFilter(".*xml");				FileIterator fileIterator = new FileIterator (new File(trainingDataPath), (FileFilter)filter);		FileIterator testFileIterator = new FileIterator (new File(testDataPath), (FileFilter)filter);		// This iterator takes an iterator over files, and iterates over all (relevant)		// pairs of DOM nodes in each file		MentionPairIterator pairIterator = new MentionPairIterator (fileIterator,																																"MUC", true);		MentionPairIterator testPairIterator = new MentionPairIterator			(testFileIterator, "MUC", true);		// This pipeline takes individual pairs as input and produces a feature vector		Pipe instancePipe = new SerialPipes (new Pipe[] {			new Target2Label(),			new AffixOfMentionPair (),			new MentionPairSentenceDistance(),			new PartOfSpeechMentionPair(),			new HobbsDistanceMentionPair(),			new MentionPairAntecedentPosition(),			new NullAntecedentFeatureExtractor(),			new ModifierWordFeatures(),			new MentionPair2FeatureVector ()		});		InstanceList ilist = new InstanceList (instancePipe);		ilist.add (pairIterator);		//InstanceList[] ilists = ilist.split (new double[] {.7, .3});		Classifier classifier = new MaxEntTrainer().train (ilist);		System.out.println ("Training Accuracy on \"yes\" = "+												new Trial (classifier, ilist).labelF1("yes"));		/*		System.out.println ("Testing Accuracy on \"yes\" = "		+ new Trial (classifier, ilists[1]).labelF1("yes")); */		System.out.println ("Training Accuracy on \"no\" = "+												new Trial (classifier, ilist).labelF1("no"));/*		System.out.println ("Testing Accuracy on \"no\" = "			+ new Trial (classifier, ilists[1]).labelF1("no"));*/		//InstanceList testList = new InstanceList (instancePipe);		//testList.add (testPairIterator);		//System.out.println ("Testing Accuracy on \"yes\" = "+		//										new Trial (classifier, testList).labelF1("yes"));		//System.out.println ("Testing Accuracy on \"no\" = "+		//											new Trial (classifier, testList).labelF1("no"));		int curRefIndex = -1;		int candRefIndex;		double curBestValue = 0.0;		Instance curInst;		MentionPair candidatePair;		MentionPair bestPair = null;		Mention antecedent = null;		Mention referent = null;		LinkedHashSet allCoreferentials = new LinkedHashSet();		LinkedHashSet keyCoreferentials = new LinkedHashSet();		LinkedHashSet responseCoreferentials = new LinkedHashSet();		int numAntecedents = 0;		int numReferents = 0;		int entityId = 0;		int nullAntecedents = 0;		int numCorrect = 0;		int numTotalInstances = 0;		// try on training data - check for bugs		while (testPairIterator.hasNext()) {			numTotalInstances++;			Instance carrier = (Instance)testPairIterator.next();			candidatePair = (MentionPair)carrier.getData();			curInst = new Instance (candidatePair, null, null, null);			curInst.getData(instancePipe); // runs instance through the pipeline			//collect actual labels			if ((carrier.getTarget().toString().equals("yes")))			{				keyCoreferentials.add(candidatePair);			}						if (candidatePair.getReferentIndex() != curRefIndex) {				numReferents++;				if ((referent != null)) {					if (antecedent != null) {						if (antecedent.getEntityId() == null) {							entityId++;							antecedent.setEntityId(new Integer(entityId).toString());						}						allCoreferentials.add(antecedent);  // a "unique" add						System.out.println("++ Selected " + antecedent.getString());											}					else {						System.out.println("++ Selected NULL");						entityId++;					}					referent.setEntityId(new Integer(entityId).toString());					allCoreferentials.add(referent);					responseCoreferentials.add(bestPair);  // response coreffed					// pair				}				referent = candidatePair.getReferent();				curRefIndex = candidatePair.getReferentIndex();				curBestValue = 0.0;				antecedent = null;				numAntecedents = 0;				// do stuff for a new referent			} else {        // condition indicates that we're looking at possible antecedent still				Classification cl = classifier.classify(curInst);				LabelVector classDistribution = cl.getLabelVector();				double value = 0.0;				for (int k=0; k < classDistribution.singleSize(); k++) {					if (classDistribution.labelAtLocation(k).toString().equals("yes"))						value = classDistribution.valueAtLocation(k);				}				if (value > curBestValue) {					curBestValue = value;					antecedent = candidatePair.getAntecedent();					bestPair = candidatePair;				}				numAntecedents++;			}		}		System.out.println("Total instances inference: " + numTotalInstances);		System.out.println("Total instances training: " + ilist.size());		PairWiseEvaluator evaluator = new PairWiseEvaluator (keyCoreferentials,																												 responseCoreferentials);		evaluator.evaluate();		//CorpusOutputter corpusOutputter = new CorpusOutputter (allCoreferentials);		//corpusOutputter.begin();		System.out.println("Number of referents is: " + numReferents);		System.out.println("Number of null antecedents: " + nullAntecedents);		System.out.println("Score is : " + (double)evaluator.score());	}}
💿 文件大小 5351 K
👤 上传用户 wait2010
📂 所属分类 matlab例程
🏷️ 相关标签

#matlab #java #家
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -