📄 tui.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
字号:
/* Copyright (C) 2002 Dept. of Computer Science, Univ. of Massachusetts, Amherst   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This program toolkit free software; you can redistribute it and/or   modify it under the terms of the GNU General Public License as   published by the Free Software Foundation; either version 2 of the   License, or (at your option) any later version.   This program is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  For more   details see the GNU General Public License and the file README-LEGAL.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA   02111-1307, USA. *//**	 @author Ben Wellner */package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.*;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.SerialPipes;import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;import java.io.*;import java.util.*;import java.util.regex.*;public class TUI{    public static void main (String[] args)    {	String trainingDataPath;	String testDataPath;	if (args.length != 2) {	    // System.exit(-1);	    //trainingDataPath = new String ("/odin.mitre.org/tmp/treebank/xml-bigger/train");	    //testDataPath = new String ("/odin.mitre.org/tmp/treebank/xml-bigger/train");	    trainingDataPath = new String("c:/JavaDevel/data/toy");	    testDataPath = new String("c:/JavaDevel/data/toy");	} else {	    trainingDataPath = args[0];	    testDataPath = args[1];	}		// This iterator takes a directory and iterates over the files contained	// in it		XMLFileFilter filter = new XMLFileFilter(".*xml");		FileIterator fileIterator = new FileIterator (new File(trainingDataPath), (FileFilter)filter);	FileIterator testFileIterator = new FileIterator (new File(testDataPath), (FileFilter)filter);		// This iterator takes an iterator over files, and iterates over all (relevant)	// pairs of DOM nodes in each file	MentionPairIterator pairIterator = new MentionPairIterator (fileIterator, "TB");	MentionPairIterator testPairIterator = new MentionPairIterator	    (testFileIterator, "TB");	// This pipeline takes individual pairs as input and produces a feature vector	Pipe instancePipe = new SerialPipes (new Pipe[] {	    new Target2Label(),	    new AffixOfMentionPair (),	    new MentionPairSentenceDistance(),	    new PartOfSpeechMentionPair(),	    new HobbsDistanceMentionPair(),	    new MentionPairAntecedentPosition(),	    new NullAntecedentFeatureExtractor(),	    new ModifierWordFeatures(),	    new MentionPair2FeatureVector ()	});	InstanceList ilist = new InstanceList (instancePipe);	ilist.add (pairIterator);		//InstanceList[] ilists = ilist.split (new double[] {.7, .3});	Classifier classifier = new MaxEntTrainer().train (ilist);	System.out.println ("Training Accuracy on \"yes\" = "+			    new Trial (classifier, ilist).labelF1("yes"));	/*	  System.out.println ("Testing Accuracy on \"yes\" = "	  + new Trial (classifier, ilists[1]).labelF1("yes")); */	System.out.println ("Training Accuracy on \"no\" = "+			    new Trial (classifier, ilist).labelF1("no"));	/*		System.out.println ("Testing Accuracy on \"no\" = "			+ new Trial (classifier, ilists[1]).labelF1("no"));*/	/*	  Iterator it1 = ilist.iterator();	  while (it1.hasNext())	  {	  Instance inst = (Instance)it1.next();	  System.out.println("-------");	  System.out.println(((FeatureVector)inst.getData(instancePipe)).toString());	  System.out.println("---");	  System.out.println(inst.getLabeling().toString());	  }	*/	/*	  Pipe testInstancePipe = new SerialPipes (new Pipe[] {	  //new CharSequence2TokenSequence(" ")	  new Target2Label(),	  new AffixOfMentionPair (),	  new MentionPairSentenceDistance(),	  new PartOfSpeechMentionPair(),	  new HobbsDistanceMentionPair(),	  new MentionPairAntecedentPosition(),	  new NullAntecedentFeatureExtractor(),	  new ModifierWordFeatures(),				  new MentionPair2FeatureVector ()	  });	*/	int curRefIndex = -1;	int candRefIndex;	double curBestValue = 0.0;	Instance curInst;	MentionPair candidatePair;	MentionPair bestPair = null;	Mention antecedent = null;	Mention referent = null;	Mention curKeyRef = null;		LinkedHashSet allCoreferentials = new LinkedHashSet();	LinkedHashSet keyCoreferentials = new LinkedHashSet();	LinkedHashSet responseCoreferentials = new LinkedHashSet();		int numAntecedents = 0;	int numReferents = 0;	int entityId = 0;	int nullAntecedents = 0;		int numCorrect = 0;		while (testPairIterator.hasNext()) {	    Instance carrier = (Instance)testPairIterator.next();	    candidatePair = (MentionPair)carrier.getData();	    curInst = new Instance (candidatePair, null, null, null);	    curInst.getData(instancePipe); // runs instance through the pipeline	    //collect actual labels	    if ((carrier.getTarget().toString().equals("yes")) &&		(!candidatePair.nullPair()) &&		candidatePair.getReferent() != curKeyRef)		// last condition ensures that we only add the first antecedent		// to the key - we have to evaluate pairwise like this		{		      System.out.print(candidatePair.getReferentIndex() + "::");		      if (candidatePair.getAntecedent() != null)			  System.out.println("key: " + candidatePair.getAntecedent().getString()					     + " & " +					     candidatePair.getReferent().getString());		      else			  System.out.println("key: " + "NULL"					     + " & " + candidatePair.getReferent().getString());		    keyCoreferentials.add(candidatePair);		    curKeyRef = candidatePair.getReferent();		}	    if (candidatePair.getReferent() != referent) {		numReferents++;		// condition checks if we have a new referent (i.e. anaphor) remember		// that we're iterating through PAIRS of mentions		if ((referent != null)) {		    if (antecedent != null) {			if (Integer.decode(antecedent.getEntityId()).intValue() == 0) {			    entityId++;			    antecedent.setEntityId(new Integer(entityId).toString());			}			allCoreferentials.add(antecedent);  // a "unique" add			//System.out.println("++ Selected " + antecedent.getString());		    }		    else {			entityId++;			//System.out.println("++ Selected NULL");		    }		    referent.setEntityId(new Integer(entityId).toString());		    allCoreferentials.add(referent);		    if (antecedent != null) // only add non NULL ones now			responseCoreferentials.add(bestPair);  // response coreffed		    // pair		}		referent = candidatePair.getReferent();  // set new referent		//System.out.print("\nNew referent: " + referent.getString());		curRefIndex = candidatePair.getReferentIndex();		curBestValue = 0.0;		antecedent = candidatePair.getAntecedent();		numAntecedents = 0;		// do stuff for a new referent	    } 	    Classification cl = classifier.classify(curInst);	    LabelVector classDistribution = cl.getLabelVector();	    double value = 0.0;	    for (int k=0; k < classDistribution.singleSize(); k++) {		if (classDistribution.labelAtLocation(k).toString().equals("yes"))		    value = classDistribution.valueAtLocation(k);	    }	    /*				if (candidatePair.getAntecedent() != null)		System.out.print(" (" + candidatePair.getAntecedent().getString() + ","		+ value + ")");		else		System.out.print(" (null," + value + ")"); */	    if (value > curBestValue) {		curBestValue = value;		antecedent = candidatePair.getAntecedent();		bestPair = candidatePair;	    }	    numAntecedents++;	}	Iterator i2 = responseCoreferentials.iterator();	while (i2.hasNext()) {	    MentionPair pair2 = (MentionPair)i2.next();	    if (!pair2.nullPair())		System.out.println(pair2.getAntecedent().getString() + " AND " +				   pair2.getReferent().getString() + " with " +				   pair2.getAntecedent().getEntityId() + "," + 				   pair2.getReferent().getEntityId());	    else		System.out.println("NULL AND " +				   pair2.getReferent().getString() + " with " +				   pair2.getReferent().getEntityId());	}	PairWiseEvaluator evaluator = new PairWiseEvaluator (keyCoreferentials,							     responseCoreferentials);	//CorpusOutputter corpusOutputter = new CorpusOutputter (allCoreferentials);	//corpusOutputter.begin();	evaluator.evaluate();	System.out.println("Number of referents is: " + numReferents);	System.out.println("Number of null antecedents: " + nullAntecedents);	System.out.println("Score is : " + (double)evaluator.score());    }}
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -