📄 tuigraph.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* Copyright (C) 2002 Dept. of Computer Science, Univ. of Massachusetts, Amherst   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This program toolkit free software; you can redistribute it and/or   modify it under the terms of the GNU General Public License as   published by the Free Software Foundation; either version 2 of the   License, or (at your option) any later version.   This program is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  For more   details see the GNU General Public License and the file README-LEGAL.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA   02111-1307, USA. *//**	 @author Ben Wellner */package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.*;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.SerialPipes;import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;import java.io.*;import java.util.*;import java.util.regex.*;import java.lang.reflect.Array;import salvo.jesus.graph.*;import edu.umass.cs.mallet.projects.seg_plus_coref.graphs.*;import edu.umass.cs.mallet.projects.seg_plus_coref.clustering.*;public class TUIGraph{    public static final String [] pronouns = new String[] {"He", "he", "Him",							   "him",							   "His", "his",							   "She", "she", "Her",							   "her", "hers", "it",							   "It",							   "its",							   "Its", "itself",							   "himself",							   "herself"};    public static final int pronounsSize = 18;    public static void main (String[] args)    {	if (new Integer(4) == new Integer(4))	    System.out.println("INTERESTING");	String trainingDataPath;	String testDataPath;	if (args.length != 2) {	    // System.exit(-1);	    //trainingDataPath = new String ("/odin.mitre.org/tmp/treebank/xml-bigger/train");	    //testDataPath = new String ("/odin.mitre.org/tmp/treebank/xml-bigger/train");	    //trainingDataPath = new String("c:/JavaDevel/data/toy");	    //testDataPath = new String("c:/JavaDevel/data/toy");	    trainingDataPath = new String("/usr/dan/users8/wellner/data/all-docs/test-annotated");	    testDataPath = new String("/usr/dan/users8/wellner/data/all-docs/mini-train");	} else {	    trainingDataPath = args[0];	    testDataPath = args[1];	}		// This iterator takes a directory and iterates over the files contained	// in it		XMLFileFilter filter = new XMLFileFilter(".*xml");		FileIterator fileIterator = new FileIterator (new File(trainingDataPath), (FileFilter)filter);	FileIterator testFileIterator = new FileIterator (new File(testDataPath), (FileFilter)filter);		ArrayList pairFilters = new ArrayList();	pairFilters.add(new MentionPairFilter());		// This iterator takes an iterator over files, and iterates over all (relevant)	// pairs of DOM nodes in each file	//MentionPairIterator pairIterator = new MentionPairIterator (fileIterator, "TB", true, true);	MentionPairIterator pairIterator = new MentionPairIterator (fileIterator, "MUC", true, true, true, pairFilters);	//MentionPairIterator testPairIterator = new MentionPairIterator	//    (testFileIterator, "TB", true, true);	MentionPairIterator testPairIterator = new MentionPairIterator	    (testFileIterator, "MUC", true, true, true, pairFilters);	// This pipeline takes individual pairs as input and produces a feature vector	Pipe instancePipe = new SerialPipes (new Pipe[] {	    new Target2Label(),	    new AffixOfMentionPair (),	    new MentionPairHeadIdentical(),	    new MentionPairIdentical(),	    new MentionPairSentenceDistance(),	    new PartOfSpeechMentionPair(),	    new HobbsDistanceMentionPair(),	    new MentionPairAntecedentPosition(),	    new NullAntecedentFeatureExtractor(),	    new ModifierWordFeatures(),	    new MentionPair2FeatureVector ()	    });	/*	Pipe instancePipe = new SerialPipes (new Pipe[] {	    new Target2Label(),	    new AffixOfMentionPair (),	    //new MentionPairHeadIdentical(),	    //new MentionPairIdentical(),	    new NullAntecedentFeatureExtractor(),	    new MentionPair2FeatureVector()	    }); */	InstanceList ilist = new InstanceList (instancePipe);	ilist.add (pairIterator);		InstanceList testList = new InstanceList (instancePipe);	testList.add (testPairIterator);		InstanceList[] ilists = ilist.split (new double[] {.7, .3});	MaxEnt classifier = (MaxEnt)new MaxEntTrainer().train (ilist);	System.out.println ("Training Accuracy on \"yes\" = "+			    new Trial (classifier, ilist).labelF1("yes"));	System.out.println ("Training Accuracy on \"no\" = "+			    new Trial (classifier, ilist).labelF1("no"));	System.out.println ("Testing Accuracy on \"yes\" = "+			    new Trial (classifier, testList).labelF1("yes"));	System.out.println ("Testing Accuracy on \"no\" = "+			    new Trial (classifier, testList).labelF1("no"));	Set trainingDocuments = MentionPairIterator.partitionIntoDocumentInstances(ilist);	Set testDocuments = MentionPairIterator.partitionIntoDocumentInstances(testList);	Clusterer clusterer = new Clusterer();	int numInstances = testDocuments.size();	int documentIndex = 0;	Iterator iter1 = testDocuments.iterator();	int docIndex = 0;		while (iter1.hasNext()) {  // iterates over doc training instances	    LinkedHashSet keyClusters = new LinkedHashSet();	    MappedGraph graph = new MappedGraph(); // need a MappedGraph because we need to be able to copy	    // Create the graph with all the correct edge weights, using the current (averaged?) lambdas	    List testMentionPairs = (List)iter1.next();	    KeyClustering keyClustering = collectAllKeyClusters(testMentionPairs);	    keyClustering.print();	    Iterator trPairIterator = testMentionPairs.iterator();	    Clustering mortonClustering = getMortonClustering(testMentionPairs, classifier);	    System.out.println("Number of pairs: " + testMentionPairs.size());	    while (trPairIterator.hasNext()) {		Instance mentionPair = (Instance)trPairIterator.next();		//constructEdgesUsingTargets (graph, mentionPair);		constructEdgesUsingModel (graph, classifier, mentionPair);		//coalesceNewPair (keyClusters, mentionPair);	    }	    clusterer.setGraph(graph);	    Clustering clustering = clusterer.getClustering(); // this could have memory of graphs	    System.out.println("Model clusters: ");	    clustering.printDetailed();	    //System.out.println("Morton clusters: ");	    // mortonClustering.print();	    System.out.println("Key clusters: ");	    keyClustering.printDetailed();	    ClusterEvaluate eval = new ClusterEvaluate(keyClustering,						       mortonClustering);	    eval.evaluate();	    System.out.println("F1 morton is : " + eval.getF1());	    ClusterEvaluate eval1 = new ClusterEvaluate(keyClustering,							clustering);	    eval1.evaluate();	    System.out.println("F1 using model is : " + eval1.getF1());	    ClusterEvaluate eval2 =  new ClusterEvaluate(keyClustering, keyClustering);	    eval2.evaluate();	    System.out.println("F1 using keykey is : " + eval2.getF1());	    System.out.println("Pairwise key:morton");	    PairEvaluate pairEval1 = new PairEvaluate(keyClustering, mortonClustering);	    pairEval1.evaluate();	    System.out.println("Morton pairF1: " + pairEval1.getF1());	    System.out.println("Pairwise key:model");	    PairEvaluate pairEval2 = new PairEvaluate(keyClustering, clustering);	    pairEval2.evaluate();	    System.out.println("Model pairF1: " + pairEval2.getF1());	    System.out.println("\n\n Error analysis: MORTON");	    eval.printErrors(true);	    System.out.println("\n\n Error analysis: Model");	    eval1.printErrors(true);	    System.out.println("Mapping: ");	    graph.printMap();	    //System.out.println("Graph:" + graph.getGraph());	    	}    }    public static Clustering getMortonClustering (List trainingMentionPairs, Classifier classifier)    {	MortonClustering mortClustering = new MortonClustering();	Iterator iter = trainingMentionPairs.iterator();	Mention curRef = null;	Mention bestAntecedent = null;	double  bestValue = -10000.0;	double  edgeVal = -10000.0;	while (iter.hasNext()) {	    Instance inst = (Instance)iter.next();	    MentionPair pair = (MentionPair)inst.getSource();	    LabelVector labelVec = classifier.classify(inst).getLabelVector();	    Mention ref = pair.getReferent();	    Mention ant = pair.getAntecedent();	    	    //if ((referentPronoun (ref))) {	    //if ((referentPronoun (ref)) || ((referentNNP(ref) && (ant != null) && referentNNP(ant)))) {	    //if (false) {	    if (true) {		for (int i=0; i < labelVec.singleSize(); i++) {		    if (labelVec.labelAtLocation(i).toString().equals("yes"))			edgeVal = labelVec.valueAtLocation(i);		}	    } else if (pair.getEntityReference() != null) {		edgeVal = 1.0;		//mortClustering.addToClustering(ref,ant); // automatically add		//System.out.println("Edge - " + edgeVal);		//if (bestAntecedent != null)		//System.out.println(" -- best " + bestAntecedent.getString());	    } else {		edgeVal = -10000.0;	    }	    if (ref != curRef) { // new referent		bestValue = -10000.0;		if (curRef != null) {		    if (bestAntecedent != null) {			mortClustering.addToClustering(curRef, bestAntecedent);			System.out.println("merging: " + curRef.getString() + ":" + bestAntecedent.getString());		    }		    else {			mortClustering.addToClustering(curRef);			System.out.println("merging: " + curRef.getString() + ":NULL");		    }		}		curRef = ref;		if (edgeVal > bestValue) {		    bestAntecedent = ant;		    bestValue = edgeVal;		} else		    bestAntecedent = null;	    } else {		if (edgeVal > bestValue) {		    /*		    if ((bestAntecedent != null) && (ant != null)) {			System.out.println(":: " + curRef.getString() + 					   "-" + bestAntecedent.getString() + "(" + bestValue + ")" +					   " to " + ant.getString() + "(" + edgeVal + ")");					   }*/		    bestAntecedent = ant;		    bestValue = edgeVal;		}	    }	}	if (bestAntecedent != null) {	    mortClustering.addToClustering(curRef, bestAntecedent);
12 下一页
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -