📄 tuigraph.java
字号:
/* Copyright (C) 2002 Dept. of Computer Science, Univ. of Massachusetts, Amherst This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This program toolkit free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For more details see the GNU General Public License and the file README-LEGAL. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//** @author Ben Wellner */package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.*;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.SerialPipes;import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;import java.io.*;import java.util.*;import java.util.regex.*;import java.lang.reflect.Array;import salvo.jesus.graph.*;import edu.umass.cs.mallet.projects.seg_plus_coref.graphs.*;import edu.umass.cs.mallet.projects.seg_plus_coref.clustering.*;public class TUIGraph{ public static final String [] pronouns = new String[] {"He", "he", "Him", "him", "His", "his", "She", "she", "Her", "her", "hers", "it", "It", "its", "Its", "itself", "himself", "herself"}; public static final int pronounsSize = 18; public static void main (String[] args) { if (new Integer(4) == new Integer(4)) System.out.println("INTERESTING"); String trainingDataPath; String testDataPath; if (args.length != 2) { // System.exit(-1); //trainingDataPath = new String ("/odin.mitre.org/tmp/treebank/xml-bigger/train"); //testDataPath = new String ("/odin.mitre.org/tmp/treebank/xml-bigger/train"); //trainingDataPath = new String("c:/JavaDevel/data/toy"); //testDataPath = new String("c:/JavaDevel/data/toy"); trainingDataPath = new String("/usr/dan/users8/wellner/data/all-docs/test-annotated"); testDataPath = new String("/usr/dan/users8/wellner/data/all-docs/mini-train"); } else { trainingDataPath = args[0]; testDataPath = args[1]; } // This iterator takes a directory and iterates over the files contained // in it XMLFileFilter filter = new XMLFileFilter(".*xml"); FileIterator fileIterator = new FileIterator (new File(trainingDataPath), (FileFilter)filter); FileIterator testFileIterator = new FileIterator (new File(testDataPath), (FileFilter)filter); ArrayList pairFilters = new ArrayList(); pairFilters.add(new MentionPairFilter()); // This iterator takes an iterator over files, and iterates over all (relevant) // pairs of DOM nodes in each file //MentionPairIterator pairIterator = new MentionPairIterator (fileIterator, "TB", true, true); MentionPairIterator pairIterator = new MentionPairIterator (fileIterator, "MUC", true, true, true, pairFilters); //MentionPairIterator testPairIterator = new MentionPairIterator // (testFileIterator, "TB", true, true); MentionPairIterator testPairIterator = new MentionPairIterator (testFileIterator, "MUC", true, true, true, pairFilters); // This pipeline takes individual pairs as input and produces a feature vector Pipe instancePipe = new SerialPipes (new Pipe[] { new Target2Label(), new AffixOfMentionPair (), new MentionPairHeadIdentical(), new MentionPairIdentical(), new MentionPairSentenceDistance(), new PartOfSpeechMentionPair(), new HobbsDistanceMentionPair(), new MentionPairAntecedentPosition(), new NullAntecedentFeatureExtractor(), new ModifierWordFeatures(), new MentionPair2FeatureVector () }); /* Pipe instancePipe = new SerialPipes (new Pipe[] { new Target2Label(), new AffixOfMentionPair (), //new MentionPairHeadIdentical(), //new MentionPairIdentical(), new NullAntecedentFeatureExtractor(), new MentionPair2FeatureVector() }); */ InstanceList ilist = new InstanceList (instancePipe); ilist.add (pairIterator); InstanceList testList = new InstanceList (instancePipe); testList.add (testPairIterator); InstanceList[] ilists = ilist.split (new double[] {.7, .3}); MaxEnt classifier = (MaxEnt)new MaxEntTrainer().train (ilist); System.out.println ("Training Accuracy on \"yes\" = "+ new Trial (classifier, ilist).labelF1("yes")); System.out.println ("Training Accuracy on \"no\" = "+ new Trial (classifier, ilist).labelF1("no")); System.out.println ("Testing Accuracy on \"yes\" = "+ new Trial (classifier, testList).labelF1("yes")); System.out.println ("Testing Accuracy on \"no\" = "+ new Trial (classifier, testList).labelF1("no")); Set trainingDocuments = MentionPairIterator.partitionIntoDocumentInstances(ilist); Set testDocuments = MentionPairIterator.partitionIntoDocumentInstances(testList); Clusterer clusterer = new Clusterer(); int numInstances = testDocuments.size(); int documentIndex = 0; Iterator iter1 = testDocuments.iterator(); int docIndex = 0; while (iter1.hasNext()) { // iterates over doc training instances LinkedHashSet keyClusters = new LinkedHashSet(); MappedGraph graph = new MappedGraph(); // need a MappedGraph because we need to be able to copy // Create the graph with all the correct edge weights, using the current (averaged?) lambdas List testMentionPairs = (List)iter1.next(); KeyClustering keyClustering = collectAllKeyClusters(testMentionPairs); keyClustering.print(); Iterator trPairIterator = testMentionPairs.iterator(); Clustering mortonClustering = getMortonClustering(testMentionPairs, classifier); System.out.println("Number of pairs: " + testMentionPairs.size()); while (trPairIterator.hasNext()) { Instance mentionPair = (Instance)trPairIterator.next(); //constructEdgesUsingTargets (graph, mentionPair); constructEdgesUsingModel (graph, classifier, mentionPair); //coalesceNewPair (keyClusters, mentionPair); } clusterer.setGraph(graph); Clustering clustering = clusterer.getClustering(); // this could have memory of graphs System.out.println("Model clusters: "); clustering.printDetailed(); //System.out.println("Morton clusters: "); // mortonClustering.print(); System.out.println("Key clusters: "); keyClustering.printDetailed(); ClusterEvaluate eval = new ClusterEvaluate(keyClustering, mortonClustering); eval.evaluate(); System.out.println("F1 morton is : " + eval.getF1()); ClusterEvaluate eval1 = new ClusterEvaluate(keyClustering, clustering); eval1.evaluate(); System.out.println("F1 using model is : " + eval1.getF1()); ClusterEvaluate eval2 = new ClusterEvaluate(keyClustering, keyClustering); eval2.evaluate(); System.out.println("F1 using keykey is : " + eval2.getF1()); System.out.println("Pairwise key:morton"); PairEvaluate pairEval1 = new PairEvaluate(keyClustering, mortonClustering); pairEval1.evaluate(); System.out.println("Morton pairF1: " + pairEval1.getF1()); System.out.println("Pairwise key:model"); PairEvaluate pairEval2 = new PairEvaluate(keyClustering, clustering); pairEval2.evaluate(); System.out.println("Model pairF1: " + pairEval2.getF1()); System.out.println("\n\n Error analysis: MORTON"); eval.printErrors(true); System.out.println("\n\n Error analysis: Model"); eval1.printErrors(true); System.out.println("Mapping: "); graph.printMap(); //System.out.println("Graph:" + graph.getGraph()); } } public static Clustering getMortonClustering (List trainingMentionPairs, Classifier classifier) { MortonClustering mortClustering = new MortonClustering(); Iterator iter = trainingMentionPairs.iterator(); Mention curRef = null; Mention bestAntecedent = null; double bestValue = -10000.0; double edgeVal = -10000.0; while (iter.hasNext()) { Instance inst = (Instance)iter.next(); MentionPair pair = (MentionPair)inst.getSource(); LabelVector labelVec = classifier.classify(inst).getLabelVector(); Mention ref = pair.getReferent(); Mention ant = pair.getAntecedent(); //if ((referentPronoun (ref))) { //if ((referentPronoun (ref)) || ((referentNNP(ref) && (ant != null) && referentNNP(ant)))) { //if (false) { if (true) { for (int i=0; i < labelVec.singleSize(); i++) { if (labelVec.labelAtLocation(i).toString().equals("yes")) edgeVal = labelVec.valueAtLocation(i); } } else if (pair.getEntityReference() != null) { edgeVal = 1.0; //mortClustering.addToClustering(ref,ant); // automatically add //System.out.println("Edge - " + edgeVal); //if (bestAntecedent != null) //System.out.println(" -- best " + bestAntecedent.getString()); } else { edgeVal = -10000.0; } if (ref != curRef) { // new referent bestValue = -10000.0; if (curRef != null) { if (bestAntecedent != null) { mortClustering.addToClustering(curRef, bestAntecedent); System.out.println("merging: " + curRef.getString() + ":" + bestAntecedent.getString()); } else { mortClustering.addToClustering(curRef); System.out.println("merging: " + curRef.getString() + ":NULL"); } } curRef = ref; if (edgeVal > bestValue) { bestAntecedent = ant; bestValue = edgeVal; } else bestAntecedent = null; } else { if (edgeVal > bestValue) { /* if ((bestAntecedent != null) && (ant != null)) { System.out.println(":: " + curRef.getString() + "-" + bestAntecedent.getString() + "(" + bestValue + ")" + " to " + ant.getString() + "(" + edgeVal + ")"); }*/ bestAntecedent = ant; bestValue = edgeVal; } } } if (bestAntecedent != null) { mortClustering.addToClustering(curRef, bestAntecedent);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -