📄 maxentsequenceconfidenceestimator.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>*/package edu.umass.cs.mallet.base.fst.confidence;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.fst.*;import java.util.*;/** * Estimates the confidence of a {@link Sequence} extracted by a {@link * Transducer} using a {@link MaxEnt} classifier to classify Sequences * as "correct" or "incorrect." xxx needs some interface work. */public class MaxEntSequenceConfidenceEstimator extends TransducerSequenceConfidenceEstimator{ MaxEntTrainer meTrainer; MaxEnt meClassifier; Pipe pipe; String correct, incorrect; public MaxEntSequenceConfidenceEstimator (Transducer model, double gaussianVariance) { this.model = model; meTrainer = new MaxEntTrainer (gaussianVariance); } public MaxEntSequenceConfidenceEstimator (Transducer model) { this (model, 10.0); } public MaxEnt getClassifier () { return this.meClassifier; } /** Train underlying classifier on <code>ilist</code>. Assumes ilist has targst <code>correct</code> or <code>incorrect</code>. @param ilist training list to build correct/incorrect classifier @param correct "correct" label @param incorrect "incorrect" label */ public MaxEnt trainClassifier (InstanceList ilist, String correct, String incorrect) { this.meClassifier = (MaxEnt) meTrainer.train (ilist); this.pipe = ilist.getPipe (); this.correct = correct; this.incorrect = incorrect; InfoGain ig = new InfoGain (ilist); int igl = Math.min (30, ig.numLocations()); for (int i = 0; i < igl; i++) System.out.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i)); return this.meClassifier; } /** Calculates the confidence in the tagging of an {@link Instance}. */ public double estimateConfidenceFor (Instance instance, Object[] startTags, Object[] inTags) { Classification c = null; if (instance.getPipe() != this.pipe) c = this.meClassifier.classify (new SequenceConfidenceInstance (instance)); else c = this.meClassifier.classify (instance); return c.getLabelVector().value (this.correct); } public PipedInstanceWithConfidence[] rankPipedInstancesByConfidence (InstanceList ilist, Object[] startTags, Object[] continueTags) { ArrayList confidenceList = new ArrayList (); for (int i=0; i < ilist.size(); i++) { Instance instance = ilist.getInstance (i); boolean correctInstance = ((Labeling)instance.getTarget()).getBestLabel().toString().equals (this.correct); System.err.println ("Instance is " + (correctInstance ? "correct" : "incorrect")); confidenceList.add (new PipedInstanceWithConfidence (instance, estimateConfidenceFor (instance, startTags, continueTags), correctInstance)); } Collections.sort (confidenceList); PipedInstanceWithConfidence[] ret = new PipedInstanceWithConfidence[1]; ret = (PipedInstanceWithConfidence[]) confidenceList.toArray (ret); return ret; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -