📄 isolatedsegmenttransducercorrector.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a> */package edu.umass.cs.mallet.base.fst.confidence;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.fst.*;import java.util.ArrayList;import edu.umass.cs.mallet.base.util.MalletLogger;import java.util.logging.*;/** * Corrects a subset of the {@link Segment}s produced by a {@link * Transducer}. It's most useful to find the {@link Segment}s that the * {@link Transducer} is least confident in and correct those using * the true {@link Labeling} * (<code>correctLeastConfidenceSegments</code>). Unlike in {@link * ConstrainedViterbi}, the corrected segment does not affect the * labeling of other segments in the sequence. For comparison. */public class IsolatedSegmentTransducerCorrector implements TransducerCorrector{ private static Logger logger = MalletLogger.getLogger(IsolatedSegmentTransducerCorrector.class.getName()); TransducerConfidenceEstimator confidenceEstimator; Transducer model; public IsolatedSegmentTransducerCorrector (TransducerConfidenceEstimator confidenceEstimator, Transducer model) { this.confidenceEstimator = confidenceEstimator; this.model = model; } public IsolatedSegmentTransducerCorrector (Transducer model) { this (new ConstrainedForwardBackwardConfidenceEstimator (model), model); } /** @param ilist original Transducer InstanceList @param startTags start segment tags (B-) @param continueTags continue segment tags (I-) TransducerConfidenceEstimator} @return a list of {@link Sequence}s corresponding to the corrected tagging of each Instance in <code>ilist</code>. Note that these corrections will not affect tokens outside of the corrected segment. */ public ArrayList correctLeastConfidentSegments (InstanceList ilist, Object[] startTags, Object[] continueTags) { ArrayList correctedPredictionList = new ArrayList (); for (int i=0; i < ilist.size(); i++) { logger.info ("correcting instance# " + i + " / " + ilist.size()); Instance instance = ilist.getInstance (i); Segment[] orderedSegments = new Segment[1]; orderedSegments = confidenceEstimator.rankSegmentsByConfidence (instance, startTags, continueTags); Segment leastConfidentSegment = orderedSegments[0]; logger.fine ("Ordered Segments:\nTrue sequence: " + leastConfidentSegment.getTruth()); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); } // _do not_ run constrained viterbi on this sequence with the // constraint that this segment is tagged correctly. // instead, simply replace the labeling of the corrected // segment. MultiSegmentationEvaluator eval = new MultiSegmentationEvaluator (startTags, continueTags); Sequence truth = leastConfidentSegment.getTruth(); Sequence predicted = leastConfidentSegment.getPredicted(); int numIncorrect = eval.numIncorrectSegments (truth, predicted); String[] sequence = new String[truth.size()]; for (int j=0; j < truth.size(); j++) { if (j <= leastConfidentSegment.getEnd() && j >= leastConfidentSegment.getStart()) sequence[j] = (String)truth.get (j); else sequence[j] = (String) predicted.get (j); } ArraySequence segmentCorrectedOutput = new ArraySequence (sequence); logger.fine ("Original prediction: "); for (int j=0; j < predicted.size(); j++) logger.fine ((String)predicted.get (j) + "\t"); logger.fine ("\nCorrected prediction: "); for (int j=0; j < segmentCorrectedOutput.size(); j++) logger.fine ((String)segmentCorrectedOutput.get (j) + "\t"); logger.fine (""); if (numIncorrect > -1) correctedPredictionList.add (segmentCorrectedOutput); else correctedPredictionList.add (null); } return correctedPredictionList; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -