📄 mcmaxenttrainer.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).http://www.cs.umass.edu/~mccallum/malletThis software is provided under the terms of the Common Public License,version 1.0, as published by http://www.opensource.org. For furtherinformation, see the file `LICENSE' included with this distribution. */package edu.umass.cs.mallet.base.classify;import edu.umass.cs.mallet.base.classify.Classifier;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.InstanceList;import edu.umass.cs.mallet.base.types.MatrixOps;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.Alphabet;import edu.umass.cs.mallet.base.types.Label;import edu.umass.cs.mallet.base.types.LabelAlphabet;import edu.umass.cs.mallet.base.types.FeatureVector;import edu.umass.cs.mallet.base.types.RankedFeatureVector;import edu.umass.cs.mallet.base.types.Labeling;import edu.umass.cs.mallet.base.types.LabelVector;import edu.umass.cs.mallet.base.types.Vector;import edu.umass.cs.mallet.base.types.FeatureSelection;import edu.umass.cs.mallet.base.types.FeatureInducer;import edu.umass.cs.mallet.base.types.ExpGain;import edu.umass.cs.mallet.base.types.GradientGain;import edu.umass.cs.mallet.base.types.InfoGain;import edu.umass.cs.mallet.base.util.MalletLogger;import edu.umass.cs.mallet.base.util.Maths;import edu.umass.cs.mallet.base.maximize.Maximizable;import edu.umass.cs.mallet.base.maximize.Maximizer;import edu.umass.cs.mallet.base.maximize.tests.*;import edu.umass.cs.mallet.base.maximize.LimitedMemoryBFGS;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.util.CommandOption;import edu.umass.cs.mallet.base.util.MalletProgressMessageLogger;import java.util.logging.*;import java.util.*;import java.io.*;// Does not currently handle instances that are labeled with distributions// instead of a single label./** * The trainer for a Maximum Entropy classifier. @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */public class MCMaxEntTrainer extends ClassifierTrainer implements Boostable, Serializable //implements CommandOption.ListProviding{ private static Logger logger = MalletLogger.getLogger(MCMaxEntTrainer.class.getName()); private static Logger progressLogger = MalletProgressMessageLogger.getLogger(MCMaxEntTrainer.class.getName()+"-pl"); int numGetValueCalls = 0; int numGetValueGradientCalls = 0; int numIterations = 10; public static final String EXP_GAIN = "exp"; public static final String GRADIENT_GAIN = "grad"; public static final String INFORMATION_GAIN = "info"; // xxx Why does TestMaximizable fail when this variance is very small? static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = .1; // note used to be 1 static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2; static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 10.0; static final Class DEFAULT_MAXIMIZER_CLASS = LimitedMemoryBFGS.class; // CPAL boolean usingMultiConditionalTraining = true; boolean usingHyperbolicPrior = false; double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE; double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE; double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS; Class maximizerClass = DEFAULT_MAXIMIZER_CLASS; double generativeWeighting = 1.0; // CPAL static CommandOption.Boolean usingMultiConditionalTrainingOption = new CommandOption.Boolean (MCMaxEntTrainer.class, "useMCTraining", "true|false", true, true, "Use MultiConditional Training", null); static CommandOption.Boolean usingHyperbolicPriorOption = new CommandOption.Boolean (MCMaxEntTrainer.class, "useHyperbolicPrior", "true|false", false, false, "Use hyperbolic (close to L1 penalty) prior over parameters", null); static CommandOption.Double gaussianPriorVarianceOption = new CommandOption.Double (MCMaxEntTrainer.class, "gaussianPriorVariance", "FLOAT", true, 10.0, "Variance of the gaussian prior over parameters", null); static CommandOption.Double hyperbolicPriorSlopeOption = new CommandOption.Double (MCMaxEntTrainer.class, "hyperbolicPriorSlope", "FLOAT", true, 0.2, "Slope of the (L1 penalty) hyperbolic prior over parameters", null); static CommandOption.Double hyperbolicPriorSharpnessOption = new CommandOption.Double (MCMaxEntTrainer.class, "hyperbolicPriorSharpness", "FLOAT", true, 10.0, "Sharpness of the (L1 penalty) hyperbolic prior over parameters", null); static final CommandOption.List commandOptions = new CommandOption.List ( "MCMaximum Entropy Classifier", new CommandOption[] { usingHyperbolicPriorOption, gaussianPriorVarianceOption, hyperbolicPriorSlopeOption, hyperbolicPriorSharpnessOption, usingMultiConditionalTrainingOption, // CPAL }); public static CommandOption.List getCommandOptionList () { return commandOptions; } /* public MCMaxEntTrainer(Maximizer.ByGradient maximizer) { this.maximizerByGradient = maximizer; this.usingHyperbolicPrior = false; } */ public MCMaxEntTrainer (CommandOption.List col) { this.usingHyperbolicPrior = usingHyperbolicPriorOption.value; this.gaussianPriorVariance = gaussianPriorVarianceOption.value; this.hyperbolicPriorSlope = hyperbolicPriorSlopeOption.value; this.hyperbolicPriorSharpness = hyperbolicPriorSharpnessOption.value; this.usingMultiConditionalTraining = usingMultiConditionalTrainingOption.value; } public MCMaxEntTrainer () { this (false); } public MCMaxEntTrainer (boolean useHyperbolicPrior) { this.usingHyperbolicPrior = useHyperbolicPrior; } /** Constructs a trainer with a parameter to avoid overtraining. 1.0 is * usually a reasonable default value. */ public MCMaxEntTrainer (double gaussianPriorVariance) { this.usingHyperbolicPrior = false; this.gaussianPriorVariance = gaussianPriorVariance; } // CPAL - added this to do MultiConditionalTraining public MCMaxEntTrainer (double gaussianPriorVariance, boolean useMultiConditionalTraining ) { this.usingHyperbolicPrior = false; this.usingMultiConditionalTraining = useMultiConditionalTraining; this.gaussianPriorVariance = gaussianPriorVariance; } public MCMaxEntTrainer (double hyperbolicPriorSlope, double hyperbolicPriorSharpness) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSlope = hyperbolicPriorSlope; this.hyperbolicPriorSharpness = hyperbolicPriorSharpness; } public Maximizable.ByGradient getMaximizableTrainer (InstanceList ilist) { if (ilist == null) return new MaximizableTrainer (); return new MaximizableTrainer (ilist, null); } /** * Specifies the maximum number of iterations to run during a single call * to <code>train</code> or <code>trainWithFeatureInduction</code>. Not * currently functional. * @return This trainer */ // XXX Since we maximize before using numIterations, this doesn't work. // Is that a bug? If so, should the default numIterations be higher? public MCMaxEntTrainer setNumIterations (int i) { numIterations = i; return this; } public MCMaxEntTrainer setUseHyperbolicPrior (boolean useHyperbolicPrior) { this.usingHyperbolicPrior = useHyperbolicPrior; return this; } /** * Sets a parameter to prevent overtraining. A smaller variance for the prior * means that feature weights are expected to hover closer to 0, so extra * evidence is required to set a higher weight. * @return This trainer */ public MCMaxEntTrainer setGaussianPriorVariance (double gaussianPriorVariance) { this.usingHyperbolicPrior = false; this.gaussianPriorVariance = gaussianPriorVariance; return this; } public MCMaxEntTrainer setHyperbolicPriorSlope(double hyperbolicPriorSlope) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSlope = hyperbolicPriorSlope; return this; } public MCMaxEntTrainer setHyperbolicPriorSharpness (double hyperbolicPriorSharpness) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSharpness = hyperbolicPriorSharpness; return this; } public Classifier train (InstanceList trainingSet, InstanceList validationSet, InstanceList testSet, ClassifierEvaluating evaluator, Classifier initialClassifier) { logger.fine ("trainingSet.size() = "+trainingSet.size()); MaximizableTrainer mt = new MaximizableTrainer (trainingSet, (MCMaxEnt)initialClassifier); Maximizer.ByGradient maximizer = new LimitedMemoryBFGS(); // CPAL - change the tolerance for large vocab experiments ((LimitedMemoryBFGS)maximizer).setTolerance(.00001); // std is .0001; maximizer.maximize (mt); // XXX given the loop below, this seems wrong. logger.info("MCMaxEnt ngetValueCalls:"+getValueCalls()+"\nMCMaxEnt ngetValueGradientCalls:"+getValueGradientCalls());// boolean converged;//// for (int i = 0; i < numIterations; i++) {// converged = maximizer.maximize (mt, 1);// if (converged)// break;// else if (evaluator != null)// if (!evaluator.evaluate (mt.getClassifier(), converged, i, mt.getValue(),// trainingSet, validationSet, testSet))// break;// }// TestMaximizable.testValueAndGradient (mt); progressLogger.info("\n"); // progess messages are on one line; move on. return mt.getClassifier (); } /** * <p>Trains a maximum entropy model using feature selection and feature induction * (adding conjunctions of features as new features).</p> * * @param trainingData A list of <code>Instance</code>s whose <code>data</code> * fields are binary, augmentable <code>FeatureVector</code>s. * and whose <code>target</code> fields are <code>Label</code>s. * @param validationData [not currently used] As <code>trainingData</code>, * or <code>null</code>. * @param testingData As <code>trainingData</code>, or <code>null</code>. * @param evaluator The evaluator to track training progress and decide whether * to continue, or <code>null</code>. * @param totalIterations The maximum total number of training iterations,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -