📄 maxenttrainer.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */package edu.umass.cs.mallet.base.classify;import edu.umass.cs.mallet.base.classify.Classifier;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.InstanceList;import edu.umass.cs.mallet.base.types.MatrixOps;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.Alphabet;import edu.umass.cs.mallet.base.types.Label;import edu.umass.cs.mallet.base.types.LabelAlphabet;import edu.umass.cs.mallet.base.types.FeatureVector;import edu.umass.cs.mallet.base.types.RankedFeatureVector;import edu.umass.cs.mallet.base.types.Labeling;import edu.umass.cs.mallet.base.types.LabelVector;import edu.umass.cs.mallet.base.types.Vector;import edu.umass.cs.mallet.base.types.FeatureSelection;import edu.umass.cs.mallet.base.types.FeatureInducer;import edu.umass.cs.mallet.base.types.ExpGain;import edu.umass.cs.mallet.base.types.GradientGain;import edu.umass.cs.mallet.base.types.InfoGain;import edu.umass.cs.mallet.base.util.MalletLogger;import edu.umass.cs.mallet.base.util.Maths;import edu.umass.cs.mallet.base.maximize.Maximizable;import edu.umass.cs.mallet.base.maximize.Maximizer;import edu.umass.cs.mallet.base.maximize.tests.*;import edu.umass.cs.mallet.base.maximize.LimitedMemoryBFGS;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.util.CommandOption;import edu.umass.cs.mallet.base.util.MalletProgressMessageLogger;import java.util.logging.*;import java.util.*;// Does not currently handle instances that are labeled with distributions// instead of a single label./** * The trainer for a Maximum Entropy classifier. @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */public class MaxEntTrainer extends ClassifierTrainer implements Boostable //implements CommandOption.ListProviding{ private static Logger logger = MalletLogger.getLogger(MaxEntTrainer.class.getName()); private static Logger progressLogger = MalletProgressMessageLogger.getLogger(MaxEntTrainer.class.getName()+"-pl"); int numGetValueCalls = 0; int numGetValueGradientCalls = 0; int numIterations = 10; public static final String EXP_GAIN = "exp"; public static final String GRADIENT_GAIN = "grad"; public static final String INFORMATION_GAIN = "info"; // xxx Why does TestMaximizable fail when this variance is very small? static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1; static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2; static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 10.0; static final Class DEFAULT_MAXIMIZER_CLASS = LimitedMemoryBFGS.class; boolean usingHyperbolicPrior = false; double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE; double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE; double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS; Class maximizerClass = DEFAULT_MAXIMIZER_CLASS; static CommandOption.Boolean usingHyperbolicPriorOption = new CommandOption.Boolean (MaxEntTrainer.class, "useHyperbolicPrior", "true|false", false, false, "Use hyperbolic (close to L1 penalty) prior over parameters", null); static CommandOption.Double gaussianPriorVarianceOption = new CommandOption.Double (MaxEntTrainer.class, "gaussianPriorVariance", "FLOAT", true, 10.0, "Variance of the gaussian prior over parameters", null); static CommandOption.Double hyperbolicPriorSlopeOption = new CommandOption.Double (MaxEntTrainer.class, "hyperbolicPriorSlope", "FLOAT", true, 0.2, "Slope of the (L1 penalty) hyperbolic prior over parameters", null); static CommandOption.Double hyperbolicPriorSharpnessOption = new CommandOption.Double (MaxEntTrainer.class, "hyperbolicPriorSharpness", "FLOAT", true, 10.0, "Sharpness of the (L1 penalty) hyperbolic prior over parameters", null); static final CommandOption.List commandOptions = new CommandOption.List ( "Maximum Entropy Classifier", new CommandOption[] { usingHyperbolicPriorOption, gaussianPriorVarianceOption, hyperbolicPriorSlopeOption, hyperbolicPriorSharpnessOption, }); public static CommandOption.List getCommandOptionList () { return commandOptions; } /* public MaxEntTrainer(Maximizer.ByGradient maximizer) { this.maximizerByGradient = maximizer; this.usingHyperbolicPrior = false; } */ public MaxEntTrainer (CommandOption.List col) { this.usingHyperbolicPrior = usingHyperbolicPriorOption.value; this.gaussianPriorVariance = gaussianPriorVarianceOption.value; this.hyperbolicPriorSlope = hyperbolicPriorSlopeOption.value; this.hyperbolicPriorSharpness = hyperbolicPriorSharpnessOption.value; } public MaxEntTrainer () { this (false); } public MaxEntTrainer (boolean useHyperbolicPrior) { this.usingHyperbolicPrior = useHyperbolicPrior; } /** Constructs a trainer with a parameter to avoid overtraining. 1.0 is * usually a reasonable default value. */ public MaxEntTrainer (double gaussianPriorVariance) { this.usingHyperbolicPrior = false; this.gaussianPriorVariance = gaussianPriorVariance; } public MaxEntTrainer (double hyperbolicPriorSlope, double hyperbolicPriorSharpness) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSlope = hyperbolicPriorSlope; this.hyperbolicPriorSharpness = hyperbolicPriorSharpness; } public Maximizable.ByGradient getMaximizableTrainer (InstanceList ilist) { if (ilist == null) return new MaximizableTrainer (); return new MaximizableTrainer (ilist, null); } /** * Specifies the maximum number of iterations to run during a single call * to <code>train</code> or <code>trainWithFeatureInduction</code>. Not * currently functional. * @return This trainer */ // XXX Since we maximize before using numIterations, this doesn't work. // Is that a bug? If so, should the default numIterations be higher? public MaxEntTrainer setNumIterations (int i) { numIterations = i; return this; } public MaxEntTrainer setUseHyperbolicPrior (boolean useHyperbolicPrior) { this.usingHyperbolicPrior = useHyperbolicPrior; return this; } /** * Sets a parameter to prevent overtraining. A smaller variance for the prior * means that feature weights are expected to hover closer to 0, so extra * evidence is required to set a higher weight. * @return This trainer */ public MaxEntTrainer setGaussianPriorVariance (double gaussianPriorVariance) { this.usingHyperbolicPrior = false; this.gaussianPriorVariance = gaussianPriorVariance; return this; } public MaxEntTrainer setHyperbolicPriorSlope(double hyperbolicPriorSlope) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSlope = hyperbolicPriorSlope; return this; } public MaxEntTrainer setHyperbolicPriorSharpness (double hyperbolicPriorSharpness) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSharpness = hyperbolicPriorSharpness; return this; } public Classifier train (InstanceList trainingSet, InstanceList validationSet, InstanceList testSet, ClassifierEvaluating evaluator, Classifier initialClassifier) { logger.fine ("trainingSet.size() = "+trainingSet.size()); MaximizableTrainer mt = new MaximizableTrainer (trainingSet, (MaxEnt)initialClassifier); Maximizer.ByGradient maximizer = new LimitedMemoryBFGS(); maximizer.maximize (mt); // XXX given the loop below, this seems wrong. logger.info("MaxEnt ngetValueCalls:"+getValueCalls()+"\nMaxEnt ngetValueGradientCalls:"+getValueGradientCalls()); boolean converged; for (int i = 0; i < numIterations; i++) { converged = maximizer.maximize (mt, 1); if (converged) break; else if (evaluator != null) if (!evaluator.evaluate (mt.getClassifier(), converged, i, mt.getValue(), trainingSet, validationSet, testSet)) break; }// TestMaximizable.testValueAndGradient (mt); progressLogger.info("\n"); // progess messages are on one line; move on. return mt.getClassifier (); } /** * <p>Trains a maximum entropy model using feature selection and feature induction * (adding conjunctions of features as new features).</p> * * @param trainingData A list of <code>Instance</code>s whose <code>data</code> * fields are binary, augmentable <code>FeatureVector</code>s. * and whose <code>target</code> fields are <code>Label</code>s. * @param validationData [not currently used] As <code>trainingData</code>, * or <code>null</code>. * @param testingData As <code>trainingData</code>, or <code>null</code>. * @param evaluator The evaluator to track training progress and decide whether * to continue, or <code>null</code>. * @param totalIterations The maximum total number of training iterations, * including those taken during feature induction. * @param numIterationsBetweenFeatureInductions How many iterations to train * between one round of feature induction and the next; this should usually * be fairly small, like 5 or 10, to avoid overfitting with current features. * @param numFeatureInductions How many rounds of feature induction to run * before beginning normal training. * @param numFeaturesPerFeatureInduction The maximum number of features to * choose during each round of featureInduction. * * @return The trained <code>MaxEnt</code> classifier */ // added - cjmaloof@linc.cis.upenn.edu public Classifier trainWithFeatureInduction (InstanceList trainingData, InstanceList validationData, InstanceList testingData, ClassifierEvaluating evaluator, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction) { return trainWithFeatureInduction (trainingData, validationData, testingData, evaluator, null, totalIterations, numIterationsBetweenFeatureInductions, numFeatureInductions, numFeaturesPerFeatureInduction, EXP_GAIN); } /** * <p>Like the other version of <code>trainWithFeatureInduction</code>, but * allows some default options to be changed.</p> * * @param maxent An initial partially-trained classifier (default <code>null</code>). * This classifier may be modified during training. * @param gainName The estimate of gain (log-likelihood increase) we want our chosen * features to maximize. * Should be one of <code>MaxEntTrainer.EXP_GAIN</code>, * <code>MaxEntTrainer.GRADIENT_GAIN</code>, or * <code>MaxEntTrainer.INFORMATION_GAIN</code> (default <code>EXP_GAIN</code>). * * @return The trained <code>MaxEnt</code> classifier */ public Classifier trainWithFeatureInduction (InstanceList trainingData, InstanceList validationData, InstanceList testingData, ClassifierEvaluating evaluator, MaxEnt maxent, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction, String gainName) { // XXX This ought to be a parameter, except that setting it to true can // crash training ("Jump too small"). boolean saveParametersDuringFI = false; Alphabet inputAlphabet = trainingData.getDataAlphabet(); Alphabet outputAlphabet = trainingData.getTargetAlphabet(); if (maxent == null) maxent = new MaxEnt(trainingData.getPipe(), new double[(1+inputAlphabet.size()) * outputAlphabet.size()]); int trainingIteration = 0; int numLabels = outputAlphabet.size(); // Initialize feature selection FeatureSelection globalFS = trainingData.getFeatureSelection(); if (globalFS == null) { // Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.) globalFS = new FeatureSelection (trainingData.getDataAlphabet()); trainingData.setFeatureSelection (globalFS); } if (validationData != null) validationData.setFeatureSelection (globalFS); if (testingData != null) testingData.setFeatureSelection (globalFS); maxent = new MaxEnt(maxent.getInstancePipe(), maxent.getParameters(), globalFS); // Run feature induction for (int featureInductionIteration = 0; featureInductionIteration < numFeatureInductions; featureInductionIteration++) { // Print out some feature information logger.info ("Feature induction iteration "+featureInductionIteration); // Train the model a little bit. We don't care whether it converges; we // execute all feature induction iterations no matter what. if (featureInductionIteration != 0) { // Don't train until we have added some features setNumIterations(numIterationsBetweenFeatureInductions); maxent = (MaxEnt)this.train (trainingData, validationData, testingData, evaluator, maxent); } trainingIteration += numIterationsBetweenFeatureInductions; logger.info ("Starting feature induction with "+(1+inputAlphabet.size())+ " features over "+numLabels+" labels.");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -