📄 maxenttrainer.java
字号:
* <p>Trains a maximum entropy model using feature selection and feature induction * (adding conjunctions of features as new features).</p> * * @param trainingData A list of <code>Instance</code>s whose <code>data</code> * fields are binary, augmentable <code>FeatureVector</code>s. * and whose <code>target</code> fields are <code>Label</code>s. * @param validationData [not currently used] As <code>trainingData</code>, * or <code>null</code>. * @param testingData As <code>trainingData</code>, or <code>null</code>. * @param evaluator The evaluator to track training progress and decide whether * to continue, or <code>null</code>. * @param totalIterations The maximum total number of training iterations, * including those taken during feature induction. * @param numIterationsBetweenFeatureInductions How many iterations to train * between one round of feature induction and the next; this should usually * be fairly small, like 5 or 10, to avoid overfitting with current features. * @param numFeatureInductions How many rounds of feature induction to run * before beginning normal training. * @param numFeaturesPerFeatureInduction The maximum number of features to * choose during each round of featureInduction. * * @return The trained <code>MaxEnt</code> classifier */ // added - cjmaloof@linc.cis.upenn.edu public Classifier trainWithFeatureInduction (InstanceList trainingData, InstanceList validationData, InstanceList testingData, ClassifierEvaluating evaluator, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction) { return trainWithFeatureInduction (trainingData, validationData, testingData, evaluator, null, totalIterations, numIterationsBetweenFeatureInductions, numFeatureInductions, numFeaturesPerFeatureInduction, EXP_GAIN); } /** * <p>Like the other version of <code>trainWithFeatureInduction</code>, but * allows some default options to be changed.</p> * * @param maxent An initial partially-trained classifier (default <code>null</code>). * This classifier may be modified during training. * @param gainName The estimate of gain (log-likelihood increase) we want our chosen * features to maximize. * Should be one of <code>MaxEntTrainer.EXP_GAIN</code>, * <code>MaxEntTrainer.GRADIENT_GAIN</code>, or * <code>MaxEntTrainer.INFORMATION_GAIN</code> (default <code>EXP_GAIN</code>). * * @return The trained <code>MaxEnt</code> classifier */ public Classifier trainWithFeatureInduction (InstanceList trainingData, InstanceList validationData, InstanceList testingData, ClassifierEvaluating evaluator, MaxEnt maxent, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction, String gainName) { // XXX This ought to be a parameter, except that setting it to true can // crash training ("Jump too small"). boolean saveParametersDuringFI = false; Alphabet inputAlphabet = trainingData.getDataAlphabet(); Alphabet outputAlphabet = trainingData.getTargetAlphabet(); if (maxent == null) maxent = new MaxEnt(trainingData.getPipe(), new double[(1+inputAlphabet.size()) * outputAlphabet.size()]); int trainingIteration = 0; int numLabels = outputAlphabet.size(); // Initialize feature selection FeatureSelection globalFS = trainingData.getFeatureSelection(); if (globalFS == null) { // Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.) globalFS = new FeatureSelection (trainingData.getDataAlphabet()); trainingData.setFeatureSelection (globalFS); } if (validationData != null) validationData.setFeatureSelection (globalFS); if (testingData != null) testingData.setFeatureSelection (globalFS); maxent = new MaxEnt(maxent.getInstancePipe(), maxent.getParameters(), globalFS); // Run feature induction for (int featureInductionIteration = 0; featureInductionIteration < numFeatureInductions; featureInductionIteration++) { // Print out some feature information logger.info ("Feature induction iteration "+featureInductionIteration); // Train the model a little bit. We don't care whether it converges; we // execute all feature induction iterations no matter what. if (featureInductionIteration != 0) { // Don't train until we have added some features setNumIterations(numIterationsBetweenFeatureInductions); maxent = (MaxEnt)this.train (trainingData, validationData, testingData, evaluator, maxent); } trainingIteration += numIterationsBetweenFeatureInductions; logger.info ("Starting feature induction with "+(1+inputAlphabet.size())+ " features over "+numLabels+" labels."); // Create the list of error tokens InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(), trainingData.getTargetAlphabet()); // This errorInstances.featureSelection will get examined by FeatureInducer, // so it can know how to add "new" singleton features errorInstances.setFeatureSelection (globalFS); List errorLabelVectors = new ArrayList(); // these are length-1 vectors for (int i = 0; i < trainingData.size(); i++) { Instance instance = trainingData.getInstance(i); FeatureVector inputVector = (FeatureVector) instance.getData(); Label trueLabel = (Label) instance.getTarget(); // Having trained using just the current features, see how we classify // the training data now. Classification classification = maxent.classify(instance); if (!classification.bestLabelIsCorrect()) { errorInstances.add(inputVector, trueLabel, null, null); errorLabelVectors.add(classification.getLabelVector()); } } logger.info ("Error instance list size = "+errorInstances.size()); int s = errorLabelVectors.size(); LabelVector[] lvs = new LabelVector[s]; for (int i = 0; i < s; i++) { lvs[i] = (LabelVector)errorLabelVectors.get(i); } RankedFeatureVector.Factory gainFactory = null; if (gainName.equals (EXP_GAIN)) gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance); else if (gainName.equals(GRADIENT_GAIN)) gainFactory = new GradientGain.Factory (lvs); else if (gainName.equals(INFORMATION_GAIN)) gainFactory = new InfoGain.Factory (); else throw new IllegalArgumentException("Unsupported gain name: "+gainName); FeatureInducer klfi = new FeatureInducer (gainFactory, errorInstances, numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction); // Note that this adds features globally, but not on a per-transition basis klfi.induceFeaturesFor (trainingData, false, false); if (testingData != null) klfi.induceFeaturesFor (testingData, false, false); logger.info ("MaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features"); klfi = null; double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()]; // XXX (Executing this block often causes an error during training; I don't know why.) if (saveParametersDuringFI) { // Keep current parameter values // XXX This relies on the implementation detail that the most recent features // added to an Alphabet get the highest indices. // Count parameters per output label int oldParamCount = maxent.parameters.length / outputAlphabet.size(); int newParamCount = 1+inputAlphabet.size(); // Copy params into the proper locations for (int i=0; i<outputAlphabet.size(); i++) { System.arraycopy(maxent.parameters, i*oldParamCount, newParameters, i*newParamCount, oldParamCount); } for (int i=0; i<oldParamCount; i++) if (maxent.parameters[i] != newParameters[i]) { System.out.println(maxent.parameters[i]+" "+newParameters[i]); System.exit(0); } } maxent.parameters = newParameters; maxent.defaultFeatureIndex = inputAlphabet.size(); } // Finished feature induction logger.info("Ended with "+globalFS.cardinality()+" features."); setNumIterations(totalIterations - trainingIteration); return this.train (trainingData, validationData, testingData, evaluator, maxent); } // XXX Should these really be public? Why? /** Counts how many times this trainer has computed the gradient of the * log probability of training labels. */ public int getValueGradientCalls() {return numGetValueGradientCalls;} /** Counts how many times this trainer has computed the * log probability of training labels. */ public int getValueCalls() {return numGetValueCalls;}// public int getIterations() {return maximizerByGradient.getIterations();} public String toString() { return "MaxEntTrainer" // + "("+maximizerClass.getName()+") " + ",numIterations=" + numIterations + (usingHyperbolicPrior ? (",hyperbolicPriorSlope="+hyperbolicPriorSlope+ ",hyperbolicPriorSharpness="+hyperbolicPriorSharpness) : (",gaussianPriorVariance="+gaussianPriorVariance)); } // A private inner class that wraps up a MaxEnt classifier and its training data. // The result is a maximize.Maximizable function. private class MaximizableTrainer implements Maximizable.ByGradient { double[] parameters, constraints, cachedGradient; MaxEnt theClassifier; InstanceList trainingList; // The expectations are (temporarily) stored in the cachedGradient double cachedValue; boolean cachedValueStale; boolean cachedGradientStale; int numLabels; int numFeatures; int defaultFeatureIndex; // just for clarity FeatureSelection featureSelection; FeatureSelection[] perLabelFeatureSelection; public MaximizableTrainer (){} public MaximizableTrainer (InstanceList ilist, MaxEnt initialClassifier) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -