📄 maxenttrainer.java
字号:
// Create the list of error tokens InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(), trainingData.getTargetAlphabet()); // This errorInstances.featureSelection will get examined by FeatureInducer, // so it can know how to add "new" singleton features errorInstances.setFeatureSelection (globalFS); List errorLabelVectors = new ArrayList(); // these are length-1 vectors for (int i = 0; i < trainingData.size(); i++) { Instance instance = trainingData.getInstance(i); FeatureVector inputVector = (FeatureVector) instance.getData(); Label trueLabel = (Label) instance.getTarget(); // Having trained using just the current features, see how we classify // the training data now. Classification classification = maxent.classify(instance); if (!classification.bestLabelIsCorrect()) { errorInstances.add(inputVector, trueLabel, null, null); errorLabelVectors.add(classification.getLabelVector()); } } logger.info ("Error instance list size = "+errorInstances.size()); int s = errorLabelVectors.size(); LabelVector[] lvs = new LabelVector[s]; for (int i = 0; i < s; i++) { lvs[i] = (LabelVector)errorLabelVectors.get(i); } RankedFeatureVector.Factory gainFactory = null; if (gainName.equals (EXP_GAIN)) gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance); else if (gainName.equals(GRADIENT_GAIN)) gainFactory = new GradientGain.Factory (lvs); else if (gainName.equals(INFORMATION_GAIN)) gainFactory = new InfoGain.Factory (); else throw new IllegalArgumentException("Unsupported gain name: "+gainName); FeatureInducer klfi = new FeatureInducer (gainFactory, errorInstances, numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction); // Note that this adds features globally, but not on a per-transition basis klfi.induceFeaturesFor (trainingData, false, false); if (testingData != null) klfi.induceFeaturesFor (testingData, false, false); logger.info ("MaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features"); klfi = null; double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()]; // XXX (Executing this block often causes an error during training; I don't know why.) if (saveParametersDuringFI) { // Keep current parameter values // XXX This relies on the implementation detail that the most recent features // added to an Alphabet get the highest indices. // Count parameters per output label int oldParamCount = maxent.parameters.length / outputAlphabet.size(); int newParamCount = 1+inputAlphabet.size(); // Copy params into the proper locations for (int i=0; i<outputAlphabet.size(); i++) { System.arraycopy(maxent.parameters, i*oldParamCount, newParameters, i*newParamCount, oldParamCount); } for (int i=0; i<oldParamCount; i++) if (maxent.parameters[i] != newParameters[i]) { System.out.println(maxent.parameters[i]+" "+newParameters[i]); System.exit(0); } } maxent.parameters = newParameters; maxent.defaultFeatureIndex = inputAlphabet.size(); } // Finished feature induction logger.info("Ended with "+globalFS.cardinality()+" features."); setNumIterations(totalIterations - trainingIteration); return this.train (trainingData, validationData, testingData, evaluator, maxent); } // XXX Should these really be public? Why? /** Counts how many times this trainer has computed the gradient of the * log probability of training labels. */ public int getValueGradientCalls() {return numGetValueGradientCalls;} /** Counts how many times this trainer has computed the * log probability of training labels. */ public int getValueCalls() {return numGetValueCalls;}// public int getIterations() {return maximizerByGradient.getIterations();} public String toString() { return "MaxEntTrainer" // + "("+maximizerClass.getName()+") " + ",numIterations=" + numIterations + (usingHyperbolicPrior ? (",hyperbolicPriorSlope="+hyperbolicPriorSlope+ ",hyperbolicPriorSharpness="+hyperbolicPriorSharpness) : (",gaussianPriorVariance="+gaussianPriorVariance)); } // A private inner class that wraps up a MaxEnt classifier and its training data. // The result is a maximize.Maximizable function. private class MaximizableTrainer implements Maximizable.ByGradient { double[] parameters, constraints, cachedGradient; MaxEnt theClassifier; InstanceList trainingList; // The expectations are (temporarily) stored in the cachedGradient double cachedValue; boolean cachedValueStale; boolean cachedGradientStale; int numLabels; int numFeatures; int defaultFeatureIndex; // just for clarity FeatureSelection featureSelection; FeatureSelection[] perLabelFeatureSelection; public MaximizableTrainer (){} public MaximizableTrainer (InstanceList ilist, MaxEnt initialClassifier) { this.trainingList = ilist; Alphabet fd = ilist.getDataAlphabet(); LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet(); // Don't fd.stopGrowth, because someone might want to do feature induction ld.stopGrowth(); // Add one feature for the "default feature". this.numLabels = ld.size(); this.numFeatures = fd.size() + 1; this.defaultFeatureIndex = numFeatures-1; this.parameters = new double [numLabels * numFeatures]; this.constraints = new double [numLabels * numFeatures]; this.cachedGradient = new double [numLabels * numFeatures]; Arrays.fill (parameters, 0.0); Arrays.fill (constraints, 0.0); Arrays.fill (cachedGradient, 0.0); this.featureSelection = ilist.getFeatureSelection(); this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection(); // Add the default feature index to the selection if (featureSelection != null) featureSelection.add (defaultFeatureIndex); if (perLabelFeatureSelection != null) for (int i = 0; i < perLabelFeatureSelection.length; i++) perLabelFeatureSelection[i].add (defaultFeatureIndex); // xxx Later change this to allow both to be set, but select which one to use by a boolean flag? assert (featureSelection == null || perLabelFeatureSelection == null); if (initialClassifier != null) { this.theClassifier = initialClassifier; this.parameters = theClassifier.parameters; this.featureSelection = theClassifier.featureSelection; this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection; this.defaultFeatureIndex = theClassifier.defaultFeatureIndex; assert (initialClassifier.getInstancePipe() == ilist.getPipe()); } else if (this.theClassifier == null) { this.theClassifier = new MaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection); } cachedValueStale = true; cachedGradientStale = true; // Initialize the constraints InstanceList.Iterator iter = trainingList.iterator (); logger.fine("Number of instances in training list = " + trainingList.size()); while (iter.hasNext()) { double instanceWeight = iter.getInstanceWeight(); Instance inst = iter.nextInstance(); Labeling labeling = inst.getLabeling (); //logger.fine ("Instance "+ii+" labeling="+labeling); FeatureVector fv = (FeatureVector) inst.getData (); Alphabet fdict = fv.getAlphabet(); assert (fv.getAlphabet() == fd); int li = labeling.getBestIndex(); MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, instanceWeight); // For the default feature, whose weight is 1.0 assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN"; assert(!Double.isNaN(li)) : "bestIndex is NaN"; boolean hasNaN = false; for(int i = 0; i < fv.numLocations(); i++) { if(Double.isNaN(fv.valueAtLocation(i))) { logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString()); hasNaN = true; } } if(hasNaN) logger.info("NaN in instance: " + inst.getName()); constraints[li*numFeatures + defaultFeatureIndex] += 1.0 * instanceWeight; } //TestMaximizable.testValueAndGradientCurrentParameters (this); } public MaxEnt getClassifier () { return theClassifier; } public double getParameter (int index) { return parameters[index]; } public void setParameter (int index, double v) { cachedValueStale = true; cachedGradientStale = true; parameters[index] = v; } public int getNumParameters() { return parameters.length; } public void getParameters (double[] buff) { if (buff == null || buff.length != parameters.length) buff = new double [parameters.length]; System.arraycopy (parameters, 0, buff, 0, parameters.length); } public void setParameters (double [] buff) { assert (buff != null); cachedValueStale = true; cachedGradientStale = true; if (buff.length != parameters.length) parameters = new double[buff.length]; System.arraycopy (buff, 0, parameters, 0, buff.length); } // log probability of the training labels public double getValue () { if (cachedValueStale) { numGetValueCalls++; cachedValue = 0; // We'll store the expectation values in "cachedGradient" for now cachedGradientStale = true; MatrixOps.setAll (cachedGradient, 0.0); // Incorporate likelihood of data double[] scores = new double[trainingList.getTargetAlphabet().size()]; double value = 0.0; //System.out.println("I Now "+inputAlphabet.size()+" regular features."); InstanceList.Iterator iter = trainingList.iterator(); int ii=0; while (iter.hasNext()) { ii++; double instanceWeight = iter.getInstanceWeight(); Instance instance = iter.nextInstance(); Labeling labeling = instance.getLabeling (); //System.out.println("L Now "+inputAlphabet.size()+" regular features."); this.theClassifier.getClassificationScores (instance, scores); FeatureVector fv = (FeatureVector) instance.getData (); int li = labeling.getBestIndex(); value = - (instanceWeight * Math.log (scores[li])); if(Double.isNaN(value)) { logger.fine ("MaxEntTrainer: Instance " + instance.getName() + "has NaN value. log(scores)= " + Math.log(scores[li]) + " scores = " + scores[li] + " has instance weight = " + instanceWeight); } if (Double.isInfinite(value)) { logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient"); cachedValue -= value; cachedValueStale = false; return -value;// continue; } cachedValue += value; for (int si = 0; si < scores.length; si++) { if (scores[si] == 0) continue; assert (!Double.isInfinite(scores[si])); MatrixOps.rowPlusEquals (cachedGradient, numFeatures, si, fv, -instanceWeight * scores[si]); cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]); } } //logger.info ("-Expectations:"); cachedGradient.print(); // Incorporate prior on parameters if (usingHyperbolicPrior) { for (int li = 0; li < numLabels; li++) for (int fi = 0; fi < numFeatures; fi++) cachedValue += (hyperbolicPriorSlope / hyperbolicPriorSharpness * Math.log (Maths.cosh (hyperbolicPriorSharpness * parameters[li *numFeatures + fi]))); } else { for (int li = 0; li < numLabels; li++) for (int fi = 0; fi < numFeatures; fi++) { double param = parameters[li*numFeatures + fi]; cachedValue += param * param / (2 * gaussianPriorVariance); } } cachedValue *= -1.0; // MAXIMIZE, NOT MINIMIZE cachedValueStale = false; progressLogger.info ("Value (loglikelihood) = "+cachedValue); } return cachedValue; } public void getValueGradient (double [] buffer) { // Gradient is (constraint - expectation - parameters/gaussianPriorVariance) if (cachedGradientStale) { numGetValueGradientCalls++; if (cachedValueStale) // This will fill in the cachedGradient with the "-expectation" getValue (); MatrixOps.plusEquals (cachedGradient, constraints); // Incorporate prior on parameters if (usingHyperbolicPrior) { throw new UnsupportedOperationException ("Hyperbolic prior not yet implemented."); } else { MatrixOps.plusEquals (cachedGradient, parameters, -1.0 / gaussianPriorVariance); } // A parameter may be set to -infinity by an external user. // We set gradient to 0 because the parameter's value can // never change anyway and it will mess up future calculations // on the matrix, such as norm(). MatrixOps.substitute (cachedGradient, Double.NEGATIVE_INFINITY, 0.0); // Set to zero all the gradient dimensions that are not among the selected features if (perLabelFeatureSelection == null) { for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) MatrixOps.rowSetAll (cachedGradient, numFeatures, labelIndex, 0.0, featureSelection, false); } else { for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) MatrixOps.rowSetAll (cachedGradient, numFeatures, labelIndex, 0.0, perLabelFeatureSelection[labelIndex], false); } cachedGradientStale = false; } assert (buffer != null && buffer.length == parameters.length); System.arraycopy (cachedGradient, 0, buffer, 0, cachedGradient.length); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -