📄 maxenttrainer.java
字号:
this.trainingList = ilist; Alphabet fd = ilist.getDataAlphabet(); LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet(); // Don't fd.stopGrowth, because someone might want to do feature induction ld.stopGrowth(); // Add one feature for the "default feature". this.numLabels = ld.size(); this.numFeatures = fd.size() + 1; this.defaultFeatureIndex = numFeatures-1; this.parameters = new double [numLabels * numFeatures]; this.constraints = new double [numLabels * numFeatures]; this.cachedGradient = new double [numLabels * numFeatures]; Arrays.fill (parameters, 0.0); Arrays.fill (constraints, 0.0); Arrays.fill (cachedGradient, 0.0); this.featureSelection = ilist.getFeatureSelection(); this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection(); // Add the default feature index to the selection if (featureSelection != null) featureSelection.add (defaultFeatureIndex); if (perLabelFeatureSelection != null) for (int i = 0; i < perLabelFeatureSelection.length; i++) perLabelFeatureSelection[i].add (defaultFeatureIndex); // xxx Later change this to allow both to be set, but select which one to use by a boolean flag? assert (featureSelection == null || perLabelFeatureSelection == null); if (initialClassifier != null) { this.theClassifier = initialClassifier; this.parameters = theClassifier.parameters; this.featureSelection = theClassifier.featureSelection; this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection; this.defaultFeatureIndex = theClassifier.defaultFeatureIndex; assert (initialClassifier.getInstancePipe() == ilist.getPipe()); } else if (this.theClassifier == null) { this.theClassifier = new MaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection); } cachedValueStale = true; cachedGradientStale = true; // Initialize the constraints InstanceList.Iterator iter = trainingList.iterator (); logger.fine("Number of instances in training list = " + trainingList.size()); while (iter.hasNext()) { double instanceWeight = iter.getInstanceWeight(); Instance inst = iter.nextInstance(); Labeling labeling = inst.getLabeling (); //logger.fine ("Instance "+ii+" labeling="+labeling); FeatureVector fv = (FeatureVector) inst.getData (); Alphabet fdict = fv.getAlphabet(); assert (fv.getAlphabet() == fd); int li = labeling.getBestIndex(); MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, instanceWeight); // For the default feature, whose weight is 1.0 assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN"; assert(!Double.isNaN(li)) : "bestIndex is NaN"; boolean hasNaN = false; for(int i = 0; i < fv.numLocations(); i++) { if(Double.isNaN(fv.valueAtLocation(i))) { logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString()); hasNaN = true; } } if(hasNaN) logger.info("NaN in instance: " + inst.getName()); constraints[li*numFeatures + defaultFeatureIndex] += 1.0 * instanceWeight; } //TestMaximizable.testValueAndGradientCurrentParameters (this); } public MaxEnt getClassifier () { return theClassifier; } public double getParameter (int index) { return parameters[index]; } public void setParameter (int index, double v) { cachedValueStale = true; cachedGradientStale = true; parameters[index] = v; } public int getNumParameters() { return parameters.length; } public void getParameters (double[] buff) { if (buff == null || buff.length != parameters.length) buff = new double [parameters.length]; System.arraycopy (parameters, 0, buff, 0, parameters.length); } public void setParameters (double [] buff) { assert (buff != null); cachedValueStale = true; cachedGradientStale = true; if (buff.length != parameters.length) parameters = new double[buff.length]; System.arraycopy (buff, 0, parameters, 0, buff.length); } // log probability of the training labels public double getValue () { if (cachedValueStale) { numGetValueCalls++; cachedValue = 0; // We'll store the expectation values in "cachedGradient" for now cachedGradientStale = true; MatrixOps.setAll (cachedGradient, 0.0); // Incorporate likelihood of data double[] scores = new double[trainingList.getTargetAlphabet().size()]; double value = 0.0; //System.out.println("I Now "+inputAlphabet.size()+" regular features."); InstanceList.Iterator iter = trainingList.iterator(); int ii=0; while (iter.hasNext()) { ii++; double instanceWeight = iter.getInstanceWeight(); Instance instance = iter.nextInstance(); Labeling labeling = instance.getLabeling (); //System.out.println("L Now "+inputAlphabet.size()+" regular features."); this.theClassifier.getClassificationScores (instance, scores); FeatureVector fv = (FeatureVector) instance.getData (); int li = labeling.getBestIndex(); value = - (instanceWeight * Math.log (scores[li])); if(Double.isNaN(value)) { logger.fine ("MaxEntTrainer: Instance " + instance.getName() + "has NaN value. log(scores)= " + Math.log(scores[li]) + " scores = " + scores[li] + " has instance weight = " + instanceWeight); } if (Double.isInfinite(value)) { logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient"); cachedValue -= value; cachedValueStale = false; return -value;// continue; } cachedValue += value; // CPAL - this is a loop over classes and their scores // - we compute the gradient by taking the dot product of the feature value // and the probability of the class for (int si = 0; si < scores.length; si++) { if (scores[si] == 0) continue; assert (!Double.isInfinite(scores[si])); // CPAL - accumulating the current classifiers expectation of the feature // vector counts for this class label // Current classifier has expectation over class label, not over feature vector MatrixOps.rowPlusEquals (cachedGradient, numFeatures, si, fv, -instanceWeight * scores[si]); cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]); } // CPAL - if we wish to do multiconditional training we need another term for this accumulated // expectation if (usingMultiConditionalTraining) { // need something analogous to this // this.theClassifier.getClassificationScores (instance, scores); // this.theClassifier.getFeatureDistributions (instance, // Note: li is the "label" for this instance for (int fi = 0; fi < numFeatures; fi++) { //if(parameters[numFeatures*li + fi] != 0) { // MatrixOps.rowPlusEquals(cachedGradient, numFeatures,li,fv,)) cachedGradient[numFeatures*li + fi] += (-instanceWeight * Math.exp(parameters[numFeatures*li + fi])); // } } } } //logger.info ("-Expectations:"); cachedGradient.print(); // Incorporate prior on parameters if (usingHyperbolicPrior) { for (int li = 0; li < numLabels; li++) for (int fi = 0; fi < numFeatures; fi++) cachedValue += (hyperbolicPriorSlope / hyperbolicPriorSharpness * Math.log (Maths.cosh (hyperbolicPriorSharpness * parameters[li *numFeatures + fi]))); } else { for (int li = 0; li < numLabels; li++) for (int fi = 0; fi < numFeatures; fi++) { double param = parameters[li*numFeatures + fi]; cachedValue += param * param / (2 * gaussianPriorVariance); } } cachedValue *= -1.0; // MAXIMIZE, NOT MINIMIZE cachedValueStale = false; progressLogger.info ("Value (loglikelihood) = "+cachedValue); } return cachedValue; } // CPAL first get value, then gradient public void getValueGradient (double [] buffer) { // Gradient is (constraint - expectation - parameters/gaussianPriorVariance) if (cachedGradientStale) { numGetValueGradientCalls++; if (cachedValueStale) // This will fill in the cachedGradient with the "-expectation" getValue (); // cachedGradient contains the negative expectations // expectations are model expectations and constraints are // empirical expectations MatrixOps.plusEquals (cachedGradient, constraints); // CPAL - we need a second copy of the constraints if (usingMultiConditionalTraining){ MatrixOps.plusEquals(cachedGradient, constraints); } // Incorporate prior on parameters if (usingHyperbolicPrior) { throw new UnsupportedOperationException ("Hyperbolic prior not yet implemented."); } else { MatrixOps.plusEquals (cachedGradient, parameters, -1.0 / gaussianPriorVariance); } // A parameter may be set to -infinity by an external user. // We set gradient to 0 because the parameter's value can // never change anyway and it will mess up future calculations // on the matrix, such as norm(). MatrixOps.substitute (cachedGradient, Double.NEGATIVE_INFINITY, 0.0); // Set to zero all the gradient dimensions that are not among the selected features if (perLabelFeatureSelection == null) { for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) MatrixOps.rowSetAll (cachedGradient, numFeatures, labelIndex, 0.0, featureSelection, false); } else { for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) MatrixOps.rowSetAll (cachedGradient, numFeatures, labelIndex, 0.0, perLabelFeatureSelection[labelIndex], false); } cachedGradientStale = false; } assert (buffer != null && buffer.length == parameters.length); System.arraycopy (cachedGradient, 0, buffer, 0, cachedGradient.length); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -