📄 mcmaxenttrainer.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
				this.theClassifier = initialClassifier;				this.parameters = theClassifier.parameters;				this.featureSelection = theClassifier.featureSelection;				this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;				this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;				assert (initialClassifier.getInstancePipe() == ilist.getPipe());			}			else if (this.theClassifier == null) {				this.theClassifier = new MCMaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);			}			cachedValueStale = true;			cachedGradientStale = true;			// Initialize the constraints			InstanceList.Iterator iter = trainingList.iterator ();			logger.fine("Number of instances in training list = " + trainingList.size());			while (iter.hasNext()) {				double instanceWeight = iter.getInstanceWeight();				Instance inst = iter.nextInstance();				Labeling labeling = inst.getLabeling ();				//logger.fine ("Instance "+ii+" labeling="+labeling);				FeatureVector fv = (FeatureVector) inst.getData ();				Alphabet fdict = fv.getAlphabet();				assert (fv.getAlphabet() == fd);				int li = labeling.getBestIndex();				// The "2*" below is because there is one copy for the p(y|x)and another for the p(x|y).				MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, 2*instanceWeight);				// For the default feature, whose weight is 1.0				assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";				assert(!Double.isNaN(li)) : "bestIndex is NaN";				boolean hasNaN = false;				for(int i = 0; i < fv.numLocations(); i++) {					if(Double.isNaN(fv.valueAtLocation(i))) {						logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());						hasNaN = true;					}				}				if(hasNaN)					logger.info("NaN in instance: " + inst.getName());        // Only p(y|x) uses the default feature; p(x|y) doesn't use it.  The default feature value is 1.0.        constraints[li*numFeatures + defaultFeatureIndex] += instanceWeight;			}			//TestMaximizable.testValueAndGradientCurrentParameters (this);		}		public MCMaxEnt getClassifier () { return theClassifier; }		public double getParameter (int index) {			return parameters[index];		}		public void setParameter (int index, double v) {			cachedValueStale = true;			cachedGradientStale = true;			parameters[index] = v;		}		public int getNumParameters() {			return parameters.length;		}		public void getParameters (double[] buff) {			if (buff == null || buff.length != parameters.length)				buff = new double [parameters.length];			System.arraycopy (parameters, 0, buff, 0, parameters.length);		}		public void setParameters (double [] buff) {			assert (buff != null);			cachedValueStale = true;			cachedGradientStale = true;			if (buff.length != parameters.length)				parameters = new double[buff.length];			System.arraycopy (buff, 0, parameters, 0, buff.length);		}		// log probability of the training labels		public double getValue ()		{			if (cachedValueStale) {				numGetValueCalls++;				cachedValue = 0;				// We'll store the expectation values in "cachedGradient" for now				cachedGradientStale = true;				java.util.Arrays.fill (cachedGradient, 0.0);				// Incorporate likelihood of data				double[] scores = new double[trainingList.getTargetAlphabet().size()];				double value = 0.0;				//System.out.println("I Now "+inputAlphabet.size()+" regular features.");				InstanceList.Iterator iter = trainingList.iterator();				//int ii = 0;				// Normalize the parameters to be per-class multinomials				double probs[][] = new double[scores.length][numFeatures];				double lprobs[][] = new double[scores.length][numFeatures];				for (int si = 0; si < scores.length; si++) {					double sum = 0, max = MatrixOps.max (parameters);					for (int fi = 0; fi < numFeatures; fi++) {            // TODO Strongly consider some smoothing here.  What happens when all parameters are zero?            // Oh, this should be no problem, because exp(0) == 1.            probs[si][fi] = Math.exp(parameters[si*numFeatures+fi] - max);						sum += probs[si][fi];					}          assert (sum > 0);          for (int fi = 0; fi < numFeatures; fi++) {						probs[si][fi] /= sum;						lprobs[si][fi] = Math.log(probs[si][fi]);					}				}				while (iter.hasNext()) {					double instanceWeight = iter.getInstanceWeight();					Instance instance = iter.nextInstance();					Labeling labeling = instance.getLabeling ();					//System.out.println("L Now "+inputAlphabet.size()+" regular features.");					this.theClassifier.getClassificationScores (instance, scores);					FeatureVector fv = (FeatureVector) instance.getData ();					int li = labeling.getBestIndex();					value = - (instanceWeight * Math.log (scores[li]));					if(Double.isNaN(value)) {						logger.fine ("MCMaxEntTrainer: Instance " + instance.getName() +						             "has NaN value. log(scores)= " + Math.log(scores[li]) +						             " scores = " + scores[li] +						             " has instance weight = " + instanceWeight);					}					if (Double.isInfinite(value)) {						logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient");						cachedValue -= value;						cachedValueStale = false;						return -value;//						continue;					}					cachedValue += value;					// CPAL - this is a loop over classes and their scores					//      - we compute the gradient by taking the dot product of the feature value					//        and the probability of the class					for (int si = 0; si < scores.length; si++) {						if (scores[si] == 0) continue;						assert (!Double.isInfinite(scores[si]));						// CPAL - accumulating the current classifiers expectation of the feature						// vector counts for this class label						// Current classifier has expectation over class label, not over feature vector						MatrixOps.rowPlusEquals (cachedGradient, numFeatures,						                         si, fv, -instanceWeight * scores[si]);						cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]);					}					// CPAL - if we wish to do multiconditional training we need another term for this accumulated					//        expectation					if (usingMultiConditionalTraining) {						// need something analogous to this						// this.theClassifier.getClassificationScores (instance, scores);						// this.theClassifier.getFeatureDistributions (instance,						// Note: li is the "label" for this instance						// Get the sum of the feature vector						// which is the number of counts for the document if we use that as input						double Ncounts = MatrixOps.sum(fv);						// CPAL - get the additional term for the value of our - log probability						//      - this computation amounts to the dot product of the feature vector and the probability vector						cachedValue -= (instanceWeight * fv.dotProduct(lprobs[li]));						// CPAL - get the model expectation over features for the given class						for (int fi = 0; fi < numFeatures; fi++) {							//if(parameters[numFeatures*li + fi] != 0) {							// MatrixOps.rowPlusEquals(cachedGradient, numFeatures,li,fv,))							cachedGradient[numFeatures*li + fi] += (-instanceWeight * Ncounts * probs[li][fi]);							//    }						}					}				}				//logger.info ("-Expectations:"); cachedGradient.print();				// Incorporate prior on parameters				if (usingHyperbolicPrior) {					for (int li = 0; li < numLabels; li++)						for (int fi = 0; fi < numFeatures; fi++)							cachedValue += (hyperbolicPriorSlope / hyperbolicPriorSharpness							                * Math.log (Maths.cosh (hyperbolicPriorSharpness * parameters[li *numFeatures + fi])));				} else {					for (int li = 0; li < numLabels; li++)						for (int fi = 0; fi < numFeatures; fi++) {							double param = parameters[li*numFeatures + fi];							cachedValue += param * param / (2 * gaussianPriorVariance);						}				}				cachedValue *= -1.0; // MAXIMIZE, NOT MINIMIZE				cachedValueStale = false;				progressLogger.info ("Value (loglikelihood) = "+cachedValue);			}			return cachedValue;		}		// CPAL first get value, then gradient		public void getValueGradient (double [] buffer)		{			// Gradient is (constraint - expectation - parameters/gaussianPriorVariance)			if (cachedGradientStale) {				numGetValueGradientCalls++;				if (cachedValueStale)				// This will fill in the cachedGradient with the "-expectation"					getValue ();				// cachedGradient contains the negative expectations				// expectations are model expectations and constraints are				// empirical expectations				MatrixOps.plusEquals (cachedGradient, constraints);				// CPAL - we need a second copy of the constraints				//      - actually, we only want this for the feature values				//      - I've moved this up into getValue				//if (usingMultiConditionalTraining){				//    MatrixOps.plusEquals(cachedGradient, constraints);				//}				// Incorporate prior on parameters				if (usingHyperbolicPrior) {					throw new UnsupportedOperationException ("Hyperbolic prior not yet implemented.");				}				else {					MatrixOps.plusEquals (cachedGradient, parameters,					                      -1.0 / gaussianPriorVariance);				}				// A parameter may be set to -infinity by an external user.				// We set gradient to 0 because the parameter's value can				// never change anyway and it will mess up future calculations				// on the matrix, such as norm().				MatrixOps.substitute (cachedGradient, Double.NEGATIVE_INFINITY, 0.0);				// Set to zero all the gradient dimensions that are not among the selected features				if (perLabelFeatureSelection == null) {					for (int labelIndex = 0; labelIndex < numLabels; labelIndex++)						MatrixOps.rowSetAll (cachedGradient, numFeatures,						                     labelIndex, 0.0, featureSelection, false);				} else {					for (int labelIndex = 0; labelIndex < numLabels; labelIndex++)						MatrixOps.rowSetAll (cachedGradient, numFeatures,						                     labelIndex, 0.0,						                     perLabelFeatureSelection[labelIndex], false);				}				cachedGradientStale = false;			}			assert (buffer != null && buffer.length == parameters.length);			System.arraycopy (cachedGradient, 0, buffer, 0, cachedGradient.length);		}		public double sumNegLogProb (double a, double b)		{			if (a == Double.POSITIVE_INFINITY && b == Double.POSITIVE_INFINITY)				return Double.POSITIVE_INFINITY;			else if (a > b)				return b - Math.log (1 + Math.exp(b-a));			else				return a - Math.log (1 + Math.exp(a-b));		}	}}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -