📄 maxenttrainer.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
			this.trainingList = ilist;			Alphabet fd = ilist.getDataAlphabet();			LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();			// Don't fd.stopGrowth, because someone might want to do feature induction			ld.stopGrowth();			// Add one feature for the "default feature".			this.numLabels = ld.size();			this.numFeatures = fd.size() + 1;			this.defaultFeatureIndex = numFeatures-1;			this.parameters = new double [numLabels * numFeatures];			this.constraints = new double [numLabels * numFeatures];			this.cachedGradient = new double [numLabels * numFeatures];			Arrays.fill (parameters, 0.0);			Arrays.fill (constraints, 0.0);			Arrays.fill (cachedGradient, 0.0);			this.featureSelection = ilist.getFeatureSelection();			this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection();			// Add the default feature index to the selection			if (featureSelection != null)				featureSelection.add (defaultFeatureIndex);			if (perLabelFeatureSelection != null)				for (int i = 0; i < perLabelFeatureSelection.length; i++)					perLabelFeatureSelection[i].add (defaultFeatureIndex);			// xxx Later change this to allow both to be set, but select which one to use by a boolean flag?			assert (featureSelection == null || perLabelFeatureSelection == null);			if (initialClassifier != null) {        this.theClassifier = initialClassifier;        this.parameters = theClassifier.parameters;        this.featureSelection = theClassifier.featureSelection;        this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;        this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;				assert (initialClassifier.getInstancePipe() == ilist.getPipe());			}			else if (this.theClassifier == null) {				this.theClassifier = new MaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);			}			cachedValueStale = true;			cachedGradientStale = true;			// Initialize the constraints			InstanceList.Iterator iter = trainingList.iterator ();			logger.fine("Number of instances in training list = " + trainingList.size());			while (iter.hasNext()) {				double instanceWeight = iter.getInstanceWeight();				Instance inst = iter.nextInstance();				Labeling labeling = inst.getLabeling ();				//logger.fine ("Instance "+ii+" labeling="+labeling);				FeatureVector fv = (FeatureVector) inst.getData ();				Alphabet fdict = fv.getAlphabet();				assert (fv.getAlphabet() == fd);				int li = labeling.getBestIndex();				MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, instanceWeight);				// For the default feature, whose weight is 1.0				assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";				assert(!Double.isNaN(li)) : "bestIndex is NaN";				boolean hasNaN = false;				for(int i = 0; i < fv.numLocations(); i++) {					if(Double.isNaN(fv.valueAtLocation(i))) {						logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());						hasNaN = true;					}				}				if(hasNaN)					logger.info("NaN in instance: " + inst.getName());				constraints[li*numFeatures + defaultFeatureIndex] += 1.0 * instanceWeight;			}			//TestMaximizable.testValueAndGradientCurrentParameters (this);		}		public MaxEnt getClassifier () { return theClassifier; }		public double getParameter (int index) {			return parameters[index];		}		public void setParameter (int index, double v) {			cachedValueStale = true;			cachedGradientStale = true;			parameters[index] = v;		}		public int getNumParameters() {			return parameters.length;		}		public void getParameters (double[] buff) {			if (buff == null || buff.length != parameters.length)				buff = new double [parameters.length];			System.arraycopy (parameters, 0, buff, 0, parameters.length);		}		public void setParameters (double [] buff) {			assert (buff != null);			cachedValueStale = true;			cachedGradientStale = true;			if (buff.length != parameters.length)				parameters = new double[buff.length];			System.arraycopy (buff, 0, parameters, 0, buff.length);		}		// log probability of the training labels		public double getValue ()		{			if (cachedValueStale) {				numGetValueCalls++;				cachedValue = 0;				// We'll store the expectation values in "cachedGradient" for now				cachedGradientStale = true;				MatrixOps.setAll (cachedGradient, 0.0);				// Incorporate likelihood of data				double[] scores = new double[trainingList.getTargetAlphabet().size()];				double value = 0.0;                //System.out.println("I Now "+inputAlphabet.size()+" regular features.");				InstanceList.Iterator iter = trainingList.iterator();				int ii=0;				while (iter.hasNext()) {					ii++;					double instanceWeight = iter.getInstanceWeight();					Instance instance = iter.nextInstance();					Labeling labeling = instance.getLabeling ();                    //System.out.println("L Now "+inputAlphabet.size()+" regular features.");					this.theClassifier.getClassificationScores (instance, scores);					FeatureVector fv = (FeatureVector) instance.getData ();					int li = labeling.getBestIndex();					value = - (instanceWeight * Math.log (scores[li]));					if(Double.isNaN(value)) {						logger.fine ("MaxEntTrainer: Instance " + instance.getName() +												 "has NaN value. log(scores)= " + Math.log(scores[li]) +												 " scores = " + scores[li] +												 " has instance weight = " + instanceWeight);					}					if (Double.isInfinite(value)) {						logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient");						cachedValue -= value;						cachedValueStale = false;						return -value;//						continue;					}					cachedValue += value;                    // CPAL - this is a loop over classes and their scores                    //      - we compute the gradient by taking the dot product of the feature value                    //        and the probability of the class                    for (int si = 0; si < scores.length; si++) {						if (scores[si] == 0) continue;						assert (!Double.isInfinite(scores[si]));                        // CPAL - accumulating the current classifiers expectation of the feature                        // vector counts for this class label                        // Current classifier has expectation over class label, not over feature vector                        MatrixOps.rowPlusEquals (cachedGradient, numFeatures,						si, fv, -instanceWeight * scores[si]);						cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]);					}                    // CPAL - if we wish to do multiconditional training we need another term for this accumulated                    //        expectation                    if (usingMultiConditionalTraining) {                        // need something analogous to this                        // this.theClassifier.getClassificationScores (instance, scores);                        // this.theClassifier.getFeatureDistributions (instance,                        // Note: li is the "label" for this instance                        for (int fi = 0; fi < numFeatures; fi++) {                            //if(parameters[numFeatures*li + fi] != 0) {                            // MatrixOps.rowPlusEquals(cachedGradient, numFeatures,li,fv,))                                cachedGradient[numFeatures*li + fi] += (-instanceWeight * Math.exp(parameters[numFeatures*li + fi]));                            //    }                        }                    }                }					//logger.info ("-Expectations:"); cachedGradient.print();				// Incorporate prior on parameters				if (usingHyperbolicPrior) {					for (int li = 0; li < numLabels; li++)						for (int fi = 0; fi < numFeatures; fi++)							cachedValue += (hyperbolicPriorSlope / hyperbolicPriorSharpness														 * Math.log (Maths.cosh (hyperbolicPriorSharpness * parameters[li *numFeatures + fi])));				} else {					for (int li = 0; li < numLabels; li++)						for (int fi = 0; fi < numFeatures; fi++) {							double param = parameters[li*numFeatures + fi];							cachedValue += param * param / (2 * gaussianPriorVariance);						}				}				cachedValue *= -1.0; // MAXIMIZE, NOT MINIMIZE				cachedValueStale = false;				progressLogger.info ("Value (loglikelihood) = "+cachedValue);			}			return cachedValue;		} // CPAL first get value, then gradient        public void getValueGradient (double [] buffer)		{			// Gradient is (constraint - expectation - parameters/gaussianPriorVariance)			if (cachedGradientStale) {				numGetValueGradientCalls++;				if (cachedValueStale)					// This will fill in the cachedGradient with the "-expectation"					getValue ();                // cachedGradient contains the negative expectations                // expectations are model expectations and constraints are                // empirical expectations                MatrixOps.plusEquals (cachedGradient, constraints);                // CPAL - we need a second copy of the constraints                if (usingMultiConditionalTraining){                    MatrixOps.plusEquals(cachedGradient, constraints);                }                // Incorporate prior on parameters				if (usingHyperbolicPrior) {					throw new UnsupportedOperationException ("Hyperbolic prior not yet implemented.");				}				else {					MatrixOps.plusEquals (cachedGradient, parameters,																-1.0 / gaussianPriorVariance);				}				// A parameter may be set to -infinity by an external user.				// We set gradient to 0 because the parameter's value can				// never change anyway and it will mess up future calculations				// on the matrix, such as norm().				MatrixOps.substitute (cachedGradient, Double.NEGATIVE_INFINITY, 0.0);				// Set to zero all the gradient dimensions that are not among the selected features				if (perLabelFeatureSelection == null) {					for (int labelIndex = 0; labelIndex < numLabels; labelIndex++)						MatrixOps.rowSetAll (cachedGradient, numFeatures,																 labelIndex, 0.0, featureSelection, false);				} else {					for (int labelIndex = 0; labelIndex < numLabels; labelIndex++)						MatrixOps.rowSetAll (cachedGradient, numFeatures,																 labelIndex, 0.0,																 perLabelFeatureSelection[labelIndex], false);				}				cachedGradientStale = false;			}			assert (buffer != null && buffer.length == parameters.length);			System.arraycopy (cachedGradient, 0, buffer, 0, cachedGradient.length);		}	}}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -