📄 mcmaxenttrainer.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
	 * including those taken during feature induction.	 * @param numIterationsBetweenFeatureInductions How many iterations to train	 * between one round of feature induction and the next; this should usually	 * be fairly small, like 5 or 10, to avoid overfitting with current features.	 * @param numFeatureInductions How many rounds of feature induction to run	 * before beginning normal training.	 * @param numFeaturesPerFeatureInduction The maximum number of features to	 * choose during each round of featureInduction.	 *	 * @return The trained <code>MaxEnt</code> classifier	 */	// added - cjmaloof@linc.cis.upenn.edu	public Classifier trainWithFeatureInduction (InstanceList trainingData,	                                             InstanceList validationData,	                                             InstanceList testingData,	                                             ClassifierEvaluating evaluator,	                                             int totalIterations,	                                             int numIterationsBetweenFeatureInductions,	                                             int numFeatureInductions,	                                             int numFeaturesPerFeatureInduction) {		return trainWithFeatureInduction (trainingData,		                                  validationData,		                                  testingData,		                                  evaluator,		                                  null,		                                  totalIterations,		                                  numIterationsBetweenFeatureInductions,		                                  numFeatureInductions,		                                  numFeaturesPerFeatureInduction,		                                  EXP_GAIN);	}	/**	 * <p>Like the other version of <code>trainWithFeatureInduction</code>, but	 * allows some default options to be changed.</p>	 *	 * @param maxent An initial partially-trained classifier (default <code>null</code>).	 * This classifier may be modified during training.	 * @param gainName The estimate of gain (log-likelihood increase) we want our chosen	 * features to maximize.	 * Should be one of <code>MaxEntTrainer.EXP_GAIN</code>,	 * <code>MaxEntTrainer.GRADIENT_GAIN</code>, or	 * <code>MaxEntTrainer.INFORMATION_GAIN</code> (default <code>EXP_GAIN</code>).	 *	 * @return The trained <code>MaxEnt</code> classifier	 */	public Classifier trainWithFeatureInduction (InstanceList trainingData,	                                             InstanceList validationData,	                                             InstanceList testingData,	                                             ClassifierEvaluating evaluator,	                                             MCMaxEnt maxent,	                                             int totalIterations,	                                             int numIterationsBetweenFeatureInductions,	                                             int numFeatureInductions,	                                             int numFeaturesPerFeatureInduction,	                                             String gainName) {		// XXX This ought to be a parameter, except that setting it to true can		// crash training ("Jump too small").		boolean saveParametersDuringFI = false;		Alphabet inputAlphabet = trainingData.getDataAlphabet();		Alphabet outputAlphabet = trainingData.getTargetAlphabet();		if (maxent == null)			maxent = new MCMaxEnt(trainingData.getPipe(),			                      new double[(1+inputAlphabet.size()) * outputAlphabet.size()]);		int trainingIteration = 0;		int numLabels = outputAlphabet.size();		// Initialize feature selection		FeatureSelection globalFS = trainingData.getFeatureSelection();		if (globalFS == null) {			// Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.)			globalFS = new FeatureSelection (trainingData.getDataAlphabet());			trainingData.setFeatureSelection (globalFS);		}		if (validationData != null) validationData.setFeatureSelection (globalFS);		if (testingData != null) testingData.setFeatureSelection (globalFS);		maxent = new MCMaxEnt(maxent.getInstancePipe(), maxent.getParameters(), globalFS);		// Run feature induction		for (int featureInductionIteration = 0;		     featureInductionIteration < numFeatureInductions;		     featureInductionIteration++) {			// Print out some feature information			logger.info ("Feature induction iteration "+featureInductionIteration);			// Train the model a little bit.  We don't care whether it converges; we			// execute all feature induction iterations no matter what.			if (featureInductionIteration != 0) {				// Don't train until we have added some features				setNumIterations(numIterationsBetweenFeatureInductions);				maxent = (MCMaxEnt)this.train (trainingData, validationData, testingData, evaluator,				                               maxent);			}			trainingIteration += numIterationsBetweenFeatureInductions;			logger.info ("Starting feature induction with "+(1+inputAlphabet.size())+			             " features over "+numLabels+" labels.");			// Create the list of error tokens			InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(),			                                                trainingData.getTargetAlphabet());			// This errorInstances.featureSelection will get examined by FeatureInducer,			// so it can know how to add "new" singleton features			errorInstances.setFeatureSelection (globalFS);			List errorLabelVectors = new ArrayList();    // these are length-1 vectors			for (int i = 0; i < trainingData.size(); i++) {				Instance instance = trainingData.getInstance(i);				FeatureVector inputVector = (FeatureVector) instance.getData();				Label trueLabel = (Label) instance.getTarget();				// Having trained using just the current features, see how we classify				// the training data now.				Classification classification = maxent.classify(instance);				if (!classification.bestLabelIsCorrect()) {					errorInstances.add(inputVector, trueLabel, null, null);					errorLabelVectors.add(classification.getLabelVector());				}			}			logger.info ("Error instance list size = "+errorInstances.size());			int s = errorLabelVectors.size();			LabelVector[] lvs = new LabelVector[s];			for (int i = 0; i < s; i++) {				lvs[i] = (LabelVector)errorLabelVectors.get(i);			}			RankedFeatureVector.Factory gainFactory = null;			if (gainName.equals (EXP_GAIN))				gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance);			else if (gainName.equals(GRADIENT_GAIN))				gainFactory =	new GradientGain.Factory (lvs);			else if (gainName.equals(INFORMATION_GAIN))				gainFactory =	new InfoGain.Factory ();			else				throw new IllegalArgumentException("Unsupported gain name: "+gainName);			FeatureInducer klfi =			    new FeatureInducer (gainFactory,			                        errorInstances,			                        numFeaturesPerFeatureInduction,			                        2*numFeaturesPerFeatureInduction,			                        2*numFeaturesPerFeatureInduction);			// Note that this adds features globally, but not on a per-transition basis			klfi.induceFeaturesFor (trainingData, false, false);			if (testingData != null) klfi.induceFeaturesFor (testingData, false, false);			logger.info ("MCMaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features");			klfi = null;			double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()];			// XXX (Executing this block often causes an error during training; I don't know why.)			if (saveParametersDuringFI) {				// Keep current parameter values				// XXX This relies on the implementation detail that the most recent features				// added to an Alphabet get the highest indices.				// Count parameters per output label				int oldParamCount = maxent.parameters.length / outputAlphabet.size();				int newParamCount = 1+inputAlphabet.size();				// Copy params into the proper locations				for (int i=0; i<outputAlphabet.size(); i++) {					System.arraycopy(maxent.parameters, i*oldParamCount,					                 newParameters, i*newParamCount,					                 oldParamCount);				}				for (int i=0; i<oldParamCount; i++)					if (maxent.parameters[i] != newParameters[i]) {						System.out.println(maxent.parameters[i]+" "+newParameters[i]);						System.exit(0);					}			}			maxent.parameters = newParameters;			maxent.defaultFeatureIndex = inputAlphabet.size();		}		// Finished feature induction		logger.info("Ended with "+globalFS.cardinality()+" features.");		setNumIterations(totalIterations - trainingIteration);		return this.train (trainingData, validationData, testingData,		                   evaluator, maxent);	}	// XXX Should these really be public?  Why?	/** Counts how many times this trainer has computed the gradient of the	 * log probability of training labels. */	public int getValueGradientCalls() {return numGetValueGradientCalls;}	/** Counts how many times this trainer has computed the	 * log probability of training labels. */	public int getValueCalls() {return numGetValueCalls;}//	public int getIterations() {return maximizerByGradient.getIterations();}	public String toString()	{		return "MCMaxEntTrainer"		//	+ "("+maximizerClass.getName()+") "		       + ",numIterations=" + numIterations		       + (usingHyperbolicPrior		          ? (",hyperbolicPriorSlope="+hyperbolicPriorSlope+		             ",hyperbolicPriorSharpness="+hyperbolicPriorSharpness)		          : (",gaussianPriorVariance="+gaussianPriorVariance));	}	// A private inner class that wraps up a MCMaxEnt classifier and its training data.	// The result is a maximize.Maximizable function.	private class MaximizableTrainer implements Maximizable.ByGradient	{		double[] parameters, constraints, cachedGradient;		MCMaxEnt theClassifier;		InstanceList trainingList;		// The expectations are (temporarily) stored in the cachedGradient		double cachedValue;		boolean cachedValueStale;		boolean cachedGradientStale;		int numLabels;		int numFeatures;		int defaultFeatureIndex;						// just for clarity		FeatureSelection featureSelection;		FeatureSelection[] perLabelFeatureSelection;		public MaximizableTrainer (){}		public MaximizableTrainer (InstanceList ilist, MCMaxEnt initialClassifier)		{			this.trainingList = ilist;			Alphabet fd = ilist.getDataAlphabet();			LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();			// Don't fd.stopGrowth, because someone might want to do feature induction			ld.stopGrowth();			// Add one feature for the "default feature".			this.numLabels = ld.size();			this.numFeatures = fd.size() + 1;			this.defaultFeatureIndex = numFeatures-1;			this.parameters = new double [numLabels * numFeatures];			this.constraints = new double [numLabels * numFeatures];			this.cachedGradient = new double [numLabels * numFeatures];			Arrays.fill (parameters, 0.0);			Arrays.fill (constraints, 0.0);			Arrays.fill (cachedGradient, 0.0);			this.featureSelection = ilist.getFeatureSelection();			this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection();			// Add the default feature index to the selection			if (featureSelection != null)				featureSelection.add (defaultFeatureIndex);			if (perLabelFeatureSelection != null)				for (int i = 0; i < perLabelFeatureSelection.length; i++)					perLabelFeatureSelection[i].add (defaultFeatureIndex);			// xxx Later change this to allow both to be set, but select which one to use by a boolean flag?			assert (featureSelection == null || perLabelFeatureSelection == null);			if (initialClassifier != null) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -