maxenttrainer.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 747 行 · 第 1/3 页
JAVA
747 行
   * <p>Trains a maximum entropy model using feature selection and feature induction   * (adding conjunctions of features as new features).</p>   *   * @param trainingData A list of <code>Instance</code>s whose <code>data</code>   * fields are binary, augmentable <code>FeatureVector</code>s.   * and whose <code>target</code> fields are <code>Label</code>s.   * @param validationData [not currently used] As <code>trainingData</code>,   * or <code>null</code>.   * @param testingData As <code>trainingData</code>, or <code>null</code>.   * @param evaluator The evaluator to track training progress and decide whether   * to continue, or <code>null</code>.   * @param totalIterations The maximum total number of training iterations,   * including those taken during feature induction.   * @param numIterationsBetweenFeatureInductions How many iterations to train   * between one round of feature induction and the next; this should usually   * be fairly small, like 5 or 10, to avoid overfitting with current features.   * @param numFeatureInductions How many rounds of feature induction to run   * before beginning normal training.   * @param numFeaturesPerFeatureInduction The maximum number of features to   * choose during each round of featureInduction.   *   * @return The trained <code>MaxEnt</code> classifier   */  // added - cjmaloof@linc.cis.upenn.edu  public Classifier trainWithFeatureInduction (InstanceList trainingData,                                               InstanceList validationData,                                               InstanceList testingData,                                               ClassifierEvaluating evaluator,                                               int totalIterations,                                               int numIterationsBetweenFeatureInductions,                                               int numFeatureInductions,                                               int numFeaturesPerFeatureInduction) {    return trainWithFeatureInduction (trainingData,                                      validationData,                                      testingData,                                      evaluator,                                      null,                                      totalIterations,                                      numIterationsBetweenFeatureInductions,                                      numFeatureInductions,                                      numFeaturesPerFeatureInduction,                                      EXP_GAIN);  }  /**   * <p>Like the other version of <code>trainWithFeatureInduction</code>, but   * allows some default options to be changed.</p>   *   * @param maxent An initial partially-trained classifier (default <code>null</code>).   * This classifier may be modified during training.   * @param gainName The estimate of gain (log-likelihood increase) we want our chosen   * features to maximize.   * Should be one of <code>MaxEntTrainer.EXP_GAIN</code>,   * <code>MaxEntTrainer.GRADIENT_GAIN</code>, or   * <code>MaxEntTrainer.INFORMATION_GAIN</code> (default <code>EXP_GAIN</code>).   *   * @return The trained <code>MaxEnt</code> classifier   */  public Classifier trainWithFeatureInduction (InstanceList trainingData,                                               InstanceList validationData,                                               InstanceList testingData,                                               ClassifierEvaluating evaluator,                                               MaxEnt maxent,                                               int totalIterations,                                               int numIterationsBetweenFeatureInductions,                                               int numFeatureInductions,                                               int numFeaturesPerFeatureInduction,                                               String gainName) {    // XXX This ought to be a parameter, except that setting it to true can    // crash training ("Jump too small").    boolean saveParametersDuringFI = false;    Alphabet inputAlphabet = trainingData.getDataAlphabet();    Alphabet outputAlphabet = trainingData.getTargetAlphabet();    if (maxent == null)      maxent = new MaxEnt(trainingData.getPipe(),                          new double[(1+inputAlphabet.size()) * outputAlphabet.size()]);		int trainingIteration = 0;		int numLabels = outputAlphabet.size();    // Initialize feature selection		FeatureSelection globalFS = trainingData.getFeatureSelection();		if (globalFS == null) {			// Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.)			globalFS = new FeatureSelection (trainingData.getDataAlphabet());			trainingData.setFeatureSelection (globalFS);		}		if (validationData != null) validationData.setFeatureSelection (globalFS);		if (testingData != null) testingData.setFeatureSelection (globalFS);    maxent = new MaxEnt(maxent.getInstancePipe(), maxent.getParameters(), globalFS);    // Run feature induction    for (int featureInductionIteration = 0;         featureInductionIteration < numFeatureInductions;         featureInductionIteration++) {      // Print out some feature information			logger.info ("Feature induction iteration "+featureInductionIteration);			// Train the model a little bit.  We don't care whether it converges; we      // execute all feature induction iterations no matter what.			if (featureInductionIteration != 0) {				// Don't train until we have added some features        setNumIterations(numIterationsBetweenFeatureInductions);				maxent = (MaxEnt)this.train (trainingData, validationData, testingData, evaluator,                                     maxent);      }			trainingIteration += numIterationsBetweenFeatureInductions;			logger.info ("Starting feature induction with "+(1+inputAlphabet.size())+                   " features over "+numLabels+" labels.");			// Create the list of error tokens			InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(),                                                      trainingData.getTargetAlphabet());			// This errorInstances.featureSelection will get examined by FeatureInducer,			// so it can know how to add "new" singleton features			errorInstances.setFeatureSelection (globalFS);			List errorLabelVectors = new ArrayList();    // these are length-1 vectors      for (int i = 0; i < trainingData.size(); i++) {				Instance instance = trainingData.getInstance(i);				FeatureVector inputVector = (FeatureVector) instance.getData();				Label trueLabel = (Label) instance.getTarget();        // Having trained using just the current features, see how we classify        // the training data now.        Classification classification = maxent.classify(instance);        if (!classification.bestLabelIsCorrect()) {          errorInstances.add(inputVector, trueLabel, null, null);          errorLabelVectors.add(classification.getLabelVector());        }      }      logger.info ("Error instance list size = "+errorInstances.size());      int s = errorLabelVectors.size();      LabelVector[] lvs = new LabelVector[s];      for (int i = 0; i < s; i++) {        lvs[i] = (LabelVector)errorLabelVectors.get(i);      }      RankedFeatureVector.Factory gainFactory = null;      if (gainName.equals (EXP_GAIN))        gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance);      else if (gainName.equals(GRADIENT_GAIN))        gainFactory =	new GradientGain.Factory (lvs);      else if (gainName.equals(INFORMATION_GAIN))        gainFactory =	new InfoGain.Factory ();      else        throw new IllegalArgumentException("Unsupported gain name: "+gainName);      FeatureInducer klfi =        new FeatureInducer (gainFactory,                            errorInstances,                            numFeaturesPerFeatureInduction,                            2*numFeaturesPerFeatureInduction,                            2*numFeaturesPerFeatureInduction);      // Note that this adds features globally, but not on a per-transition basis      klfi.induceFeaturesFor (trainingData, false, false);      if (testingData != null) klfi.induceFeaturesFor (testingData, false, false);      logger.info ("MaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features");      klfi = null;      double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()];      // XXX (Executing this block often causes an error during training; I don't know why.)      if (saveParametersDuringFI) {        // Keep current parameter values        // XXX This relies on the implementation detail that the most recent features        // added to an Alphabet get the highest indices.        // Count parameters per output label        int oldParamCount = maxent.parameters.length / outputAlphabet.size();        int newParamCount = 1+inputAlphabet.size();        // Copy params into the proper locations        for (int i=0; i<outputAlphabet.size(); i++) {          System.arraycopy(maxent.parameters, i*oldParamCount,                           newParameters, i*newParamCount,                           oldParamCount);        }        for (int i=0; i<oldParamCount; i++)          if (maxent.parameters[i] != newParameters[i]) {            System.out.println(maxent.parameters[i]+" "+newParameters[i]);            System.exit(0);          }      }      maxent.parameters = newParameters;      maxent.defaultFeatureIndex = inputAlphabet.size();    }    // Finished feature induction    logger.info("Ended with "+globalFS.cardinality()+" features.");    setNumIterations(totalIterations - trainingIteration);    return this.train (trainingData, validationData, testingData,                       evaluator, maxent);  }    // XXX Should these really be public?  Why?    /** Counts how many times this trainer has computed the gradient of the     * log probability of training labels. */	public int getValueGradientCalls() {return numGetValueGradientCalls;}    /** Counts how many times this trainer has computed the     * log probability of training labels. */	public int getValueCalls() {return numGetValueCalls;}//	public int getIterations() {return maximizerByGradient.getIterations();}	public String toString()	{		return "MaxEntTrainer"		//	+ "("+maximizerClass.getName()+") "		    + ",numIterations=" + numIterations			+ (usingHyperbolicPrior				 ? (",hyperbolicPriorSlope="+hyperbolicPriorSlope+						",hyperbolicPriorSharpness="+hyperbolicPriorSharpness)				 : (",gaussianPriorVariance="+gaussianPriorVariance));	}  // A private inner class that wraps up a MaxEnt classifier and its training data.	// The result is a maximize.Maximizable function.	private class MaximizableTrainer implements Maximizable.ByGradient	{		double[] parameters, constraints, cachedGradient;		MaxEnt theClassifier;		InstanceList trainingList;		// The expectations are (temporarily) stored in the cachedGradient		double cachedValue;		boolean cachedValueStale;		boolean cachedGradientStale;		int numLabels;		int numFeatures;		int defaultFeatureIndex;						// just for clarity		FeatureSelection featureSelection;		FeatureSelection[] perLabelFeatureSelection;		public MaximizableTrainer (){}		public MaximizableTrainer (InstanceList ilist, MaxEnt initialClassifier)		{
maxenttrainer.java - 源码说明

本页面展示了「mallet是自然语言处理、机器学习领域的一个开源项目。」中的 maxenttrainer.java 源码文件，采用 Java 编程语言编写，共 747 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与mallet相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?