crf4.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 1,695 行 · 第 1/5 页
JAVA
1,695 行
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//** 		@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.fst;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.maximize.*;import edu.umass.cs.mallet.base.maximize.tests.*;import edu.umass.cs.mallet.base.util.Maths;import edu.umass.cs.mallet.base.util.MalletLogger;import edu.umass.cs.mallet.base.util.ArrayUtils;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Arrays;import java.util.BitSet;import java.util.Random;import java.util.regex.*;import java.util.logging.*;import java.io.*;import java.lang.reflect.Constructor;import java.text.DecimalFormat;/*	Changes from CRF3:	- converted MinimizableTrainer to a MaximizableTrainer	- getCost is now getValue	- getCostGradient is now getValueGradient	*//* There are several different kinds of numeric values:   "weights" range from -Inf to Inf.  High weights make a path more   likely.  These don't appear directly in Transducer.java, but appear	 as parameters to many subclasses, such as CRFs.  Weights are also	 often summed, or combined in a dot product with feature vectors.	 "unnormalized costs" range from -Inf to Inf.  High costs make a	 path less likely.  Unnormalized costs can be obtained from negated	 weights or negated sums of weights.  These are often returned by a	 TransitionIterator's getValue() method.  The LatticeNode.alpha	 values are unnormalized costs.	 "normalized costs" range from 0 to Inf.  High costs make a path	 less likely.  Normalized costs can safely be considered as the	 -log(probability) of some event.  They can be obtained by	 substracting a (negative) normalizer from unnormalized costs, for	 example, subtracting the total cost of a lattice.  Typically	 initialCosts and finalCosts are examples of normalized costs, but	 they are also allowed to be unnormalized costs.  The gammas[][],	 stateGammas[], and transitionXis[][] are all normalized costs, as	 well as the return value of Lattice.getValue().	 "probabilities" range from 0 to 1.  High probabilities make a path	 more likely.  They are obtained from normalized costs by taking the	 log and negating.  	 "sums of probabilities" range from 0 to positive numbers.  They are	 the sum of several probabilities.  These are passed to the	 incrementCount() methods.	 */public class CRF4 extends Transducer implements Serializable{	private static Logger logger = MalletLogger.getLogger(CRF.class.getName());	static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1.0;	static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2;	static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 10.0;  static final String LABEL_SEPARATOR = ",";		// The length of each weights[i] Vector is the size of the input	// dictionary plus one; the additional column at the end is for the	// "default feature".	Alphabet inputAlphabet;	Alphabet outputAlphabet;	ArrayList states = new ArrayList ();	ArrayList initialStates = new ArrayList ();	HashMap name2state = new HashMap ();	SparseVector[] weights, constraints, expectations;	double[] defaultWeights, defaultConstraints, defaultExpectations;	// parameters for default feature	BitSet[] weightsPresent;							// Only used in setWeightsDimensionAsIn()	// FeatureInduction can fill this in	FeatureSelection globalFeatureSelection;	// "featureSelections" is on a per- weights[i] basis, and over-rides	// (permanently disabiling) FeatureInducer's and	// setWeightsDimensionsAsIn() from using these features on these transitions	FeatureSelection[] featureSelections;  boolean[] weightsFrozen;	Alphabet weightAlphabet = new Alphabet ();	boolean trainable = false;	boolean gatheringConstraints = false;	boolean gatheringWeightsPresent = false;	//int defaultFeatureIndex;	boolean usingHyperbolicPrior = false;	double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;	double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE;	double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS;	boolean useSparseWeights = true;  private transient boolean useSomeUnsupportedTrick = true;	private boolean cachedValueStale = true;	private boolean cachedGradientStale = true;	protected boolean someTrainingDone = false;  private int transductionType = 0;	ArrayList featureInducers = new ArrayList();	// xxx temporary hack.  //  This is quite useful to have, though!! -cas	public boolean printGradient = false;	public CRF4 (Pipe inputPipe, Pipe outputPipe)	{		this.inputPipe = inputPipe;		this.outputPipe = outputPipe;		this.inputAlphabet = inputPipe.getDataAlphabet();		this.outputAlphabet = inputPipe.getTargetAlphabet();		//this.defaultFeatureIndex = inputAlphabet.size();		//inputAlphabet.stopGrowth();	}		public CRF4 (Alphabet inputAlphabet,							 Alphabet outputAlphabet)	{		inputAlphabet.stopGrowth();		logger.info ("CRF input dictionary size = "+inputAlphabet.size());		//xxx outputAlphabet.stopGrowth();		this.inputAlphabet = inputAlphabet;		this.outputAlphabet = outputAlphabet;		//this.defaultFeatureIndex = inputAlphabet.size();	}  /**   * Create a CRF whose states and weights are a copy of noes from another CRF.   */  public CRF4 (CRF4 other)  {    this (other.getInputPipe (), other.getOutputPipe ());    copyStatesAndWeightsFrom (other);    assertWeightsLength ();  }	private void copyStatesAndWeightsFrom (CRF4 initialCRF)  {		//this.defaultFeatureIndex = initialCRF.defaultFeatureIndex;    weightAlphabet = (Alphabet) initialCRF.weightAlphabet.clone ();    weights = new SparseVector [initialCRF.weights.length];    states.clear ();		for (int i = 0; i < initialCRF.states.size(); i++) {			State s = (State) initialCRF.getState (i);			String[][] weightNames = new String[s.weightsIndices.length][];			for (int j = 0; j < weightNames.length; j++) {        int[] thisW = s.weightsIndices[j];        weightNames[j] = (String[]) initialCRF.weightAlphabet.lookupObjects(thisW, new String [s.weightsIndices[j].length]);      }			addState (s.name, s.initialCost, s.finalCost, s.destinationNames, s.labels, weightNames);		}		assert (weights.length > 0);    defaultWeights = (double[]) initialCRF.defaultWeights.clone();		for (int i = 0; i < weights.length; i++) {      Object wname = weightAlphabet.lookupObject (i);      int otherIndex = initialCRF.weightAlphabet.lookupIndex (wname);			weights[i] = (SparseVector) initialCRF.weights [otherIndex].cloneMatrix();    }    featureSelections = (FeatureSelection[]) initialCRF.featureSelections.clone ();    weightsFrozen = (boolean[]) initialCRF.weightsFrozen.clone();	}	public Alphabet getInputAlphabet () { return inputAlphabet; }	public Alphabet getOutputAlphabet () { return outputAlphabet; }		public void setUseHyperbolicPrior (boolean f) { usingHyperbolicPrior = f; }	public void setHyperbolicPriorSlope (double p) { hyperbolicPriorSlope = p; }	public void setHyperbolicPriorSharpness (double p) { hyperbolicPriorSharpness = p; }	public double getUseHyperbolicPriorSlope () { return hyperbolicPriorSlope; }	public double getUseHyperbolicPriorSharpness () { return hyperbolicPriorSharpness; }	public void setGaussianPriorVariance (double p) { gaussianPriorVariance = p; }	public double getGaussianPriorVariance () { return gaussianPriorVariance; }	//public int getDefaultFeatureIndex () { return defaultFeatureIndex;}	public void setUseSparseWeights (boolean b) { useSparseWeights = b; }	public boolean getUseSparseWeights () { return useSparseWeights; }  /** Sets whether to use the 'some unsupported trick.' This trick is, if training a CRF   * where some training has been done and sparse weights are used, to add a few weights   * for feaures that do not occur in the tainig data.   * <p>   * This generally leads to better accuracy at only a  small memory cost.   *   * @param b Whether to use the trick   */  public void setUseSomeUnsupportedTrick (boolean b) { useSomeUnsupportedTrick = b; }    // Types of transuction support    public static final int VITERBI = 0;    // CPAL   - some new beam based "transducers"    //        - Here is a forward viterbi "beam search"    public static final int VITERBI_FBEAM = 1;    // CPAL   - backward beam transducer    public static final int VITERBI_BBEAM = 2;    // CPAL   - forward backward beam transducer    public static final int VITERBI_FBBEAM = 3;    // CPAL   - adaptive KL divergence forward beam    public static final int VITERBI_FBEAMKL = 4;  public int getTransductionType () { return transductionType; }  public void setTransductionType (int transductionType) { this.transductionType = transductionType; }	protected State newState (String name, int index,	                          double initialCost, double finalCost,	                          String[] destinationNames,	                          String[] labelNames,	                          String[][] weightNames,	                          CRF4 crf)	{		return new State (name, index, initialCost, finalCost,		                  destinationNames, labelNames, weightNames, crf);	}	public void addState (String name, double initialCost, double finalCost,												String[] destinationNames,												String[] labelNames,												String[][] weightNames)	{		assert (weightNames.length == destinationNames.length);		assert (labelNames.length == destinationNames.length);		setTrainable (false);		if (name2state.get(name) != null)			throw new IllegalArgumentException ("State with name `"+name+"' already exists.");		State s = newState (name, states.size(), initialCost, finalCost,		                    destinationNames, labelNames, weightNames, this);		s.print ();		states.add (s);		if (initialCost < INFINITE_COST)			initialStates.add (s);		name2state.put (name, s);	}	public void addState (String name, double initialCost, double finalCost,												String[] destinationNames,												String[] labelNames,												String[] weightNames)	{		String[][] newWeightNames = new String[weightNames.length][1];		for (int i = 0; i < weightNames.length; i++)			newWeightNames[i][0] = weightNames[i];		this.addState (name, initialCost, finalCost, destinationNames, labelNames, newWeightNames);	}		// Default gives separate parameters to each transition	public void addState (String name, double initialCost, double finalCost,												String[] destinationNames,												String[] labelNames)	{		assert (destinationNames.length == labelNames.length);		String[] weightNames = new String[labelNames.length];		for (int i = 0; i < labelNames.length; i++)			weightNames[i] = name + "->" + destinationNames[i] + ":" + labelNames[i];		this.addState (name, initialCost, finalCost, destinationNames, labelNames, weightNames);	}													// Add a state with parameters equal zero, and labels on out-going arcs	// the same name as their destination state names.	public void addState (String name, String[] destinationNames)	{		this.addState (name, 0, 0,									 destinationNames, destinationNames);	}	// Add a group of states that are fully connected with each other,	// with parameters equal zero, and labels on their out-going arcs	// the same name as their destination state names.	public void addFullyConnectedStates (String[] stateNames)	{		for (int i = 0; i < stateNames.length; i++)			addState (stateNames[i], stateNames);	}	public void addFullyConnectedStatesForLabels ()	{		String[] labels = new String[outputAlphabet.size()];		// This is assuming the the entries in the outputAlphabet are Strings!		for (int i = 0; i < outputAlphabet.size(); i++) {			logger.info ("CRF: outputAlphabet.lookup class = "+													outputAlphabet.lookupObject(i).getClass().getName());			labels[i] = (String) outputAlphabet.lookupObject(i);		}		addFullyConnectedStates (labels);	}  public void addStartState ()  {    addStartState ("<START>");  }  public void addStartState (String name)  {    for (int i = 0; i < numStates (); i++)      getState(i).initialCost = INFINITE_COST;    String[] dests = new String [numStates ()];    for (int i = 0; i < dests.length; i++)      dests[i] = getState(i).getName();    addState (name, 0, INFINITE_COST, dests, dests);  }  public void setAsStartState (State state)  {    for (int i = 0; i < numStates(); i++) {      Transducer.State other = getState (i);      if (other == state) {        other.setInitialCost (0);      } else {        other.setInitialCost (INFINITE_COST);      }    }  }
crf4.java - 源码说明

本页面展示了「mallet是自然语言处理、机器学习领域的一个开源项目。」中的 crf4.java 源码文件，采用 Java 编程语言编写，共 1,695 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与mallet相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?