📄 crf4.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.fst;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.maximize.*;import edu.umass.cs.mallet.base.maximize.tests.*;import edu.umass.cs.mallet.base.util.Maths;import edu.umass.cs.mallet.base.util.MalletLogger;import edu.umass.cs.mallet.base.util.ArrayUtils;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Arrays;import java.util.BitSet;import java.util.Random;import java.util.regex.*;import java.util.logging.*;import java.io.*;import java.lang.reflect.Constructor;import java.text.DecimalFormat;/* Changes from CRF3: - converted MinimizableTrainer to a MaximizableTrainer - getCost is now getValue - getCostGradient is now getValueGradient *//* There are several different kinds of numeric values: "weights" range from -Inf to Inf. High weights make a path more likely. These don't appear directly in Transducer.java, but appear as parameters to many subclasses, such as CRFs. Weights are also often summed, or combined in a dot product with feature vectors. "unnormalized costs" range from -Inf to Inf. High costs make a path less likely. Unnormalized costs can be obtained from negated weights or negated sums of weights. These are often returned by a TransitionIterator's getValue() method. The LatticeNode.alpha values are unnormalized costs. "normalized costs" range from 0 to Inf. High costs make a path less likely. Normalized costs can safely be considered as the -log(probability) of some event. They can be obtained by substracting a (negative) normalizer from unnormalized costs, for example, subtracting the total cost of a lattice. Typically initialCosts and finalCosts are examples of normalized costs, but they are also allowed to be unnormalized costs. The gammas[][], stateGammas[], and transitionXis[][] are all normalized costs, as well as the return value of Lattice.getValue(). "probabilities" range from 0 to 1. High probabilities make a path more likely. They are obtained from normalized costs by taking the log and negating. "sums of probabilities" range from 0 to positive numbers. They are the sum of several probabilities. These are passed to the incrementCount() methods. */public class CRF4 extends Transducer implements Serializable{ private static Logger logger = MalletLogger.getLogger(CRF.class.getName()); static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1.0; static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2; static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 10.0; static final String LABEL_SEPARATOR = ","; // The length of each weights[i] Vector is the size of the input // dictionary plus one; the additional column at the end is for the // "default feature". Alphabet inputAlphabet; Alphabet outputAlphabet; ArrayList states = new ArrayList (); ArrayList initialStates = new ArrayList (); HashMap name2state = new HashMap (); SparseVector[] weights, constraints, expectations; double[] defaultWeights, defaultConstraints, defaultExpectations; // parameters for default feature BitSet[] weightsPresent; // Only used in setWeightsDimensionAsIn() // FeatureInduction can fill this in FeatureSelection globalFeatureSelection; // "featureSelections" is on a per- weights[i] basis, and over-rides // (permanently disabiling) FeatureInducer's and // setWeightsDimensionsAsIn() from using these features on these transitions FeatureSelection[] featureSelections; boolean[] weightsFrozen; Alphabet weightAlphabet = new Alphabet (); boolean trainable = false; boolean gatheringConstraints = false; boolean gatheringWeightsPresent = false; //int defaultFeatureIndex; boolean usingHyperbolicPrior = false; double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE; double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE; double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS; boolean useSparseWeights = true; private transient boolean useSomeUnsupportedTrick = true; private boolean cachedValueStale = true; private boolean cachedGradientStale = true; protected boolean someTrainingDone = false; private int transductionType = 0; ArrayList featureInducers = new ArrayList(); // xxx temporary hack. // This is quite useful to have, though!! -cas public boolean printGradient = false; public CRF4 (Pipe inputPipe, Pipe outputPipe) { this.inputPipe = inputPipe; this.outputPipe = outputPipe; this.inputAlphabet = inputPipe.getDataAlphabet(); this.outputAlphabet = inputPipe.getTargetAlphabet(); //this.defaultFeatureIndex = inputAlphabet.size(); //inputAlphabet.stopGrowth(); } public CRF4 (Alphabet inputAlphabet, Alphabet outputAlphabet) { inputAlphabet.stopGrowth(); logger.info ("CRF input dictionary size = "+inputAlphabet.size()); //xxx outputAlphabet.stopGrowth(); this.inputAlphabet = inputAlphabet; this.outputAlphabet = outputAlphabet; //this.defaultFeatureIndex = inputAlphabet.size(); } /** * Create a CRF whose states and weights are a copy of noes from another CRF. */ public CRF4 (CRF4 other) { this (other.getInputPipe (), other.getOutputPipe ()); copyStatesAndWeightsFrom (other); assertWeightsLength (); } private void copyStatesAndWeightsFrom (CRF4 initialCRF) { //this.defaultFeatureIndex = initialCRF.defaultFeatureIndex; weightAlphabet = (Alphabet) initialCRF.weightAlphabet.clone (); weights = new SparseVector [initialCRF.weights.length]; states.clear (); for (int i = 0; i < initialCRF.states.size(); i++) { State s = (State) initialCRF.getState (i); String[][] weightNames = new String[s.weightsIndices.length][]; for (int j = 0; j < weightNames.length; j++) { int[] thisW = s.weightsIndices[j]; weightNames[j] = (String[]) initialCRF.weightAlphabet.lookupObjects(thisW, new String [s.weightsIndices[j].length]); } addState (s.name, s.initialCost, s.finalCost, s.destinationNames, s.labels, weightNames); } assert (weights.length > 0); defaultWeights = (double[]) initialCRF.defaultWeights.clone(); for (int i = 0; i < weights.length; i++) { Object wname = weightAlphabet.lookupObject (i); int otherIndex = initialCRF.weightAlphabet.lookupIndex (wname); weights[i] = (SparseVector) initialCRF.weights [otherIndex].cloneMatrix(); } featureSelections = (FeatureSelection[]) initialCRF.featureSelections.clone (); weightsFrozen = (boolean[]) initialCRF.weightsFrozen.clone(); } public Alphabet getInputAlphabet () { return inputAlphabet; } public Alphabet getOutputAlphabet () { return outputAlphabet; } public void setUseHyperbolicPrior (boolean f) { usingHyperbolicPrior = f; } public void setHyperbolicPriorSlope (double p) { hyperbolicPriorSlope = p; } public void setHyperbolicPriorSharpness (double p) { hyperbolicPriorSharpness = p; } public double getUseHyperbolicPriorSlope () { return hyperbolicPriorSlope; } public double getUseHyperbolicPriorSharpness () { return hyperbolicPriorSharpness; } public void setGaussianPriorVariance (double p) { gaussianPriorVariance = p; } public double getGaussianPriorVariance () { return gaussianPriorVariance; } //public int getDefaultFeatureIndex () { return defaultFeatureIndex;} public void setUseSparseWeights (boolean b) { useSparseWeights = b; } public boolean getUseSparseWeights () { return useSparseWeights; } /** Sets whether to use the 'some unsupported trick.' This trick is, if training a CRF * where some training has been done and sparse weights are used, to add a few weights * for feaures that do not occur in the tainig data. * <p> * This generally leads to better accuracy at only a small memory cost. * * @param b Whether to use the trick */ public void setUseSomeUnsupportedTrick (boolean b) { useSomeUnsupportedTrick = b; } // Types of transuction support public static final int VITERBI = 0; // CPAL - some new beam based "transducers" // - Here is a forward viterbi "beam search" public static final int VITERBI_FBEAM = 1; // CPAL - backward beam transducer public static final int VITERBI_BBEAM = 2; // CPAL - forward backward beam transducer public static final int VITERBI_FBBEAM = 3; // CPAL - adaptive KL divergence forward beam public static final int VITERBI_FBEAMKL = 4; public int getTransductionType () { return transductionType; } public void setTransductionType (int transductionType) { this.transductionType = transductionType; } protected State newState (String name, int index, double initialCost, double finalCost, String[] destinationNames, String[] labelNames, String[][] weightNames, CRF4 crf) { return new State (name, index, initialCost, finalCost, destinationNames, labelNames, weightNames, crf); } public void addState (String name, double initialCost, double finalCost, String[] destinationNames, String[] labelNames, String[][] weightNames) { assert (weightNames.length == destinationNames.length); assert (labelNames.length == destinationNames.length); setTrainable (false); if (name2state.get(name) != null) throw new IllegalArgumentException ("State with name `"+name+"' already exists."); State s = newState (name, states.size(), initialCost, finalCost, destinationNames, labelNames, weightNames, this); s.print (); states.add (s); if (initialCost < INFINITE_COST) initialStates.add (s); name2state.put (name, s); } public void addState (String name, double initialCost, double finalCost, String[] destinationNames, String[] labelNames, String[] weightNames) { String[][] newWeightNames = new String[weightNames.length][1]; for (int i = 0; i < weightNames.length; i++) newWeightNames[i][0] = weightNames[i]; this.addState (name, initialCost, finalCost, destinationNames, labelNames, newWeightNames); } // Default gives separate parameters to each transition public void addState (String name, double initialCost, double finalCost, String[] destinationNames, String[] labelNames) { assert (destinationNames.length == labelNames.length); String[] weightNames = new String[labelNames.length]; for (int i = 0; i < labelNames.length; i++) weightNames[i] = name + "->" + destinationNames[i] + ":" + labelNames[i]; this.addState (name, initialCost, finalCost, destinationNames, labelNames, weightNames); } // Add a state with parameters equal zero, and labels on out-going arcs // the same name as their destination state names. public void addState (String name, String[] destinationNames) { this.addState (name, 0, 0, destinationNames, destinationNames); } // Add a group of states that are fully connected with each other, // with parameters equal zero, and labels on their out-going arcs // the same name as their destination state names. public void addFullyConnectedStates (String[] stateNames) { for (int i = 0; i < stateNames.length; i++) addState (stateNames[i], stateNames); } public void addFullyConnectedStatesForLabels () { String[] labels = new String[outputAlphabet.size()]; // This is assuming the the entries in the outputAlphabet are Strings! for (int i = 0; i < outputAlphabet.size(); i++) { logger.info ("CRF: outputAlphabet.lookup class = "+ outputAlphabet.lookupObject(i).getClass().getName()); labels[i] = (String) outputAlphabet.lookupObject(i); } addFullyConnectedStates (labels); } public void addStartState () { addStartState ("<START>"); } public void addStartState (String name) { for (int i = 0; i < numStates (); i++) getState(i).initialCost = INFINITE_COST; String[] dests = new String [numStates ()]; for (int i = 0; i < dests.length; i++) dests[i] = getState(i).getName(); addState (name, 0, INFINITE_COST, dests, dests); } public void setAsStartState (State state) { for (int i = 0; i < numStates(); i++) { Transducer.State other = getState (i); if (other == state) { other.setInitialCost (0); } else { other.setInitialCost (INFINITE_COST); } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -