📄 crf3.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.fst;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.minimize.*;import edu.umass.cs.mallet.base.util.Maths;import edu.umass.cs.mallet.base.util.MalletLogger;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Arrays;import java.util.BitSet;import java.util.Random;import java.util.regex.*;import java.util.logging.*;import java.io.*;import java.lang.reflect.Constructor;/* There are several different kinds of numeric values: "weights" range from -Inf to Inf. High weights make a path more likely. These don't appear directly in Transducer.java, but appear as parameters to many subclasses, such as CRFs. Weights are also often summed, or combined in a dot product with feature vectors. "unnormalized costs" range from -Inf to Inf. High costs make a path less likely. Unnormalized costs can be obtained from negated weights or negated sums of weights. These are often returned by a TransitionIterator's getCost() method. The LatticeNode.alpha values are unnormalized costs. "normalized costs" range from 0 to Inf. High costs make a path less likely. Normalized costs can safely be considered as the -log(probability) of some event. They can be obtained by substracting a (negative) normalizer from unnormalized costs, for example, subtracting the total cost of a lattice. Typically initialCosts and finalCosts are examples of normalized costs, but they are also allowed to be unnormalized costs. The gammas[][], stateGammas[], and transitionXis[][] are all normalized costs, as well as the return value of Lattice.getCost(). "probabilities" range from 0 to 1. High probabilities make a path more likely. They are obtained from normalized costs by taking the log and negating. "sums of probabilities" range from 0 to positive numbers. They are the sum of several probabilities. These are passed to the incrementCount() methods. */public class CRF3 extends Transducer implements Serializable{ private static Logger logger = MalletLogger.getLogger(CRF.class.getName()); static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 0.5;//Double.POSITIVE_INFINITY;//10.0; static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2;//<<<<<<< CRF3.java static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 15.0;//10.0;//======= static final String LABEL_SEPARATOR = ",";//>>>>>>> 1.6 // The length of each weights[i] Vector is the size of the input // dictionary plus one; the additional column at the end is for the // "default feature". Alphabet inputAlphabet; Alphabet outputAlphabet; ArrayList states = new ArrayList (); ArrayList initialStates = new ArrayList (); HashMap name2state = new HashMap (); SparseVector[] weights, constraints, expectations; double[] defaultWeights, defaultConstraints, defaultExpectations; // parameters for default feature BitSet[] weightsPresent; // Only used in setWeightsDimensionAsIn() // FeatureInduction can fill this in FeatureSelection globalFeatureSelection; // "featureSelections" is on a per- weights[i] basis, and over-rides // (permanently disabiling) FeatureInducer's and // setWeightsDimensionsAsIn() from using these features on these transitions FeatureSelection[] featureSelections; Alphabet weightAlphabet = new Alphabet (); boolean trainable = false; boolean gatheringConstraints = false; boolean gatheringWeightsPresent = false; //int defaultFeatureIndex; boolean usingHyperbolicPrior = false; double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE; double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE; double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS; private boolean cachedCostStale = true; private boolean cachedGradientStale = true; private boolean someTrainingDone = false; ArrayList featureInducers = new ArrayList(); double[] priorInitialCost; double[] priorFinalCost; // xxx temporary hack public boolean printGradient = false; public CRF3 (Pipe inputPipe, Pipe outputPipe) { this.inputPipe = inputPipe; this.outputPipe = outputPipe; this.inputAlphabet = inputPipe.getDataAlphabet(); this.outputAlphabet = inputPipe.getTargetAlphabet(); //this.defaultFeatureIndex = inputAlphabet.size(); //inputAlphabet.stopGrowth(); } public CRF3 (Alphabet inputAlphabet, Alphabet outputAlphabet) { inputAlphabet.stopGrowth(); logger.info ("CRF input dictionary size = "+inputAlphabet.size()); //xxx outputAlphabet.stopGrowth(); this.inputAlphabet = inputAlphabet; this.outputAlphabet = outputAlphabet; //this.defaultFeatureIndex = inputAlphabet.size(); } // xxx Remove this method? /** Create a new CRF sharing Alphabet and other attributes, but possibly having a larger weights array. */ /* private CRF3 (CRF3 initialCRF) { System.out.println ("new CRF. Old weight size = "+initialCRF.weights[0].singleSize()+ " New weight size = "+initialCRF.inputAlphabet.size()); this.inputAlphabet = initialCRF.inputAlphabet; this.outputAlphabet = initialCRF.outputAlphabet; this.states = new ArrayList (initialCRF.states.size()); this.inputPipe = initialCRF.inputPipe; this.outputPipe = initialCRF.outputPipe; //this.defaultFeatureIndex = initialCRF.defaultFeatureIndex; for (int i = 0; i < initialCRF.states.size(); i++) { State s = (State) initialCRF.getState (i); String[] weightNames = new String[s.weightsIndices.length]; for (int j = 0; j < weightNames.length; j++) weightNames[j] = (String) initialCRF.weightAlphabet.lookupObject(s.weightsIndices[i]); addState (s.name, s.initialCost, s.finalCost, s.destinationNames, s.labels, weightNames); } assert (weights.length > 0); for (int i = 0; i < weights.length; i++) weights[i].arrayCopyFrom (0, initialCRF.getWeights ((String)weightAlphabet.lookupObject(i))); } */ public Alphabet getInputAlphabet () { return inputAlphabet; } public Alphabet getOutputAlphabet () { return outputAlphabet; } public void setUseHyperbolicPrior (boolean f) { usingHyperbolicPrior = f; } public void setHyperbolicPriorSlope (double p) { hyperbolicPriorSlope = p; } public void setHyperbolicPriorSharpness (double p) { hyperbolicPriorSharpness = p; } public double getUseHyperbolicPriorSlope () { return hyperbolicPriorSlope; } public double getUseHyperbolicPriorSharpness () { return hyperbolicPriorSharpness; } public void setGaussianPriorVariance (double p) { gaussianPriorVariance = p; } public double getGaussianPriorVariance () { return gaussianPriorVariance; } //public int getDefaultFeatureIndex () { return defaultFeatureIndex;} public void addState (String name, double initialCost, double finalCost, String[] destinationNames, String[] labelNames, String[][] weightNames) { assert (weightNames.length == destinationNames.length); assert (labelNames.length == destinationNames.length); setTrainable (false); if (name2state.get(name) != null) throw new IllegalArgumentException ("State with name `"+name+"' already exists."); State s = new State (name, states.size(), initialCost, finalCost, destinationNames, labelNames, weightNames, this); s.print (); states.add (s); if (initialCost < INFINITE_COST) initialStates.add (s); name2state.put (name, s); } public void addState (String name, double initialCost, double finalCost, String[] destinationNames, String[] labelNames, String[] weightNames) { String[][] newWeightNames = new String[weightNames.length][1]; for (int i = 0; i < weightNames.length; i++) newWeightNames[i][0] = weightNames[i]; this.addState (name, initialCost, finalCost, destinationNames, labelNames, newWeightNames); } // Default gives separate parameters to each transition public void addState (String name, double initialCost, double finalCost, String[] destinationNames, String[] labelNames) { assert (destinationNames.length == labelNames.length); String[] weightNames = new String[labelNames.length]; for (int i = 0; i < labelNames.length; i++) weightNames[i] = name + "->" + destinationNames[i] + ":" + labelNames[i]; this.addState (name, initialCost, finalCost, destinationNames, labelNames, weightNames); } // Add a state with parameters equal zero, and labels on out-going arcs // the same name as their destination state names. public void addState (String name, String[] destinationNames) { // == changed by Fuchun Peng to use non-zero prior cost int index = outputAlphabet.lookupIndex(name); this.addState (name, priorInitialCost[index], priorFinalCost[index], destinationNames, destinationNames); // this.addState (name, 0, 0, destinationNames, destinationNames); } // Add a group of states that are fully connected with each other, // with parameters equal zero, and labels on their out-going arcs // the same name as their destination state names. public void addFullyConnectedStates (String[] stateNames) { for (int i = 0; i < stateNames.length; i++) addState (stateNames[i], stateNames); } public void addFullyConnectedStatesForLabels () { String[] labels = new String[outputAlphabet.size()]; // This is assuming the the entries in the outputAlphabet are Strings! for (int i = 0; i < outputAlphabet.size(); i++) { logger.info ("CRF: outputAlphabet.lookup class = "+ outputAlphabet.lookupObject(i).getClass().getName()); labels[i] = (String) outputAlphabet.lookupObject(i); } addFullyConnectedStates (labels); } private boolean[][] labelConnectionsIn (InstanceList trainingSet) { int numLabels = outputAlphabet.size(); boolean[][] connections = new boolean[numLabels][numLabels]; for (int i = 0; i < trainingSet.size(); i++) { Instance instance = trainingSet.getInstance(i); FeatureSequence output = (FeatureSequence) instance.getTarget(); for (int j = 1; j < output.size(); j++) { int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1)); int destIndex = outputAlphabet.lookupIndex (output.get(j)); assert (sourceIndex >= 0 && destIndex >= 0); connections[sourceIndex][destIndex] = true; } } return connections; } //added by Fuchun Peng public void priorCost(InstanceList trainingSet) { int numLabels = outputAlphabet.size(); priorInitialCost = new double[numLabels]; priorFinalCost = new double[numLabels]; int initialNum = 0; int finalNum = 0; for (int i = 0; i < trainingSet.size(); i++) { Instance instance = trainingSet.getInstance(i); FeatureSequence output = (FeatureSequence) instance.getTarget(); int initialIndex = outputAlphabet.lookupIndex (output.get(0)); int finalIndex= outputAlphabet.lookupIndex (output.get(output.size()-1)); assert (initialIndex >= 0 && finalIndex >= 0); priorInitialCost[initialIndex] ++; priorFinalCost[finalIndex] ++; initialNum ++; finalNum ++; } for(int i=0; i<numLabels; i++){ priorInitialCost[i] = (priorInitialCost[i]+1)/(initialNum+numLabels); priorInitialCost[i] = -Math.log(priorInitialCost[i]); priorFinalCost[i] /= finalNum; priorFinalCost[i] = -Math.log(priorFinalCost[i]); System.out.println((String)outputAlphabet.lookupObject(i) + " " + priorInitialCost[i] + " " + priorFinalCost[i]); } } /** Add states to create a first-order Markov model on labels, adding only those transitions the occur in the given trainingSet. */ public void addStatesForLabelsConnectedAsIn (InstanceList trainingSet) { priorCost(trainingSet); int numLabels = outputAlphabet.size(); boolean[][] connections = labelConnectionsIn (trainingSet); for (int i = 0; i < numLabels; i++) { int numDestinations = 0; for (int j = 0; j < numLabels; j++) if (connections[i][j]) numDestinations++; String[] destinationNames = new String[numDestinations]; int destinationIndex = 0; for (int j = 0; j < numLabels; j++) if (connections[i][j]) destinationNames[destinationIndex++] = (String)outputAlphabet.lookupObject(j); addState ((String)outputAlphabet.lookupObject(i), destinationNames); } } /** Add as many states as there are labels, but don't create separate weights for each source-destination pair of states. Instead have all the incoming transitions to a state share the same weights. */ public void addStatesForHalfLabelsConnectedAsIn (InstanceList trainingSet) { priorCost(trainingSet); int numLabels = outputAlphabet.size(); boolean[][] connections = labelConnectionsIn (trainingSet); for (int i = 0; i < numLabels; i++) { int numDestinations = 0; for (int j = 0; j < numLabels; j++) if (connections[i][j]) numDestinations++; String[] destinationNames = new String[numDestinations]; int destinationIndex = 0; for (int j = 0; j < numLabels; j++) if (connections[i][j]) destinationNames[destinationIndex++] = (String)outputAlphabet.lookupObject(j);// addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,// destinationNames, destinationNames, destinationNames); addState ((String)outputAlphabet.lookupObject(i), priorInitialCost[i], priorFinalCost[i], destinationNames, destinationNames, destinationNames);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -