📄 crf.java
字号:
pi = parameters.arrayCopyFrom (pi, weights[i]); return parameters; } public void setParameters (Matrix m) { assert (m instanceof DenseVector && ((DenseVector)m).singleSize() == numParameters); cachedCostStale = cachedGradientStale = true; DenseVector parameters = (DenseVector)m; int pi = 0; for (int i = 0; i < numStates(); i++) { State s = (State) getState (i); s.initialCost = -parameters.value (pi++); s.finalCost = -parameters.value (pi++); } for (int i = 0; i < weights.length; i++) pi = parameters.arrayCopyTo (pi, weights[i]); } public double getParameter (int[] indices) { assert (indices.length == 1); int index = indices[0]; int numStateParms = 2 * numStates(); if (index < numStateParms) { State s = (State)getState(index / 2); if (index % 2 == 0) return -s.initialCost; else return -s.finalCost; } else { index -= numStateParms; Vector v = weights[index / (defaultFeatureIndex+1)]; return v.singleValue (index % (defaultFeatureIndex+1)); } } public void setParameter (int[] indices, double value) { cachedCostStale = cachedGradientStale = true; assert (indices.length == 1); int index = indices[0]; int numStateParms = 2 * numStates(); if (index < numStateParms) { State s = (State)getState(index / 2); if (index % 2 == 0) s.initialCost = -value; else s.finalCost = -value; } else { index -= numStateParms; DenseVector v = weights[index / (defaultFeatureIndex+1)]; v.setSingleValue (index % (defaultFeatureIndex+1), value); } } // Minus log probability of the training sequence labels public double getCost () { if (cachedCostStale) { cachedCost = 0; cachedGradientStale = true; // Instance costs must either always or never be included in // the total costs; we can't just sometimes skip a cost // because it is infinite, this throws off the total costs. boolean initializingInfiniteCosts = false; if (infiniteCosts == null) { infiniteCosts = new BitSet (); initializingInfiniteCosts = true; } // Clear the sufficient statistics that we are about to fill for (int i = 0; i < numStates(); i++) { State s = (State)getState(i); s.initialExpectation = 0; s.finalExpectation = 0; } for (int i = 0; i < weights.length; i++) expectations[i].setAll (0.0); // Calculate the cost of each instance, and also fill in expectations double unlabeledCost, labeledCost, cost; for (int ii = 0; ii < trainingSet.size(); ii++) { Instance instance = trainingSet.getInstance(ii); FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); labeledCost = forwardBackward (input, output, false).getCost(); //System.out.println ("input size = "+input.size()); //System.out.println ("labeledCost = "+labeledCost); if (Double.isInfinite (labeledCost)) logger.warning (instance.getName().toString() + " has infinite labeled cost.\n" +(instance.getSource() != null ? instance.getSource() : "")); unlabeledCost = forwardBackward (input, true).getCost (); //System.out.println ("unlabeledCost = "+unlabeledCost); System.exit (0); if (Double.isInfinite (unlabeledCost)) logger.warning (instance.getName().toString() + " has infinite unlabeled cost.\n" +(instance.getSource() != null ? instance.getSource() : "")); // Here cost is -log(conditional probability correct label sequence) cost = labeledCost - unlabeledCost; //System.out.println ("Instance "+ii+" CRF.MinimizableCRF.getCost = "+cost); if (Double.isInfinite(cost)) { logger.warning (instance.getName().toString() + " has infinite cost; skipping."); if (initializingInfiniteCosts) infiniteCosts.set (ii); else if (!infiniteCosts.get(ii)) throw new IllegalStateException ("Instance i used to have non-infinite cost, " +"but now it has infinite cost."); continue; } else { cachedCost += cost; } } // Incorporate prior on parameters if (usingHyperbolicPrior) { // Hyperbolic prior for (int i = 0; i < numStates(); i++) { State s = (State) getState (i); if (!Double.isInfinite(s.initialCost)) cachedCost += (hyperbolicPriorSlope / hyperbolicPriorSharpness * Math.log (Maths.cosh (hyperbolicPriorSharpness * -s.initialCost))); if (!Double.isInfinite(s.finalCost)) cachedCost += (hyperbolicPriorSlope / hyperbolicPriorSharpness * Math.log (Maths.cosh (hyperbolicPriorSharpness * -s.finalCost))); } for (int i = 0; i < weights.length; i++) { for (int j = 0; j < weights[i].singleSize(); j++) { double w = weights[i].singleValue(j); if (!Double.isInfinite(w)) cachedCost += (hyperbolicPriorSlope / hyperbolicPriorSharpness * Math.log (Maths.cosh (hyperbolicPriorSharpness * w))); } } } else { // Gaussian prior double priorDenom = 2 * gaussianPriorVariance; for (int i = 0; i < numStates(); i++) { State s = (State) getState (i); if (!Double.isInfinite(s.initialCost)) cachedCost += s.initialCost * s.initialCost / priorDenom; if (!Double.isInfinite(s.finalCost)) cachedCost += s.finalCost * s.finalCost / priorDenom; } for (int i = 0; i < weights.length; i++) { DenseVector weightVector = weights[i]; int singleSize = weightVector.singleSize(); for (int j = 0; j < singleSize; j++) { double w = weightVector.singleValue( j ); if (!Double.isInfinite( w )) { cachedCost += w * w / priorDenom; } } } } cachedCostStale = false; logger.info ("getCost() (-loglikelihood) = "+cachedCost); logger.fine ("getCost() (-loglikelihood) = "+cachedCost); //crf.print(); } return cachedCost; } private boolean checkForNaN () { for (int i = 0; i < weights.length; i++) { assert (!weights[i].isNaN()); assert (constraints == null || !constraints[i].isNaN()); assert (expectations == null || !expectations[i].isNaN()); } for (int i = 0; i < numStates(); i++) { State s = (State) getState (i); assert (!Double.isNaN (s.initialExpectation)); assert (!Double.isNaN (s.initialConstraint)); assert (!Double.isNaN (s.initialCost)); assert (!Double.isNaN (s.finalExpectation)); assert (!Double.isNaN (s.finalConstraint)); assert (!Double.isNaN (s.finalCost)); } return true; } public Matrix getCostGradient (Matrix m) { // Gradient is -(constraint - expectation - parameters/gaussianPriorVariance) // == (expectation + parameters/gaussianPriorVariance - constraint) // This might be opposite from what you are used to seeing, this // is because this is the gradient of the "cost" and the // gradient should point "up-hill", which is actually away from // the direction we want to parameters to go. if (cachedGradientStale) { if (cachedCostStale) // This will fill in the this.expectation getCost (); assert (checkForNaN()); Vector g = (Vector) m; int gi = 0; for (int i = 0; i < numStates(); i++) { State s = (State) getState (i); cachedGradient.setValue (gi++, (Double.isInfinite(s.initialCost) ? 0.0 : (s.initialExpectation + (usingHyperbolicPrior ? (hyperbolicPriorSlope * Maths.tanh (-s.initialCost) * hyperbolicPriorSharpness) : ((-s.initialCost) / gaussianPriorVariance)) - s.initialConstraint))); cachedGradient.setValue (gi++, (Double.isInfinite (s.finalCost) ? 0.0 : s.finalExpectation + (usingHyperbolicPrior ? (hyperbolicPriorSlope * Maths.tanh (-s.finalCost) * hyperbolicPriorSharpness) : ((-s.finalCost) / gaussianPriorVariance)) - s.finalConstraint)); } if (usingHyperbolicPrior) { // Hyperbolic prior for (int i = 0; i < weights.length; i++) for (int j = 0; j < weights[i].singleSize(); j++) { cachedGradient.setValue (gi++, (Double.isInfinite (weights[i].singleValue(j)) ? 0.0 : (expectations[i].singleValue(j) + (hyperbolicPriorSlope * Maths.tanh (weights[i].singleValue(j)) * hyperbolicPriorSharpness) - constraints[i].singleValue(j)))); if (printGradient) System.out.println ("CRF gradient["+crf.getWeightsName(i)+"]["+(j!=defaultFeatureIndex?inputAlphabet.lookupObject(j):"<DEFAULT_FEATURE>")+"]="+cachedGradient.value(gi-1)); } } else { // Gaussian prior for (int i = 0; i < weights.length; i++) for (int j = 0; j < weights[i].singleSize(); j++) { cachedGradient.setValue (gi++, (Double.isInfinite (weights[i].singleValue(j)) ? 0.0 : (expectations[i].singleValue(j) + weights[i].singleValue(j) / gaussianPriorVariance - constraints[i].singleValue(j)))); if (printGradient) System.out.println ("CRF gradient["+crf.getWeightsName(i)+"]["+(j!=defaultFeatureIndex?inputAlphabet.lookupObject(j):"<DEFAULT_FEATURE>")+"]="+cachedGradient.value(gi-1)); } } // xxx Show the feature with maximum gradient cachedGradientStale = false; assert (!cachedGradient.isNaN()); } m.set (cachedGradient); printGradient = false; return m; } // Accessors for constraints and expectations.... useful for unit tests // These return a copies, so they aren't for "real" code public DenseVector[] getConstraints () { return constExpCopy (constraints); } public DenseVector[] getExpectations () { return constExpCopy (expectations); } private DenseVector[] constExpCopy (DenseVector[] stuff) { DenseVector[] values = new DenseVector [stuff.length]; for (int i = 0; i < values.length; i++) { values [i] = (DenseVector) stuff[i].cloneMatrix (); } return values; } //Serialization of MinimizableCRF private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject(trainingSet); out.writeDouble(cachedCost); out.writeObject(cachedGradient); out.writeObject((BitSet)infiniteCosts); out.writeInt(numParameters); out.writeObject(crf); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt (); trainingSet = (InstanceList) in.readObject(); cachedCost = in.readDouble(); cachedGradient = (DenseVector) in.readObject(); infiniteCosts = (BitSet) in.readObject(); numParameters = in.readInt(); crf = (CRF)in.readObject(); } } public class State extends Transducer.State implements Serializable { // Parameters indexed by destination state, feature index double initialConstraint, initialExpectation; double finalConstraint, finalExpectation; String name; int index; String[] destinationNames; State[] destinations; int[] weightsIndices; // contains indices into CRF.weights[], String[] labels; CRF crf; // No arg constructor so serialization works protected State() { } protected State (String name, int index, double initialCost, double finalCost, String[] destinationNames, String[] labelNames, String[] weightNames, CRF crf) { assert (destinationNames.length == labelNames.length); assert (destinationNames.length == weightNames.length); this.name = name; this.index = index; this.initialCost = initialCost; this.finalCost = finalCost; this.destinationNames = destinationNames; this.destinations = new State[labelNames.length]; this.weightsIndices = new int[labelNames.length]; this.labels = new String[labelNames.length];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -