📄 crf3.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//** 		@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.fst;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.minimize.*;import edu.umass.cs.mallet.base.util.Maths;import edu.umass.cs.mallet.base.util.MalletLogger;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Arrays;import java.util.BitSet;import java.util.Random;import java.util.regex.*;import java.util.logging.*;import java.io.*;import java.lang.reflect.Constructor;/* There are several different kinds of numeric values:   "weights" range from -Inf to Inf.  High weights make a path more   likely.  These don't appear directly in Transducer.java, but appear	 as parameters to many subclasses, such as CRFs.  Weights are also	 often summed, or combined in a dot product with feature vectors.	 "unnormalized costs" range from -Inf to Inf.  High costs make a	 path less likely.  Unnormalized costs can be obtained from negated	 weights or negated sums of weights.  These are often returned by a	 TransitionIterator's getCost() method.  The LatticeNode.alpha	 values are unnormalized costs.	 "normalized costs" range from 0 to Inf.  High costs make a path	 less likely.  Normalized costs can safely be considered as the	 -log(probability) of some event.  They can be obtained by	 substracting a (negative) normalizer from unnormalized costs, for	 example, subtracting the total cost of a lattice.  Typically	 initialCosts and finalCosts are examples of normalized costs, but	 they are also allowed to be unnormalized costs.  The gammas[][],	 stateGammas[], and transitionXis[][] are all normalized costs, as	 well as the return value of Lattice.getCost().	 "probabilities" range from 0 to 1.  High probabilities make a path	 more likely.  They are obtained from normalized costs by taking the	 log and negating.  	 "sums of probabilities" range from 0 to positive numbers.  They are	 the sum of several probabilities.  These are passed to the	 incrementCount() methods.	 */public class CRF3 extends Transducer implements Serializable{	private static Logger logger = MalletLogger.getLogger(CRF.class.getName());	static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 0.5;//Double.POSITIVE_INFINITY;//10.0;	static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2;//<<<<<<< CRF3.java	static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 15.0;//10.0;//=======  static final String LABEL_SEPARATOR = ",";//>>>>>>> 1.6		// The length of each weights[i] Vector is the size of the input	// dictionary plus one; the additional column at the end is for the	// "default feature".	Alphabet inputAlphabet;	Alphabet outputAlphabet;	ArrayList states = new ArrayList ();	ArrayList initialStates = new ArrayList ();	HashMap name2state = new HashMap ();	SparseVector[] weights, constraints, expectations;	double[] defaultWeights, defaultConstraints, defaultExpectations;	// parameters for default feature	BitSet[] weightsPresent;							// Only used in setWeightsDimensionAsIn()	// FeatureInduction can fill this in	FeatureSelection globalFeatureSelection;	// "featureSelections" is on a per- weights[i] basis, and over-rides	// (permanently disabiling) FeatureInducer's and	// setWeightsDimensionsAsIn() from using these features on these transitions	FeatureSelection[] featureSelections;	Alphabet weightAlphabet = new Alphabet ();	boolean trainable = false;	boolean gatheringConstraints = false;	boolean gatheringWeightsPresent = false;	//int defaultFeatureIndex;	boolean usingHyperbolicPrior = false;	double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;	double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE;	double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS;	private boolean cachedCostStale = true;	private boolean cachedGradientStale = true;	private boolean someTrainingDone = false;	ArrayList featureInducers = new ArrayList();		double[] priorInitialCost;	double[] priorFinalCost;	// xxx temporary hack	public boolean printGradient = false;	public CRF3 (Pipe inputPipe, Pipe outputPipe)	{		this.inputPipe = inputPipe;		this.outputPipe = outputPipe;		this.inputAlphabet = inputPipe.getDataAlphabet();		this.outputAlphabet = inputPipe.getTargetAlphabet();		//this.defaultFeatureIndex = inputAlphabet.size();		//inputAlphabet.stopGrowth();	}		public CRF3 (Alphabet inputAlphabet,							 Alphabet outputAlphabet)	{		inputAlphabet.stopGrowth();		logger.info ("CRF input dictionary size = "+inputAlphabet.size());		//xxx outputAlphabet.stopGrowth();		this.inputAlphabet = inputAlphabet;		this.outputAlphabet = outputAlphabet;		//this.defaultFeatureIndex = inputAlphabet.size();	}	// xxx Remove this method?	/** Create a new CRF sharing Alphabet and other attributes, but possibly			having a larger weights array. */	/*	private CRF3 (CRF3 initialCRF) {		System.out.println ("new CRF. Old weight size = "+initialCRF.weights[0].singleSize()+												" New weight size = "+initialCRF.inputAlphabet.size());		this.inputAlphabet = initialCRF.inputAlphabet;		this.outputAlphabet = initialCRF.outputAlphabet;		this.states = new ArrayList (initialCRF.states.size());		this.inputPipe = initialCRF.inputPipe;		this.outputPipe = initialCRF.outputPipe;		//this.defaultFeatureIndex = initialCRF.defaultFeatureIndex;		for (int i = 0; i < initialCRF.states.size(); i++) {			State s = (State) initialCRF.getState (i);			String[] weightNames = new String[s.weightsIndices.length];			for (int j = 0; j < weightNames.length; j++)				weightNames[j] = (String) initialCRF.weightAlphabet.lookupObject(s.weightsIndices[i]);			addState (s.name, s.initialCost, s.finalCost, s.destinationNames, s.labels, weightNames);		}		assert (weights.length > 0);		for (int i = 0; i < weights.length; i++)			weights[i].arrayCopyFrom (0, initialCRF.getWeights ((String)weightAlphabet.lookupObject(i)));	}	*/	public Alphabet getInputAlphabet () { return inputAlphabet; }	public Alphabet getOutputAlphabet () { return outputAlphabet; }		public void setUseHyperbolicPrior (boolean f) { usingHyperbolicPrior = f; }	public void setHyperbolicPriorSlope (double p) { hyperbolicPriorSlope = p; }	public void setHyperbolicPriorSharpness (double p) { hyperbolicPriorSharpness = p; }	public double getUseHyperbolicPriorSlope () { return hyperbolicPriorSlope; }	public double getUseHyperbolicPriorSharpness () { return hyperbolicPriorSharpness; }	public void setGaussianPriorVariance (double p) { gaussianPriorVariance = p; }	public double getGaussianPriorVariance () { return gaussianPriorVariance; }	//public int getDefaultFeatureIndex () { return defaultFeatureIndex;}		public void addState (String name, double initialCost, double finalCost,												String[] destinationNames,												String[] labelNames,												String[][] weightNames)	{		assert (weightNames.length == destinationNames.length);		assert (labelNames.length == destinationNames.length);		setTrainable (false);		if (name2state.get(name) != null)			throw new IllegalArgumentException ("State with name `"+name+"' already exists.");		State s = new State (name, states.size(), initialCost, finalCost,												 destinationNames, labelNames, weightNames, this);		s.print ();		states.add (s);		if (initialCost < INFINITE_COST)			initialStates.add (s);		name2state.put (name, s);	}	public void addState (String name, double initialCost, double finalCost,												String[] destinationNames,												String[] labelNames,												String[] weightNames)	{		String[][] newWeightNames = new String[weightNames.length][1];		for (int i = 0; i < weightNames.length; i++)			newWeightNames[i][0] = weightNames[i];		this.addState (name, initialCost, finalCost, destinationNames, labelNames, newWeightNames);	}		// Default gives separate parameters to each transition	public void addState (String name, double initialCost, double finalCost,												String[] destinationNames,												String[] labelNames)	{		assert (destinationNames.length == labelNames.length);		String[] weightNames = new String[labelNames.length];		for (int i = 0; i < labelNames.length; i++)			weightNames[i] = name + "->" + destinationNames[i] + ":" + labelNames[i];		this.addState (name, initialCost, finalCost, destinationNames, labelNames, weightNames);	}													// Add a state with parameters equal zero, and labels on out-going arcs	// the same name as their destination state names.	public void addState (String name, String[] destinationNames)	{		// == changed by Fuchun Peng to use non-zero prior cost		int index = outputAlphabet.lookupIndex(name);		this.addState (name, priorInitialCost[index], priorFinalCost[index], destinationNames, destinationNames);		//		this.addState (name, 0, 0, destinationNames, destinationNames);	}	// Add a group of states that are fully connected with each other,	// with parameters equal zero, and labels on their out-going arcs	// the same name as their destination state names.	public void addFullyConnectedStates (String[] stateNames)	{		for (int i = 0; i < stateNames.length; i++)			addState (stateNames[i], stateNames);	}	public void addFullyConnectedStatesForLabels ()	{		String[] labels = new String[outputAlphabet.size()];		// This is assuming the the entries in the outputAlphabet are Strings!		for (int i = 0; i < outputAlphabet.size(); i++) {			logger.info ("CRF: outputAlphabet.lookup class = "+													outputAlphabet.lookupObject(i).getClass().getName());			labels[i] = (String) outputAlphabet.lookupObject(i);		}		addFullyConnectedStates (labels);	}	private boolean[][] labelConnectionsIn (InstanceList trainingSet)	{		int numLabels = outputAlphabet.size();		boolean[][] connections = new boolean[numLabels][numLabels];		for (int i = 0; i < trainingSet.size(); i++) {			Instance instance = trainingSet.getInstance(i);			FeatureSequence output = (FeatureSequence) instance.getTarget();			for (int j = 1; j < output.size(); j++) {				int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1));				int destIndex = outputAlphabet.lookupIndex (output.get(j));				assert (sourceIndex >= 0 && destIndex >= 0);				connections[sourceIndex][destIndex] = true;			}		}		return connections;	}	//added by Fuchun Peng	public void priorCost(InstanceList trainingSet)	{		int numLabels = outputAlphabet.size();		priorInitialCost = new double[numLabels];		priorFinalCost = new double[numLabels];		int initialNum = 0;		int finalNum = 0;		for (int i = 0; i < trainingSet.size(); i++) {			Instance instance = trainingSet.getInstance(i);			FeatureSequence output = (FeatureSequence) instance.getTarget();			int initialIndex = outputAlphabet.lookupIndex (output.get(0));			int finalIndex= outputAlphabet.lookupIndex (output.get(output.size()-1));			assert (initialIndex >= 0 && finalIndex >= 0);			priorInitialCost[initialIndex] ++;			priorFinalCost[finalIndex] ++;			initialNum ++;			finalNum ++;		}		for(int i=0; i<numLabels; i++){			priorInitialCost[i] = (priorInitialCost[i]+1)/(initialNum+numLabels);			priorInitialCost[i] = -Math.log(priorInitialCost[i]);			priorFinalCost[i] /= finalNum;			priorFinalCost[i] = -Math.log(priorFinalCost[i]);			System.out.println((String)outputAlphabet.lookupObject(i) + " " 					+ priorInitialCost[i] + " " 					+ priorFinalCost[i]);		}	}	/** Add states to create a first-order Markov model on labels,			adding only those transitions the occur in the given			trainingSet. */	public void addStatesForLabelsConnectedAsIn (InstanceList trainingSet)	{		priorCost(trainingSet);		int numLabels = outputAlphabet.size();		boolean[][] connections = labelConnectionsIn (trainingSet);		for (int i = 0; i < numLabels; i++) {			int numDestinations = 0;			for (int j = 0; j < numLabels; j++)				if (connections[i][j]) numDestinations++;			String[] destinationNames = new String[numDestinations];			int destinationIndex = 0;			for (int j = 0; j < numLabels; j++)				if (connections[i][j])					destinationNames[destinationIndex++] = (String)outputAlphabet.lookupObject(j);			addState ((String)outputAlphabet.lookupObject(i), destinationNames);		}	}	/** Add as many states as there are labels, but don't create separate weights			for each source-destination pair of states.  Instead have all the incoming			transitions to a state share the same weights. */	public void addStatesForHalfLabelsConnectedAsIn (InstanceList trainingSet)	{		priorCost(trainingSet);		int numLabels = outputAlphabet.size();		boolean[][] connections = labelConnectionsIn (trainingSet);		for (int i = 0; i < numLabels; i++) {			int numDestinations = 0;			for (int j = 0; j < numLabels; j++)				if (connections[i][j]) numDestinations++;			String[] destinationNames = new String[numDestinations];			int destinationIndex = 0;			for (int j = 0; j < numLabels; j++)				if (connections[i][j])					destinationNames[destinationIndex++] = (String)outputAlphabet.lookupObject(j);//			addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,//								destinationNames, destinationNames, destinationNames);						addState ((String)outputAlphabet.lookupObject(i), priorInitialCost[i], priorFinalCost[i],								destinationNames, destinationNames, destinationNames);
12 3 4 5 下一页
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -