📄 expgain.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**	 The "gain" obtained by adding a feature to a conditional exponential model.	 Based on the joint exponential model in Della Pietra, Della Pietra & Lafferty, 1997.	 We smooth using a Gaussian prior.	 Note that we use Math.log(), not log-base-2, so the units are not "bits".   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.types;import edu.umass.cs.mallet.base.classify.Classification;import edu.umass.cs.mallet.base.util.MalletLogger;import java.util.logging.*;import java.io.*;public class ExpGain extends RankedFeatureVector{	private static Logger logger = MalletLogger.getLogger(ExpGain.class.getName());	// ExpGain of a feature, f, is defined in terms of MaxEnt-type feature+class "Feature"s, F,	//  F = f,c	// ExpGain of a Feature, F, is	//  G(F) = KL(p~[C]||q[C]) - KL(p~[C]||q_F[C])	// where p~[] is the empirical distribution, according to the true class label distribution	// and   q[] is the distribution from the (imperfect) classifier	// and   q_F[] is the distribution from the (imperfect) classifier with F added	//       and F's weight adjusted (but none of the other weights adjusted)	// ExpGain of a feature,f, is	//  G(f) = sum_c G(f,c)	// It would be more accurate to return a gain number for each "feature/class" combination,	// but here we simply return the gain(feature) = \sum_{class} gain(feature,class).	// xxx Not ever used.  Remove them.	boolean usingHyperbolicPrior = false;	double hyperbolicSlope = 0.2;	double hyperbolicSharpness = 10.0;	private static double[] calcExpGains (InstanceList ilist, LabelVector[] classifications,																				double gaussianPriorVariance)	{		int numInstances = ilist.size();		int numClasses = ilist.getTargetAlphabet().size();		int numFeatures = ilist.getDataAlphabet().size();		assert (ilist.size() > 0);		// Notation from  Della Pietra & Lafferty 1997, p.4		// "p~"		double[][] p = new double[numClasses][numFeatures];		// "q"		double[][] q = new double[numClasses][numFeatures];		// "alpha", the weight of the new feature		double[][] alphas = new double[numClasses][numFeatures];		int fli; // feature location index		double flv;	// feature location value		logger.info ("Starting klgains, #instances="+numInstances);		double trueLabelWeightSum = 0;		double modelLabelWeightSum = 0;		// Calculate p~[f] and q[f]		for (int i = 0; i < numInstances; i++) {			assert (classifications[i].getLabelAlphabet() == ilist.getTargetAlphabet());			Instance inst = ilist.getInstance(i);			Labeling labeling = inst.getLabeling ();			FeatureVector fv = (FeatureVector) inst.getData ();			//double instanceWeight = ilist.getInstanceWeight(i);			// The code below relies on labelWeights summing to 1 over all labels!			double perInstanceModelLabelWeight = 0;			for (int li = 0; li < numClasses; li++) {				double trueLabelWeight = labeling.value (li);				double modelLabelWeight = classifications[i].value(li);				trueLabelWeightSum += trueLabelWeight;				modelLabelWeightSum += modelLabelWeight;				perInstanceModelLabelWeight += modelLabelWeight;				//if (i < 500) System.out.println ("i="+i+" li="+li+" true="+trueLabelWeight+" model="+modelLabelWeight);				if (trueLabelWeight == 0 && modelLabelWeight == 0)					continue;				for (int fl = 0; fl < fv.numLocations(); fl++) {					fli = fv.indexAtLocation(fl);					assert (fv.valueAtLocation(fl) == 1.0);					// xxx Note that we are not attenting to instanceWeight here!					//p[li][fli] += trueLabelWeight * instanceWeight / (numInstances+1);					//q[li][fli] += modelLabelWeight * instanceWeight / (numInstances+1);					p[li][fli] += trueLabelWeight;					q[li][fli] += modelLabelWeight;				}			}			assert (Math.abs (perInstanceModelLabelWeight - 1.0) < 0.001);		}		assert (Math.abs (trueLabelWeightSum/numInstances - 1.0) < 0.001)			: "trueLabelWeightSum should be 1.0, it was "+trueLabelWeightSum;		assert (Math.abs (modelLabelWeightSum/numInstances - 1.0) < 0.001)			: "modelLabelWeightSum should be 1.0, it was "+modelLabelWeightSum;/*		double psum = 0;		double qsum = 0;		for (int i = 0; i < numClasses; i++)			for (int j = 0; j < numFeatures; j++) {				psum += p[i][j];				qsum += q[i][j];			}		assert (Math.abs(psum - 1.0) < 0.0001) : "psum not 1.0!  psum="+psum+" qsum="+qsum;		assert (Math.abs(qsum - 1.0) < 0.0001) : "qsum not 1.0!  psum="+psum+" qsum="+qsum;*/		// Determine the alphas		// We can't do it in closed form as in the Della Pietra paper, because this we have here		// a conditional MaxEnt model.		// So we do it by Newton-Raphson...		// ...initializing by the broken, inappropriate joint-case closed-form solution:		//for (int i = 0; i < numClasses; i++)		//for (int j = 0; j < numFeatures; j++)		//alphas[i][j] = Math.log ( (p[i][j]*(1.0-q[i][j])) / (q[i][j]*(1.0-p[i][j])) );				double[][] dalphas = new double[numClasses][numFeatures];	// first derivative		double[][] alphaChangeOld = new double[numClasses][numFeatures];	// change in alpha, last iteration		double[][] alphaMax = new double[numClasses][numFeatures];	// change in alpha, last iteration		double[][] alphaMin = new double[numClasses][numFeatures];	// change in alpha, last iteration		double[][] ddalphas = new double[numClasses][numFeatures];// second derivative		for (int i = 0; i < numClasses; i++)			for (int j = 0; j < numFeatures; j++) {				alphaMax[i][j] = Double.POSITIVE_INFINITY;				alphaMin[i][j] = Double.NEGATIVE_INFINITY;			}		double maxAlphachange = 0;		double maxDalpha = 99;		int maxNewtonSteps = 50;							// xxx Change to more?		// alphas[][] are initialized to zero		for (int newton = 0; maxDalpha > 1.0E-8 && newton < maxNewtonSteps; newton++) {			//System.out.println ("Newton iteration "+newton);			if (false /*usingHyperbolicPrior*/) {				for (int i = 0; i < numClasses; i++)					for (int j = 0; j < numFeatures; j++) {						dalphas[i][j] = p[i][j] - (alphas[i][j] / gaussianPriorVariance);						ddalphas[i][j] = -1 / gaussianPriorVariance;					}			} else {				// Gaussian prior				for (int i = 0; i < numClasses; i++)					for (int j = 0; j < numFeatures; j++) {						dalphas[i][j] = p[i][j] - (alphas[i][j] / gaussianPriorVariance);						ddalphas[i][j] = -1 / gaussianPriorVariance;					}			}			for (int i = 0; i < ilist.size(); i++) {				assert (classifications[i].getLabelAlphabet() == ilist.getTargetAlphabet());				Instance inst = ilist.getInstance(i);				Labeling labeling = inst.getLabeling ();				FeatureVector fv = (FeatureVector) inst.getData ();				// xxx This assumes binary-valued features.  What about "tied" weights?				for (int fl = 0; fl < fv.numLocations(); fl++) {					fli = fv.indexAtLocation(fl); 					for (int li = 0; li < numClasses; li++) {						double modelLabelWeight = classifications[i].value(li);						double expalpha = Math.exp (alphas[li][fli]);						double numerator = modelLabelWeight * expalpha;						double denominator = numerator + (1.0 - modelLabelWeight);						dalphas[li][fli] -= numerator / denominator;						ddalphas[li][fli] += ((numerator*numerator) / (denominator*denominator)																	- (numerator/denominator));					}				}			}			// We now now first- and second-derivative for this newton step			// Run tests on the alphas and their derivatives, and do a newton step			double alphachange, newalpha, oldalpha;			maxAlphachange = maxDalpha = 0;			for (int i = 0; i < numClasses; i++)				for (int j = 0; j < numFeatures; j++) {					alphachange = - (dalphas[i][j] / ddalphas[i][j]);					if (p[i][j] == 0 && q[i][j] == 0)						continue;					else if (false && (i*numFeatures+j) % (numClasses*numFeatures/2000) == 0
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -