⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classification2confidencepredictingfeaturevector.java

📁 这是一个matlab的java实现。里面有许多内容。请大家慢慢捉摸。
💻 JAVA
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**    @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a> */package edu.umass.cs.mallet.base.pipe;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.classify.evaluate.*;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.util.PropertyList;import java.util.ArrayList;import java.util.logging.*;  /** Pipe features from underlying classifier to   * the confidence prediction instance list   */public class Classification2ConfidencePredictingFeatureVector extends Pipe{	public Classification2ConfidencePredictingFeatureVector ()	{		super (Alphabet.class, LabelAlphabet.class);	}		public Instance pipe (Instance carrier)	{		Classification classification = (Classification) carrier.getData();		PropertyList features = null;		LabelVector lv = classification.getLabelVector();		Label bestLabel = lv.getBestLabel();		Instance inst = (Instance)classification.getInstance();		FeatureVector fv = (FeatureVector)inst.getData();		Alphabet fdict = fv.getAlphabet();				double winningThreshold = .990;		double varianceThreshold = .15;		double secondThreshold = .03;				double winningScore = lv.getValueAtRank(0);		double marginOfVictory = winningScore - lv.getValueAtRank(1);				// attempts to use the confusion matrix of the training list		// as some prior knowledge in training				features = PropertyList.add ("winningScore", winningScore, features);		features = PropertyList.add ("secondScore", lv.getValueAtRank(1), features);		for(int i=0; i<lv.numLocations(); i++) {//			features = PropertyList.add (lv.getLabelAtRank(i).toString() +"HasRank"+i, 1.0, features);			features = PropertyList.add (lv.getLabelAtRank(i).toString() +"HasValue", lv.valueAtLocation (i), features);			}				features = PropertyList.add ("MarginOfVictory", marginOfVictory, features);		features = PropertyList.add("numFeatures", ((double)fv.numLocations()/fdict.size()), features);		features = PropertyList.add (bestLabel.toString() + "IsFirst-" + lv.getLabelAtRank(1).toString()+"IsSecond", 1.0, features);		features = PropertyList.add ("Range", winningScore - lv.getValueAtRank(lv.numLocations()-1), features);		features = PropertyList.add (bestLabel.toString()+"IsFirst", 1.0, features);		features = PropertyList.add (lv.getLabelAtRank(1).toString() + "IsSecond", 1.0, features);					// loop through original feature vector		// and add each feature to PropertyList//		features = PropertyList.add ("winningScore", winningScore, features);//		features = PropertyList.add ("secondScore", lv.getValueAtRank(1), features);//		features = PropertyList.add (bestLabel.toString()+"IsFirst", 1.0, features);//		features = PropertyList.add (lv.getLabelAtRank(1).toString() + "IsSecond", 1.0, features);					// xxx this hurt performance. is this correct function call?//			for(int loc = 0; loc < fv.numLocations(); loc++) //				features = PropertyList.add(fdict.lookupObject(loc).toString(), 1.0, features);					//features = PropertyList.add ("winningClassPrecision", confusionMatrix.getPrecision(lv.getBestIndex()) , features);			//			features = PropertyList.add ("confusionBetweenTop2", confusionMatrix.getConfusionBetween(lv.getBestIndex(), lv.getIndexAtRank(1)) , features);			//features = PropertyList.add ("Variance",getScoreVariance(lv), features);						// use cutoffs of some metrics/*	if(winningScore < winningThreshold){	features = PropertyList.add ("WinningScoreBelowX", 1.0, features);	bestScoreLessThanX++;	if(classification.bestLabelIsCorrect()) {	reallyWrong++;	}				}							if(marginOfVictory < .9)				features = PropertyList.add ("MarginOfVictoryBelow.9", 1.0, features);				if(getScoreVariance(lv) < varianceThreshold) {				features = PropertyList.add ("VarianceBelowX", 1.0, features);				varianceLessThanX++;				}				if(lv.getValueAtRank(1) > secondThreshold) {				features = PropertyList.add ("SecondScoreAboveX", 1.0, features);				secondScoreGreaterThanX++;			    				}*/									/*			// all the confidence predicting features			features = PropertyList.add ("winningScore", winningScore, features);						features = PropertyList.add(bestLabel.toString()+"IsFirst", 1.0, features);			features = PropertyList.add (lv.getLabelAtRank(1).toString() + "IsSecond", 1.0, features);									features = PropertyList.add ("secondScore", lv.getValueAtRank(1), features);			for(int i=0; i<lv.numLocations(); i++) {				features = PropertyList.add (lv.getLabelAtRank(i).toString() +"HasRank"+i, lv.getValueAtRank(i), features);			}			if(marginOfVictory < .9)			 	features = PropertyList.add ("MarginOfVictoryBelow.9", 1.0, features);			if(winningScore < winningThreshold){			 	features = PropertyList.add ("WinningScoreBelowX", 1.0, features);				bestScoreLessThanX++;			}			if(getScoreVariance(lv) < varianceThreshold) {			 	features = PropertyList.add ("VarianceBelowX", 1.0, features);				varianceLessThanX++;			}			if(lv.getValueAtRank(1) > secondThreshold) {			        features = PropertyList.add ("SecondScoreAboveX", 1.0, features);				secondScoreGreaterThanX++;			    			}			LabelAlphabet vocab = lv.getLabelAlphabet(); 			for(int i=0; i<vocab.size(); i++) {			 	features = PropertyList.add(vocab.lookupObject(i).toString()+"'sScore", lv.valueAtLocation(i), features);			}			features = PropertyList.add("numFeatures", ((double)fv.numLocations()/fdict.size()), features);			features = PropertyList.add (bestLabel.toString() + "IsFirst-" + lv.getLabelAtRank(1).toString()+"IsSecond", 1.0, features);								features = PropertyList.add("marginOfVictory", lv.getBestValue() - lv.getValueAtRank(1), features);*//*	// xxx these features either had 0 info gain or had a negative	// impact on performance					features = PropertyList.add ("scoreVariance", getScoreVariance(lv), features);					features = PropertyList.add ("scoreMean", getScoreMean(lv), features);*/			// loop through original feature vector			// and add each feature to PropertyList			// xxx this hurt performance. is this correct function call?			//for(int loc = 0; loc < fv.numLocations(); loc++) 			//	features = PropertyList.add(fdict.lookupObject(loc).toString(), 1.0, features);									// ...			// ...				carrier.setTarget(((LabelAlphabet)getTargetAlphabet()).lookupLabel(classification.bestLabelIsCorrect() ? "correct" : "incorrect"));		carrier.setData(new FeatureVector ((Alphabet) getDataAlphabet(), features, false));		carrier.setName(inst.getName());		carrier.setSource(inst.getSource());		return carrier;	}		private double getScoreMean(LabelVector lv)	{		double sum = 0.0;		for(int i=0; i<lv.numLocations(); i++) {			sum += lv.getValueAtRank(i);		}		return sum / lv.numLocations();	}		private double getScoreVariance(LabelVector lv)	{		double mean = getScoreMean(lv);			double squaredDifference = 0.0;			for(int i=0; i<lv.numLocations(); i++) {				squaredDifference += (mean - lv.getValueAtRank(i)) * (mean - lv.getValueAtRank(i));			}			return squaredDifference / lv.numLocations();	}}	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -