📄 attributeselection.java
字号:
CvString.append("\n\n=== Attribute selection " + m_numFolds
+ " fold cross-validation ");
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) &&
!(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator) &&
(m_trainInstances.classAttribute().isNominal())) {
CvString.append("(stratified), seed: ");
CvString.append(m_seed+" ===\n\n");
}
else {
CvString.append("seed: "+m_seed+" ===\n\n");
}
if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) {
CvString.append("average merit average rank attribute\n");
// calcualte means and std devs
for (int i = 0; i < m_rankResults[0].length; i++) {
m_rankResults[0][i] /= m_numFolds; // mean merit
double var = m_rankResults[0][i]*m_rankResults[0][i]*m_numFolds;
var = (m_rankResults[2][i] - var);
var /= m_numFolds;
if (var <= 0.0) {
var = 0.0;
m_rankResults[2][i] = 0;
}
else {
m_rankResults[2][i] = Math.sqrt(var);
}
m_rankResults[1][i] /= m_numFolds; // mean rank
var = m_rankResults[1][i]*m_rankResults[1][i]*m_numFolds;
var = (m_rankResults[3][i] - var);
var /= m_numFolds;
if (var <= 0.0) {
var = 0.0;
m_rankResults[3][i] = 0;
}
else {
m_rankResults[3][i] = Math.sqrt(var);
}
}
// now sort them by mean rank
int[] s = Utils.sort(m_rankResults[1]);
for (int i=0; i<s.length; i++) {
if (m_rankResults[1][s[i]] > 0) {
CvString.append(Utils.doubleToString(Math.
abs(m_rankResults[0][s[i]]),
6, 3)
+ " +-"
+ Utils.doubleToString(m_rankResults[2][s[i]], 6, 3)
+ " "
+ Utils.doubleToString(m_rankResults[1][s[i]],
fieldWidth+2, 1)
+ " +-"
+ Utils.doubleToString(m_rankResults[3][s[i]], 5, 2)
+" "
+ Utils.doubleToString(((double)(s[i] + 1)),
fieldWidth, 0)
+ " "
+ m_trainInstances.attribute(s[i]).name()
+ "\n");
}
}
}
else {
CvString.append("number of folds (%) attribute\n");
for (int i = 0; i < m_subsetResults.length; i++) {
if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) ||
(i != m_trainInstances.classIndex())) {
CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0)
+ "("
+ Utils.doubleToString((m_subsetResults[i] /
m_numFolds * 100.0)
, 3, 0)
+ " %) "
+ Utils.doubleToString(((double)(i + 1)),
fieldWidth, 0)
+ " "
+ m_trainInstances.attribute(i).name()
+ "\n");
}
}
}
return CvString.toString();
}
/**
* Select attributes for a split of the data. Calling this function
* updates the statistics on attribute selection. CVResultsString()
* returns a string summarizing the results of repeated calls to
* this function. Assumes that splits are from the same dataset---
* ie. have the same number and types of attributes as previous
* splits.
*
* @param split the instances to select attributes from
* @exception Exception if an error occurs
*/
public void selectAttributesCVSplit(Instances split) throws Exception {
double[][] attributeRanking = null;
// if the train instances are null then set equal to this split.
// If this is the case then this function is more than likely being
// called from outside this class in order to obtain CV statistics
// and all we need m_trainIstances for is to get at attribute names
// and types etc.
if (m_trainInstances == null) {
m_trainInstances = split;
}
// create space to hold statistics
if (m_rankResults == null && m_subsetResults == null) {
m_subsetResults = new double[split.numAttributes()];
m_rankResults = new double[4][split.numAttributes()];
}
m_ASEvaluator.buildEvaluator(split);
// Do the search
int[] attributeSet = m_searchMethod.search(m_ASEvaluator,
split);
// Do any postprocessing that a attribute selection method might
// require
attributeSet = m_ASEvaluator.postProcess(attributeSet);
if ((m_searchMethod instanceof RankedOutputSearch) &&
(m_doRank == true)) {
attributeRanking = ((RankedOutputSearch)m_searchMethod).
rankedAttributes();
// System.out.println(attributeRanking[0][1]);
for (int j = 0; j < attributeRanking.length; j++) {
// merit
m_rankResults[0][(int)attributeRanking[j][0]] +=
attributeRanking[j][1];
// squared merit
m_rankResults[2][(int)attributeRanking[j][0]] +=
(attributeRanking[j][1]*attributeRanking[j][1]);
// rank
m_rankResults[1][(int)attributeRanking[j][0]] += (j + 1);
// squared rank
m_rankResults[3][(int)attributeRanking[j][0]] += (j + 1)*(j + 1);
// += (attributeRanking[j][0] * attributeRanking[j][0]);
}
} else {
for (int j = 0; j < attributeSet.length; j++) {
m_subsetResults[attributeSet[j]]++;
}
}
m_trials++;
}
/**
* Perform a cross validation for attribute selection. With subset
* evaluators the number of times each attribute is selected over
* the cross validation is reported. For attribute evaluators, the
* average merit and average ranking + std deviation is reported for
* each attribute.
*
* @return the results of cross validation as a String
* @exception Exception if an error occurs during cross validation
*/
public String CrossValidateAttributes () throws Exception {
Instances cvData = new Instances(m_trainInstances);
Instances train;
double[][] rankResults;
double[] subsetResults;
double[][] attributeRanking = null;
cvData.randomize(new Random(m_seed));
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) &&
!(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
if (cvData.classAttribute().isNominal()) {
cvData.stratify(m_numFolds);
}
}
for (int i = 0; i < m_numFolds; i++) {
// Perform attribute selection
train = cvData.trainCV(m_numFolds, i);
selectAttributesCVSplit(train);
}
return CVResultsString();
}
/**
* Perform attribute selection on the supplied training instances.
*
* @param data the instances to select attributes from
* @exception Exception if there is a problem during selection
*/
public void SelectAttributes (Instances data) throws Exception {
int [] attributeSet;
m_transformer = null;
m_attributeFilter = null;
m_trainInstances = data;
if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) {
throw new Exception("Can't cross validate an attribute transformer.");
}
if (m_ASEvaluator instanceof SubsetEvaluator &&
m_searchMethod instanceof Ranker) {
throw new Exception(m_ASEvaluator.getClass().getName()
+" must use a search method other than Ranker");
}
if (m_ASEvaluator instanceof AttributeEvaluator &&
!(m_searchMethod instanceof Ranker)) {
// System.err.println("AttributeEvaluators must use a Ranker search "
// +"method. Switching to Ranker...");
// m_searchMethod = new Ranker();
throw new Exception("AttributeEvaluators must use the Ranker search "
+ "method");
}
if (m_searchMethod instanceof RankedOutputSearch) {
m_doRank = ((RankedOutputSearch)m_searchMethod).getGenerateRanking();
}
if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator ||
m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) {
// unset the class index
m_trainInstances.setClassIndex(-1);
} else {
// check that a class index has been set
if (m_trainInstances.classIndex() < 0) {
m_trainInstances.setClassIndex(m_trainInstances.numAttributes()-1);
}
}
// Initialize the attribute evaluator
m_ASEvaluator.buildEvaluator(m_trainInstances);
if (m_ASEvaluator instanceof AttributeTransformer) {
m_trainInstances =
((AttributeTransformer)m_ASEvaluator).transformedHeader();
m_transformer = (AttributeTransformer)m_ASEvaluator;
}
int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0);
// Do the search
attributeSet = m_searchMethod.search(m_ASEvaluator,
m_trainInstances);
// try and determine if the search method uses an attribute transformer---
// this is a bit of a hack to make things work properly with RankSearch
// using PrincipalComponents as its attribute ranker
try {
BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass());
PropertyDescriptor properties[];
MethodDescriptor methods[];
// methods = bi.getMethodDescriptors();
properties = bi.getPropertyDescriptors();
for (int i=0;i<properties.length;i++) {
String name = properties[i].getDisplayName();
Method meth = properties[i].getReadMethod();
Object retType = meth.getReturnType();
if (retType.equals(ASEvaluation.class)) {
Class args [] = { };
ASEvaluation tempEval = (ASEvaluation)(meth.invoke(m_searchMethod,
args));
if (tempEval instanceof AttributeTransformer) {
// grab the transformed data header
m_trainInstances =
((AttributeTransformer)tempEval).transformedHeader();
m_transformer = (AttributeTransformer)tempEval;
}
}
}
} catch (IntrospectionException ex) {
System.err.println("AttributeSelection: Couldn't "
+"introspect");
}
// Do any postprocessing that a attribute selection method might require
attributeSet = m_ASEvaluator.postProcess(attributeSet);
if (!m_doRank) {
m_selectionResults.append(printSelectionResults());
}
if ((m_searchMethod instanceof RankedOutputSearch) && m_doRank == true) {
m_attributeRanking =
((RankedOutputSearch)m_searchMethod).rankedAttributes();
m_selectionResults.append(printSelectionResults());
m_selectionResults.append("Ranked attributes:\n");
// retrieve the number of attributes to retain
m_numToSelect =
((RankedOutputSearch)m_searchMethod).getCalculatedNumToSelect();
// determine fieldwidth for merit
int f_p=0;
int w_p=0;
for (int i = 0; i < m_numToSelect; i++) {
double precision = (Math.abs(m_attributeRanking[i][1]) -
(int)(Math.abs(m_attributeRanking[i][1])));
if (precision > 0) {
precision = Math.abs((Math.log(Math.abs(precision)) /
Math.log(10)))+3;
}
if (precision > f_p) {
f_p = (int)precision;
}
if ((Math.abs((Math.log(Math.abs(m_attributeRanking[i][1]))
/ Math.log(10)))+1) > w_p) {
if (m_attributeRanking[i][1] > 0) {
w_p = (int)Math.abs((Math.log(Math.abs(m_attributeRanking[i][1]))
/ Math.log(10)))+1;
}
}
}
for (int i = 0; i < m_numToSelect; i++) {
m_selectionResults.
append(Utils.doubleToString(m_attributeRanking[i][1],
f_p+w_p+1,f_p)
+ Utils.doubleToString((m_attributeRanking[i][0] + 1),
fieldWidth+1,0)
+ " "
+ m_trainInstances.
attribute((int)m_attributeRanking[i][0]).name()
+ "\n");
}
// set up the selected attributes array - usable by a filter or
// whatever
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator))
{
// one more for the class
m_selectedAttributeSet = new int[m_numToSelect + 1];
m_selectedAttributeSet[m_numToSelect] =
m_trainInstances.classIndex();
}
else {
m_selectedAttributeSet = new int[m_numToSelect];
}
m_selectionResults.append("\nSelected attributes: ");
for (int i = 0; i < m_numToSelect; i++) {
m_selectedAttributeSet[i] = (int)m_attributeRanking[i][0];
if (i == m_numToSelect - 1) {
m_selectionResults.append(((int)m_attributeRanking[i][0] + 1)
+ " : "
+ (i + 1)
+ "\n");
}
else {
m_selectionResults.append(((int)m_attributeRanking[i][0] + 1));
m_selectionResults.append(",");
}
}
} else {
// set up the selected attributes array - usable by a filter or
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -