📄 bayesianboosting.java
字号:
/*
* YALE - Yet Another Learning Environment
* Copyright (C) 2001-2004
* Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,
* Katharina Morik, Oliver Ritthoff
* Artificial Intelligence Unit
* Computer Science Department
* University of Dortmund
* 44221 Dortmund, Germany
* email: yale-team@lists.sourceforge.net
* web: http://yale.cs.uni-dortmund.de/
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*/
package edu.udo.cs.yale.operator.learner.meta;
import java.io.IOException;
import java.util.List;
import java.util.Vector;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.Example;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.SplittedExampleSet;
import edu.udo.cs.yale.operator.IOContainer;
import edu.udo.cs.yale.operator.IODescription;
import edu.udo.cs.yale.operator.IOObject;
import edu.udo.cs.yale.operator.IllegalInputException;
import edu.udo.cs.yale.operator.MissingIOObjectException;
import edu.udo.cs.yale.operator.OperatorChain;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.operator.Value;
import edu.udo.cs.yale.operator.learner.Model;
import edu.udo.cs.yale.operator.parameter.ParameterType;
import edu.udo.cs.yale.operator.parameter.ParameterTypeBoolean;
import edu.udo.cs.yale.operator.parameter.ParameterTypeDouble;
import edu.udo.cs.yale.operator.parameter.ParameterTypeFile;
import edu.udo.cs.yale.operator.parameter.ParameterTypeInt;
import edu.udo.cs.yale.tools.LogService;
/**
* @author scholz
*/
public class BayesianBoosting extends OperatorChain {
/** Name of the variable specifying the maximal number of iterations of the learner. */
public static final String NUM_OF_ITERATIONS = "iterations";
/** Name of the flag indicating internal bootstrapping. */
public static final String INTERNAL_BOOTSTRAP = "ratio_internal_bootstrap";
/** Name of the flag indicating internal bootstrapping. */
public static final String VALIDATION_SET = "ratio_validation_set";
/** Name of the variable specifying the model file. */
public static final String MODEL_FILE = "model_file";
/** Boolean parameter to specify whether the output is expected to be a soft or crisp classifier. */
public static final String USE_DISTRIBUTION = "use_distribution";
/** Boolean parameter to specify whether the label priors should be equally likely after first iteration. */
public static final String EQUALLY_PROB_LABELS = "rescale_label_priors";
/** Discard models with an advantage of less than the specified value. */
public static final double MIN_ADVANTAGE = 0.001;
/** Name of the internal special attribute for preliminary predictions */
public static final String PRELIM_PRED_ATTRIBUTE = "preliminary prediction ";
// A model to initialise the example weights.
private Model startModel;
// A performance measure to be visualized. Not yet implemented!
private double performance = 0;
// field for visualizing performance
private int currentIteration;
/** Constructor. */
public BayesianBoosting() {
super();
addValue(new Value("performance", "The performance.") {
public double getValue() { return performance; }
});
addValue(new Value("iteration", "The current iteration.") {
public double getValue() { return currentIteration; }
});
}
/**
* This class expects an <code>Operator</code> and returns a <code>Model</code>.
* @see OperatorChain#checkIO(Class[])
*/
public Class[] checkIO(Class[] input) throws IllegalInputException {
Class[] output = getOperator(0).checkIO(input);
if (!IODescription.containsClass(Model.class, output))
throw new IllegalInputException(this, this.getOperator(0), Model.class);
return new Class[] { Model.class };
}
/** Adds the parameters "number of iterations" and "model file". */
public List getParameterTypes() {
List types = super.getParameterTypes();
ParameterType type = new ParameterTypeFile(MODEL_FILE, "If this parameter is set, the model is written to a file.", true);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeBoolean(USE_DISTRIBUTION, "Specifies whether the output should be a probability estimator (supported for boolean prediction tasks, only). Please note, that probability estimation is only supported for boolean target labels.", false));
types.add(new ParameterTypeBoolean(EQUALLY_PROB_LABELS, "Specifies whether the proportion of labels should be equal by construction after first iteration .", false));
types.add(new ParameterTypeDouble(INTERNAL_BOOTSTRAP, "Fraction of examples used for training (internal bootstrapping). If activated (value < 1) only the rest is used to estimate the biases.", 0, 1, 0.7));
types.add(new ParameterTypeDouble(VALIDATION_SET, "Fraction of examples not used for training, but to estimate the quality of intermediate classifiers. If activated (value > 1) then this fraction is used to decide when to stop adding new base classifiers to th ensembles.", 0, 1, 0));
types.add(new ParameterTypeInt(NUM_OF_ITERATIONS, "The maximum number of iterations.", 1, Integer.MAX_VALUE, 10));
return types;
}
/**
* @see OperatorChain#getMaxNumberOfInnerOperators()
*/
public int getMaxNumberOfInnerOperators() {
return 1;
}
/**
* @see OperatorChain#getMinNumberOfInnerOperators()
*/
public int getMinNumberOfInnerOperators() {
return 1;
}
/**
* @see OperatorChain#getNumberOfSteps()
*/
public int getNumberOfSteps() {
return 1; // getParameterAsInt(NUM_OF_ITERATIONS);
}
/**
* @see edu.udo.cs.yale.operator.Operator#getInputClasses()
*/
public Class[] getInputClasses() {
return new Class[] { ExampleSet.class };
}
/**
* @see edu.udo.cs.yale.operator.Operator#getOutputClasses()
*/
public Class[] getOutputClasses() {
return new Class[] { Model.class };
}
/**
* Creates a weight attribute if not yet done and fills it with the initial
* value of 1.
* @param exampleSet the example set to be prepared
*/
private void prepareWeights(ExampleSet exampleSet) {
Attribute weightAttr = exampleSet.createWeightAttribute();
ExampleReader exRead = exampleSet.getExampleReader();
while (exRead.hasNext()) {
Example example = exRead.next();
example.setValue(weightAttr, 1);
}
int numClasses = exampleSet.getLabel().getValues().size();
}
/**
* Runs the "embedded" learner on the example set and retuns a model.
* @param exampleSet an <code>ExampleSet</code> to train a model for
* @return a <code>Model</code>
*/
private Model trainModel(ExampleSet exampleSet) throws OperatorException {
IOContainer result = getOperator(0).apply(new IOContainer(new IOObject[] { exampleSet }));
Model model = (Model) result.getInput(Model.class);
model.createPredictedLabel(exampleSet);
model.apply(exampleSet);
return model;
}
/** Helper method reading a start model from the input if present. */
private void readOptionalParameters() {
try {
this.startModel = (Model)getInput(Model.class);
} catch (MissingIOObjectException e) {
LogService.logMessage(getName() + ": No model found in input.", LogService.STATUS);
}
}
/**
* Constructs a <code>Model</code> repeatedly running a weak learner,
* reweighting the training example set accordingly, and combining the
* hypothesis using the available weighted performance values.
* If the input contains a model, then this model is used as a starting
* point for weighting the examples.
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -