📄 id3learner.java
字号:
/*
* YALE - Yet Another Learning Environment
* Copyright (C) 2001-2004
* Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,
* Katharina Morik, Oliver Ritthoff
* Artificial Intelligence Unit
* Computer Science Department
* University of Dortmund
* 44221 Dortmund, Germany
* email: yale-team@lists.sourceforge.net
* web: http://yale.cs.uni-dortmund.de/
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*/
package edu.udo.cs.yale.operator.learner.decisiontree;
import edu.udo.cs.yale.operator.learner.AbstractLearner;
import edu.udo.cs.yale.operator.learner.Model;
import edu.udo.cs.yale.operator.parameter.*;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.SplittedExampleSet;
import edu.udo.cs.yale.example.Example;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.Tools;
import edu.udo.cs.yale.tools.ParameterService;
import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.Ontology;
import java.util.*;
/** ID3Learner is an internal (i.e. pure Java) classification machine learning algorithm based on the
* ID3 algorithm by Quinlan. In each step the most promising attribute is determined by calculating
* the information gain. Then the example set is partitioned according to the values of this attribute
* and the algorithm is applied recursively on the partitions. The trees resulting from the recursive
* calls are attached as children together with their respective attribute values. Recursion stops
* when all examples of a subset have the same label or the subset becomes empty.
*
* @yale.xmlclass ID3Learner
* @author Ingo
* @version $Id: ID3Learner.java,v 2.3 2004/08/27 11:57:38 ingomierswa Exp $
*/
public class ID3Learner extends AbstractLearner {
/** Helper class for managing labels.
*/
private class LabelCounter implements Comparable {
int label;
int count;
LabelCounter(int label) { this.label = label; count = 0;}
void inc() { count++; }
int getCount() { return count; }
int getValue() { return label; }
public int compareTo(Object o) {
return this.count - ((LabelCounter)o).count;
}
}
static final String[] PARAMETER = { "gain_ratio" };
//private boolean ratioGain;
/** Trains a model for a labelled example set.
*/
public Model learn(ExampleSet exampleSet) throws OperatorException {
ExampleSet clonedExampleSet = (ExampleSet)exampleSet.clone();
// init
boolean ratioGain = getParameterAsBoolean("gain_ratio");
int defaultGoal = getMostProbableClassification(exampleSet);
// Erzeugen des Entscheidungsbaumes.
Tree decisionTree = makeDecisionTree(clonedExampleSet, ratioGain, defaultGoal);
return decisionTree;
}
/** Creates a new decision tree by selecting the most informative attribute and splitting the example set according
* to the attribute values. This process is repeated recursively until there are no examples left or
* all examples have the same classification. In case of inconsistency the most probable attribute is selected.
*/
Tree makeDecisionTree(ExampleSet exampleSet, boolean ratioGain, int defaultGoal) throws OperatorException {
int classification = getClassification(exampleSet);
if (classification != -1) { // alle Beispiele haben die gleiche Klasse
return new Tree(exampleSet.getLabel(), classification);
} else { // nicht gleiche Klasse
// find attribute with maximum information gain
Attribute bestAttribute = Tools.getMostInformativeAttribute(exampleSet, ratioGain);
// wenn es noch ein solches Attribut gibt, so teile weiter auf...
if (bestAttribute != null) {
exampleSet.removeAttribute(bestAttribute);
return createNewDecisionTree((ExampleSet)exampleSet.clone(), bestAttribute, ratioGain, defaultGoal);
// ... sonst gib die wahrscheinlichste Klassifizierung (die am haeufigsten vertretene) zurueck.
} else {
classification = getMostProbableClassification(exampleSet);
if (classification == -1) {
classification = defaultGoal;
}
return new Tree(exampleSet.getLabel(), classification);
}
}
}
/** Creates a decision tree using the given attribute as the first decision.
*/
Tree createNewDecisionTree(ExampleSet exampleSet, Attribute bestAttribute, boolean ratioGain, int defaultGoal) throws OperatorException {
if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(bestAttribute.getValueType(), Ontology.NOMINAL)) {
throw new UserError(this, 103, new Object[] {"ID3", bestAttribute.getName() });
}
if (exampleSet.getSize() == 0) return new Tree(exampleSet.getLabel(), defaultGoal);
SplittedExampleSet splittedES = SplittedExampleSet.splitByAttribute(exampleSet, bestAttribute);
// make new decisionTree
Tree decisionTree = new Tree(exampleSet.getLabel(), bestAttribute);
// call method for every successor
for (int i = 0; i < bestAttribute.getValues().size(); i++) {
splittedES.selectSingleSubset(i);
Premise premise = new SimplePremise(bestAttribute, "=", i+Attribute.FIRST_CLASS_INDEX);
Tree child = makeDecisionTree(splittedES, ratioGain, defaultGoal);
decisionTree.addChild(premise, child);
}
return decisionTree;
}
// ================================================================================
/** Returns the classification if it is equal for all examples or -1.
*/
int getClassification(ExampleSet exampleSet) {
if ((exampleSet == null) || (exampleSet.getSize() == 0)) return -1;
int label = -1;
boolean read = false;
ExampleReader i = exampleSet.getExampleReader();
while (i.hasNext()) {
int currentLabel = (int)i.next().getLabel();
if (!read) {
label = currentLabel;
read = true;
}
if (label != currentLabel) return -1;
}
return label;
}
/** Returns the most probable classification or -1 if exampleSet is empty.
*/
int getMostProbableClassification(ExampleSet exampleSet) {
if (exampleSet.getSize() == 0) return -1;
Map labelCounters = new HashMap();
ExampleReader i = exampleSet.getExampleReader();
while (i.hasNext()) {
int currentLabel = (int)i.next().getLabel();
LabelCounter labelCounter = (LabelCounter)labelCounters.get(new Integer(currentLabel));
if (labelCounter == null) {
labelCounters.put(new Integer(currentLabel), new LabelCounter(currentLabel));
} else labelCounter.inc();
}
return ((LabelCounter)Collections.max(labelCounters.values())).getValue();
}
public List getParameterTypes() {
List types = super.getParameterTypes();
types.add(new ParameterTypeBoolean("gain_ratio", "If set to true, the gain ratio criterion is used.", true));
return types;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -