📄 id3learner.java
字号:
/* * YALE - Yet Another Learning Environment * Copyright (C) 2002, 2003 * Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, * Katharina Morik, Oliver Ritthoff * Artificial Intelligence Unit * Computer Science Department * University of Dortmund * 44221 Dortmund, Germany * email: yale@ls8.cs.uni-dortmund.de * web: http://yale.cs.uni-dortmund.de/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */package edu.udo.cs.yale.operator.learner;import edu.udo.cs.yale.operator.parameter.*;import edu.udo.cs.yale.operator.OperatorException;import edu.udo.cs.yale.operator.UserError;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.SplittedExampleSet;import edu.udo.cs.yale.example.Example;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.example.Tools;import edu.udo.cs.yale.tools.ParameterService;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.operator.learner.decisiontree.Tree;import edu.udo.cs.yale.operator.learner.decisiontree.Premise;import edu.udo.cs.yale.operator.learner.decisiontree.SimplePremise;import java.util.*;/** ID3Learner is an internal (i.e. pure Java) classification machine learning algorithm based on the * ID3 algorithm by Quinlan. In each step the most promising attribute is determined by calculating * the information gain. Then the example set is partitioned according to the values of this attribute * and the algorithm is applied recursively on the partitions. The trees resulting from the recursive * calls are attached as children together with their respective attribute values. Recursion stops * when all examples of a subset have the same label or the subset becomes empty. * * @yale.xmlclass ID3Learner * @author Ingo * @version $Id: ID3Learner.java,v 2.5 2003/06/18 17:11:18 fischer Exp $ */public class ID3Learner extends Learner { /** Helper class for managing labels. */ private class LabelCounter implements Comparable { int label; int count; LabelCounter(int label) { this.label = label; count = 0;} void inc() { count++; } int getCount() { return count; } int getValue() { return label; } public int compareTo(Object o) { return this.count - ((LabelCounter)o).count; } } static final String[] PARAMETER = { "gain_ratio" }; //private boolean ratioGain; /** Trains a model for a labelled example set. */ public Model learn(ExampleSet exampleSet) throws OperatorException { LogService.logMessage("ID3 learner '"+getName()+"': " +"starts learning.", LogService.TASK); ExampleSet clonedExampleSet = (ExampleSet)exampleSet.clone(); // init boolean ratioGain = getParameterAsBoolean("gain_ratio"); int defaultGoal = getMostProbableClassification(exampleSet); // Erzeugen des Entscheidungsbaumes. Tree decisionTree = makeDecisionTree(clonedExampleSet, ratioGain, defaultGoal); LogService.logMessage("ID3 learner '"+getName()+"': ID3 has succesfully " +"learned a decision tree.", LogService.TASK); return decisionTree; } /** Creates a new decision tree by selecting the most informative attribute and splitting the example set according * to the attribute values. This process is repeated recursively until there are no examples left or * all examples have the same classification. In case of inconsistency the most probable attribute is selected. */ Tree makeDecisionTree(ExampleSet exampleSet, boolean ratioGain, int defaultGoal) throws OperatorException { int classification = getClassification(exampleSet); if (classification != -1) { // alle Beispiele haben die gleiche Klasse return new Tree(exampleSet.getLabel(), classification); } else { // nicht gleiche Klasse // find attribute with maximum information gain Attribute bestAttribute = Tools.getMostInformativeAttribute(exampleSet, ratioGain); // wenn es noch ein solches Attribut gibt, so teile weiter auf... if (bestAttribute != null) { exampleSet.removeAttribute(bestAttribute); return createNewDecisionTree((ExampleSet)exampleSet.clone(), bestAttribute, ratioGain, defaultGoal); // ... sonst gib die wahrscheinlichste Klassifizierung (die am haeufigsten vertretene) zurueck. } else { classification = getMostProbableClassification(exampleSet); if (classification == -1) { classification = defaultGoal; } return new Tree(exampleSet.getLabel(), classification); } } } /** Creates a decision tree using the given attribute as the first decision. */ Tree createNewDecisionTree(ExampleSet exampleSet, Attribute bestAttribute, boolean ratioGain, int defaultGoal) throws OperatorException { if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(bestAttribute.getValueType(), Ontology.NOMINAL)) { throw new UserError(this, 103, new Object[] {"ID3", bestAttribute.getName() }); } if (exampleSet.getSize() == 0) return new Tree(exampleSet.getLabel(), defaultGoal); SplittedExampleSet splittedES = SplittedExampleSet.splitByAttribute(exampleSet, bestAttribute); // make new decisionTree Tree decisionTree = new Tree(bestAttribute); // call method for every successor for (int i = 0; i < bestAttribute.getNumberOfClasses(); i++) { splittedES.selectSingleSubset(i); Premise premise = new SimplePremise(bestAttribute, "=", i+Attribute.FIRST_CLASS_INDEX); Tree child = makeDecisionTree(splittedES, ratioGain, defaultGoal); decisionTree.addChild(premise, child); } return decisionTree; } // ================================================================================ /** Returns the classification if it is equal for all examples or -1. */ int getClassification(ExampleSet exampleSet) { if ((exampleSet == null) || (exampleSet.getSize() == 0)) return -1; int label = -1; boolean read = false; ExampleReader i = exampleSet.getExampleReader(); while (i.hasNext()) { int currentLabel = (int)i.next().getLabel(); if (!read) { label = currentLabel; read = true; } if (label != currentLabel) return -1; } return label; } /** Returns the most probable classification or -1 if exampleSet is empty. */ int getMostProbableClassification(ExampleSet exampleSet) { if (exampleSet.getSize() == 0) return -1; Map labelCounters = new HashMap(); ExampleReader i = exampleSet.getExampleReader(); while (i.hasNext()) { int currentLabel = (int)i.next().getLabel(); LabelCounter labelCounter = (LabelCounter)labelCounters.get(new Integer(currentLabel)); if (labelCounter == null) { labelCounters.put(new Integer(currentLabel), new LabelCounter(currentLabel)); } else labelCounter.inc(); } return ((LabelCounter)Collections.max(labelCounters.values())).getValue(); } public List getParameterTypes() { List types = super.getParameterTypes(); types.add(new ParameterTypeBoolean("gain_ratio", "If set to true, the gain ratio criterion is used.", true)); return types; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -