📄 id3learner.java

📁 著名的开源仿真软件yale
💻 JAVA
字号:
/* *  YALE - Yet Another Learning Environment *  Copyright (C) 2002, 2003 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,  *          Katharina Morik, Oliver Ritthoff *      Artificial Intelligence Unit *      Computer Science Department *      University of Dortmund *      44221 Dortmund,  Germany *  email: yale@ls8.cs.uni-dortmund.de *  web:   http://yale.cs.uni-dortmund.de/ * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License as  *  published by the Free Software Foundation; either version 2 of the *  License, or (at your option) any later version.  * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *  General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *  USA. */package edu.udo.cs.yale.operator.learner;import edu.udo.cs.yale.operator.parameter.*;import edu.udo.cs.yale.operator.OperatorException;import edu.udo.cs.yale.operator.UserError;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.SplittedExampleSet;import edu.udo.cs.yale.example.Example;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.example.Tools;import edu.udo.cs.yale.tools.ParameterService;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.operator.learner.decisiontree.Tree;import edu.udo.cs.yale.operator.learner.decisiontree.Premise;import edu.udo.cs.yale.operator.learner.decisiontree.SimplePremise;import java.util.*;/** ID3Learner is an internal (i.e. pure Java) classification machine learning algorithm based on the *  ID3 algorithm by Quinlan. In each step the most promising attribute is determined by calculating *  the information gain. Then the example set is partitioned according to the values of this attribute *  and the algorithm is applied recursively on the partitions. The trees resulting from the recursive *  calls are attached as children together with their respective attribute values. Recursion stops *  when all examples of a subset have the same label or the subset becomes empty. * *  @yale.xmlclass ID3Learner *  @author Ingo *  @version $Id: ID3Learner.java,v 2.5 2003/06/18 17:11:18 fischer Exp $ */public class ID3Learner extends Learner {    /** Helper class for managing labels.     */    private class LabelCounter implements Comparable {	int label;	int count;	LabelCounter(int label) { this.label = label; count = 0;}	void inc() { count++; }	int getCount() { return count; } 	int getValue() { return label; }	public int compareTo(Object o) { 	    return this.count - ((LabelCounter)o).count; 	}    }    static final String[] PARAMETER = { "gain_ratio" };    //private boolean ratioGain;    /** Trains a model for a labelled example set.     */    public Model learn(ExampleSet exampleSet) throws OperatorException {	LogService.logMessage("ID3 learner '"+getName()+"':  "          			      +"starts learning.", LogService.TASK); 	ExampleSet clonedExampleSet = (ExampleSet)exampleSet.clone();	// init	boolean ratioGain = getParameterAsBoolean("gain_ratio");	int defaultGoal = getMostProbableClassification(exampleSet);	// Erzeugen des Entscheidungsbaumes.	Tree decisionTree = makeDecisionTree(clonedExampleSet, ratioGain, defaultGoal);	    	LogService.logMessage("ID3 learner '"+getName()+"':  ID3 has succesfully " 			      +"learned a decision tree.", LogService.TASK);        	return decisionTree;     }    /** Creates a new decision tree by selecting the most informative attribute and splitting the example set according     *  to the attribute values. This process is repeated recursively until there are no examples left or     *  all examples have the same classification. In case of inconsistency the most probable attribute is selected.     */    Tree makeDecisionTree(ExampleSet exampleSet, boolean ratioGain, int defaultGoal) throws OperatorException {	int classification = getClassification(exampleSet);		if (classification != -1) {  // alle Beispiele haben die gleiche Klasse	    return new Tree(exampleSet.getLabel(), classification);	    	} else {  // nicht gleiche Klasse	    	    // find attribute with maximum information gain	    Attribute bestAttribute = Tools.getMostInformativeAttribute(exampleSet, ratioGain);  	    	    // wenn es noch ein solches Attribut gibt, so teile weiter auf...	    if (bestAttribute != null) {				exampleSet.removeAttribute(bestAttribute);		return createNewDecisionTree((ExampleSet)exampleSet.clone(), bestAttribute, ratioGain, defaultGoal);				// ... sonst gib die wahrscheinlichste Klassifizierung (die am haeufigsten vertretene) zurueck.	    } else {		classification = getMostProbableClassification(exampleSet);		if (classification == -1) {		    classification = defaultGoal;		}		return new Tree(exampleSet.getLabel(), classification);	    }   	}    }        /** Creates a decision tree using the given attribute as the first decision.     */    Tree createNewDecisionTree(ExampleSet exampleSet, Attribute bestAttribute, boolean ratioGain, int defaultGoal) throws OperatorException {	if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(bestAttribute.getValueType(), Ontology.NOMINAL)) {	    throw new UserError(this, 103, new Object[] {"ID3", bestAttribute.getName() });	}		if (exampleSet.getSize() == 0) return new Tree(exampleSet.getLabel(), defaultGoal);	SplittedExampleSet splittedES  = SplittedExampleSet.splitByAttribute(exampleSet, bestAttribute);		// make new decisionTree	Tree decisionTree = new Tree(bestAttribute);		// call method for every successor	for (int i = 0; i < bestAttribute.getNumberOfClasses(); i++) {	    splittedES.selectSingleSubset(i);	    Premise premise = new SimplePremise(bestAttribute, "=", i+Attribute.FIRST_CLASS_INDEX);	    	    Tree child = makeDecisionTree(splittedES, ratioGain, defaultGoal);	    decisionTree.addChild(premise, child);		}	return decisionTree;    }    // ================================================================================    /** Returns the classification if it is equal for all examples or -1.     */    int getClassification(ExampleSet exampleSet) {	if ((exampleSet == null) || (exampleSet.getSize() == 0)) return -1;	int label = -1;	boolean read = false;	ExampleReader i = exampleSet.getExampleReader();	while (i.hasNext()) {	    int currentLabel = (int)i.next().getLabel();	    if (!read) {		label = currentLabel;		read = true;	    }	    if (label != currentLabel) return -1;	}	return label;    }        /** Returns the most probable classification or -1 if exampleSet is empty.     */     int getMostProbableClassification(ExampleSet exampleSet) {	if (exampleSet.getSize() == 0) return -1;	Map labelCounters = new HashMap();		ExampleReader i = exampleSet.getExampleReader();	while (i.hasNext()) {	    int currentLabel = (int)i.next().getLabel();	    LabelCounter labelCounter = (LabelCounter)labelCounters.get(new Integer(currentLabel));	    if (labelCounter == null) {		labelCounters.put(new Integer(currentLabel), new LabelCounter(currentLabel));	    } else labelCounter.inc();	}	return ((LabelCounter)Collections.max(labelCounters.values())).getValue();    }    public List getParameterTypes() {	List types = super.getParameterTypes();	types.add(new ParameterTypeBoolean("gain_ratio", "If set to true, the gain ratio criterion is used.", true));	return types;    }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -