📄 id3learner.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
字号:
/*
 *  YALE - Yet Another Learning Environment
 *  Copyright (C) 2001-2004
 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, 
 *          Katharina Morik, Oliver Ritthoff
 *      Artificial Intelligence Unit
 *      Computer Science Department
 *      University of Dortmund
 *      44221 Dortmund,  Germany
 *  email: yale-team@lists.sourceforge.net
 *  web:   http://yale.cs.uni-dortmund.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as 
 *  published by the Free Software Foundation; either version 2 of the
 *  License, or (at your option) any later version. 
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *  USA.
 */
package edu.udo.cs.yale.operator.learner.decisiontree;

import edu.udo.cs.yale.operator.learner.AbstractLearner;
import edu.udo.cs.yale.operator.learner.Model;
import edu.udo.cs.yale.operator.parameter.*;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.SplittedExampleSet;
import edu.udo.cs.yale.example.Example;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.Tools;
import edu.udo.cs.yale.tools.ParameterService;
import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.Ontology;

import java.util.*;

/** ID3Learner is an internal (i.e. pure Java) classification machine learning algorithm based on the
 *  ID3 algorithm by Quinlan. In each step the most promising attribute is determined by calculating
 *  the information gain. Then the example set is partitioned according to the values of this attribute
 *  and the algorithm is applied recursively on the partitions. The trees resulting from the recursive
 *  calls are attached as children together with their respective attribute values. Recursion stops
 *  when all examples of a subset have the same label or the subset becomes empty.
 *
 *  @yale.xmlclass ID3Learner
 *  @author Ingo
 *  @version $Id: ID3Learner.java,v 2.3 2004/08/27 11:57:38 ingomierswa Exp $
 */
public class ID3Learner extends AbstractLearner {


    /** Helper class for managing labels.
     */
    private class LabelCounter implements Comparable {
	int label;
	int count;
	LabelCounter(int label) { this.label = label; count = 0;}
	void inc() { count++; }
	int getCount() { return count; } 
	int getValue() { return label; }
	public int compareTo(Object o) { 
	    return this.count - ((LabelCounter)o).count; 
	}
    }

    static final String[] PARAMETER = { "gain_ratio" };

    //private boolean ratioGain;

    /** Trains a model for a labelled example set.
     */
    public Model learn(ExampleSet exampleSet) throws OperatorException {
	ExampleSet clonedExampleSet = (ExampleSet)exampleSet.clone();

	// init
	boolean ratioGain = getParameterAsBoolean("gain_ratio");
	int defaultGoal = getMostProbableClassification(exampleSet);
	// Erzeugen des Entscheidungsbaumes.
	Tree decisionTree = makeDecisionTree(clonedExampleSet, ratioGain, defaultGoal);

	return decisionTree; 
    }

    /** Creates a new decision tree by selecting the most informative attribute and splitting the example set according
     *  to the attribute values. This process is repeated recursively until there are no examples left or
     *  all examples have the same classification. In case of inconsistency the most probable attribute is selected.
     */
    Tree makeDecisionTree(ExampleSet exampleSet, boolean ratioGain, int defaultGoal) throws OperatorException {
	int classification = getClassification(exampleSet);
	
	if (classification != -1) {  // alle Beispiele haben die gleiche Klasse

	    return new Tree(exampleSet.getLabel(), classification);
	    
	} else {  // nicht gleiche Klasse
	    
	    // find attribute with maximum information gain
	    Attribute bestAttribute = Tools.getMostInformativeAttribute(exampleSet, ratioGain);  
	    
	    // wenn es noch ein solches Attribut gibt, so teile weiter auf...
	    if (bestAttribute != null) {
		
		exampleSet.removeAttribute(bestAttribute);
		return createNewDecisionTree((ExampleSet)exampleSet.clone(), bestAttribute, ratioGain, defaultGoal);
		
		// ... sonst gib die wahrscheinlichste Klassifizierung (die am haeufigsten vertretene) zurueck.
	    } else {
		classification = getMostProbableClassification(exampleSet);
		if (classification == -1) {
		    classification = defaultGoal;
		}
		return new Tree(exampleSet.getLabel(), classification);
	    }   
	}
    }
    

    /** Creates a decision tree using the given attribute as the first decision.
     */
    Tree createNewDecisionTree(ExampleSet exampleSet, Attribute bestAttribute, boolean ratioGain, int defaultGoal) throws OperatorException {
	if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(bestAttribute.getValueType(), Ontology.NOMINAL)) {
	    throw new UserError(this, 103, new Object[] {"ID3", bestAttribute.getName() });
	}
	
	if (exampleSet.getSize() == 0) return new Tree(exampleSet.getLabel(), defaultGoal);

	SplittedExampleSet splittedES  = SplittedExampleSet.splitByAttribute(exampleSet, bestAttribute);
	
	// make new decisionTree
	Tree decisionTree = new Tree(exampleSet.getLabel(), bestAttribute);
	
	// call method for every successor
	for (int i = 0; i < bestAttribute.getValues().size(); i++) {
	    splittedES.selectSingleSubset(i);
	    Premise premise = new SimplePremise(bestAttribute, "=", i+Attribute.FIRST_CLASS_INDEX);
	    
	    Tree child = makeDecisionTree(splittedES, ratioGain, defaultGoal);
	    decisionTree.addChild(premise, child);
	
	}
	return decisionTree;
    }



    // ================================================================================


    /** Returns the classification if it is equal for all examples or -1.
     */
    int getClassification(ExampleSet exampleSet) {
	if ((exampleSet == null) || (exampleSet.getSize() == 0)) return -1;

	int label = -1;
	boolean read = false;
	ExampleReader i = exampleSet.getExampleReader();
	while (i.hasNext()) {
	    int currentLabel = (int)i.next().getLabel();
	    if (!read) {
		label = currentLabel;
		read = true;
	    }
	    if (label != currentLabel) return -1;
	}
	return label;
    }
    
    /** Returns the most probable classification or -1 if exampleSet is empty.
     */ 
    int getMostProbableClassification(ExampleSet exampleSet) {
	if (exampleSet.getSize() == 0) return -1;

	Map labelCounters = new HashMap();
	
	ExampleReader i = exampleSet.getExampleReader();
	while (i.hasNext()) {
	    int currentLabel = (int)i.next().getLabel();
	    LabelCounter labelCounter = (LabelCounter)labelCounters.get(new Integer(currentLabel));
	    if (labelCounter == null) {
		labelCounters.put(new Integer(currentLabel), new LabelCounter(currentLabel));
	    } else labelCounter.inc();
	}

	return ((LabelCounter)Collections.max(labelCounters.values())).getValue();
    }



    public List getParameterTypes() {
	List types = super.getParameterTypes();
	types.add(new ParameterTypeBoolean("gain_ratio", "If set to true, the gain ratio criterion is used.", true));
	return types;
    }
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -