📄 id3.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.classifiers;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Enumeration;
import java.util.Vector;
import org.agentacademy.modules.dataminer.classifiers.evaluation.DistributionClassifier;
import org.agentacademy.modules.dataminer.core.FastVector;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Utils;
import org.jdom.DocType;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.output.XMLOutputter;
import org.apache.log4j.Logger;
/**
* Class implementing an Id3 decision tree classifier. For more
* information, see<p>
*
*/
public class Id3 extends DistributionClassifier {
public static Logger log = Logger.getLogger(Id3.class);
/** The String Pmml String */
// public static String pmmlDocumentString = null;
/** The String Pmml Document */
public static Document pmmlDocument = null;
/** The node's successors. */
private Id3[] m_Successors;
/** Attribute used for splitting. */
private org.agentacademy.modules.dataminer.core.Attribute m_Attribute;
/** Class value if node is leaf. */
private double m_ClassValue;
/** Class distribution if node is leaf. */
private double[] m_Distribution;
/** Class attribute of dataset. */
private org.agentacademy.modules.dataminer.core.Attribute m_ClassAttribute;
/** A temp Element node for traversing. */
protected Element currentNodeElement;
/** The train Instances*/
protected Instances m_train;
// protected Element nextElement;
// protected Element simplePredicateElement;
// protected Element leafElement;
// protected Element tempElement;
protected static Vector ruleVector = new Vector();
protected int vectorIndex = 0;
/**
* Builds Id3 decision tree classifier.
*
* @param data the training data
* @exception Exception if classifier can't be built successfully
*/
public void buildClassifier(Instances data) throws Exception {
if (!data.classAttribute().isNominal()) {
throw new Exception("Id3: nominal class, please.");
}
Enumeration enumAtt = data.enumerateAttributes();
while (enumAtt.hasMoreElements()) {
org.agentacademy.modules.dataminer.core.Attribute attr = (org.agentacademy.modules.dataminer.core.Attribute) enumAtt.nextElement();
if (!attr.isNominal()) {
throw new Exception("Id3: only nominal attributes, please.");
}
Enumeration enum = data.enumerateInstances();
while (enum.hasMoreElements()) {
if (((Instance) enum.nextElement()).isMissing(attr)) {
throw new Exception("Id3: no missing values, please.");
}
}
}
data = new Instances(data);
data.deleteWithMissingClass();
m_train = data;
makeTree(data);
}
/**
* Method building Id3 tree.
*
* @param data the training data
* @exception Exception if decision tree can't be built successfully
*/
private void makeTree(Instances data) throws Exception {
// Check if no instances have reached this node.
if (data.numInstances() == 0) {
m_Attribute = null;
m_ClassValue = Instance.missingValue();
m_Distribution = new double[data.numClasses()];
return;
}
// Compute attribute with maximum information gain.
double[] infoGains = new double[data.numAttributes()];
Enumeration attEnum = data.enumerateAttributes();
while (attEnum.hasMoreElements()) {
org.agentacademy.modules.dataminer.core.Attribute att = (org.agentacademy.modules.dataminer.core.Attribute) attEnum.nextElement();
infoGains[att.index()] = computeInfoGain(data, att);
}
m_Attribute = data.attribute(Utils.maxIndex(infoGains));
// Make leaf if information gain is zero.
// Otherwise create successors.
if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
m_Attribute = null;
m_Distribution = new double[data.numClasses()];
Enumeration instEnum = data.enumerateInstances();
while (instEnum.hasMoreElements()) {
Instance inst = (Instance) instEnum.nextElement();
m_Distribution[(int) inst.classValue()]++;
}
Utils.normalize(m_Distribution);
m_ClassValue = Utils.maxIndex(m_Distribution);
m_ClassAttribute = data.classAttribute();
} else {
Instances[] splitData = splitData(data, m_Attribute);
m_Successors = new Id3[m_Attribute.numValues()];
for (int j = 0; j < m_Attribute.numValues(); j++) {
m_Successors[j] = new Id3();
m_Successors[j].buildClassifier(splitData[j]);
}
}
}
/**
* Classifies a given test instance using the decision tree.
*
* @param instance the instance to be classified
* @return the classification
*/
public double classifyInstance(Instance instance) {
if (m_Attribute == null) {
return m_ClassValue;
} else {
return m_Successors[(int) instance.value(m_Attribute)].
classifyInstance(instance);
}
}
/**
* Computes class distribution for instance using decision tree.
*
* @param instance the instance for which distribution is to be computed
* @return the class distribution for the given instance
*/
public double[] distributionForInstance(Instance instance) {
if (m_Attribute == null) {
return m_Distribution;
} else {
return m_Successors[(int) instance.value(m_Attribute)].
distributionForInstance(instance);
}
}
/**
* Prints the decision tree using the private toString method from below.
*
* @return a textual description of the classifier
*/
public String toString() {
if ((m_Distribution == null) && (m_Successors == null)) {
return "Id3: No model built yet.";
}
return "Id3\n\n" + toString(0);
}
/**
* Computes information gain for an attribute.
*
* @param data the data for which info gain is to be computed
* @param att the attribute
* @return the information gain for the given attribute and data
*/
private double computeInfoGain(Instances data, org.agentacademy.modules.dataminer.core.Attribute att)
throws Exception {
double infoGain = computeEntropy(data);
Instances[] splitData = splitData(data, att);
for (int j = 0; j < att.numValues(); j++) {
if (splitData[j].numInstances() > 0) {
infoGain -= ((double) splitData[j].numInstances() /
(double) data.numInstances()) *
computeEntropy(splitData[j]);
}
}
return infoGain;
}
/**
* Computes the entropy of a dataset.
*
* @param data the data for which entropy is to be computed
* @return the entropy of the data's class distribution
*/
private double computeEntropy(Instances data) throws Exception {
double [] classCounts = new double[data.numClasses()];
Enumeration instEnum = data.enumerateInstances();
while (instEnum.hasMoreElements()) {
Instance inst = (Instance) instEnum.nextElement();
classCounts[(int) inst.classValue()]++;
}
double entropy = 0;
for (int j = 0; j < data.numClasses(); j++) {
if (classCounts[j] > 0) {
entropy -= classCounts[j] * Utils.log2(classCounts[j]);
}
}
entropy /= (double) data.numInstances();
return entropy + Utils.log2(data.numInstances());
}
/**
* Splits a dataset according to the values of a nominal attribute.
*
* @param data the data which is to be split
* @param att the attribute to be used for splitting
* @return the sets of instances produced by the split
*/
private Instances[] splitData(Instances data, org.agentacademy.modules.dataminer.core.Attribute att) {
Instances[] splitData = new Instances[att.numValues()];
for (int j = 0; j < att.numValues(); j++) {
splitData[j] = new Instances(data, data.numInstances());
}
Enumeration instEnum = data.enumerateInstances();
while (instEnum.hasMoreElements()) {
Instance inst = (Instance) instEnum.nextElement();
splitData[(int) inst.value(att)].add(inst);
}
return splitData;
}
/**
* Outputs a tree at a certain level.
*
* @param level the level at which the tree is to be printed
*/
private String toString(int level) {
try {
StringBuffer text = new StringBuffer();
if (m_Attribute == null) {
if (Instance.isMissingValue(m_ClassValue)) {
text.append(": null");
}
else {
text.append(": "+m_ClassAttribute.value((int) m_ClassValue));
}
}
else
pmmlDocument(0);
dumpTree (0, text);
return text.toString();
}
catch (Exception e) {
e.printStackTrace();
return ("Cannot print classification tree MALAKA!!!");
}
}
/**
* Help method for Printing tree Structure
*/
private void dumpTree (int level, StringBuffer text) throws Exception {
if (m_Attribute == null) {
if (Instance.isMissingValue(m_ClassValue)) {
text.append(": null");
}
else {
text.append(": "+m_ClassAttribute.value((int) m_ClassValue));
}
}
else {
for (int j = 0; j < m_Attribute.numValues(); j++) {
text.append("\n");
for (int i = 0; i < level; i++) {
text.append("| ");
}
text.append(m_Attribute.name() + " = " + m_Attribute.value(j));
m_Successors[j].dumpTree(level + 1, text);
}
}
}
/**
* Main method.
*
* @param args the options for the classifier
*/
/*
public static void main(String[] args) {
try {
System.out.println(Evaluation.evaluateModel(new Id3(), args));
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
*/
/**
* Creates the PMML Document
* @param depth that specified the depth of the decision tree we are currently traversing
* @exception e if the PMML document is not correctly constructed
*/
private void pmmlDocument (int depth) {
try{
// OutputStream fout = new FileOutputStream ("edw_to_onoma.xml");
// OutputStream bout = new BufferedOutputStream (fout);
Element versionElement = pmmlIntro();
Element headerElement = header ();
Element dataDictionaryElement = dataDictionary () ;
Element treeModelElement = treeModel();
Element miningSchemaElement = miningSchema ();
versionElement.addContent(headerElement);
versionElement.addContent(dataDictionaryElement);
versionElement.addContent(treeModelElement);
treeModelElement.addContent(miningSchemaElement);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -