📄 dt.java
字号:
// DT.java
import java.util.Vector;
// A DT is one node in a decision tree.
public class DT
{
private static String indent = " "; // for printOut()
// We keep track of the most common class in the entire training
// set, and if we get stuck we can return it as our classification.
private static String overallMostCommonClass = "UNASSIGNED";
private int attribute; // index to DecisionTree.attributeName[]
// If classification is not the empty string, then this DT is
// a leaf node and classification holds the class of the examples
// that reach this point. If classification is the empty string
// then this DT object is an interior node in the tree.
private String classification = "";
private int numberWithThisClassification;
private int numberWithOtherClassifications;
// The next two arrays have as many entries as this node has children.
// For leaf nodes, value and subtree will be null.
private String[] value; // a value of the attribute
private DT[] subtree;
// constructor (simple version with no depth parameter)
public DT(TrainingData data)
{
this(data, 0); // invoke the other constructor
System.out.println("Built decision tree based on " +
data.numRecords() + " records.");
}
// constructor with current depth
public DT(TrainingData data, int depth)
{
// At the root we can look at all the training data, find the
// most frequent class, and save that class to use when we don't
// have a better choice.
if (depth == 0) // the root
{
TrainingData.MostFrequentClassInfo freqInfo =
data.getMostFrequentClass();
overallMostCommonClass = "*" + freqInfo.name;
}
if (DecisionTree.maxDepth != -1 &&
depth >= DecisionTree.maxDepth)
{
TrainingData.MostFrequentClassInfo freqInfo =
data.getMostFrequentClass();
classification = freqInfo.name;
numberWithThisClassification = freqInfo.recordCountInClass;
numberWithOtherClassifications = freqInfo.recordCountNotInClass;
return;
}
// This logic is meant to follow fairly closely the function
// in Figure 18.5 (page 658) of the textbook.
AttributeChooser ac = new AttributeChooser(data);
if (ac.dataIsEmpty())
System.out.println("Data is empty"); // return default -- not handled here
else if (ac.allExamplesHaveTheSameClassification())
{
classification = ac.thatClassification();
numberWithThisClassification = data.numRecords();
numberWithOtherClassifications = 0;
}
else
{
attribute = ac.chooseAttribute();
if (attribute == -1) // no attribute to split on, but
{ // not all classes the same.
TrainingData.MostFrequentClassInfo freqInfo =
data.getMostFrequentClass();
classification = freqInfo.name;
numberWithThisClassification = freqInfo.recordCountInClass;
numberWithOtherClassifications = freqInfo.recordCountNotInClass;
}
else
{
value = ac.getValues();
//for (int yy=0; yy<depth; yy++) System.out.print(".."); // indent
//System.out.print("data has " + data.numRecords() +
// " records, now splitting on attribute " +
// DecisionTree.attributeName.elementAt(attribute)
// + " with values ");
//for (int zz=0; zz<value.length; zz++)
// System.out.print(value[zz] + " ");
//System.out.println("");
subtree = new DT[value.length];
for (int i=0; i<value.length; i++)
{
subtree[i] = new DT(data.selectRecords(attribute, value[i]), depth+1);
}
}
}
}
private boolean isLeaf()
{
return classification.length() > 0;
}
// This method uses the decision tree, and its subtrees,
// to classify the single example passed in.
public String classify(String[] example)
{
if (isLeaf())
return classification;
// Get the value in the array passed in that this node
// will branch on.
String attrValue = example[attribute];
// Find which branch to take.
for (int i=0; i<subtree.length; i++)
{
if (value[i].equals(attrValue))
return subtree[i].classify(example);
}
// If we got here, we've encountered a value for the
// attribute that we didn't see in the training data.
// We'll let the overall majority rule.
return overallMostCommonClass; // it's got "*" at the front
}
// Enable printOut to be called without an argument
public void printOut()
{
printOut(0);
}
// printOut is called recursively to print a decision tree.
// It might look like this:
/*
a1=yes
a2=big
a5=loud class=P (4)
a5=quiet class=N (5)
a2=small class=P (2)
a2=tiny class=N (12)
a1=no
a4=hot class=N (8)
a4=cold
a7=red class=N (32)
a7=blue class=P (1)
a7=green class=P (5)
*/
public void printOut(int level)
{
String indentation = "";
for (int q=0; q<level; q++)
indentation += indent;
if (value == null)
{
System.out.println(indent + "class: " + classification);
return;
}
// print out each node, and its children
for (int i=0; i<value.length; i++)
{
System.out.print(indentation +
DecisionTree.attributeName.elementAt(attribute) + "=" +
value[i]);
if (subtree[i].isLeaf())
{
System.out.print(indent + "class: " +
subtree[i].classification);
System.out.print(" (" + subtree[i].numberWithThisClassification);
if (subtree[i].numberWithOtherClassifications == 0)
System.out.println(")");
else
System.out.println(", " +
subtree[i].numberWithOtherClassifications + ")");
}
else
{
System.out.println("");
subtree[i].printOut(level+1);
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -