📄 decisiontree.java
字号:
package id3;
import java.io.*;
import shared.*;
import shared.Error;
/** DecisonTrees are RootedCatGraphs where each node other than the root has
* exactly one parent. The root has no parents.
* @author James Louis 5/29/2001 Ported to Java.
* @author Eric Eros 4/18/96 Added delete_subtree
* @author Ronny Kohavi 4/16/96 Added treeviz display
* @author Richard Long 9/02/93 Initial revision (.c,.h)
*/
public class DecisionTree extends RootedCatGraph {
/** Indicates if this DecisionTree is sparsely populated.
*/
boolean isGraphSparse = false;
/** Constructor.
*/
public DecisionTree() {
super(false);
}
/** Constructor.
* @param grph The CGraph object to be used to maintain the DecisionTree.
*/
public DecisionTree(CGraph grph) {
super(grph, false);
}
/** Distribute instances to a subtree. This function is used whenever we
* replace a node with its child. The distributions of the child include
* only the instances there while if we replace, we must update all the
* counts. This function is also the backfitting function for decision trees.
* @param subtree The subtree over which Instances will be distributed.
* @param il InstanceList to be distributed over the DecisionTree.
* @param pruningFactor The amount of pruning to be done on this tree.
* @param pessimisticErrors Number of errors estimated for the new distribution.
* @param ldType Leaf Distribution Type.
* @param leafDistParameter The distribution of instances that reach this leaf node.
* @param parentWeightDist The weight distribution of the parent node.
*/
public void distribute_instances(Node subtree,
InstanceList il,
double pruningFactor,
DoubleRef pessimisticErrors,
int ldType, //TDDTInducer.LeafDistType
double leafDistParameter,
double[] parentWeightDist) {
distribute_instances(subtree,il,pruningFactor,pessimisticErrors,ldType,
leafDistParameter,parentWeightDist,false);
}
/** Distribute instances to a subtree. This function is used whenever we
* replace a node with its child. The distributions of the child include
* only the instances there while if we replace, we must update all the
* counts. This function is also the backfitting function for decision trees.
* @param subtree The subtree over which Instances will be distributed.
* @param il InstanceList to be distributed over the DecisionTree.
* @param pruningFactor The amount of pruning to be done on this tree.
* @param pessimisticErrors Number of errors estimated for the new distribution.
* @param ldType Leaf Distribution Type.
* @param leafDistParameter The distribution of instances that reach this leaf node.
* @param parentWeightDist The weight distribution of the parent node.
* @param saveOriginalDistr TRUE if the original instance distribution should be preserved, FALSE otherwise.
*/
public void distribute_instances(Node subtree,
InstanceList il,
double pruningFactor,
DoubleRef pessimisticErrors,
int ldType, //TDDTInducer.LeafDistType
double leafDistParameter,
double[] parentWeightDist,
boolean saveOriginalDistr) {
// DBGSLOW(check_node_in_graph(subtree, TRUE));
NodeCategorizer splitCat = ((NodeInfo)cGraph.inf(subtree)).get_categorizer();
logOptions.LOG(3, "Distributing instances: " + il + '\n' + "categorizer is "
+splitCat.description()+'\n');
splitCat.distribute_instances(il, pruningFactor, pessimisticErrors, ldType,
leafDistParameter, parentWeightDist,
saveOriginalDistr);
}
/** Removes a subtree recursively. This is used to <BR>
* a) Remove a node and all nodes below it if the second parameter is NULL,<BR>
* b) Remove just the nodes under a particular node, if both parameters are
* the same (the named node remains in the graph). <BR>
* c) Replace the subtree rooted at the first parameter with the subtree
* rooted at the second parameter, if the two parameters are not equal, and
* are non-null. <P>
* We allow replacing node X with a child of node X (or a node related
* through comman ancestors) or, in general, replacing a subtree with
* another subtree. In both cases, we disconnect the parents of the new node
* node from the new node. <P>
* We do not allow replacing node X with an ancester (parent, etc.) of
* node X, as this would make no sense. <P>
* The method is as follows: <P>
* 1) If 'node' is to be deleted, delete the edges connecting it to its
* parents. <BR>
* 2) If 'node' is to be replaced by 'newNode', delete the edges connecting
* 'newNode' to its parents. <BR>
* 3) Delete the edges from 'node' to all its children. <BR>
* 4) If 'node' is to be deleted, since it's now completely disconnected,
* delete it. <BR>
* 5) If 'node' is to be replaced by 'newNode', <BR>
* 5a) Connect all of 'newNode's children to 'node' (adding edges), <BR>
* 5b) Delete all the edges from 'newNode' to its children. <BR>
* 5c) Since 'newNode' is now completely disconnected, delete it. <BR>
* 6) For all the children discovered in step 3, recurse to delete them.
* @param node Node to be replaced.
* @param newNode New Node to be used for replacement.
*/
public void delete_subtree(Node node, Node newNode) {
if (node == null)
Error.fatalErr("DecisionTree::delete_subtree: node is NULL");
// Delete a subtree, given the starting node. The second parameter
// NULL means the top-most node is deleted--it is set NULL for all
// recursive calls from here, so that all children are deleted. If the
// second parameter is non-NULL, it needs to point to a node in the
// same cGraph as the first.
boolean deleteNode = (newNode == null);
boolean replaceWithSelf = (node == newNode);
boolean replaceWithOther = !deleteNode && !replaceWithSelf;
// We can extend this routine to support the new node being the root,
// but it seems very strange to do so, since we usually delete the
// newNode.
// One would need to set the new node to be the root. For safely
// it's better to abort in this case that can't happen right now.
// Note that replacing the root with a child is OK and the root
// will be the new node because it's the categorizer that's replace,
// and the root reference remains valid
if (!replaceWithSelf && newNode == get_root())
Error.fatalErr("DecisionTree::delete_subtree: new node cannot be root");
if (deleteNode)
logOptions.LOG(5, " 1. Deleting the node " + node + '\n');
else
logOptions.LOG(5, " 2. Removing the subtree from node " + node + '\n');
if (!deleteNode && !replaceWithSelf)
logOptions.LOG(5, " 3. Replacing it with the node " + newNode + '\n');
// Ensure specified node(s) in graph (check_node_in_graph(node, TRUE)
// aborts when node isn't in graph.)
// DBGSLOW(check_node_in_graph(node, TRUE));
if (replaceWithOther) {
check_node_in_graph(newNode, true);
// 'node' is to be replaced with 'newNode'. This is only legal when
// 'newNode' is NOT an ancester of 'node'.
// The following function is only called once, as newNode is NULL in
// all recursive calls.
// DBG(if (check_node_reachable(newNode, node))
// err << "DecisionTree::delete_subtree: attempt to replace a "
// "node with its own ancestor" << fatal_error);
}
Edge iterEdge;
Edge oldEdge;
if (deleteNode) {
// If 'node' is to be deleted, remove the edges from its parent(s).
iterEdge = node.first_in_edge();
while (iterEdge != null) {
oldEdge = iterEdge;
iterEdge = oldEdge.in_succ(oldEdge);
// oldEdge.entry() = null;
cGraph.del_edge(oldEdge);
}
MLJ.ASSERT(node.indeg() == 0,"DecisionTree.delete_subtree: node.indeg() != 0");
}
// 'node' is to be replaced with 'newNode'. That means that the
// current incoming edges to 'newNode' are extraneous, and need
// to be removed.
if (replaceWithOther) {
iterEdge = newNode.first_in_edge();
while (iterEdge != null) {
oldEdge = iterEdge;
iterEdge = oldEdge.in_succ(oldEdge);
Node parentNode = oldEdge.source();
logOptions.LOG(5, " 4. Removing parent " + parentNode + " from " + newNode
+ " (deleting edge " + oldEdge + ")" + '\n');
// cGraph[oldEdge] = null;
cGraph.del_edge(oldEdge);
}
MLJ.ASSERT(newNode.indeg() == 0,"DecisionTree.delete_subtree: newNode.indeg() != 0");
}
// Disconnect 'node' (the old node) from all outgoing edges. Save references
// to the targets of these edges so we can (effectively) follow them,
int numChildren = node.outdeg();
int currentChild = 0;
MLJ.ASSERT(numChildren >= 0,"DecisionTree.delete_subtree: numChildren < 0");
// Declared before the loop because we use it after the if
// for replaceWithSelf.
Node[] children = new Node[numChildren];
if (numChildren > 0) {
// We're not a leaf, we've got children to delete.
// a) Copy the (references to the) children nodes.
// b) Delete the edges.
iterEdge = node.first_adj_edge();
while (iterEdge != null) {
logOptions.LOG(5, " 7. Disconnecting edge " + iterEdge
+ " from node " + node + " to its child " + '\n'
+ iterEdge.target() + '\n');
oldEdge = iterEdge;
iterEdge = oldEdge.adj_succ();
// Save the other node attached to this edge.
Node childNode = oldEdge.target();
children[currentChild++] = childNode;
// Delete the connection.
// cGraph[oldEdge] = null;
cGraph.del_edge(oldEdge);
}
}
MLJ.ASSERT(currentChild == numChildren,"DecisionTree.delete_subtree: currentChild != numChildren");
MLJ.ASSERT(node.outdeg() == 0,"DecisionTree.delete_subtree: node.outdeg() != 0");
// Delete the node.
if (deleteNode) {
logOptions.LOG(5, " 8. Deleting the node " + node + '\n');
MLJ.ASSERT(node.indeg() == 0,"DecisionTree.delete_subtree: node.indeg() != 0");
MLJ.ASSERT(node.outdeg() == 0,"DecisionTree.delete_subtree: node.outdeg() != 0");
// cGraph[node] = null;
cGraph.del_node(node);
}
else if (replaceWithOther) {
// Delete 'newNode' after moving all its children over to 'node',
// and assigning its categorizer to 'node'.
cGraph.assign_categorizer(node, newNode);
iterEdge = newNode.first_adj_edge();
while (iterEdge != null) {
oldEdge = iterEdge;
iterEdge = oldEdge.adj_succ();
Node childNode = oldEdge.target();
AugCategory aug = new
AugCategory(cGraph.edge_info(oldEdge).num(),
cGraph.edge_info(oldEdge).description());
cGraph.new_edge(node, childNode, aug);
// cGraph[oldEdge] = null;
cGraph.del_edge(oldEdge);
}
MLJ.ASSERT(newNode.indeg() == 0,"DecisionTree.delete_subtree: newNode.indeg() != 0");
MLJ.ASSERT(newNode.outdeg() == 0,"DecisionTree.delete_subtree: newNode.outdeg() != 0");
// cGraph.entry(newNode) = null;
cGraph.del_node(newNode);
// Re-assign the levels of each node in the subtree we just moved.
if (get_graph().node_info(node).level() != CGraph.DEFAULT_LEVEL)
assign_subtree_levels(node, get_graph().node_info(node).level());
}
// Recurse--all children must delete themselves.
for (currentChild = 0; currentChild < numChildren; currentChild++) {
logOptions.LOG(5, " 9. Now to delete child " + currentChild + " of "
+ numChildren + " children" + '\n');
delete_subtree(children[currentChild], null);
}
}
/** Creates NodeInfo objects for every Node in the branch starting at the
* given Node and assigns each NodeInfo its appropriate level in the tree.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -