📄 id3.m
字号:
%
% tree.m
%
% created by Sunghwan Yoo
%
%
% =========== Description of Algorithm ================
%
% ID3 (Examples, Target_Attribute, Attributes)
%
% * Create a root node for the tree
% * If all examples are positive, Return the single-node tree Root, with label = +.
% * If all examples are negative, Return the single-node tree Root, with label = -.
% * If number of predicting attributes is empty, then Return the single node tree Root,
% with label = most common value of the target attribute in the examples.
% * Otherwise Begin
% o A = The Attribute that best classifies examples.
% o Decision Tree attribute for Root = A.
% o For each possible value, vi, of A,
% + Add a new tree branch below Root, corresponding to the test A = vi.
% + Let Examples(vi), be the subset of examples that have the value vi for A
% + If Examples(vi) is empty
% # Then below this new branch add a leaf node with label = most common target value in the examples
% + Else below this new branch add the subtree ID3 (Examples(vi), Target_Attribute, Attributes – {A})
% * End
% * Return Root
%
%
% Data import and series of experiments will be taken care by the main.m
function [tree, training_acc, test_acc] = id3(training_set, test_set, depth, cutoff)
% Create a root node for the tree
tree = create_tree(training_set, 0, cutoff);
training_acc = 0;
test_acc = 0;
training_acc = test_id3( tree, training_set );
test_acc = test_id3( tree, test_set );
function [test_acc] = test_id3( tree, dataset )
% parse given trainingset and get result.
[nrows, ncols] = size( dataset );
success = 0;
failure = 0;
for i=1:nrows
if( test_id3_case( tree, dataset(i,:) ) == dataset(i,ncols) )
success = success + 1;
else
failure = failure + 1;
end
end
test_acc = (success) / (success + failure);
function [class_res] = test_id3_case( node, dataset )
if( node.split_attribute ~= 0 )
if( ~isempty(node.subnode0) && dataset(node.split_attribute) == 0 )
class_res = test_id3_case(node.subnode0, dataset);
return;
elseif( ~isempty(node.subnode1) && dataset(node.split_attribute) == 1 )
class_res = test_id3_case(node.subnode1, dataset);
return;
elseif( ~isempty(node.subnode2) && dataset(node.split_attribute) == 2)
class_res = test_id3_case(node.subnode2, dataset);
return;
end
class_res = -1;
else
class_res = node.classvar;
end
function [] = disp_tree( node, tab_level )
tab = '';
for i=1:tab_level
tab = sprintf('%s\t', tab);
end
if( node.split_attribute ~= 0 )
txt = sprintf('%sSplit_attr : %d / Infogain : %f\n', tab, node.split_attribute, node.info_gain);
disp(txt);
if( ~isempty(node.subnode0) )
txt = sprintf('%s- case 0:\n', tab);
disp(txt);
disp_tree(node.subnode0, tab_level+1);
end
if( ~isempty(node.subnode1) )
txt = sprintf('%s- case 1:\n', tab);
disp(txt);
disp_tree(node.subnode1, tab_level+1);
end
if( ~isempty(node.subnode2) )
txt = sprintf('%s- case 2:\n', tab);
disp(txt);
disp_tree(node.subnode2, tab_level+1);
end
else
txt = sprintf('%sClassify : %d\n', tab, node.classvar);
disp(txt);
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -