📄 id3.m

📁 Matlab implementation of ID3 and NaiveBayes classifier. It also includes example dataset as well.
💻 M
字号:
%
%  tree.m
%
%  created by Sunghwan Yoo
%
%
% =========== Description of Algorithm ================
%
% ID3 (Examples, Target_Attribute, Attributes)
%
%    * Create a root node for the tree
%    * If all examples are positive, Return the single-node tree Root, with label = +.
%    * If all examples are negative, Return the single-node tree Root, with label = -.
%    * If number of predicting attributes is empty, then Return the single node tree Root, 
%      with label = most common value of the target attribute in the examples.
%    * Otherwise Begin
%          o A = The Attribute that best classifies examples.
%          o Decision Tree attribute for Root = A.
%          o For each possible value, vi, of A,
%                + Add a new tree branch below Root, corresponding to the test A = vi.
%                + Let Examples(vi), be the subset of examples that have the value vi for A
%                + If Examples(vi) is empty
%                      # Then below this new branch add a leaf node with label = most common target value in the examples
%                + Else below this new branch add the subtree ID3 (Examples(vi), Target_Attribute, Attributes – {A})
%    * End
%    * Return Root
%
%

%  Data import and series of experiments will be taken care by the main.m

function [tree, training_acc, test_acc] = id3(training_set, test_set, depth, cutoff)

	%  Create a root node for the tree

	tree = create_tree(training_set, 0, cutoff);

	training_acc = 0;
	test_acc = 0;

	training_acc = test_id3( tree, training_set );
	test_acc = test_id3( tree, test_set );


function [test_acc] = test_id3( tree, dataset )
	% parse given trainingset and get result.

	[nrows, ncols] = size( dataset );
	success = 0;
	failure = 0;
	for i=1:nrows
		if( test_id3_case( tree, dataset(i,:) ) == dataset(i,ncols) )
			success = success + 1;
		else
			failure = failure + 1;
		end
	end

	test_acc = (success) / (success + failure);


function [class_res] = test_id3_case( node, dataset )
	if( node.split_attribute ~= 0 )
		if( ~isempty(node.subnode0) && dataset(node.split_attribute) == 0 )
			class_res = test_id3_case(node.subnode0, dataset);
			return;
		elseif( ~isempty(node.subnode1) && dataset(node.split_attribute) == 1 )
			class_res = test_id3_case(node.subnode1, dataset);
			return;
		elseif( ~isempty(node.subnode2) && dataset(node.split_attribute) == 2) 
			class_res = test_id3_case(node.subnode2, dataset);
			return;
		end
		class_res = -1;
	else
		class_res = node.classvar;
	end



function [] = disp_tree( node, tab_level )
	tab = '';
	for i=1:tab_level
		tab = sprintf('%s\t', tab);
	end

	if( node.split_attribute ~= 0 )
		txt = sprintf('%sSplit_attr : %d / Infogain : %f\n', tab, node.split_attribute, node.info_gain);
		disp(txt);
		if( ~isempty(node.subnode0) )
			txt = sprintf('%s- case 0:\n', tab);
			disp(txt);
			disp_tree(node.subnode0, tab_level+1);
		end
		if( ~isempty(node.subnode1) )
			txt = sprintf('%s- case 1:\n', tab);
			disp(txt);
			disp_tree(node.subnode1, tab_level+1);
		end
		if( ~isempty(node.subnode2) )
			txt = sprintf('%s- case 2:\n', tab);
			disp(txt);
			disp_tree(node.subnode2, tab_level+1);
		end
	else
		txt = sprintf('%sClassify : %d\n', tab, node.classvar);
		disp(txt);
	end
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -