info_gain.m

来自「Matlab implementation of ID3 and NaiveBa」· M 代码 · 共 63 行

M
63
字号
%
% info_gain.m
%
%  Algorithm :
%
%  IG(Y|X) = H(Y) - H(Y | X)
%  H(X) = p1 log2 p1 - p1 log2 p2 - ... pn log2 pn = - SUM(i=1)^n p_i log2( p_i)

function [res] = info_gain(dataset)

	[nrows, ncols] = size(dataset);

	target_attr_1 = dataset(:,ncols);
	target_attr_0 = 1 - dataset(:,ncols);

	% Calculate H(Y)
	en_before = ent(length(find(target_attr_1)),length(find(target_attr_0)));
	en_after = zeros(1,ncols-1);
	
	% calculate information gain for each attributes

	for i=1:ncols-1						% select attribute
		selected_attr = dataset(:,i);
		uval = unique(selected_attr);
		for j=1:length(uval)			% evaluate unique values in it
			val = uval(j);
			% perform group-by sum.
			subsum0(j) = length(find(select_only(selected_attr,val) .* target_attr_0));
			subsum1(j) = length(find(select_only(selected_attr,val) .* target_attr_1));
			en_after(i) = en_after(i) - ( subsum0(j)+subsum1(j) ) / nrows * ent(subsum0(j),subsum1(j));
		end
	end

	res = en_before + en_after;


function [attr] = select_only( attr, j )
	[nrows, ncols] = size(attr);
	for i=1:nrows
		if( attr(i,1) == j )
			attr(i,1) = 1;
		else
			attr(i,1) = 0;
		end
	end


% calculate entropy
function [res] = ent(val1, val2)
	if( val1 == 0 )
		if( val2 == 0 )
			res = 0;
		else
			res = - (val2/(val1+val2)) * log2( val2/(val1+val2) );
		end
	else
		if( val2 == 0 )
			res = - (val1/(val1+val2)) * log2( val1/(val1+val2) );
		else
			res = - (val1/(val1+val2)) * log2( val1/(val1+val2) ) - (val2/(val1+val2)) * log2( val2/(val1+val2) );
		end
	end

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?