📄 info_gain.m
字号:
%
% info_gain.m
%
% Algorithm :
%
% IG(Y|X) = H(Y) - H(Y | X)
% H(X) = p1 log2 p1 - p1 log2 p2 - ... pn log2 pn = - SUM(i=1)^n p_i log2( p_i)
function [res] = info_gain(dataset)
[nrows, ncols] = size(dataset);
target_attr_1 = dataset(:,ncols);
target_attr_0 = 1 - dataset(:,ncols);
% Calculate H(Y)
en_before = ent(length(find(target_attr_1)),length(find(target_attr_0)));
en_after = zeros(1,ncols-1);
% calculate information gain for each attributes
for i=1:ncols-1 % select attribute
selected_attr = dataset(:,i);
uval = unique(selected_attr);
for j=1:length(uval) % evaluate unique values in it
val = uval(j);
% perform group-by sum.
subsum0(j) = length(find(select_only(selected_attr,val) .* target_attr_0));
subsum1(j) = length(find(select_only(selected_attr,val) .* target_attr_1));
en_after(i) = en_after(i) - ( subsum0(j)+subsum1(j) ) / nrows * ent(subsum0(j),subsum1(j));
end
end
res = en_before + en_after;
function [attr] = select_only( attr, j )
[nrows, ncols] = size(attr);
for i=1:nrows
if( attr(i,1) == j )
attr(i,1) = 1;
else
attr(i,1) = 0;
end
end
% calculate entropy
function [res] = ent(val1, val2)
if( val1 == 0 )
if( val2 == 0 )
res = 0;
else
res = - (val2/(val1+val2)) * log2( val2/(val1+val2) );
end
else
if( val2 == 0 )
res = - (val1/(val1+val2)) * log2( val1/(val1+val2) );
else
res = - (val1/(val1+val2)) * log2( val1/(val1+val2) ) - (val2/(val1+val2)) * log2( val2/(val1+val2) );
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -