📄 decisiontreetestcon.m
字号:
function Result=DecisionTreeTestCon(DataBase,DataName,WhereSen,ForecastSen,attributName)
%数据库中请把预测列放到最后一列,且要求有ID列,放于最前面一列
%DataName为数表名称,WhereSen为筛选语句名称,ForecastSen预测属性名称,attribu%Name为现有的属性名称
logintimeout(15);
conn = database('DecisionTreeTest', '', '');
%exec(conn,'use PAKDDCompetition2007');
exec(conn,['use',' ',DataBase]);
%得到表的各属性名称
attributNameList='';
for I=1:length(attributName)
if ~isempty(attributName{1,I})
if I==length(attributName)
attributNameList=[attributNameList,attributName{1,I}]
else
attributNameList=[attributNameList,attributName{1,I},','];
end
end
end
j=length(attributName);
IS=0;
%对不为空的筛选语句进行处理
if WhereSen==' '
KK=['select',' ',ForecastSen, ' from',' ',DataName,' group by',' ', ForecastSen]
cursflag=exec(conn,['select',' ',ForecastSen, ' from',' ',DataName,' group by',' ', ForecastSen]);
setdbprefs('DataReturnFormat','cellarray');
cursflag=fetch(cursflag);
flagdiv=cursflag.data;
WhereSenList='where(';
for I=1:length(flagdiv)
if I~=length(flagdiv)
% WhereSenList=[WhereSenList,' ',ForecastSen,'=''',int2str(flagdiv{I,1}),''' or']
WhereSenList=[WhereSenList,' ',ForecastSen,'=''',flagdiv{I,1},''' or']
else
% WhereSenList=[WhereSenList,' ',ForecastSen,'=''',int2str(flagdiv{I,1}),''')'];
WhereSenList=[WhereSenList,' ',ForecastSen,'=''',flagdiv{I,1},''')'];
end
end
close(cursflag);
else
WhereSenList=WhereSen;
end
C=0;
k=0;
% j>0按列数对各属性循环进行熵值的计算
while j>1
if ~isempty(attributName{1,j})%若这个属性值已经为空,则跳过
KK=['select count( ',attributName{1,j},' ) as num from',' ',DataName,' ',WhereSenList, ' group by',' ', attributName{1,j}]
%按上一步取到的属性分类计数
curshigh=exec(conn,['select count( ',attributName{1,j},' ) as num from',' ',DataName,' ',WhereSenList, ' group by',' ', attributName{1,j}]);
setdbprefs ('DataReturnFormat','numeric');
curshigh = fetch(curshigh);
AA=curshigh.Data;
% close(curshigh);
B=sum(AA);
%计算之前先判断是不是预测列,预测列非预测列的熵值计算方法是不一样的
if strcmp(attributName{1,j},ForecastSen)==1
%直接由函数计原熵值(无条件熵值不确定度越大,无条件熵值越大),并储在IS中,由原熵值条件熵值,就可以计算出信益
for I=1:length(AA)
IS=IS+(AA(I)/B)*log2(AA(I)/B);
end
IS=-IS;
else
%处理非预测属性,计算条件熵值
%先取得各非预测属性分段情况,然后再取得这些分段按预测值分类的数目,以便下一步计算
% KK=['select ',' ',attributName{1,j},' from DecisionTreeTest ',' ',WhereSenList, ' group by',' ', attributName{1,j}]
curslow=exec(conn,['select ',' ',attributName{1,j},' from',' ',DataName,' ',WhereSenList, ' group by',' ', attributName{1,j}]);
setdbprefs('DataReturnFormat','cellarray');
curslow = fetch(curslow);
CharLowName=curslow.data;
% close(curslow);
for I=1:length(CharLowName)
%取得相应分段按预测值分类的数目
KK=['select count(',ForecastSen,') from',' ',DataName,' ',WhereSenList,' and',' ',attributName{1,j},'=''',CharLowName{I,1},''' group by',' ', ForecastSen,',',attributName{1,j}]
curslowdiv=exec(conn,['select count(',ForecastSen,') from',' ',DataName,' ',WhereSenList,' and',' ',attributName{1,j},'=''',CharLowName{I,1},''' group by',' ', ForecastSen,',',attributName{1,j}]);
setdbprefs('DataReturnFormat','numeric');
curslowdiv = fetch(curslowdiv);
CharLowNameDiv=curslowdiv.data;
% close(curslowdiv);
CharLowNameDivSum=sum(CharLowNameDiv);
%计算各分布的期望信息
T=0;
for II=1:length(CharLowNameDiv)
SSS(II)=T+(CharLowNameDiv(II)/CharLowNameDivSum)*log2(CharLowNameDiv(II)/CharLowNameDivSum);
T=SSS(II);
end
IIS(I)=-T;
%II用来记录每个分布的期望值
end
%计算按上同的分布对一个给定样本分类所需的期望信息并存储在E中,相当于条件熵值
E=0;
for I=1:length(AA)
E=E+(AA(I)/B)*IIS(I);
end
GAIN(j)=IS-E %得到这种划分的信息益
if C<GAIN(j)
C=GAIN(j)
k=j
end
end
end
j=j-1;
end
if C<0.2
Result=0;
else
Result=attributName{1,k}
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -