decisiontreetestcon.m

来自「ID3+C4.5的源程序。用于数据挖掘决策算法的一个实例。」· M 代码 · 共 112 行

112 行

function Result=DecisionTreeTestCon(DataBase,DataName,WhereSen,ForecastSen,attributName)
%数据库中请把预测列放到最后一列,且要求有ID列,放于最前面一列
%DataName为数表名称,WhereSen为筛选语句名称,ForecastSen预测属性名称,attribu%Name为现有的属性名称
logintimeout(15);
conn = database('DecisionTreeTest', '', '');
%exec(conn,'use PAKDDCompetition2007');
exec(conn,['use',' ',DataBase]);
%得到表的各属性名称
attributNameList='';
for I=1:length(attributName)
    if ~isempty(attributName{1,I})
    if I==length(attributName)
        attributNameList=[attributNameList,attributName{1,I}]
    else
        attributNameList=[attributNameList,attributName{1,I},','];
    end
    end
end
j=length(attributName);
IS=0;

%对不为空的筛选语句进行处理
if WhereSen==' '
    KK=['select',' ',ForecastSen, ' from',' ',DataName,' group by',' ', ForecastSen]
    cursflag=exec(conn,['select',' ',ForecastSen, ' from',' ',DataName,' group by',' ', ForecastSen]);
    setdbprefs('DataReturnFormat','cellarray');
    cursflag=fetch(cursflag);
    flagdiv=cursflag.data;
    WhereSenList='where(';
    for I=1:length(flagdiv)
        if I~=length(flagdiv)
%            WhereSenList=[WhereSenList,' ',ForecastSen,'=''',int2str(flagdiv{I,1}),''' or']
            WhereSenList=[WhereSenList,' ',ForecastSen,'=''',flagdiv{I,1},''' or']
        else
%            WhereSenList=[WhereSenList,' ',ForecastSen,'=''',int2str(flagdiv{I,1}),''')'];
            WhereSenList=[WhereSenList,' ',ForecastSen,'=''',flagdiv{I,1},''')'];
        end
    end
    close(cursflag);
else
    WhereSenList=WhereSen;
end
 C=0;
 k=0;
% j>0按列数对各属性循环进行熵值的计算
while j>1
    if ~isempty(attributName{1,j})%若这个属性值已经为空，则跳过
        KK=['select count( ',attributName{1,j},' ) as num from',' ',DataName,' ',WhereSenList, ' group by',' ', attributName{1,j}]
%按上一步取到的属性分类计数        
        curshigh=exec(conn,['select count( ',attributName{1,j},' ) as num from',' ',DataName,' ',WhereSenList, ' group by',' ', attributName{1,j}]);
        setdbprefs ('DataReturnFormat','numeric');
        curshigh = fetch(curshigh);
        AA=curshigh.Data;
%        close(curshigh);
        B=sum(AA);
        
            %计算之前先判断是不是预测列，预测列非预测列的熵值计算方法是不一样的
            if strcmp(attributName{1,j},ForecastSen)==1
                %直接由函数计原熵值(无条件熵值不确定度越大,无条件熵值越大),并储在IS中,由原熵值条件熵值,就可以计算出信益
                for I=1:length(AA)
                IS=IS+(AA(I)/B)*log2(AA(I)/B);
                end
                IS=-IS;
            else
                %处理非预测属性,计算条件熵值
                %先取得各非预测属性分段情况,然后再取得这些分段按预测值分类的数目,以便下一步计算
%               KK=['select ',' ',attributName{1,j},' from DecisionTreeTest ',' ',WhereSenList, ' group by',' ', attributName{1,j}]
                curslow=exec(conn,['select ',' ',attributName{1,j},' from',' ',DataName,' ',WhereSenList, ' group by',' ', attributName{1,j}]);
                setdbprefs('DataReturnFormat','cellarray');
                curslow = fetch(curslow);
                CharLowName=curslow.data;
%                close(curslow);
                
                
                for I=1:length(CharLowName)
                    %取得相应分段按预测值分类的数目
                    KK=['select count(',ForecastSen,') from',' ',DataName,' ',WhereSenList,' and',' ',attributName{1,j},'=''',CharLowName{I,1},''' group by',' ', ForecastSen,',',attributName{1,j}]
                    curslowdiv=exec(conn,['select count(',ForecastSen,') from',' ',DataName,' ',WhereSenList,' and',' ',attributName{1,j},'=''',CharLowName{I,1},''' group by',' ', ForecastSen,',',attributName{1,j}]);
                    setdbprefs('DataReturnFormat','numeric');
                    curslowdiv = fetch(curslowdiv);
                    CharLowNameDiv=curslowdiv.data;
%                    close(curslowdiv);
                    CharLowNameDivSum=sum(CharLowNameDiv);
                    %计算各分布的期望信息 
                    T=0;
                    for II=1:length(CharLowNameDiv)
                    SSS(II)=T+(CharLowNameDiv(II)/CharLowNameDivSum)*log2(CharLowNameDiv(II)/CharLowNameDivSum);
                    T=SSS(II);
                    end
                    IIS(I)=-T;
                    %II用来记录每个分布的期望值
                end
                %计算按上同的分布对一个给定样本分类所需的期望信息并存储在E中,相当于条件熵值
                E=0;
                    for I=1:length(AA)
                    E=E+(AA(I)/B)*IIS(I);
                    end
                    GAIN(j)=IS-E    %得到这种划分的信息益
                if C<GAIN(j)
                C=GAIN(j)
                k=j
                end
            end
    end
        j=j-1;
end
if C<0.2
    Result=0;
else
    Result=attributName{1,k}
end

decisiontreetestcon.m - 源码说明

本页面展示了「ID3+C4.5的源程序。用于数据挖掘决策算法的一个实例。」中的 decisiontreetestcon.m 源码文件，采用 M 编程语言编写，共 112 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与4.5相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?