⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 build_tree.m

📁 决策树
💻 M
📖 第 1 页 / 共 4 页
字号:

clear all;
clc;
%num=700;
tree_level=4;
attribute_num=6;
class_split=0.9;               %类分裂的最低纯度
element_num=2;                  %类分裂的最少元素个数
attribute_test=5;                 %测试的变量
attribute_num_use=attribute_num;
attribute_use=zeros(attribute_num_use,1);
attribute_use_0=zeros(attribute_num_use,1);
attribute_use_1=zeros(attribute_num_use,1);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
class_0(:,1)=0;
class_1(:,1)=0;

class_0_0(:,1)=0;
class_0_1(:,1)=0;
class_1_0(:,1)=0;
class_1_1(:,1)=0;

class_0_0_0(:,1)=0;
class_0_0_1(:,1)=0;
class_0_1_0(:,1)=0;
class_0_1_1(:,1)=0;
class_1_0_0(:,1)=0;
class_1_0_1(:,1)=0;
class_1_1_0(:,1)=0;
class_1_1_1(:,1)=0;

class_0_0_0_0(:,1)=0;
class_0_0_0_1(:,1)=0;
class_0_0_1_0(:,1)=0;
class_0_0_1_1(:,1)=0;
class_0_1_0_0(:,1)=0;
class_0_1_0_1(:,1)=0;
class_0_1_1_0(:,1)=0;
class_0_1_1_1(:,1)=0;
class_1_0_0_0(:,1)=0;
class_1_0_0_1(:,1)=0;
class_1_0_1_0(:,1)=0;
class_1_0_1_1(:,1)=0;
class_1_1_0_0(:,1)=0;
class_1_1_0_1(:,1)=0;
class_1_1_1_0(:,1)=0;
class_1_1_1_1(:,1)=0;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

datarow=xlsread('1.xls','血常规','i2:n994');

%load datarow;
data_ret=xlsread('1.xls','血常规','a2:h994');
%归一化 
%min-max normalization

data_min_max=zeros(993,6);
for i=1:6
   minA=min(datarow(:,i));
   maxA=max(datarow(:,i));
   for j=1:993
       data_min_max(j,i)=(datarow(j,i)-minA)/(maxA-minA);
   end
end


%z-score normalization
% data_z_score=zeros(993,6);
% for i=1:6
%     meanA=mean(datarow(:,i));
%     sum=0;
%     for j=1:993
%         sum=sum+(datarow(j,i)-meanA)^2;
%         meanB=sum/(993-1);
%         xigmad=sqrt(meanB);
%         data_z_score(j,i)=(datarow(j,i)-meanA)/xigmad;
%     end
% end


% narmalization by decimal scaling 
% data_decimal=zeros(993,6);
% for i=1:6
%    maxA=max(datarow(:,i));
%    if maxA/10<1
%        for j=1:993
%            data_decimal(j,i)=datarow(j,i)/10;
%        end
%    else if maxA/100<1
%            for j=1:993
%               data_decimal(j,i)=datarow(j,i)/100;
%            end
%        else
%            for j=1:993
%                data_decimal(j,i)=datarow(j,i)/1000;
%            end
%        end
%    end
% end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

rand_a=0.25;
[m_temp,n_temp]=size(data_min_max);
num11=0;
num22=0;
for i=1:m_temp
    if (rand>rand_a)
        num11=num11+1;
        data(num11,:)=data_min_max(i,:);
        class_ret(num11,:)=data_ret(i,:);
    else
        num22=num22+1;
        data_test(num22,:)=data_min_max(i,:);
        class_test(num22,:)=data_ret(i,:);
    end
end
num=num11;
save class_test;
class_train(:,1)=class_ret(:,attribute_test);
class_train(:,2)=ones(num,1);

%分根节点
%class=class_train(:,1);
class_split_a=split(class_train(:,1),num);
if class_split_a<class_split
    classsplit=1;
else
    classsplit=0;
end
classificate_condition=classsplit*attribute_num_use*tree_level;
if classificate_condition~=0
     flag=1;
    for i=1:attribute_num
        if attribute_use(i,1)==0;
           all_gini(1,flag)=mingini(num,i,class_train,data(:,i));
           flag=flag+1;
        end
     end
    [min,attribute]=min(all_gini);
    attribute_use(attribute,1)=1;
    attribute_use_0=attribute_use;
    attribute_use_1=attribute_use;
    cla_point=attribute;
    disp('................................................the root !!')
    [class_num ,class_value_root,class_0,class_1]=classificate_root(num,attribute,class_train,data(:,attribute));
    clear min all_gini  ;
end
tree_level=tree_level-1;
%分第一层左节点

%data_1=zeros(num,12);
data_1=chongpai_1(attribute,num,data);
%clear data;
attribute_num_use=attribute_num_use-1;

if class_num>=element_num
    class_split_a=split(class_0(:,1),class_num);
    if class_split_a<class_split
        classsplit=1;
    else
        classsplit=0;
    end
    classificate_condition=classsplit*attribute_num_use*tree_level;
    if classificate_condition~=0
        flag=1;
        for i=1:attribute_num
            if attribute_use(i,1)==0;
                all_gini(1,flag)=mingini(class_num,i,class_0,data_1(1:class_num,(2*i-1)));
                attri(1,flag)=i;
                flag=flag+1;
            end
        end
        [min,attribute_0]=min(all_gini);
        attribute_use_0(attri(1,attribute_0),1)=1;
        cla_point_0=attri(1,attribute_0);
        disp('................................................the classification 0 !!')
        [class_num_0_0 ,class_value_0,class_0_0,class_0_1]=classificate(class_num,attri(1,attribute_0),class_0,data_1(1:class_num,(attri(1,attribute_0)*2-1)));
        clear  all_gini  min flag attri;
        attribute_use_0_0=attribute_use_0;
        attribute_use_0_1=attribute_use_0;
    else
        class_value_0=0;
        cla_point_0=0;
        class_num_0_0=0;
    end
else
    class_value_0=0;
    cla_point_0=0;
    class_num_0_0=0;
end

%分第一层右节点

if num-class_num>=element_num
    class_split_a=split(class_1(:,1),num-class_num);
    if class_split_a<class_split
        classsplit=1;
    else
        classsplit=0;
    end
    classificate_condition=classsplit*attribute_num_use*tree_level;
    if classificate_condition~=0
        flag=1;
        for i=1:attribute_num
            if attribute_use(i,1)==0;
                all_gini(1,flag)=mingini(num-class_num,i,class_1,data_1((class_num+1):num,(2*i-1)));
                attri(1,flag)=i;
                flag=flag+1;
            end
        end
        [min,attribute_1]=min(all_gini);
        attribute_use_1(attri(1,attribute_1),1)=1;
        cla_point_1=attri(1,attribute_1);
        disp('................................................the classification 1 !!')
        [class_num_1_0 ,class_value_1,class_1_0,class_1_1]=classificate((num-class_num),attri(1,attribute_1),class_1,data_1((class_num+1):num,(attri(1,attribute_1)*2-1)));
        attribute_use_1_0=attribute_use_1;
        attribute_use_1_1=attribute_use_1;
    else
        class_value_1=0;
        cla_point_1=0;
        class_num_1_0=0;
    end
else
    class_value_1=0;
    cla_point_1=0;
    class_num_1_0=0;
end

clear  all_gini   min flag attri;
tree_level=tree_level-1;

%分第二层左节点的左节点

data_2=chongpai_2(class_num,(num-class_num),data_1,cla_point_0,cla_point_1);
clear data_1;
attribute_num_use=attribute_num_use-1;

%all_gini=zeros(attribute_num_use,1);
if class_num_0_0~=0
    if class_num_0_0>=element_num 
    class_split_a=split(class_0_0(:,1),class_num_0_0);
        if class_split_a<class_split
            classsplit=1;
        else
            classsplit=0;
        end
        classificate_condition=classsplit*attribute_num_use*tree_level;
        if classificate_condition~=0
            flag=1;
            for i=1:attribute_num
                if attribute_use_0(i,1)==0;
                    all_gini(1,flag)=mingini(class_num_0_0,i,class_0_0,data_2(1:class_num_0_0,(2*i-1)));
                    attri(1,flag)=i;
                    flag=flag+1;
                end
            end
            [min,attribute_0_0]=min(all_gini);
            cla_point_0_0=attri(1,attribute_0_0);
            disp('................................................the classification 0_0 !!')
            [class_num_0_0_0 ,class_value_0_0,class_0_0_0,class_0_0_1]=classificate(class_num_0_0,attri(1,attribute_0_0),class_0_0,data_2(1:class_num_0_0,(attri(1,attribute_0_0)*2-1)));
            clear  all_gini min flag attri;
            attribute_use_0_0_0=attribute_use_0_0;
            attribute_use_0_0_1=attribute_use_0_0;
        else
            class_value_0_0=0;
            class_num_0_0_0=0;
            cla_point_0_0=0;
        end
    else
        class_value_0_0=0;
        class_num_0_0_0=0;
        cla_point_0_0=0;
    end

%分第二层左节点的右节点

    if class_num-class_num_0_0>=element_num
    class_split_a=split(class_0_1(:,1),class_num-class_num_0_0);
        if class_split_a<class_split
            classsplit=1;
        else
            classsplit=0;
        end
        classificate_condition=classsplit*attribute_num_use*tree_level;
        if classificate_condition~=0
            flag=1;
            for i=1:attribute_num
                if attribute_use_0(i,1)==0;
                    all_gini(1,flag)=mingini((class_num-class_num_0_0),i,class_0_1,data_2((1+class_num_0_0):class_num,(2*i-1)));
                    attri(1,flag)=i;
                    flag=flag+1;
                end
            end
            [min,attribute_0_1]=min(all_gini);
            attribute_use_0_1(attri(1,attribute_0_1),1)=1;
            cla_point_0_1=attri(1,attribute_0_1);
            disp('................................................the classification 0_1 !!')
            [class_num_0_1_0 ,class_value_0_1,class_0_1_0,class_0_1_1]=classificate((class_num-class_num_0_0),attri(1,attribute_0_1),class_0_1,data_2((1+class_num_0_0):class_num,(attri(1,attribute_0_1)*2-1)));
            clear  all_gini  min flag attri;
            attribute_use_0_1_0=attribute_use_0_1;
            attribute_use_0_1_1=attribute_use_0_1;
        else
            class_value_0_1=0;
            cla_point_0_1=0;
            class_num_0_1_0=0;
        end
    else
        class_value_0_1=0;
        cla_point_0_1=0;
        class_num_0_1_0=0;
    end
else
    class_value_0_1=0;
    class_value_0_0=0;
    cla_point_0_1=0;
    class_num_0_1_0=0;
    class_num_0_0_0=0;
    cla_point_0_0=0;
end
%分第二层右节点的左节点

if class_num_1_0~=0
    if class_num_1_0>=element_num
        class_split_a=split(class_1_0(:,1),class_num_1_0);
        if class_split_a<class_split
            classsplit=1;
        else
            classsplit=0;
        end
        classificate_condition=classsplit*attribute_num_use*tree_level;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -