⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tcmknn_pima.m

📁 matlab编写的SVM分类器源代码
💻 M
字号:
close all  
clear all 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%准备样本%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load pima_indians_diabetes_data.dat  %全部样本保存在pima_indians_diabetes_data矩阵当中
[m,n]=size(pima_indians_diabetes_data);
label=pima_indians_diabetes_data(:,n);%对应的标签存在label中
data=pima_indians_diabetes_data(:,1:n-1);%data存放数据
order=randperm(m)';
data=data(order,:);
label=label(order,:);

p=round(m*0.1);
%保存初始各类样本及标签
t=1;%t与r初始化,t、r分别计算L中正常数据与异常数据的个数
r=1;
for i=1:p
    if label(i,:)==1
        normalL(t,:)=data(i,:);%将已标注的正常数据存入normal-L中
        label_normalL(t,1)=label(i,1);
        t=t+1;
    else unnormalL(r,:)=data(i,:);%将已标注的异常数据存入unnormal-L中
        label_unnormalL(r,1)=label(i,1);
        r=r+1;
    end
end
t=t-1;
r=r-1;
[rL,n]=size(normalL);
[ruL,n]=size(unnormalL);
%将剩余(除初始L的)数据集打乱,重新随机排列
data(1:p,:)=[];
label(1:p,:)=[];
[m,n]=size(data);

%未标注样本集UL及其标签label_UL,占剩余样本的80%
row_UL=round(m*0.8);
max_row_UL=row_UL;
UL=zeros(row_UL,n);
UL=data(1:row_UL,:);
label_UL=zeros(row_UL,1);
label_UL=label(1:row_UL,:);

%测试集testdata及其标签label_testdata,占剩余样本的20%
testdata=zeros(m-row_UL,n);
testdata=data(((row_UL+1):m),:);
label_testdata=zeros(m-row_UL,1);
label_testdata=label(((row_UL+1):m),:);
[row_testdata,n]=size(testdata);
%%%%%%%以上准备好了所有需要的数据集及其标签,L、UL、testdata和它们的标签%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%开始训练、采样、分类%%%%%%%%%%%%%%%%
Accuracy=0.05;%初始化Accuracy,赋个很小的非零值即可
number=1;%number用来保存主动学习次数
while Accuracy<0.85    
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
%%%%%%%%%%%%%%%%%%计算normalL中各点的奇异值%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
[row_normalL,n]=size(normalL);
SV_normalL=zeros(1,row_normalL);%normalL中各点奇异值初始化

copy_normalL=normalL;
%normalL中各点奇异值的分子
for i=1:row_normalL
    dangqian=normalL(i,:);
    normalL(i,:)=[];
    for j=1:row_normalL-1
        cha=dangqian-normalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        distance1(j)=sqrt(sum(cha));
    end
    temp1=sort(distance1);
    nL_fenzi(i)=sum(temp1(:,1:10));%k近邻,k取10
    normalL=copy_normalL;
end

[row_unnormalL,n]=size(unnormalL);
%normalL中各点奇异值的分母
for i=1:row_normalL
    dangqian=normalL(i,:);
    for j=1:row_unnormalL
        cha=dangqian-unnormalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        distance2(j)=sqrt(sum(cha));
    end
    temp2=sort(distance2);
    nL_fenmu(i)=sum(temp2(:,1:10));%k近邻,k取10
end

SV_normalL=nL_fenzi./nL_fenmu;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
%%%%%%%%%%%%%%%%%%计算unnormalL中各点的奇异值%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
SV_unnormalL=zeros(1,row_unnormalL);%unnormalL中各点奇异值初始化
copy_unnormalL=unnormalL;
%unnormalL中各点奇异值的分子
for i=1:row_unnormalL
    dangqian=unnormalL(i,:);
    unnormalL(i,:)=[];
    for j=1:row_unnormalL-1
        cha=dangqian-unnormalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        distance3(j)=sqrt(sum(cha));
    end
    temp3=sort(distance3);
    unL_fenzi(i)=sum(temp3(:,1:10));%k近邻,k取10
    unnormalL=copy_unnormalL;
end

%unnormalL中各点奇异值的分母
for i=1:row_unnormalL
    dangqian=unnormalL(i,:);
    for j=1:row_normalL
        cha=dangqian-normalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        distance4(j)=sqrt(sum(cha));
    end
    temp4=sort(distance4);
    unL_fenmu(i)=sum(temp4(:,1:10));%k近邻,k取10
end

SV_unnormalL=unL_fenzi./unL_fenmu;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
%%%%%%%%%%主动学习:不确定采样UBS%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%UL中样本相对于normal类的奇异值%%%%%%%%%%%%%%%%%%%%%%%
[row_UL,n]=size(UL);
if row_UL~=0
SV_normal_UL=zeros(1,row_UL);%UL中各点相对于normal类的奇异值初始化
%UL中各点相对于normal类的奇异值的分子
for i=1:row_UL
    dangqian=UL(i,:);
    for j=1:row_normalL
        cha=dangqian-normalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        UL_distance1(j)=sqrt(sum(cha));
    end
    UL_temp1=sort(UL_distance1);
    UL_fenzi(i)=sum(UL_temp1(:,1:10));%k近邻,k取10
end

%UL中各点相对于normal类的奇异值的分母
for i=1:row_UL
    dangqian=UL(i,:);
    for j=1:row_unnormalL
        cha=dangqian-unnormalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        UL_distance2(j)=sqrt(sum(cha));
    end
    UL_temp2=sort(UL_distance2);
    UL_fenmu(i)=sum(UL_temp2(:,1:10));%k近邻,k取10
end

SV_normal_UL=UL_fenzi./UL_fenmu;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%UL中样本相对于normal类的P值%%%%%%%%%%%%%%%%%%%%%%%
num=0;
for i=1:row_UL 
    for j=1:row_normalL
        if SV_normal_UL(1,i)<SV_normalL(1,j)
            num=num+1;
        end
    end
    P_SV_normal_UL(1,i)=num/(row_normalL+1);
    num=0;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%UL中样本相对于unnormal类的奇异值%%%%%%%%%%%%%%%%%%%%%%%
SV_unnormal_UL=zeros(1,row_UL);%UL中各点相对于normal类的奇异值初始化
%UL中各点相对于normal类的奇异值的分子
for i=1:row_UL
    dangqian=UL(i,:);
    for j=1:row_unnormalL
        cha=dangqian-unnormalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        UL_distance3(j)=sqrt(sum(cha));
    end
    UL_temp3=sort(UL_distance3);
    UL_fenzi(i)=sum(UL_temp3(:,1:10));%k近邻,k取10
end

%UL中各点相对于unnormal类的奇异值的分母
for i=1:row_UL
    dangqian=UL(i,:);
    for j=1:row_normalL
        cha=dangqian-normalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        UL_distance4(j)=sqrt(sum(cha));
    end
    UL_temp4=sort(UL_distance4);
    UL_fenmu(i)=sum(UL_temp4(:,1:10));%k近邻,k取10
end

SV_unnormal_UL=UL_fenzi./UL_fenmu;
UL_fenzi=[];
UL_fenmu=[];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%UL中样本相对于unnormal类的P值%%%%%%%%%%%%%%%%%%%%%%%
num=0;
for i=1:row_UL
    for j=1:row_unnormalL
        if SV_unnormal_UL(1,i)<SV_unnormalL(1,j)
            num=num+1;
        end
    end
    P_SV_unnormal_UL(1,i)=num/(row_unnormalL+1);
    num=0;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%给出主动学习选择函数C%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
C=abs(P_SV_normal_UL-P_SV_unnormal_UL);
[C,location]=sort(C);
newL=UL(location(1,1),:);%选择P_SV_normal_UL与P_SV_unnormal_UL最接近的样本,即最不确定分类的样本交由人工标注
newlabel=label_UL(location(1,1),:);
if label_UL(location(1,1),:)==1
    normalL=[normalL;newL];
    label_normalL=[label_normalL;newlabel];
    biaoji=1;%为最后的一小段程序做个标记
else unnormalL=[unnormalL;newL];
    label_unnormalL=[label_unnormalL;newlabel];
    biaoji=0;
end
temp_UL=UL;%若最后采样的样本未被利用,则最后将这个样本还回UL
temp_label_UL=label_UL;
UL(location(1,1),:)=[];
label_UL(location(1,1),:)=[];
SV_normal_UL=[];
SV_unnormal_UL=[];
P_SV_normal_UL=[];
P_SV_unnormal_UL=[];
end
%%%%%%%%%%%%%%%主动学习采样结束%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%对测试样本集testdata进行分类%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%testdata中样本相对于normal类的奇异值%%%%%%%%%%%%%%%%%%%%%%%
[row_testdata,n]=size(testdata);
SV_normal_testdata=zeros(1,row_testdata);%testdata中各点相对于normal类的奇异值初始化

%testdata中各点相对于normal类的奇异值的分子
for i=1:row_testdata
    dangqian=testdata(i,:);
    for j=1:row_normalL
        cha=dangqian-normalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        T_distance1(j)=sqrt(sum(cha));
    end
    T_temp1=sort(T_distance1);
    fenzi(i)=sum(T_temp1(:,1:10));%k近邻,k取10
end

%testdata中各点相对于normal类的奇异值的分母
for i=1:row_testdata
    dangqian=testdata(i,:);
    for j=1:row_unnormalL
        cha=dangqian-unnormalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        T_distance2(j)=sqrt(sum(cha));
    end
    T_temp2=sort(T_distance2);
    fenmu(i)=sum(T_temp2(:,1:10));%k近邻,k取10
end

SV_normal_testdata=fenzi./fenmu;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%testdata中样本相对于normal类的P值%%%%%%%%%%%%%%%%%%%%%%%
num=0;
for i=1:row_testdata
    for j=1:row_normalL
        if SV_normal_testdata(1,i)<SV_normalL(1,j)
            num=num+1;
        end
    end
    P_SV_normal_testdata(1,i)=num/(row_normalL+1);
    num=0;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%testdata中样本相对于unnormal类的奇异值%%%%%%%%%%%%%%%%%%%%%%%
SV_unnormal_testdata=zeros(1,row_testdata);%testdata中各点相对于normal类的奇异值初始化
%testdata中各点相对于normal类的奇异值的分子
for i=1:row_testdata
    dangqian=testdata(i,:);
    for j=1:row_unnormalL
        cha=dangqian-unnormalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        T_distance3(j)=sqrt(sum(cha));
    end
    T_temp3=sort(T_distance3);
    fenzi(i)=sum(T_temp3(:,1:10));%k近邻,k取10
end

%testdata中各点相对于unnormal类的奇异值的分母
for i=1:row_testdata
    dangqian=testdata(i,:);
    for j=1:row_normalL
        cha=dangqian-normalL(j,:);
        for t=1:n
            cha(t)=cha(t)^2;
        end
        T_distance4(j)=sqrt(sum(cha));
    end
    T_temp4=sort(T_distance4);
    fenmu(i)=sum(T_temp4(:,1:10));%k近邻,k取10
end

SV_unnormal_testdata=fenzi./fenmu;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%testdata中样本相对于unnormal类的P值%%%%%%%%%%%%%%%%%%%%%%%
num=0;
for i=1:row_testdata
    for j=1:row_unnormalL
        if SV_unnormal_testdata(1,i)<SV_unnormalL(1,j)
            num=num+1;
        end
    end
    P_SV_unnormal_testdata(1,i)=num/(row_unnormalL+1);
    num=0;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%分类%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%预测分类%%%%%%%%%%%%%%%%
for i=1:row_testdata
    if P_SV_normal_testdata(1,i)>P_SV_unnormal_testdata(1,i)
        pre_label_testdata(i,1)=1;
    else pre_label_testdata(i,1)=0;
    end
end
%%%%%%%%%%比较预测分类与实际分类的差异%%%%%%%%%%%
num=0;
for i=1:row_testdata
    if pre_label_testdata(i,1)==label_testdata(i,1)
        num=num+1;
    end
end
%%%%%%分类正确率%%%%%%%
Accuracy(number,1)=num/row_testdata;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

number=number+1;%始终让number比主动学习次数大2
if number==max_row_UL+2
    break;%若将所有训练样本训练完都达不到期望的正确率,则主动退出循环
end

end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%分类结束%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%结果显示%%%%%%%%%%%%%%%%%
Accuracy
number=number-2   %number用来保存主动学习次数
x=0:1:number;
plot(x,Accuracy,'-ro')
xlabel('Number of samples selected for active learning (UBS)');
ylabel('Accuracy');
title('Learning curve on pima Indians Diabetes');

if row_UL~=0
    UL=temp_UL;%最后一次选出没利用的样本再放回UL
    label_UL=temp_label_UL;
    if biaoji==1
       normalL(row_normalL+1,:)=[];%若未利用完UL中的所有样本即满足要求,将最后加到L中的而未被利用的新样本去掉
       label_normalL(row_normalL+1,:)=[];%同理
    else unnormalL(row_unnormalL+1,:)=[];
         label_unnormalL(row_unnormalL+1,:)=[];
    end
end
%%%%%%%%%%%%%%%%%%%%%%程序结束%%%%%%%%%%%%%%%%%%%%%%%%%   
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -