📄 svmfenlei.cpp
字号:
#include "iostream.h"
#include "fstream.h"
#include "ctype.h"
#include "math.h"
#include "stdlib.h"
#include "time.h"
#include "stdio.h"
#define NN 51//
#define train_sample 27//
#define predict_sample 24//
#define d 11//
//global variables
//int N= //
//int d= //
char shuru[100];//
char parameter[100];//
char result[100];//
int N;
int end_support_i;
int first_test_i;
float C;//
float opti_C;//
float kernel_gamma;//
float opti_kernel_gamma;//
float b;//
float tolerance=0.001;//
float eps=1e-3;//
float alph[train_sample];//
float error_cache[train_sample];//
int target[train_sample];//
int or_target[NN];//
float dense_points[NN][d];//
float points[NN][d];//
float sample[NN][d];//
//函数的申明
int takeStep(int,int);
int examineNonBound(int);
int examineBound(int);
int examineFirstChoice(int,float);
int examineExample(int);
float kernel_func(int,int);
float learned_func(int);
float dot_product_func(int,int);
float error_rate();
void train_predict();
void setX();
void initialize();
void scale();
void cross_validation(int);
void total_error_rate();
///////////////////////////////////////////////////////
void main()
{
cout<<"文件名及其路径,例如e:\\data\\total.txt"<<endl;
cout<<"请输入进行预测和训练的样本存放的路径"<<endl;
cin>>shuru;
cout<<"请输入所取的存放训练参数结果的文件名及其路径"<<endl;
cin>>parameter;
cout<<"请输入所取的存放预测结果的文件名及其路径"<<endl;
cin>>result;
setX();//
scale();//
int n;//
cout<<"请输入进行交叉验证时所划分的子集的个数n=";
cin>>n;
if((n<=0)||(n>=train_sample))
{
cout<<"交叉验证子集个数不正确,n应该大于0小于训练样本个数"<<endl;
cout<<"请输入进行交叉验证时划分子集的个数n=";
cin>>n;
}
cross_validation(n);//
//
for(int i=0;i<NN;i++)
{
for(int j=0;j<d;j++)
{
sample[i][j]=points[i][j];
}
target[i]=or_target[i];
}
//选定最优参数以后,用最优参数对训练样本集进行训练,然后对未知样本集进行预测
N=NN;
C=opti_C;
kernel_gamma=opti_kernel_gamma;
end_support_i=train_sample;
first_test_i=train_sample;
cout<<"*****************************************************************"<<endl;
cout<<"opti_C="<<C<<" opti_kernel_gamma="<<kernel_gamma<<endl;
train_predict();
total_error_rate();
//存放训练后的参数
ofstream os(parameter);//存放训练的后的参数
os<<"样本数据的维数 d="<<d<<endl;
os<<"判别函数中的阈值 b="<<b<<endl;
os<<"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"<<endl;
os<<"交叉验证的所得惩罚参数的最优结果 C="<<C<<endl;
os<<"交叉验证所得核函数的最优结果 gamma="<<kernel_gamma<<endl;
int n_support_vectors=0;
for(i=0;i<end_support_i;i++)
if(alph[i]>0)
n_support_vectors++;
os<<"n_support_vectors是支持向量的个数"<<endl;
os<<"n_support_vectors="<<n_support_vectors<<endl;
os<<"rate是支持向量在训练样本中所占的比例"<<endl;
os<<"rate="<<(float)n_support_vectors/first_test_i<<endl;
int k=0;
os<<"非零拉格朗日乘子的值分别为:"<<endl;
for(i=0;i<end_support_i;i++)
{
if(k%5==0)
{
os<<endl;
}
if(alph[i]>0)
{
os<<"alph["<<i<<"]="<<alph[i]<<" ";
k++;
}
}
os<<endl;
}
void train_predict()
{
//初始化阀值b为0
b=0.0;
//初始化alph[]为0
for(int i=0;i<end_support_i;i++)
{
alph[i]=0.0;
}
int numChanged=0,examineAll=1;
//
while(numChanged>0||examineAll)
{
numChanged=0;
if(examineAll)
{
for(int k=0;k<end_support_i;k++)
numChanged+=examineExample(k);//examin all exmples
}
else{
for(int k=0;k<end_support_i;k++)
if(alph[k]!=0&&alph[k]!=C)
numChanged+=examineExample(k);//loop k over all non-bound lagrange multipliers
}
if(examineAll==1)
examineAll=0;
else if(numChanged==0)
examineAll=1;
}
}
/////////examineExample程序
//
//
int examineExample(int i1)
{
float y1,alph1,E1,r1;
y1=target[i1];
alph1=alph[i1];
if(alph1>0&&alph1<C)
E1=error_cache[i1];
else
E1=learned_func(i1)-y1;//learned_func为计算输出函数
r1=y1*E1;
//////违反KKT条件的判断
if((r1>tolerance&&alph1>0)||(r1<-tolerance&&alph1<C))
{
/////////////使用三种方法选择第二个乘子
//1:
//2:
//3:
if (examineFirstChoice(i1,E1))//1
{
return 1;
}
if (examineNonBound(i1))//2
{
return 1;
}
if (examineBound(i1))//3
{
return 1;
}
}
///没有进展
return 0;
}
////////////1:
int examineFirstChoice(int i1,float E1)
{
int k,i2;
float tmax;
for(i2=-1,tmax=0.0,k=0;k<end_support_i;k++)//*******************************end_support_i
if(alph[k]>0&&alph[k]<C)
{
float E2,temp;
E2=error_cache[k];
temp=fabs(E1-E2);
if(temp>tmax)
{
tmax=temp;
i2=k;
}
}
if(i2>=0)
{
if(takeStep(i1,i2))
return 1;
}
return 0;
}
/////////////// 2:
int examineNonBound(int i1)
{
int k,k0=rand()%end_support_i;
int i2;
for (k=0;k<end_support_i;k++)
{
i2=(k+k0)%end_support_i;//从随机位开始
if(alph[i2]>0.0&&alph[i2]<C)
{
if(takeStep(i1,i2))
{
return 1;
}
}
}
return 0;
}
////////////3:
int examineBound(int i1)
{
int k,k0=rand()%end_support_i;
int i2;
for(k=0;k<end_support_i;k++)
{
i2=(k+k0)%end_support_i;//从随机位开始
//if (alph[i2]= 0.0 || alph[i2]=C)//修改******************
{
if(takeStep(i1,i2))
{
return 1;
}
}
}
return 0;
}
///////////takeStep()
//
int takeStep(int i1,int i2)
{
int y1,y2,s;
float alph1,alph2;//两个乘子的旧值
float a1,a2;//两个乘子的新值
float E1,E2,L,H,k11,k22,k12,eta,Lobj,Hobj,delta_b;
if(i1==i2) return 0;//不会优化两个同一样本
//给变量赋值
alph1=alph[i1];
alph2=alph[i2];
y1=target[i1];
y2=target[i2];
if(alph1>0&&alph1<C)
E1=error_cache[i1];
else
E1=learned_func(i1)-y1;//
if(alph2>0&&alph2<C)
E2=error_cache[i2];
else
E2=learned_func(i2)-y2;
s=y1*y2;
//计算乘子的上下限
if(y1==y2)
{
float gamma=alph1+alph2;
if(gamma>C)
{
L=gamma-C;
H=C;
}
else
{
L=0;
H=gamma;
}
}
else
{
float gamma=alph1-alph2;
if(gamma>0)
{
L=0;
H=C-gamma;
}
else
{
L=-gamma;
H=C;
}
}
//计算乘子的上下限
if(L==H) return 0;
//计算eta
k11=kernel_func(i1,i1);//kernel_func(int,int)为核函数
k22=kernel_func(i2,i2);
k12=kernel_func(i1,i2);
eta=2*k12-k11-k22;
if(eta<0)
{
a2=alph2+y2*(E2-E1)/eta;//计算新的alph2
//调整a2,使其处于可行域
if(a2<L) a2=L;
if(a2>H) a2=H;
}
else//
{
float c1=eta/2;
float c2=y2*(E2-E1)-eta*alph2;
Lobj=c1*L*L+c2*L;
Hobj=c1*H*H+c2*H;
if(Lobj>Hobj+eps)//eps****************
a2=L;
else if(Lobj<Hobj-eps)
a2=H;
else
a2=alph2;//
}
if(fabs(a2-alph2)<eps*(a2+alph2+eps))
return 0;
a1=alph1-s*(a2-alph2);//计算新的a1
if(a1<0)
{
a2+=s*a1;
a1=0;
}
else if(a1>C)
{
float t=a1-C;
a2+=s*t;
a1=C;
}
//更新阀值b
{
float b1,b2,bnew;
if(a1>0&&a1<C)
bnew=b+E1+y1*(a1-alph1)*k11+y2*(a2-alph2)*k12;
else{
if(a2>0&&a2<C)
bnew=b+E2+y1*(a1-alph1)*k12+y2*(a2-alph2)*k22;
else{
b1=b+E1+y1*(a1-alph1)*k11+y2*(a2-alph2)*k12;
b2=b+E2+y1*(a1-alph1)*k12+y2*(a2-alph2)*k22;
bnew=(b1+b2)/2;
}
}
delta_b=bnew-b;
b=bnew;
}
//
{
float t1=y1*(a1-alph1);
float t2=y2*(a2-alph2);
for(int i=0;i<end_support_i;i++)
if(0<alph[i]&&alph[i]<C)
error_cache[i]+=t1*kernel_func(i1,i)+t2*(kernel_func(i2,i))-delta_b;
error_cache[i1]=0.0;
error_cache[i2]=0.0;
}
alph[i1]=a1;//store a1,a2 in the alpha array
alph[i2]=a2;
return 1;//说明已经取得进展
}
//learned_func(int)评价分类学习函数
float learned_func(int k)
{
float s=0.0;
for(int i=0;i<end_support_i;i++)
if(alph[i]>0)
s+=alph[i]*target[i]*kernel_func(i,k);
s-=b;
return s;
}
//计算点积函数dot_product_func(int,int)
float dot_product_func(int i1,int i2)
{
float dot=0.0;
for(int i=0;i<d;i++)
dot+=sample[i1][i]*sample[i2][i];
return dot;
}
//径向机核函数RBF:kernel_func(int,int)
float kernel_func(int i1,int i2)
{
float s=dot_product_func(i1,i2);
s*=-2;
s+=dot_product_func(i1,i1)+dot_product_func(i2,i2);
return exp(-s*kernel_gamma);
}
//计算误差率error_rate()
float error_rate()
{
int tp=0,tn=0,fp=0,fn=0;
float total_q=0,temp=0;
for(int i=first_test_i;i<N;i++)
{ temp=learned_func(i);
if(temp>0&&target[i]>0)
tp++;
else if(temp>0&&target[i]<0)
fp++;
else if(temp<0&&target[i]>0)
fn++;
else if(temp<0&&target[i]<0)
tn++;
}
total_q=(float)(tp+tn)/(float)(tp+tn+fp+fn);//总精度
return (total_q);
}
//计算样本X[]
void setX()
{
FILE *fp=fopen(shuru,"r");
if(fp == NULL)
{
cout<<"can't open the file "<<shuru<<endl;
exit(1);
}
int i=0,dd=0;
float value;
for(i=0;i<NN;i++)
{
int j=0;
int c=fgetc(fp);
if(c==-1)break;
if(char(c)=='+')
or_target[i]=+1;
if(char(c)=='-')
or_target[i]=-1;
c=fgetc(fp);
while(1)
{
do
{
c=fgetc(fp);
if(c=='\n')goto out2;
}while(isspace(c));
ungetc(c,fp);
fscanf(fp,"%d:%f",&dd,&value);
dense_points[i][j]=value;
j++;
}
out2:continue;
}
fclose(fp);
}
//对数据进行归一化处理;
void scale()
{
ofstream to("e:\\data\\total_heartscale.txt");
float max,min;
int i,j;
for(j=0;j<d;j++)
{
max=dense_points[0][j];
min=dense_points[0][j];
for(i=0;i<NN;i++)
{
if(dense_points[i][j]>max)
max=dense_points[i][j];
else if(dense_points[i][j]<min)
min=dense_points[i][j];
}
for(i=0;i<NN;i++)
{
points[i][j]=2*(dense_points[i][j]-min)/(max-min)-1;
}
}
for(i=0;i<NN;i++)
{
to<<or_target[i]<<" ";
for(j=0;j<d;j++)
to<<j+1<<":"<<points[i][j]<<" ";
to<<endl;
}
}
void cross_validation(int n)
{
int m,k0;
//a1[end_support_i]记录标准样本中作为训练集的那一部分数据;
//a2[end_support_i]记录标准样本中作为测试集的那一部分数据;
N=train_sample;
m=N/n;
end_support_i=N-m;
first_test_i=N-m;
k0=rand()%N;
float l1,l2,u1,u2;
float step1,step2;
float accuracy=0;
cout<<"请输入进行交叉验证时C的区间下限2的L次方L1=";
cin>>l1;
cout<<"请输入进行交叉验证时C的区间上限2的U次方U1=";
cin>>u1;
if(u1<l1)
{
cout<<"输入的进行交叉验证上下限错误,请重新输入"<<endl;
cout<<"请输入进行交叉验证时C的区间下限2的L次方L1=";
cin>>l1;
cout<<"请输入进行交叉验证时C的区间上限2的U次方U1=";
cin>>u1;
}
cout<<"请输入进行交叉验证时gamma的区间下限2的L次方L2=";
cin>>l2;
cout<<"请输入进行交叉验证时gamma的区间上限2的U次方U2=";
cin>>u2;
if(u2<l2)
{
cout<<"输入的进行交叉验证上下限错误,请重新输入"<<endl;
cout<<"请输入进行交叉验证时gamma的区间下限2的L次方L1=";
cin>>l2;
cout<<"请输入进行交叉验证gammaC的区间上限2的U次方U1=";
cin>>u2;
}
cout<<"输入的步长为0.5的整数次幂"<<endl;
cout<<"请输入进行C迭代的步长step1=";
cin>>step1;
cout<<"请输入进行gamma迭代的步长step2=";
cin>>step2;
float r,q;
r=l1;
while(r<=u1)
{
C=pow(2,r);
q=l2;
while(q<=u2)
{
float cv_accuracy=0;
kernel_gamma=pow(2,q);
for(int j=0;j<n;j++)
{
int ii=k0+m*(j+1);
for(int i=0;i<N;i++)
{
if((ii+i)>(N-1))
{
target[i]=or_target[(ii+i)%N];
for(int k=0;k<d;k++)
{
sample[i][k]=points[(ii+i)%N][k];
}
}
else
{
target[i]=or_target[ii+i];
for(int k=0;k<d;k++)
{
sample[i][k]=points[ii+i][k];
}
}
}
cout<<"C="<<C<<" gamma="<<kernel_gamma<<" ";
train_predict();
float rate=error_rate();
cout<<"预测精度"<<rate<<endl;
cv_accuracy=rate+cv_accuracy;
}
cv_accuracy=cv_accuracy/n;
if(cv_accuracy>accuracy)
{
accuracy=cv_accuracy;
opti_C=C;
opti_kernel_gamma=kernel_gamma;
}
q=q+step2;
}
r=r+step1;
}
}
//训练结束后,计算总的预测精度
void total_error_rate()
{
ofstream to(result);
int o_right=0,w_right=0,w_error=0,o_error=0,tempp=0;
float ming=0,te=0,total_q=0,temp=0;
for(int i=first_test_i;i<N;i++)
{
temp=learned_func(i);
if(temp>0&&target[i]>0)
{
o_right++;
tempp=+1;
}
else if(temp>0&&target[i]<0)
{
w_error++;
tempp=+1;
}
else if(temp<0&&target[i]>0)
{
o_error++;
tempp=-1;
}
else if(temp<0&&target[i]<0)
{
w_right++;
tempp=-1;
}
to<<"进行预测的第"<<i-first_test_i+1<<"个样本"<<" 目标值 "<<target[i]<<" 预测的实际输出值"<<temp;
if(tempp==-1)
to<<" 判别结果 "<<tempp<<" 为1"<<endl;
if(tempp==1)
to<<" 判别结果 "<<tempp<<" 为2"<<endl;
}
total_q=(float)(o_right+w_right)/(float)(w_right+o_right+w_error+o_error);//对未知样本进行预测的总精度
cout<<"预测精度总rate="<<total_q<<endl;
to<<"---------------测试结果-----------------"<<endl;
to<<"1预测正确个数为:"<<o_right<<"个"<<endl;
to<<"1预测错误个数为:"<<o_error<<"个"<<endl;
to<<"2预测正确个数为:"<<w_right<<"个"<<endl;
to<<"2预测错误个数为:"<<w_error<<"个"<<endl<<endl;
to<<"预测的总精度为:"<<total_q<<endl;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -