📄 svm.cpp

📁 一个计算线性支持向量机的matlab源代码
💻 CPP
📖 第 1 页 / 共 4 页
字号:
	{		alpha2[i] = alpha2[i+l] = min(sum,C);		sum -= alpha2[i];		linear_term[i] = - prob->y[i];		y[i] = 1;		linear_term[i+l] = prob->y[i];		y[i+l] = -1;	}	Solver_NU s;	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,		alpha2, C, C, param->eps, si, param->shrinking);	info("epsilon = %f\n",-si->r);	for(i=0;i<l;i++)		alpha[i] = alpha2[i] - alpha2[i+l];	delete[] alpha2;	delete[] linear_term;	delete[] y;}//// decision_function//struct decision_function{	double *alpha;	double rho;	};decision_function svm_train_one(	const svm_problem *prob, const svm_parameter *param,	double Cp, double Cn){	double *alpha = Malloc(double,prob->l);	Solver::SolutionInfo si;	switch(param->svm_type)	{		case C_SVC:			solve_c_svc(prob,param,alpha,&si,Cp,Cn);			break;		case NU_SVC:			solve_nu_svc(prob,param,alpha,&si);			break;		case ONE_CLASS:			solve_one_class(prob,param,alpha,&si);			break;		case EPSILON_SVR:			solve_epsilon_svr(prob,param,alpha,&si);			break;		case NU_SVR:			solve_nu_svr(prob,param,alpha,&si);			break;	}	info("obj = %f, rho = %f\n",si.obj,si.rho);	// output SVs	int nSV = 0;	int nBSV = 0;	for(int i=0;i<prob->l;i++)	{		if(fabs(alpha[i]) > 0)		{			++nSV;			if(prob->y[i] > 0)			{				if(fabs(alpha[i]) >= si.upper_bound_p)					++nBSV;			}			else			{				if(fabs(alpha[i]) >= si.upper_bound_n)					++nBSV;			}		}	}	info("nSV = %d, nBSV = %d\n",nSV,nBSV);	decision_function f;	f.alpha = alpha;	f.rho = si.rho;	return f;}//// svm_model//struct svm_model{	svm_parameter param;	// parameter	int nr_class;		// number of classes, = 2 in regression/one class svm	int l;			// total #SV	svm_node **SV;		// SVs (SV[l])	double **sv_coef;	// coefficients for SVs in decision functions (sv_coef[n-1][l])	double *rho;		// constants in decision functions (rho[n*(n-1)/2])	double *probA;          // pariwise probability information	double *probB;	// for classification only	int *label;		// label of each class (label[n])	int *nSV;		// number of SVs for each class (nSV[n])				// nSV[0] + nSV[1] + ... + nSV[n-1] = l	// XXX	int free_sv;		// 1 if svm_model is created by svm_load_model				// 0 if svm_model is created by svm_train};// Platt's binary SVM Probablistic Output: an improvement from Lin et al.void sigmoid_train(	int l, const double *dec_values, const double *labels, 	double& A, double& B){	double prior1=0, prior0 = 0;	int i;	for (i=0;i<l;i++)		if (labels[i] > 0) prior1+=1;		else prior0+=1;		int max_iter=100; 	// Maximal number of iterations	double min_step=1e-10;	// Minimal step taken in line search	double sigma=1e-3;	// For numerically strict PD of Hessian	double eps=1e-5;	double hiTarget=(prior1+1.0)/(prior1+2.0);	double loTarget=1/(prior0+2.0);	double *t=Malloc(double,l);	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;	double newA,newB,newf,d1,d2;	int iter; 		// Initial Point and Initial Fun Value	A=0.0; B=log((prior0+1.0)/(prior1+1.0));	double fval = 0.0;	for (i=0;i<l;i++)	{		if (labels[i]>0) t[i]=hiTarget;		else t[i]=loTarget;		fApB = dec_values[i]*A+B;		if (fApB>=0)			fval += t[i]*fApB + log(1+exp(-fApB));		else			fval += (t[i] - 1)*fApB +log(1+exp(fApB));	}	for (iter=0;iter<max_iter;iter++)	{		// Update Gradient and Hessian (use H' = H + sigma I)		h11=sigma; // numerically ensures strict PD		h22=sigma;		h21=0.0;g1=0.0;g2=0.0;		for (i=0;i<l;i++)		{			fApB = dec_values[i]*A+B;			if (fApB >= 0)			{				p=exp(-fApB)/(1.0+exp(-fApB));				q=1.0/(1.0+exp(-fApB));			}			else			{				p=1.0/(1.0+exp(fApB));				q=exp(fApB)/(1.0+exp(fApB));			}			d2=p*q;			h11+=dec_values[i]*dec_values[i]*d2;			h22+=d2;			h21+=dec_values[i]*d2;			d1=t[i]-p;			g1+=dec_values[i]*d1;			g2+=d1;		}		// Stopping Criteria		if (fabs(g1)<eps && fabs(g2)<eps)			break;		// Finding Newton direction: -inv(H') * g		det=h11*h22-h21*h21;		dA=-(h22*g1 - h21 * g2) / det;		dB=-(-h21*g1+ h11 * g2) / det;		gd=g1*dA+g2*dB;		stepsize = 1; 		// Line Search		while (stepsize >= min_step)		{			newA = A + stepsize * dA;			newB = B + stepsize * dB;			// New function value			newf = 0.0;			for (i=0;i<l;i++)			{				fApB = dec_values[i]*newA+newB;				if (fApB >= 0)					newf += t[i]*fApB + log(1+exp(-fApB));				else					newf += (t[i] - 1)*fApB +log(1+exp(fApB));			}			// Check sufficient decrease			if (newf<fval+0.0001*stepsize*gd)			{				A=newA;B=newB;fval=newf;				break;			}			else				stepsize = stepsize / 2.0;		}		if (stepsize < min_step)		{			info("Line search fails in two-class probability estimates\n");			break;		}	}	if (iter>=max_iter)		info("Reaching maximal iterations in two-class probability estimates\n");	free(t);}double sigmoid_predict(double decision_value, double A, double B){	double fApB = decision_value*A+B;	if (fApB >= 0)		return exp(-fApB)/(1.0+exp(-fApB));	else		return 1.0/(1+exp(fApB)) ;}// Method 2 from the multiclass_prob paper by Wu, Lin, and Wengvoid multiclass_probability(int k, double **r, double *p){	int t,j;	int iter = 0, max_iter=100;	double **Q=Malloc(double *,k);	double *Qp=Malloc(double,k);	double pQp, eps=0.005/k;		for (t=0;t<k;t++)	{		p[t]=1.0/k;  // Valid if k = 1		Q[t]=Malloc(double,k);		Q[t][t]=0;		for (j=0;j<t;j++)		{			Q[t][t]+=r[j][t]*r[j][t];			Q[t][j]=Q[j][t];		}		for (j=t+1;j<k;j++)		{			Q[t][t]+=r[j][t]*r[j][t];			Q[t][j]=-r[j][t]*r[t][j];		}	}	for (iter=0;iter<max_iter;iter++)	{		// stopping condition, recalculate QP,pQP for numerical accuracy		pQp=0;		for (t=0;t<k;t++)		{			Qp[t]=0;			for (j=0;j<k;j++)				Qp[t]+=Q[t][j]*p[j];			pQp+=p[t]*Qp[t];		}		double max_error=0;		for (t=0;t<k;t++)		{			double error=fabs(Qp[t]-pQp);			if (error>max_error)				max_error=error;		}		if (max_error<eps) break;				for (t=0;t<k;t++)		{			double diff=(-Qp[t]+pQp)/Q[t][t];			p[t]+=diff;			pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);			for (j=0;j<k;j++)			{				Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);				p[j]/=(1+diff);			}		}	}	if (iter>=max_iter)		info("Exceeds max_iter in multiclass_prob\n");	for(t=0;t<k;t++) free(Q[t]);	free(Q);	free(Qp);}// Cross-validation decision values for probability estimatesvoid svm_binary_svc_probability(	const svm_problem *prob, const svm_parameter *param,	double Cp, double Cn, double& probA, double& probB){	int i;	int nr_fold = 5;	int *perm = Malloc(int,prob->l);	double *dec_values = Malloc(double,prob->l);	// random shuffle	for(i=0;i<prob->l;i++) perm[i]=i;	for(i=0;i<prob->l;i++)	{		int j = i+rand()%(prob->l-i);		swap(perm[i],perm[j]);	}	for(i=0;i<nr_fold;i++)	{		int begin = i*prob->l/nr_fold;		int end = (i+1)*prob->l/nr_fold;		int j,k;		struct svm_problem subprob;		subprob.l = prob->l-(end-begin);		subprob.x = Malloc(struct svm_node*,subprob.l);		subprob.y = Malloc(double,subprob.l);					k=0;		for(j=0;j<begin;j++)		{			subprob.x[k] = prob->x[perm[j]];			subprob.y[k] = prob->y[perm[j]];			++k;		}		for(j=end;j<prob->l;j++)		{			subprob.x[k] = prob->x[perm[j]];			subprob.y[k] = prob->y[perm[j]];			++k;		}		int p_count=0,n_count=0;		for(j=0;j<k;j++)			if(subprob.y[j]>0)				p_count++;			else				n_count++;		if(p_count==0 && n_count==0)			for(j=begin;j<end;j++)				dec_values[perm[j]] = 0;		else if(p_count > 0 && n_count == 0)			for(j=begin;j<end;j++)				dec_values[perm[j]] = 1;		else if(p_count == 0 && n_count > 0)			for(j=begin;j<end;j++)				dec_values[perm[j]] = -1;		else		{			svm_parameter subparam = *param;			subparam.probability=0;			subparam.C=1.0;			subparam.nr_weight=2;			subparam.weight_label = Malloc(int,2);			subparam.weight = Malloc(double,2);			subparam.weight_label[0]=+1;			subparam.weight_label[1]=-1;			subparam.weight[0]=Cp;			subparam.weight[1]=Cn;			struct svm_model *submodel = svm_train(&subprob,&subparam);			for(j=begin;j<end;j++)			{				svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 				// ensure +1 -1 order; reason not using CV subroutine				dec_values[perm[j]] *= submodel->label[0];			}					svm_destroy_model(submodel);			svm_destroy_param(&subparam);			free(subprob.x);			free(subprob.y);		}	}			sigmoid_train(prob->l,dec_values,prob->y,probA,probB);	free(dec_values);	free(perm);}// Return parameter of a Laplace distribution double svm_svr_probability(	const svm_problem *prob, const svm_parameter *param){	int i;	int nr_fold = 5;	double *ymv = Malloc(double,prob->l);	double mae = 0;	svm_parameter newparam = *param;	newparam.probability = 0;	svm_cross_validation(prob,&newparam,nr_fold,ymv);	for(i=0;i<prob->l;i++)	{		ymv[i]=prob->y[i]-ymv[i];		mae += fabs(ymv[i]);	}			mae /= prob->l;	double std=sqrt(2*mae*mae);	int count=0;	mae=0;	for(i=0;i<prob->l;i++)	        if (fabs(ymv[i]) > 5*std)                         count=count+1;		else 		        mae+=fabs(ymv[i]);	mae /= (prob->l-count);	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);	free(ymv);	return mae;}// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data// perm, length l, must be allocated before calling this subroutinevoid svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm){	int l = prob->l;	int max_nr_class = 16;	int nr_class = 0;	int *label = Malloc(int,max_nr_class);	int *count = Malloc(int,max_nr_class);	int *data_label = Malloc(int,l);		int i;	for(i=0;i<l;i++)	{		int this_label = (int)prob->y[i];		int j;		for(j=0;j<nr_class;j++)		{			if(this_label == label[j])			{				++count[j];				break;			}		}		data_label[i] = j;		if(j == nr_class)		{			if(nr_class == max_nr_class)			{				max_nr_class *= 2;				label = (int *)realloc(label,max_nr_class*sizeof(int));				count = (int *)realloc(count,max_nr_class*sizeof(int));			}			label[nr_class] = this_label;			count[nr_class] = 1;			++nr_class;		}	}	int *start = Malloc(int,nr_class);	start[0] = 0;	for(i=1;i<nr_class;i++)		start[i] = start[i-1]+count[i-1];	for(i=0;i<l;i++)	{		perm[start[data_label[i]]] = i;		++start[data_label[i]];	}	start[0] = 0;	for(i=1;i<nr_class;i++)		start[i] = start[i-1]+count[i-1];	*nr_class_ret = nr_class;	*label_ret = label;	*start_ret = start;	*count_ret = count;	free(data_label);}//// Interface functions//svm_model *svm_train(const svm_problem *prob, const svm_parameter *param){	svm_model *model = Malloc(svm_model,1);	model->param = *param;	model->free_sv = 0;	// XXX	if(param->svm_type == ONE_CLASS ||	   param->svm_type == EPSILON_SVR ||	   param->svm_type == NU_SVR)	{		// regression or one-class-svm		model->nr_class = 2;		model->label = NULL;		model->nSV = NULL;		model->probA = NULL; model->probB = NULL;		model->sv_coef = Malloc(double *,1);		if(param->probability && 		   (param->svm_type == EPSILON_SVR ||		    param->svm_type == NU_SVR))		{			model->probA = Malloc(double,1);			model->probA[0] = svm_svr_probability(prob,param);		}		decision_function f = svm_train_one(prob,param,0,0);		model->rho = Malloc(double,1);		model->rho[0] = f.rho;		int nSV = 0;		int i;		for(i=0;i<prob->l;i++)			if(fabs(f.alpha[i]) > 0) ++nSV;		model->l = nSV;		model->SV = Malloc(svm_node *,nSV);		model->sv_coef[0] = Malloc(double,nSV);		int j = 0;		for(i=0;i<prob->l;i++)			if(fabs(f.alpha[i]) > 0)			{				model->SV[j] = prob->x[i];				model->sv_coef[0][j] = f.alpha[i];				++j;			}				free(f.alpha);	}	else	{		// classification		int l = prob->l;		int nr_class;		int *label = NULL;		int *start = NULL;		int *count = NULL;		int *perm = Malloc(int,l);		// group training data of the same class		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);				svm_node **x = Malloc(svm_node *,l);		int i;		for(i=0;i<l;i++)			x[i] = prob->x[perm[i]];		// calculate weighted C		double *weighted_C = Malloc(double, nr_class);		for(i=0;i<nr_class;i++)			weighted_C[i] = param->C;		for(i=0;i<param->nr_weight;i++)		{				int j;			for(j=0;j<nr_class;j++)				if(param->weight_label[i] == label[j])					break;			if(j == nr_class)				fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);			else				weighted_C[j] *= param->weight[i];		}		// train k*(k-1)/2 models				bool *nonzero = Malloc(bool,l);		for(i=0;i<l;i++)			nonzero[i] = false;		decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);		double *probA=NULL,*probB=NULL;		if (param->probability)		{			probA=Malloc(double,nr_class*(nr_class-1)/2);			probB=Malloc(double,nr_class*(nr_class-1)/2);		}		int p = 0;		for(i=0;i<nr_class;i++)			for(int j=i+1;j<nr_class;j++)			{				svm_problem sub_prob;				int si = start[i], sj = start[j];				int ci = count[i], cj = count[j];				sub_prob.l = ci+cj;				sub_prob.x = Malloc(svm_node *,sub_prob.l);				sub_prob.y = Malloc(double,sub_prob.l);				int k;				for(k=0;k<ci;k++)				{					sub_prob.x[k] = x[si+k];					sub_prob.y[k] = +1;				}				for(k=0;k<cj;k++)				{					sub_prob.x[ci+k] = x[sj+k];					sub_prob.y[ci+k] = -1;				}				if(param->probability)					svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);				f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);				for(k=0;k<ci;k++)					if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)						nonzero[si+k] = true;				for(k=0;k<cj;k++)					if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)						nonzero[sj+k] = true;				free(sub_prob.x);				free(sub_prob.y);				++p;			}		// build output		model->nr_class = nr_class;				model->label = Malloc(int,nr_class);		for(i=0;i<nr_class;i++)			model->label[i] = label[i];				model->rho = Malloc(double,nr_class*(nr_class-1)/2);		for(i=0;i<nr_class*(nr_class-1)/2;i++)			model->rho[i] = f[i].rho;		if(param->probability)		{			model->probA = Malloc(double,nr_class*(nr_class-1)/2);			model->probB = Malloc(double,nr_class*(nr_class-1)/2);			for(i=0;i<nr_class*(nr_class-1)/2;i++)			{				model->probA[i] = probA[i];				model->probB[i] = probB[i];			}		}		else		{			model->probA=NULL;			model->probB=NULL;		}		int total_sv = 0;		int *nz_count = Malloc(int,nr_class);		model->nSV = Malloc(int,nr_class);		for(i=0;i<nr_class;i++)		{			int nSV = 0;			for(int j=0;j<count[i];j++)				if(nonzero[start[i]+j])				{						++nSV;					++total_sv;				}			model->nSV[i] = nSV;			nz_count[i] = nSV;		}				info("Total nSV = %d\n",total_sv);		model->l = total_sv;		model->SV = Malloc(svm_node *,total_sv);		p = 0;		for(i=0;i<l;i++)			if(nonzero[i]) model->SV[p++] = x[i];		int *nz_start = Malloc(int,nr_class);		nz_start[0] = 0;		for(i=1;i<nr_class;i++)			nz_start[i] = nz_start[i-1]+nz_count[i-1];		model->sv_coef = Malloc(double *,nr_class-1);		for(i=0;i<nr_class-1;i++)			model->sv_coef[i] = Malloc(double,total_sv);		p = 0;		for(i=0;i<nr_class;i++)			for(int j=i+1;j<nr_class;j++)			{				// classifier (i,j): coefficients with				// i are in sv_coef[j-1][nz_start[i]...],				// j are in sv_coef[i][nz_start[j]...]				int si = start[i];				int sj = start[j];				int ci = count[i];				int cj = count[j];								int q = nz_start[i];				int k;				for(k=0;k<ci;k++)					if(nonzero[si+k])						model->sv_coef[j-1][q++] = f[p].alpha[k];				q = nz_start[j];				for(k=0;k<cj;k++)					if(nonzero[sj+k])						model->sv_coef[i][q++] = f[p].alpha[ci+k];				++p;			}				free(label);		free(probA);		free(probB);		free(count);		free(perm);		free(start);		free(x);		free(weighted_C);		free(nonzero);		for(i=0;i<nr_class*(nr_class-1)/2;i++)			free(f[i].alpha);		free(f);		free(nz_count);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -