📄 svm_hideo.c
字号:
/***********************************************************************//* *//* svm_hideo.c *//* *//* The Hildreth and D'Espo solver specialized for SVMs. *//* *//* Author: Thorsten Joachims *//* Date: 01.11.00 *//* *//* Copyright (c) 2000 Universitaet Dortmund - All rights reserved *//* *//* This software is available for non-commercial use only. It must *//* not be modified and distributed without prior permission of the *//* author. The author is not responsible for implications from the *//* use of this software. *//* *//***********************************************************************/# include <math.h># include "svm_common.h"/* Common Block Declarations */long verbosity;# define PRIMAL_OPTIMAL 1# define DUAL_OPTIMAL 2# define MAXITER_EXCEEDED 3# define NAN_SOLUTION 4# define ONLY_ONE_VARIABLE 5# define LARGEROUND 0# define SMALLROUND 1/* /////////////////////////////////////////////////////////////// */# define DEF_PRECISION 1E-5# define DEF_MAX_ITERATIONS 200# define DEF_LINDEP_SENSITIVITY 1E-8# define EPSILON_HIDEO 1E-20# define EPSILON_EQ 1E-5double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);double *primal=0,*dual=0;long precision_violations=0;double opt_precision=DEF_PRECISION;long maxiter=DEF_MAX_ITERATIONS;double lindep_sensitivity=DEF_LINDEP_SENSITIVITY;double *buffer;long *nonoptimal;long smallroundcount=0;/* /////////////////////////////////////////////////////////////// */void *my_malloc();int optimize_hildreth_despo(long,long,double,double,double,long,long,long,double,double *, double *,double *,double *,double *,double *, double *,double *,double *,long *,double *);int solve_dual(long,long,double,double,long,double *,double *,double *, double *,double *,double *,double *,double *,double *, double *,double *,double *,double *,long);void linvert_matrix(double *, long, double *, double, long *);void lprint_matrix(double *, long);void ladd_matrix(double *, long, double);void lcopy_matrix(double *, long, double *);void lswitch_rows_matrix(double *, long, long, long);void lswitchrk_matrix(double *, long, long, long);double calculate_qp_objective(long, double *, double *, double *);double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm)QP *qp;double *epsilon_crit;long nx; /* Maximum number of variables in QP */double *threshold; LEARN_PARM *learn_parm;/* start the optimizer and return the optimal values *//* The HIDEO optimizer does not necessarily fully solve the problem. *//* Since it requires a strictly positive definite hessian, the solution *//* is restricted to a linear independent subset in case the matrix is *//* only semi-definite. */{ long i,j; int result; double eq; if(!primal) { /* allocate memory at first call */ primal=(double *)my_malloc(sizeof(double)*nx); dual=(double *)my_malloc(sizeof(double)*((nx+1)*2)); nonoptimal=(long *)my_malloc(sizeof(long)*(nx)); buffer=(double *)my_malloc(sizeof(double)*((nx+1)*2*(nx+1)*2+ nx*nx+2*(nx+1)*2+2*nx+1+2*nx+ nx+nx+nx*nx)); (*threshold)=0; for(i=0;i<nx;i++) { primal[i]=0; } } if(verbosity>=4) { /* really verbose */ printf("\n\n"); eq=qp->opt_ce0[0]; for(i=0;i<qp->opt_n;i++) { eq+=qp->opt_xinit[i]*qp->opt_ce[i]; printf("%f: ",qp->opt_g0[i]); for(j=0;j<qp->opt_n;j++) { printf("%f ",qp->opt_g[i*qp->opt_n+j]); } printf(": a=%.30f",qp->opt_xinit[i]); printf(": y=%f\n",qp->opt_ce[i]); } printf("eq-constraint=%.30f opt_ce0=%f\n",eq,qp->opt_ce0[0]); printf("upper_bound=%.30f\n",qp->opt_up[0]); } result=optimize_hildreth_despo(qp->opt_n,qp->opt_m, opt_precision,(*epsilon_crit), learn_parm->epsilon_a,maxiter, /* (long)PRIMAL_OPTIMAL, */ (long)0, (long)0, lindep_sensitivity, qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0, qp->opt_low,qp->opt_up,primal,qp->opt_xinit, dual,nonoptimal,buffer); if(verbosity>=3) { printf("return(%d)...",result); } if(learn_parm->totwords < learn_parm->svm_maxqpsize) { /* larger working sets will be linear dependent anyway */ learn_parm->svm_maxqpsize=maxl(learn_parm->totwords,(long)2); } if(result == NAN_SOLUTION) { lindep_sensitivity*=2; /* throw out linear dependent examples more */ /* generously */ if(learn_parm->svm_maxqpsize>2) { learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */ } precision_violations++; } /* take one round of only two variable to get unstuck */ if(result != PRIMAL_OPTIMAL) { smallroundcount++; result=optimize_hildreth_despo(qp->opt_n,qp->opt_m, opt_precision,(*epsilon_crit), learn_parm->epsilon_a,(long)maxiter, (long)PRIMAL_OPTIMAL,(long)SMALLROUND, lindep_sensitivity, qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0, qp->opt_low,qp->opt_up,primal,qp->opt_xinit, dual,nonoptimal,buffer); if(verbosity>=3) { printf("return_srd(%d)...",result); } if(result != PRIMAL_OPTIMAL) { if(result != ONLY_ONE_VARIABLE) precision_violations++; if(result == MAXITER_EXCEEDED) maxiter+=100; if(result == NAN_SOLUTION) { lindep_sensitivity*=2; /* throw out linear dependent examples more */ /* generously */ /* results not valid, so return inital values */ for(i=0;i<qp->opt_n;i++) { primal[i]=qp->opt_xinit[i]; } } } } if(precision_violations > 50) { precision_violations=0; (*epsilon_crit)*=10.0; if(verbosity>=1) { printf("\nWARNING: Relaxing epsilon on KT-Conditions (%f).\n", (*epsilon_crit)); } } if(verbosity>=4) { /* really verbose */ printf("\n\n"); eq=qp->opt_ce0[0]; for(i=0;i<qp->opt_n;i++) { eq+=primal[i]*qp->opt_ce[i]; printf("%f: ",qp->opt_g0[i]); for(j=0;j<qp->opt_n;j++) { printf("%f ",qp->opt_g[i*qp->opt_n+j]); } printf(": a=%.30f",primal[i]); printf(": nonopti=%ld",nonoptimal[i]); printf(": y=%f\n",qp->opt_ce[i]); } printf("eq-constraint=%.30f\n",eq); printf(" smallroundcount=%ld ",smallroundcount); } if((qp->opt_m>0) && (result != NAN_SOLUTION)) (*threshold)=dual[1]-dual[0]; else (*threshold)=0; return(primal);}int optimize_hildreth_despo(n,m,precision,epsilon_crit,epsilon_a,maxiter,goal, smallround,lindep_sensitivity,g,g0,ce,ce0,low,up, primal,init,dual,lin_dependent,buffer) long n; /* number of variables */ long m; /* number of linear equality constraints [0,1] */ double precision; /* solve at least to this dual precision */ double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */ double epsilon_a; /* precision of alphas at bounds */ long maxiter; /* stop after this many iterations */ long goal; /* keep going until goal fulfilled */ long smallround; /* use only two variables of steepest descent */ double lindep_sensitivity; /* epsilon for detecting linear dependent ex */ double *g; /* hessian of objective */ double *g0; /* linear part of objective */ double *ce,*ce0; /* linear equality constraints */ double *low,*up; /* box constraints */ double *primal; /* primal variables */ double *init; /* initial values of primal */ double *dual; /* dual variables */ long *lin_dependent; double *buffer;{ long i,j,k,from,to,n_indep,changed; double sum,bmin=0,bmax=0; double *d,*d0,*ig,*dual_old,*temp,*start; double *g0_new,*g_new,*ce_new,*ce0_new,*low_new,*up_new; double add,t; int result; double obj_before,obj_after; long b1,b2; g0_new=&(buffer[0]); /* claim regions of buffer */ d=&(buffer[n]); d0=&(buffer[n+(n+m)*2*(n+m)*2]); ce_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2]); ce0_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n]); ig=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m]); dual_old=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n]); low_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2]); up_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n]); start=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n]); g_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n]); temp=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n+n*n]); b1=-1; b2=-1; for(i=0;i<n;i++) { /* get variables with steepest feasible descent */ sum=g0[i]; for(j=0;j<n;j++) sum+=init[j]*g[i*n+j]; sum=sum*ce[i]; if(((b1==-1) || (sum<bmin)) && (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]<0.0))) && (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]>0.0))) ) { bmin=sum; b1=i; } if(((b2==-1) || (sum>bmax)) && (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]>0.0))) && (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]<0.0))) ) { bmax=sum; b2=i; } } /* in case of unbiased hyperplane, the previous projection on */ /* equality constraint can lead to b1 or b2 being -1. */ if((b1 == -1) || (b2 == -1)) { b1=0; b2=n-1; } for(i=0;i<n;i++) { start[i]=init[i]; } /* in case both examples are equal */ add=0; changed=0; if((g[b1*n+b2] == g[b1*n+b1]) && (g[b1*n+b2] == g[b2*n+b2])) { if(ce[b1] == ce[b2]) { /* distribute evenly */ changed=1; start[b1]=(init[b1]+init[b2])/2.0; start[b2]=(init[b1]+init[b2])/2.0; if(start[b2] > up[b2]) { t=start[b2]-up[b2]; start[b2]=up[b2]; start[b1]+=t; } if(start[b1] > up[b1]) { t=start[b1]-up[b1]; start[b1]=up[b1]; start[b2]+=t; } } } if((-g[b1*n+b2] == g[b1*n+b1]) && (-g[b1*n+b2] == g[b2*n+b2])) { if(ce[b1] != ce[b2]) { /* set to upper bound */ changed=1; t=up[b1]-init[b1]; if((up[b2]-init[b2]) < t) { t=up[b2]-init[b2]; } start[b1]=init[b1]+t; start[b2]=init[b2]+t; } } /* if we have a biased hyperplane, then adding a constant to the */ /* hessian does not change the solution. So that is done for examples */ /* with zero diagonal entry, since HIDEO cannot handle them. */ else if((fabs(g[b1*n+b1]) < lindep_sensitivity) || (fabs(g[b2*n+b2]) < lindep_sensitivity)) { add+=0.093274; } /* in case both examples are linear dependent */ else if(g[b1*n+b2] != 0 && g[b2*n+b2] != 0 && (fabs(g[b1*n+b1]/g[b1*n+b2] - g[b1*n+b2]/g[b2*n+b2]) < lindep_sensitivity)) { add+=0.078274; }/* printf("b1=%ld,b2=%ld\n",b1,b2); */ lcopy_matrix(g,n,d); if((m==1) && (add>0.0)) { for(j=0;j<n;j++) { for(k=0;k<n;k++) { d[j*n+k]+=add*ce[j]*ce[k]; } } } else { add=0.0; } if(n>2) { /* switch, so that variables are better mixed */ lswitchrk_matrix(d,n,b1,(long)0); if(b2 == 0) lswitchrk_matrix(d,n,b1,(long)1); else lswitchrk_matrix(d,n,b2,(long)1); } if(smallround == SMALLROUND) { for(i=2;i<n;i++) { lin_dependent[i]=1; } lin_dependent[0]=0; lin_dependent[1]=0; } else { for(i=0;i<n;i++) { lin_dependent[i]=0; } } linvert_matrix(d,n,ig,lindep_sensitivity,lin_dependent); if(n>2) { /* now switch back */ if(b2 == 0) { lswitchrk_matrix(ig,n,b1,(long)1); i=lin_dependent[1]; lin_dependent[1]=lin_dependent[b1]; lin_dependent[b1]=i; } else { lswitchrk_matrix(ig,n,b2,(long)1); i=lin_dependent[1]; lin_dependent[1]=lin_dependent[b2]; lin_dependent[b2]=i; } lswitchrk_matrix(ig,n,b1,(long)0); i=lin_dependent[0]; lin_dependent[0]=lin_dependent[b1]; lin_dependent[b1]=i; } /* lprint_matrix(d,n); */ /* lprint_matrix(ig,n); */ lcopy_matrix(g,n,g_new); /* restore g_new matrix */ if(add>0) for(j=0;j<n;j++) { for(k=0;k<n;k++) { g_new[j*n+k]+=add*ce[j]*ce[k]; } } for(i=0;i<n;i++) { /* fix linear dependent vectors */ g0_new[i]=g0[i]+add*ce0[0]*ce[i]; } if(m>0) ce0_new[0]=-ce0[0]; for(i=0;i<n;i++) { /* fix linear dependent vectors */ if(lin_dependent[i]) { for(j=0;j<n;j++) { if(!lin_dependent[j]) { g0_new[j]+=start[i]*g_new[i*n+j]; } } if(m>0) ce0_new[0]-=(start[i]*ce[i]); } } from=0; /* remove linear dependent vectors */ to=0; n_indep=0; for(i=0;i<n;i++) { if(!lin_dependent[i]) { g0_new[n_indep]=g0_new[i]; ce_new[n_indep]=ce[i]; low_new[n_indep]=low[i]; up_new[n_indep]=up[i]; primal[n_indep]=start[i]; n_indep++; } for(j=0;j<n;j++) { if((!lin_dependent[i]) && (!lin_dependent[j])) { ig[to]=ig[from]; g_new[to]=g_new[from]; to++; } from++; } } if(verbosity>=3) { printf("real_qp_size(%ld)...",n_indep); } /* cannot optimize with only one variable */ if((n_indep<=1) && (m>0) && (!changed)) { for(i=n-1;i>=0;i--) { primal[i]=init[i]; } return((int)ONLY_ONE_VARIABLE); } result=solve_dual(n_indep,m,precision,epsilon_crit,maxiter,g_new,g0_new, ce_new,ce0_new,low_new,up_new,primal,d,d0,ig, dual,dual_old,temp,goal);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -