📄 svm_hideo.h
字号:
/***********************************************************************/
/* */
/* svm_hideo.c */
/* */
/* The Hildreth and D'Espo solver specialized for SVMs. */
/* */
/* Author: Thorsten Joachims */
/* Date: 01.11.00 */
/* */
/* Copyright (c) 2000 Universitaet Dortmund - All rights reserved */
/* */
/* This software is available for non-commercial use only. It must */
/* not be modified and distributed without prior permission of the */
/* author. The author is not responsible for implications from the */
/* use of this software. */
/* */
/***********************************************************************/
# include <math.h>
/* Common Block Declarations */
# define PRIMAL_OPTIMAL 1
# define DUAL_OPTIMAL 2
# define MAXITER_EXCEEDED 3
# define NAN_SOLUTION 4
# define ONLY_ONE_VARIABLE 5
# define LARGEROUND 0
# define SMALLROUND 1
/* /////////////////////////////////////////////////////////////// */
# define DEF_PRECISION 1E-5
# define DEF_MAX_ITERATIONS 200
# define DEF_LINDEP_SENSITIVITY 1E-8
# define EPSILON_HIDEO 1E-20
# define EPSILON_EQ 1E-5
double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
double *primal=0,*dual=0;
long precision_violations=0;
double opt_precision=DEF_PRECISION;
long maxiter=DEF_MAX_ITERATIONS;
double lindep_sensitivity=DEF_LINDEP_SENSITIVITY;
double *buffer;
long *nonoptimal;
long smallroundcount=0;
static char temstr[200];
/* /////////////////////////////////////////////////////////////// */
void *my_malloc();
int optimize_hildreth_despo(long,long,double,double,double,long,long,long,double,double *,
double *,double *,double *,double *,double *,
double *,double *,double *,long *,double *);
int solve_dual(long,long,double,double,long,double *,double *,double *,
double *,double *,double *,double *,double *,double *,
double *,double *,double *,double *,long);
void linvert_matrix(double *, long, double *, double, long *);
void lprint_matrix(double *, long);
void ladd_matrix(double *, long, double);
void lcopy_matrix(double *, long, double *);
void lswitch_rows_matrix(double *, long, long, long);
void lswitchrk_matrix(double *, long, long, long);
double calculate_qp_objective(long, double *, double *, double *);
double *optimize_qp(QP *qp,
double *epsilon_crit,
long nx, /* Maximum number of variables in QP */
double *threshold,
LEARN_PARM *learn_parm)
/* start the optimizer and return the optimal values */
/* The HIDEO optimizer does not necessarily fully solve the problem. */
/* Since it requires a strictly positive definite hessian, the solution */
/* is restricted to a linear independent subset in case the matrix is */
/* only semi-definite. */
{
long i,j;
int result;
double eq;
if(!primal) { /* allocate memory at first call */
primal=(double *)my_malloc(sizeof(double)*nx);
dual=(double *)my_malloc(sizeof(double)*((nx+1)*2));
nonoptimal=(long *)my_malloc(sizeof(long)*(nx));
buffer=(double *)my_malloc(sizeof(double)*((nx+1)*2*(nx+1)*2+
nx*nx+2*(nx+1)*2+2*nx+1+2*nx+
nx+nx+nx*nx));
(*threshold)=0;
}
eq=qp->opt_ce0[0];
for(i=0;i<qp->opt_n;i++) {
eq+=qp->opt_xinit[i]*qp->opt_ce[i];
}
result=optimize_hildreth_despo(qp->opt_n,qp->opt_m,
opt_precision,(*epsilon_crit),
learn_parm->epsilon_a,maxiter,
/* (long)PRIMAL_OPTIMAL, */
(long)0, (long)0,
lindep_sensitivity,
qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0,
qp->opt_low,qp->opt_up,primal,qp->opt_xinit,
dual,nonoptimal,buffer);
if(learn_parm->totwords < learn_parm->svm_maxqpsize) {
/* larger working sets will be linear dependent anyway */
learn_parm->svm_maxqpsize=maxl(learn_parm->totwords,(long)2);
}
if(result == NAN_SOLUTION) {
lindep_sensitivity*=2; /* throw out linear dependent examples more */
/* generously */
if(learn_parm->svm_maxqpsize>2) {
learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */
}
precision_violations++;
}
/* take one round of only two variable to get unstuck */
if(result != PRIMAL_OPTIMAL) {
smallroundcount++;
result=optimize_hildreth_despo(qp->opt_n,qp->opt_m,
opt_precision,(*epsilon_crit),
learn_parm->epsilon_a,(long)maxiter,
(long)PRIMAL_OPTIMAL,(long)SMALLROUND,
lindep_sensitivity,
qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0,
qp->opt_low,qp->opt_up,primal,qp->opt_xinit,
dual,nonoptimal,buffer);
}
if(result != PRIMAL_OPTIMAL) {
if(result != ONLY_ONE_VARIABLE)
precision_violations++;
if(result == MAXITER_EXCEEDED)
maxiter+=100;
if(result == NAN_SOLUTION) {
lindep_sensitivity*=2; /* throw out linear dependent examples more */
/* generously */
/* results not valid, so return inital values */
for(i=0;i<qp->opt_n;i++) {
primal[i]=qp->opt_xinit[i];
}
}
}
if(precision_violations > 50) {
precision_violations=0;
(*epsilon_crit)*=10.0;
}
if((qp->opt_m>0) && (result != NAN_SOLUTION))
(*threshold)=dual[1]-dual[0];
else
(*threshold)=0;
return(primal);
}
int optimize_hildreth_despo(
long n, /* number of variables */
long m, /* number of linear equality constraints [0,1] */
double precision, /* solve at least to this dual precision */
double epsilon_crit, /* stop, if KT-Conditions approx fulfilled */
double epsilon_a, /* precision of alphas at bounds */
long maxiter, /* stop after this many iterations */
long goal, /* keep going until goal fulfilled */
long smallround, /* use only two variables of steepest descent */
double lindep_sensitivity, /* epsilon for detecting linear dependent ex */
double *g, /* hessian of objective */
double *g0, /* linear part of objective */
double *ce,double *ce0, /* linear equality constraints */
double *low,double *up, /* box constraints */
double *primal, /* primal variables */
double *init, /* initial values of primal */
double *dual, /* dual variables */
long *lin_dependent,
double *buffer)
{
long i,j,k,from,to,n_indep,changed;
double sum,bmin=0,bmax=0;
double *d,*d0,*ig,*dual_old,*temp,*start;
double *g0_new,*g_new,*ce_new,*ce0_new,*low_new,*up_new;
double add,t;
int result;
double obj_before,obj_after;
long b1,b2;
g0_new=&(buffer[0]); /* claim regions of buffer */
d=&(buffer[n]);
d0=&(buffer[n+(n+m)*2*(n+m)*2]);
ce_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2]);
ce0_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n]);
ig=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m]);
dual_old=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n]);
low_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2]);
up_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n]);
start=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n]);
g_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n]);
temp=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n+n*n]);
b1=-1;
b2=-1;
for(i=0;i<n;i++) { /* get variables with steepest feasible descent */
sum=g0[i];
for(j=0;j<n;j++)
sum+=init[j]*g[i*n+j];
sum=sum*ce[i];
if(((b1==-1) || (sum<bmin))
&& (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]<0.0)))
&& (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]>0.0)))
) {
bmin=sum;
b1=i;
}
if(((b2==-1) || (sum>bmax))
&& (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]>0.0)))
&& (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]<0.0)))
) {
bmax=sum;
b2=i;
}
}
for(i=0;i<n;i++) {
start[i]=init[i];
}
/* in case both examples are equal */
add=0;
changed=0;
if((-g[b1*n+b2] == g[b1*n+b1]) && (-g[b1*n+b2] == g[b2*n+b2])) {
changed=1;
if(ce[b1] == ce[b2]) { /* distribute evenly */
start[b1]=(init[b1]+init[b2])/2.0;
start[b2]=(init[b1]+init[b2])/2.0;
if(start[b2] > up[b2]) {
t=start[b2]-up[b2];
start[b2]=up[b2];
start[b1]+=t;
}
if(start[b1] > up[b1]) {
t=start[b1]-up[b1];
start[b1]=up[b1];
start[b2]+=t;
}
}
else { /* set to upper bound */
t=up[b1]-init[b1];
if((up[b2]-init[b2]) < t) {
t=up[b2]-init[b2];
}
start[b1]=init[b1]+t;
start[b2]=init[b2]+t;
}
}
/* if we have a biased hyperplane, then adding a constant to the */
/* hessian does not change the solution. So that is done for examples */
/* with zero diagonal entry, since HIDEO cannot handle them. */
else if((fabs(g[b1*n+b1]) < lindep_sensitivity)
|| (fabs(g[b2*n+b2]) < lindep_sensitivity)) {
add+=0.093274;
}
/* in case both examples are linear dependent */
else if(fabs(g[b1*n+b1]/g[b1*n+b2] - g[b1*n+b2]/g[b2*n+b2])
< lindep_sensitivity) {
add+=0.078274;
}
/* sprintf(temstr,"b1=%ld,b2=%ld\n",b1,b2); */
lcopy_matrix(g,n,d);
if((m==1) && (add>0.0)) {
for(j=0;j<n;j++) {
for(k=0;k<n;k++) {
d[j*n+k]+=add*ce[j]*ce[k];
}
}
}
else {
add=0.0;
}
if(n>2) { /* switch, so that variables are better mixed */
lswitchrk_matrix(d,n,b1,(long)0);
if(b2 == 0)
lswitchrk_matrix(d,n,b1,(long)1);
else
lswitchrk_matrix(d,n,b2,(long)1);
}
if(smallround == SMALLROUND) {
for(i=2;i<n;i++) {
lin_dependent[i]=1;
}
lin_dependent[0]=0;
lin_dependent[1]=0;
}
else {
for(i=0;i<n;i++) {
lin_dependent[i]=0;
}
}
linvert_matrix(d,n,ig,lindep_sensitivity,lin_dependent);
if(n>2) { /* now switch back */
if(b2 == 0) {
lswitchrk_matrix(ig,n,b1,(long)1);
i=lin_dependent[1];
lin_dependent[1]=lin_dependent[b1];
lin_dependent[b1]=i;
}
else {
lswitchrk_matrix(ig,n,b2,(long)1);
i=lin_dependent[1];
lin_dependent[1]=lin_dependent[b2];
lin_dependent[b2]=i;
}
lswitchrk_matrix(ig,n,b1,(long)0);
i=lin_dependent[0];
lin_dependent[0]=lin_dependent[b1];
lin_dependent[b1]=i;
}
/* lprint_matrix(d,n); */
/* lprint_matrix(ig,n); */
lcopy_matrix(g,n,g_new); /* restore g_new matrix */
if(add>0)
for(j=0;j<n;j++) {
for(k=0;k<n;k++) {
g_new[j*n+k]+=add*ce[j]*ce[k];
}
}
for(i=0;i<n;i++) { /* fix linear dependent vectors */
g0_new[i]=g0[i]+add*ce0[0]*ce[i];
}
if(m>0) ce0_new[0]=-ce0[0];
for(i=0;i<n;i++) { /* fix linear dependent vectors */
if(lin_dependent[i]) {
for(j=0;j<n;j++) {
if(!lin_dependent[j]) {
g0_new[j]+=start[i]*g_new[i*n+j];
}
}
if(m>0) ce0_new[0]-=(start[i]*ce[i]);
}
}
from=0; /* remove linear dependent vectors */
to=0;
n_indep=0;
for(i=0;i<n;i++) {
if(!lin_dependent[i]) {
g0_new[n_indep]=g0_new[i];
ce_new[n_indep]=ce[i];
low_new[n_indep]=low[i];
up_new[n_indep]=up[i];
primal[n_indep]=start[i];
n_indep++;
}
for(j=0;j<n;j++) {
if((!lin_dependent[i]) && (!lin_dependent[j])) {
ig[to]=ig[from];
g_new[to]=g_new[from];
to++;
}
from++;
}
}
/* cannot optimize with only one variable */
if((n_indep<=1) && (m>0) && (!changed)) {
for(i=n-1;i>=0;i--) {
primal[i]=init[i];
}
return((int)ONLY_ONE_VARIABLE);
}
result=solve_dual(n_indep,m,precision,epsilon_crit,maxiter,g_new,g0_new,
ce_new,ce0_new,low_new,up_new,primal,d,d0,ig,
dual,dual_old,temp,goal);
j=n_indep;
for(i=n-1;i>=0;i--) {
if(!lin_dependent[i]) {
j--;
primal[i]=primal[j];
}
else if((m==0) && (g[i*n+i]==0)) {
/* if we use a biased hyperplane, each example with a zero diagonal */
/* entry must have an alpha at the upper bound. Doing this */
/* is essential for the HIDEO optimizer, since it cannot handle zero */
/* diagonal entries in the hessian for the unbiased hyperplane case. */
primal[i]=up[i];
}
else {
primal[i]=start[i]; /* leave as is */
}
temp[i]=primal[i];
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -