📄 svm_struct_learn.c
字号:
alphasum+=alpha[i]*cset.rhs[i];
modellength=model_length_s(svmModel,kparm);
dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps))
-(alphasum-0.5*modellength*modellength);
printf("Final epsilon on KKT-Conditions: %.5f\n",
MAX(svmModel->maxdiff,epsilon));
printf("Upper bound on duality gap: %.5f\n", dualitygap);
printf("Dual objective value: dval=%.5f\n",
alphasum-0.5*modellength*modellength);
printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
printf("Number of iterations: %d\n",numIt);
printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
if(sparm->slack_norm == 1) {
printf("Number of SV: %ld \n",svmModel->sv_num-1);
printf("Number of non-zero slack variables: %ld (out of %ld)\n",
svmModel->at_upper_bound,n);
printf("Norm of weight vector: |w|=%.5f\n",
model_length_s(svmModel,kparm));
}
else if(sparm->slack_norm == 2){
printf("Number of SV: %ld (including %ld at upper bound)\n",
svmModel->sv_num-1,svmModel->at_upper_bound);
printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
model_length_s(svmModel,kparm));
}
printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n);
printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
length_of_longest_document_vector(cset.lhs,cset.m,kparm));
printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n",
rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total,
(100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total);
}
if(struct_verbosity>=4)
printW(sm->w,sizePsi,n,lparm->svm_c);
if(svmModel) {
sm->svm_model=copy_model(svmModel);
sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
}
print_struct_learning_stats(sample,sm,cset,alpha,sparm);
if(fycache) {
for(i=0;i<n;i++)
free_svector(fycache[i]);
free(fycache);
}
if(svmModel)
free_model(svmModel,0);
free(alpha);
free(alphahist);
free(opti);
free(cset.rhs);
for(i=0;i<cset.m;i++)
free_example(cset.lhs[i],1);
free(cset.lhs);
}
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
LEARN_PARM *lparm, KERNEL_PARM *kparm,
STRUCTMODEL *sm, int alg_type)
{
int i,j;
int numIt=0;
long argmax_count=0;
long totconstraints=0;
long kernel_type_org;
double epsilon,epsilon_cached;
double lossval,factor,dist;
double margin=0;
double slack, slacksum, ceps;
double dualitygap,modellength,alphasum;
long sizePsi;
double *alpha=NULL;
long *alphahist=NULL,optcount=0;
CONSTSET cset;
SVECTOR *diff=NULL;
double *diff_n=NULL;
SVECTOR *fy, *fybar, *f, **fycache, *lhs;
MODEL *svmModel=NULL;
LABEL ybar;
DOC *doc;
long n=sample.n;
EXAMPLE *ex=sample.examples;
double rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
double rt1,rt2;
double progress,progress_old;
/*
SVECTOR ***fydelta_cache=NULL;
double **loss_cache=NULL;
int cache_size=0;
*/
CCACHE *ccache=NULL;
int cached_constraint;
rt1=get_runtime();
init_struct_model(sample,sm,sparm,lparm,kparm);
sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */
if(sparm->slack_norm == 1) {
lparm->svm_c=sparm->C; /* set upper bound C */
lparm->sharedslack=1;
}
else if(sparm->slack_norm == 2) {
printf("ERROR: The joint algorithm does not apply to L2 slack norm!");
fflush(stdout);
exit(0);
}
else {
printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
exit(0);
}
lparm->biased_hyperplane=0; /* set threshold to zero */
epsilon=100.0; /* start with low precision and
increase later */
epsilon_cached=epsilon; /* epsilon to use for iterations
using constraints constructed
from the constraint cache */
cset=init_struct_constraints(sample, sm, sparm);
if(cset.m > 0) {
alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
for(i=0; i<cset.m; i++) {
alpha[i]=0;
alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
}
}
kparm->gram_matrix=NULL;
if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
kparm->gram_matrix=init_kernel_matrix(&cset,kparm);
/* set initial model and slack variables */
svmModel=(MODEL *)my_malloc(sizeof(MODEL));
lparm->epsilon_crit=epsilon;
svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
lparm,kparm,NULL,svmModel,alpha);
add_weight_vector_to_linear_model(svmModel);
sm->svm_model=svmModel;
sm->w=svmModel->lin_weights; /* short cut to weight vector */
/* create a cache of the feature vectors for the correct labels */
fycache=(SVECTOR **)malloc(n*sizeof(SVECTOR *));
for(i=0;i<n;i++) {
fy=psi(ex[i].x,ex[i].y,sm,sparm);
if(kparm->kernel_type == LINEAR) {
diff=add_list_ss(fy); /* store difference vector directly */
free_svector(fy);
fy=diff;
}
fycache[i]=fy;
}
/* initialize the constraint cache */
if(alg_type == DUAL_CACHE_ALG) {
ccache=create_constraint_cache(sample,sparm);
}
rt_init+=MAX(get_runtime()-rt1,0);
rt_total+=MAX(get_runtime()-rt1,0);
/*****************/
/*** main loop ***/
/*****************/
do { /* iteratively find and add constraints to working set */
if(struct_verbosity>=1) {
printf("Iter %i: ",++numIt);
fflush(stdout);
}
rt1=get_runtime();
/**** compute current slack ****/
slack=0;
for(j=0;j<cset.m;j++)
slack=MAX(slack,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
/**** find a violated joint constraint ****/
lhs=NULL;
dist=0;
if(alg_type == DUAL_CACHE_ALG) {
/* see if it is possible to construct violated constraint from cache */
update_constraint_cache_for_model(ccache, svmModel);
dist=find_most_violated_joint_constraint_in_cache(ccache,&lhs,&margin);
}
rt_total+=MAX(get_runtime()-rt1,0);
/* Is there a sufficiently violated constraint in cache? */
if(dist-slack > MAX(epsilon/10,sparm->epsilon)) {
/* use constraint from cache */
rt1=get_runtime();
cached_constraint=1;
if(kparm->kernel_type == LINEAR) {
diff=add_list_ns(lhs); /* Linear case: compute weighted sum */
free_svector_shallow(lhs);
}
else { /* Non-linear case: make sure we have deep copy for cset */
diff=copy_svector(lhs);
free_svector_shallow(lhs);
}
rt_total+=MAX(get_runtime()-rt1,0);
}
else {
/* do not use constraint from cache */
rt1=get_runtime();
cached_constraint=0;
if(lhs)
free_svector_shallow(lhs);
lhs=NULL;
if(kparm->kernel_type == LINEAR) {
diff_n=create_nvector(sm->sizePsi);
clear_nvector(diff_n,sm->sizePsi);
}
margin=0;
progress=0;
progress_old=progress;
rt_total+=MAX(get_runtime()-rt1,0);
/**** find most violated joint constraint ***/
for(i=0; i<n; i++) {
rt1=get_runtime();
progress+=10.0/n;
if((struct_verbosity==1) && (((int)progress_old) != ((int)progress)))
{printf(".");fflush(stdout); progress_old=progress;}
if(struct_verbosity>=2)
{printf("."); fflush(stdout);}
rt2=get_runtime();
argmax_count++;
if(sparm->loss_type == SLACK_RESCALING)
ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
ex[i].y,sm,
sparm);
else
ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
ex[i].y,sm,
sparm);
rt_viol+=MAX(get_runtime()-rt2,0);
if(empty_label(ybar)) {
printf("ERROR: empty label was returned for example (%i)\n",i);
/* exit(1); */
continue;
}
/**** get psi(x,y) and psi(x,ybar) ****/
rt2=get_runtime();
fy=copy_svector(fycache[i]); /*<= fy=psi(ex[i].x,ex[i].y,sm,sparm);*/
fybar=psi(ex[i].x,ybar,sm,sparm);
rt_psi+=MAX(get_runtime()-rt2,0);
lossval=loss(ex[i].y,ybar,sparm);
free_label(ybar);
/**** scale feature vector and margin by loss ****/
if(sparm->loss_type == SLACK_RESCALING)
factor=lossval/n;
else /* do not rescale vector for */
factor=1.0/n; /* margin rescaling loss type */
for(f=fy;f;f=f->next)
f->factor*=factor;
for(f=fybar;f;f=f->next)
f->factor*=-factor;
append_svector_list(fybar,fy); /* compute fy-fybar */
/**** add current fy-fybar and loss to cache ****/
if(alg_type == DUAL_CACHE_ALG) {
if(kparm->kernel_type == LINEAR)
add_constraint_to_constraint_cache(ccache,svmModel,i,
add_list_ss(fybar),
lossval/n,sparm->ccache_size);
else
add_constraint_to_constraint_cache(ccache,svmModel,i,
copy_svector(fybar),
lossval/n,sparm->ccache_size);
}
/**** add current fy-fybar to constraint and margin ****/
if(kparm->kernel_type == LINEAR) {
add_list_n_ns(diff_n,fybar,1.0); /* add fy-fybar to sum */
free_svector(fybar);
}
else {
append_svector_list(fybar,lhs); /* add fy-fybar to vector list */
lhs=fybar;
}
margin+=lossval/n; /* add loss to rhs */
rt_total+=MAX(get_runtime()-rt1,0);
} /* end of example loop */
rt1=get_runtime();
/* create sparse vector from dense sum */
if(kparm->kernel_type == LINEAR) {
diff=create_svector_n(diff_n,sm->sizePsi,"",1.0);
free_nvector(diff_n);
}
else {
diff=lhs;
}
rt_total+=MAX(get_runtime()-rt1,0);
} /* end of finding most violated joint constraint */
rt1=get_runtime();
/**** if `error', then add constraint and recompute QP ****/
doc=create_example(cset.m,0,1,1,diff);
dist=classify_example(svmModel,doc);
ceps=MAX(0,margin-dist-slack);
if(slack > (margin-dist+0.000001)) {
printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
printf("slack=%f, newslack=%f\n",slack,margin-dist);
/* exit(1); */
}
if(ceps > sparm->epsilon) {
/**** resize constraint matrix and add new constraint ****/
cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
if(sparm->slack_norm == 1)
cset.lhs[cset.m]=create_example(cset.m,0,1,1,diff);
else if(sparm->slack_norm == 2)
exit(1);
cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
cset.rhs[cset.m]=margin;
alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
alpha[cset.m]=0;
alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
alphahist[cset.m]=optcount;
cset.m++;
totconstraints++;
if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) {
if(struct_verbosity>=1) {
printf(":");fflush(stdout);
}
rt2=get_runtime();
kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
&cset,kparm);
rt_kernel+=MAX(get_runtime()-rt2,0);
}
/**** get new QP solution ****/
if(struct_verbosity>=1) {
printf("*");fflush(stdout);
}
rt2=get_runtime();
/* set svm precision so that higher than eps of most violated constr */
if(cached_constraint) {
epsilon_cached=MIN(epsilon_cached,MAX(ceps,sparm->epsilon));
lparm->epsilon_crit=epsilon_cached/2;
}
else {
epsilon=MIN(epsilon,MAX(ceps,sparm->epsilon)); /* best eps so far */
lparm->epsilon_crit=epsilon/2;
epsilon_cached=epsilon;
}
free_model(svmModel,0);
svmModel=(MODEL *)my_malloc(sizeof(MODEL));
/* Run the QP solver on cset. */
kernel_type_org=kparm->kernel_type;
if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
kparm->kernel_type=GRAM; /* use kernel stored in kparm */
svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
lparm,kparm,NULL,svmModel,alpha);
kparm->kernel_type=kernel_type_org;
svmModel->kernel_parm.kernel_type=kernel_type_org;
/* Always add weight vector, in case part of the kernel is
linear. If not, ignore the weight vector since its
content is bogus. */
add_weight_vector_to_linear_model(svmModel);
sm->svm_model=svmModel;
sm->w=svmModel->lin_weights; /* short cut to weight vector */
optcount++;
/* keep track of when each constraint was last
active. constraints marked with -1 are not updated */
for(j=0;j<cset.m;j++)
if((alphahist[j]>-1) && (alpha[j] != 0))
alphahist[j]=optcount;
rt_opt+=MAX(get_runtime()-rt2,0);
/* Check if some of the linear constraints have not been
active in a while. Those constraints are then removed to
avoid bloating the working set beyond necessity. */
if(struct_verbosity>=2)
printf("Reducing working set...");fflush(stdout);
remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
if(struct_verbosity>=2)
printf("done. (NumConst=%d) ",cset.m);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -