📄 svm_learn.cpp
字号:
}
else
{
learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
}
return((long)3);
}
else if(((transductcycle % check_every) < check_every))
{
model_length=0;
sumalpha=0;
loss=0;
for(i=0;i<totdoc;i++)
{
model_length+=a[i]*label[i]*lin[i];
sumalpha+=a[i];
dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
if((label[i]*dist)<(1.0-learn_parm->epsilon_crit))
{
loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
}
}
model_length=sqrt(model_length);
sprintf(temstr,"Model-length = %f (%f), loss = %f, objective = %f\n",
model_length,sumalpha,loss,loss+0.5*model_length*model_length);
printm(temstr);
j1=0;
j2=0;
j3=0;
j4=0;
unsupaddnum1=0;
unsupaddnum2=0;
umin=99999;
umax=-99999;
j4=1;
while(j4)
{
umin=99999;
umax=-99999;
for(i=0;(i<totdoc);i++)
{
dist=(lin[i]-model->b);
if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
&& (dist<umin))
{
umin=dist;
imin=i;
}
if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
&& (dist>umax))
{
umax=dist;
imax=i;
}
}
if((umin < (umax+switchsens-1E-4)))
{
j1++;
j2++;
unsupaddnum1++;
unlabeled[imin]=3;
inconsistent[imin]=1;
unsupaddnum2++;
unlabeled[imax]=2;
inconsistent[imax]=1;
}
else
j4=0;
j4=0;
}
for(j=0;(j<totdoc);j++)
{
if(unlabeled[j] && (!inconsistent[j]))
{
if(label[j]>0)
{
unlabeled[j]=2;
}
else if(label[j]<0)
{
unlabeled[j]=3;
}
/* inconsistent[j]=1; */
j3++;
}
}
switchnum+=unsupaddnum1+unsupaddnum2;
/* stop and print out current margin
sprintf(temstr,"switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
if(switchnum == 2*kernel_parm->poly_degree) {
learn_parm->svm_unlabbound=1;
}
*/
if((!unsupaddnum1) && (!unsupaddnum2))
{
if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab))
{
for(j=0;(j<totdoc);j++)
{
inconsistent[j]=0;
if(unlabeled[j]) unlabeled[j]=1;
}
write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
totdoc,learn_parm);
sprintf(temstr,"Number of switches: %ld\n",switchnum);
return((long)0);
switchsens=switchsensorg;
learn_parm->svm_unlabbound*=1.5;
if(learn_parm->svm_unlabbound>1)
{
learn_parm->svm_unlabbound=1;
}
model->at_upper_bound=0; /* since upper bound increased */
sprintf(temstr,"Increasing influence of unlabeled examples to %f%% .",
learn_parm->svm_unlabbound*100.0);
printm(temstr);
learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
for(i=0;i<totdoc;i++)
{ /* set upper bounds on vars */
if(unlabeled[i])
{
if(label[i] == 1)
{
learn_parm->svm_cost[i]=learn_parm->svm_c*
learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
}
else if(label[i] == -1)
{
learn_parm->svm_cost[i]=learn_parm->svm_c*
learn_parm->svm_unlabbound;
}
}
return((long)2);
}
} }
}
return((long)0);
}
/*************************** Working set selection ***************************/
long select_next_qp_subproblem_grad(
long *label,long *unlabeled,
double *a,double *lin, /* Use the feasible direction approach to select the */
long totdoc,long qp_size, /* next qp-subproblem (see section 'Selecting a good */
LEARN_PARM *learn_parm, /* working set') */
long *inconsistent,long *active2dnum,long *working2dnum,
double *selcrit,
long *select,
KERNEL_CACHE *kernel_cache,
long *key,long *chosen)
{
long choosenum,i,j,k,activedoc,inum;
double s;
for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
choosenum=0;
activedoc=0;
for(i=0;(j=active2dnum[i])>=0;i++)
{
s=-label[j];
if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
&& (s>0)))
&& (!inconsistent[j])
&& (label[j])
&& (!chosen[j]))
{
selcrit[activedoc]=lin[j]-(double)label[j];
key[activedoc]=j;
activedoc++;
}
}
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++)
{
i=key[select[k]];
chosen[i]=1;
working2dnum[inum+choosenum]=i;
choosenum+=1;
kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
/* out of cache */
}
activedoc=0;
for(i=0;(j=active2dnum[i])>=0;i++)
{
s=label[j];
if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
&& (s>0)))
&& (!inconsistent[j])
&& (label[j])
&& (!chosen[j]))
{
selcrit[activedoc]=(double)(label[j])-lin[j];
key[activedoc]=j;
activedoc++;
}
}
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++)
{
i=key[select[k]];
chosen[i]=1;
working2dnum[inum+choosenum]=i;
choosenum+=1;
kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
/* out of cache */
}
working2dnum[inum+choosenum]=-1; /* complete index */
return(choosenum);
}
long select_next_qp_subproblem_grad_cache(
long *label,long *unlabeled,
double *a,double *lin, /* Use the feasible direction approach to select the */
long totdoc,long qp_size, /* next qp-subproblem (see chapter 'Selecting a */
LEARN_PARM *learn_parm, /* good working set') among the variable with */
long *inconsistent,long *active2dnum,long *working2dnum, /* cached kernel */
double *selcrit,
long *select,
KERNEL_CACHE *kernel_cache,
long *key,long *chosen)
{
long choosenum,i,j,k,activedoc,inum;
double s;
for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
choosenum=0;
activedoc=0;
for(i=0;(j=active2dnum[i])>=0;i++)
{
s=-label[j];
if((kernel_cache->index[j]>=0)
&& (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
&& (s>0)))
&& (!chosen[j])
&& (label[j])
&& (!inconsistent[j]))
{
selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]);
key[activedoc]=j;
activedoc++;
}
}
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++)
{
i=key[select[k]];
chosen[i]=1;
working2dnum[inum+choosenum]=i;
choosenum+=1;
kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
/* out of cache */
}
activedoc=0;
for(i=0;(j=active2dnum[i])>=0;i++)
{
s=label[j];
if((kernel_cache->index[j]>=0)
&& (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
&& (s>0)))
&& (!chosen[j])
&& (label[j])
&& (!inconsistent[j]))
{
selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j]));
key[activedoc]=j;
activedoc++;
}
}
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++)
{
i=key[select[k]];
chosen[i]=1;
working2dnum[inum+choosenum]=i;
choosenum+=1;
kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
/* out of cache */
}
working2dnum[inum+choosenum]=-1; /* complete index */
return(choosenum);
}
void select_top_n(
double *selcrit,
long range,long *select,long n)
{
register long i,j;
for(i=0;(i<n) && (i<range);i++)
{ /* Initialize with the first n elements */
for(j=i;j>=0;j--)
{
if((j>0) && (selcrit[select[j-1]]<selcrit[i]))
{
select[j]=select[j-1];
}
else
{
select[j]=i;
j=-1;
}
}
}
for(i=n;i<range;i++)
{
if(selcrit[i]>selcrit[select[n-1]])
{
for(j=n-1;j>=0;j--)
{
if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
select[j]=select[j-1];
}
else
{
select[j]=i;
j=-1;
}
}
}
}
}
/******************************** Shrinking *********************************/
void init_shrink_state(SHRINK_STATE *shrink_state,
long totdoc,
long maxhistory)
{
long i;
shrink_state->deactnum=0;
shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
shrink_state->a_history = (double **)my_malloc(sizeof(double *)*10000);
for(i=0;i<totdoc;i++)
{
shrink_state->active[i]=1;
shrink_state->inactive_since[i]=0;
}
}
void shrink_state_cleanup(SHRINK_STATE *shrink_state)
{
free(shrink_state->active);
free(shrink_state->inactive_since);
if(shrink_state->deactnum > 0)
free(shrink_state->a_history[shrink_state->deactnum-1]);
free(shrink_state->a_history);
}
long shrink_problem(
/* shrink some variables away */
/* do the shrinking only if at least minshrink variables can be removed */
LEARN_PARM *learn_parm,
SHRINK_STATE *shrink_state,
long *active2dnum,long iteration,long* last_suboptimal_a
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -