⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm_learn.cpp

📁 支持向量机分类器(可分类文本
💻 CPP
📖 第 1 页 / 共 5 页
字号:
        }
        else 
        {
            learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
        }
        
        return((long)3);
    }
    else if(((transductcycle % check_every) < check_every)) 
    { 
        model_length=0;
        sumalpha=0;
        loss=0;
        for(i=0;i<totdoc;i++)
        {
            model_length+=a[i]*label[i]*lin[i];
            sumalpha+=a[i];
            dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/
            if((label[i]*dist)<(1.0-learn_parm->epsilon_crit))
            {
                loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; 
            }
        }
        model_length=sqrt(model_length); 
        
        sprintf(temstr,"Model-length = %f (%f), loss = %f, objective = %f\n",
            model_length,sumalpha,loss,loss+0.5*model_length*model_length);
        printm(temstr);
        j1=0;
        j2=0;
        j3=0;
        j4=0;
        unsupaddnum1=0; 
        unsupaddnum2=0; 
        umin=99999;
        umax=-99999;
        j4=1;
        while(j4)
        {
            umin=99999;
            umax=-99999;
            for(i=0;(i<totdoc);i++)
            { 
                dist=(lin[i]-model->b);  
                if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) 
                    && (dist<umin)) 
                {
                    umin=dist;
                    imin=i;
                }
                if((label[i]<0) && (unlabeled[i])  && (!inconsistent[i]) 
                    && (dist>umax))
                {
                    umax=dist;
                    imax=i;
                }
            }
            if((umin < (umax+switchsens-1E-4))) 
            {
                j1++;
                j2++;
                unsupaddnum1++; 
                unlabeled[imin]=3;
                inconsistent[imin]=1;
                unsupaddnum2++; 
                unlabeled[imax]=2;
                inconsistent[imax]=1;
            }
            else
                j4=0;
            j4=0;
        }
        for(j=0;(j<totdoc);j++) 
        {
            if(unlabeled[j] && (!inconsistent[j]))
            {
                if(label[j]>0) 
                {
                    unlabeled[j]=2;
                }
                else if(label[j]<0) 
                {
                    unlabeled[j]=3;
                }
                /* inconsistent[j]=1; */
                j3++;
            }
        }
        switchnum+=unsupaddnum1+unsupaddnum2;
        
        /* stop and print out current margin
        sprintf(temstr,"switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
        if(switchnum == 2*kernel_parm->poly_degree) {
        learn_parm->svm_unlabbound=1;
        }
        */
        
        if((!unsupaddnum1) && (!unsupaddnum2)) 
        {
            if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) 
            {
                for(j=0;(j<totdoc);j++) 
                {
                    inconsistent[j]=0;
                    if(unlabeled[j]) unlabeled[j]=1;
                }
                write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
                    totdoc,learn_parm);  
                
                sprintf(temstr,"Number of switches: %ld\n",switchnum);
                return((long)0);
                switchsens=switchsensorg;
                learn_parm->svm_unlabbound*=1.5;
                if(learn_parm->svm_unlabbound>1) 
                {
                    learn_parm->svm_unlabbound=1;
                }
                model->at_upper_bound=0; /* since upper bound increased */
                
                sprintf(temstr,"Increasing influence of unlabeled examples to %f%% .",
                    learn_parm->svm_unlabbound*100.0);
                printm(temstr);
                
                learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
                
                for(i=0;i<totdoc;i++) 
                {  /* set upper bounds on vars */
                    if(unlabeled[i]) 
                    {
                        if(label[i] == 1) 
                        {
                            learn_parm->svm_cost[i]=learn_parm->svm_c*
                                learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
                        }
                        else if(label[i] == -1) 
                        {
                            learn_parm->svm_cost[i]=learn_parm->svm_c*
                                learn_parm->svm_unlabbound;
                        }
                    }
                    return((long)2);
                }
        }     }
    }
    return((long)0); 
}

/*************************** Working set selection ***************************/

long select_next_qp_subproblem_grad(
                                    long *label,long *unlabeled,
                                    double *a,double *lin,      /* Use the feasible direction approach to select the */
                                    long totdoc,long qp_size, /* next qp-subproblem  (see section 'Selecting a good */
                                    LEARN_PARM *learn_parm, /* working set') */
                                    long *inconsistent,long *active2dnum,long *working2dnum,
                                    double *selcrit,
                                    long *select,
                                    KERNEL_CACHE *kernel_cache,
                                    long *key,long *chosen)
{
    long choosenum,i,j,k,activedoc,inum;
    double s;
    
    for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
    choosenum=0;
    activedoc=0;
    for(i=0;(j=active2dnum[i])>=0;i++)
    {
        s=-label[j];
        if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
            && (s>0)))
            && (!inconsistent[j]) 
            && (label[j])
            && (!chosen[j]))
        {
            selcrit[activedoc]=lin[j]-(double)label[j];
            key[activedoc]=j;
            activedoc++;
        }
    }
    select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
    for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++)
    {
        i=key[select[k]];
        chosen[i]=1;
        working2dnum[inum+choosenum]=i;
        choosenum+=1;
        kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
        /* out of cache */
    }
    
    activedoc=0;
    for(i=0;(j=active2dnum[i])>=0;i++)
    {
        s=label[j];
        if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
            && (s>0))) 
            && (!inconsistent[j]) 
            && (label[j])
            && (!chosen[j]))
        {
            selcrit[activedoc]=(double)(label[j])-lin[j];
            key[activedoc]=j;
            activedoc++;
        }
    }
    select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
    for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++)
    {
        i=key[select[k]];
        chosen[i]=1;
        working2dnum[inum+choosenum]=i;
        choosenum+=1;
        kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
        /* out of cache */
    } 
    working2dnum[inum+choosenum]=-1; /* complete index */
    return(choosenum);
}

long select_next_qp_subproblem_grad_cache(
                                          long *label,long *unlabeled,
                                          double *a,double *lin,         /* Use the feasible direction approach to select the */
                                          long totdoc,long qp_size,    /* next qp-subproblem  (see chapter 'Selecting a  */
                                          LEARN_PARM *learn_parm, /* good working set') among the variable with */
                                          long *inconsistent,long *active2dnum,long *working2dnum, /* cached kernel */
                                          double *selcrit,
                                          long *select,
                                          KERNEL_CACHE *kernel_cache,
                                          long *key,long *chosen)
{
    long choosenum,i,j,k,activedoc,inum;
    double s;
    
    for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
    choosenum=0;
    activedoc=0;
    for(i=0;(j=active2dnum[i])>=0;i++) 
    {
        s=-label[j];
        if((kernel_cache->index[j]>=0)
            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
            && (s>0)))
            && (!chosen[j]) 
            && (label[j])
            && (!inconsistent[j]))
        {
            selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]);
            key[activedoc]=j;
            activedoc++;
        }
    }
    select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
    for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++)
    {
        i=key[select[k]];
        chosen[i]=1;
        working2dnum[inum+choosenum]=i;
        choosenum+=1;
        kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
        /* out of cache */
    }
    
    activedoc=0;
    for(i=0;(j=active2dnum[i])>=0;i++) 
    {
        s=label[j];
        if((kernel_cache->index[j]>=0)
            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
            && (s>0))) 
            && (!chosen[j]) 
            && (label[j])
            && (!inconsistent[j])) 
        {
            selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j]));
            key[activedoc]=j;
            activedoc++;
        }
    }
    select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
    for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) 
    {
        i=key[select[k]];
        chosen[i]=1;
        working2dnum[inum+choosenum]=i;
        choosenum+=1;
        kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
        /* out of cache */
    } 
    working2dnum[inum+choosenum]=-1; /* complete index */
    return(choosenum);
}

void select_top_n(
                  double *selcrit,
                  long range,long *select,long n)
{
    register long i,j;
    
    for(i=0;(i<n) && (i<range);i++)
    { /* Initialize with the first n elements */
        for(j=i;j>=0;j--) 
        {
            if((j>0) && (selcrit[select[j-1]]<selcrit[i]))
            {
                select[j]=select[j-1];
            }
            else 
            {
                select[j]=i;
                j=-1;
            }
        }
    }
    for(i=n;i<range;i++)
    {   
        if(selcrit[i]>selcrit[select[n-1]]) 
        {
            for(j=n-1;j>=0;j--)
            {
                if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
                    select[j]=select[j-1];
                }
                else
                {
                    select[j]=i;
                    j=-1;
                }
            }
        }
    }
}      


/******************************** Shrinking  *********************************/

void init_shrink_state(SHRINK_STATE *shrink_state,
                       long totdoc,
                       long maxhistory)
{
    long i;
    
    shrink_state->deactnum=0;
    shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
    shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
    shrink_state->a_history = (double **)my_malloc(sizeof(double *)*10000);
    
    for(i=0;i<totdoc;i++)
    { 
        shrink_state->active[i]=1;
        shrink_state->inactive_since[i]=0;
    }
}

void shrink_state_cleanup(SHRINK_STATE *shrink_state)
{
    free(shrink_state->active);
    free(shrink_state->inactive_since);
    if(shrink_state->deactnum > 0) 
        free(shrink_state->a_history[shrink_state->deactnum-1]);
    free(shrink_state->a_history);
}

long shrink_problem(
                    /* shrink some variables away */
                    /* do the shrinking only if at least minshrink variables can be removed */
                    LEARN_PARM *learn_parm,
                    SHRINK_STATE *shrink_state,
                    long *active2dnum,long iteration,long* last_suboptimal_a

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -