📄 svm_learn.c

📁 SVM-light Version llf_dqy_hhu
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
    for(jj=0;(j=active2dnum[jj])>=0;jj++) {      lin[j]+=sprod_ns(weights,docs[j].words);    }  }  else {                            /* general case */    for(jj=0;(i=working2dnum[jj])>=0;jj++) {      if(a[i] != a_old[i]) {	get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,		       kernel_parm);	for(ii=0;(j=active2dnum[ii])>=0;ii++) {	  tec=aicache[j];	  lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);	}      }    }  }}long incorporate_unlabeled_examples(MODEL *model, long int *label, 				    long int *inconsistent, 				    long int *unlabeled, 				    double *a, double *lin, 				    long int totdoc, double *selcrit, 				    long int *select, long int *key, 				    long int transductcycle, 				    KERNEL_PARM *kernel_parm, 				    LEARN_PARM *learn_parm){  long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;  long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;  double dist,model_length,posratio,negratio;  long check_every=2;  double loss;  static double switchsens=0.0,switchsensorg=0.0;  double umin,umax,sumalpha;  long imin=0,imax=0;  static long switchnum=0;  switchsens/=1.2;  /* assumes that lin[] is up to date -> no inactive vars */  orgpos=0;  orgneg=0;  newpos=0;  newneg=0;  nolabel=0;  allunlab=0;  for(i=0;i<totdoc;i++) {    if(!unlabeled[i]) {      if(label[i] > 0) {	orgpos++;      }      else {	orgneg++;      }    }    else {      allunlab++;      if(unlabeled[i]) {	if(label[i] > 0) {	  newpos++;	}	else if(label[i] < 0) {	  newneg++;	}      }    }    if(label[i]==0) {      nolabel++;    }  }  if(learn_parm->transduction_posratio >= 0) {    posratio=learn_parm->transduction_posratio;  }  else {    posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */  }                                                  /* in training data */  negratio=1.0-posratio;  learn_parm->svm_costratio=1.0;                     /* global */  if(posratio>0) {    learn_parm->svm_costratio_unlab=negratio/posratio;  }  else {    learn_parm->svm_costratio_unlab=1.0;  }    pos=0;  neg=0;  upos=0;  uneg=0;  for(i=0;i<totdoc;i++) {    dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/    if(dist>0) {      pos++;    }    else {      neg++;    }    if(unlabeled[i]) {      if(dist>0) {	upos++;      }      else {	uneg++;      }    }    if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {      /*      printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */    }  }  if(verbosity>=2) {    printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);    printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);    printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);    fflush(stdout);  }  if(transductcycle == 0) {    j1=0;     j2=0;    j4=0;    for(i=0;i<totdoc;i++) {      dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/      if((label[i]==0) && (unlabeled[i])) {	selcrit[j4]=dist;	key[j4]=i;	j4++;      }    }    unsupaddnum1=0;	    unsupaddnum2=0;	    select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));    for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {      i=key[select[k]];      label[i]=1;      unsupaddnum1++;	      j1++;    }    for(i=0;i<totdoc;i++) {      if((label[i]==0) && (unlabeled[i])) {	label[i]=-1;	j2++;	unsupaddnum2++;      }    }    for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */      if(unlabeled[i]) {	if(label[i] == 1) {	  learn_parm->svm_cost[i]=learn_parm->svm_c*	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;	}	else if(label[i] == -1) {	  learn_parm->svm_cost[i]=learn_parm->svm_c*	    learn_parm->svm_unlabbound;	}      }    }    if(verbosity>=1) {      /* printf("costratio %lf, costratio_unlab %lf, unlabbound %lf\n",	 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,	 learn_parm->svm_unlabbound); */      printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",	     unsupaddnum1,unsupaddnum2);       fflush(stdout);    }    if(verbosity >= 1)       printf("Retraining.");    if(verbosity >= 2) printf("\n");    return((long)3);  }  if((transductcycle % check_every) == 0) {    if(verbosity >= 1)       printf("Retraining.");    if(verbosity >= 2) printf("\n");    j1=0;    j2=0;    unsupaddnum1=0;    unsupaddnum2=0;    for(i=0;i<totdoc;i++) {      if((unlabeled[i] == 2)) {	unlabeled[i]=1;	label[i]=1;	j1++;	unsupaddnum1++;      }      else if((unlabeled[i] == 3)) {	unlabeled[i]=1;	label[i]=-1;	j2++;	unsupaddnum2++;      }    }    for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */      if(unlabeled[i]) {	if(label[i] == 1) {	  learn_parm->svm_cost[i]=learn_parm->svm_c*	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;	}	else if(label[i] == -1) {	  learn_parm->svm_cost[i]=learn_parm->svm_c*	    learn_parm->svm_unlabbound;	}      }    }    if(verbosity>=2) {      /* printf("costratio %lf, costratio_unlab %lf, unlabbound %lf\n",	     learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,	     learn_parm->svm_unlabbound); */      printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",	     upos,unsupaddnum1,unsupaddnum2);       fflush(stdout);    }    if(learn_parm->svm_unlabbound == 1) {      learn_parm->epsilon_crit=0.001; /* do the last run right */    }    else {      learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */    }    return((long)3);  }  else if(((transductcycle % check_every) < check_every)) {     model_length=0;    sumalpha=0;    loss=0;    for(i=0;i<totdoc;i++) {      model_length+=a[i]*label[i]*lin[i];      sumalpha+=a[i];      dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/      if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {	loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];       }    }    model_length=sqrt(model_length);     if(verbosity>=2) {      printf("Model-length = %f (%f), loss = %f, objective = %f\n",	     model_length,sumalpha,loss,loss+0.5*model_length*model_length);      fflush(stdout);    }    j1=0;    j2=0;    j3=0;    j4=0;    unsupaddnum1=0;	    unsupaddnum2=0;	    umin=99999;    umax=-99999;    j4=1;    while(j4) {      umin=99999;      umax=-99999;      for(i=0;(i<totdoc);i++) { 	dist=(lin[i]-model->b);  	if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) 	   && (dist<umin)) {	  umin=dist;	  imin=i;	}	if((label[i]<0) && (unlabeled[i])  && (!inconsistent[i]) 	   && (dist>umax)) {	  umax=dist;	  imax=i;	}      }      if((umin < (umax+switchsens-1E-4))) {	j1++;	j2++;	unsupaddnum1++;		unlabeled[imin]=3;	inconsistent[imin]=1;	unsupaddnum2++;		unlabeled[imax]=2;	inconsistent[imax]=1;      }      else	j4=0;      j4=0;    }    for(j=0;(j<totdoc);j++) {      if(unlabeled[j] && (!inconsistent[j])) {	if(label[j]>0) {	  unlabeled[j]=2;	}	else if(label[j]<0) {	  unlabeled[j]=3;	}	/* inconsistent[j]=1; */	j3++;      }    }    switchnum+=unsupaddnum1+unsupaddnum2;    /* stop and print out current margin       printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);       if(switchnum == 2*kernel_parm->poly_degree) {       learn_parm->svm_unlabbound=1;       }       */    if((!unsupaddnum1) && (!unsupaddnum2)) {      if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {	for(j=0;(j<totdoc);j++) {	  inconsistent[j]=0;	  if(unlabeled[j]) unlabeled[j]=1;	}	write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,			 totdoc,learn_parm);  	if(verbosity>=1)	  printf("Number of switches: %ld\n",switchnum);	return((long)0);      }      switchsens=switchsensorg;      learn_parm->svm_unlabbound*=1.5;      if(learn_parm->svm_unlabbound>1) {	learn_parm->svm_unlabbound=1;      }      model->at_upper_bound=0; /* since upper bound increased */      if(verbosity>=1) 	printf("Increasing influence of unlabeled examples to %f%% .",	       learn_parm->svm_unlabbound*100.0);    }    else if(verbosity>=1) {      printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",	     upos,unsupaddnum1,unsupaddnum2);       fflush(stdout);    }    if(verbosity >= 2) printf("\n");        learn_parm->epsilon_crit=0.5; /* don't need to be so picky */    for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */      if(unlabeled[i]) {	if(label[i] == 1) {	  learn_parm->svm_cost[i]=learn_parm->svm_c*	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;	}	else if(label[i] == -1) {	  learn_parm->svm_cost[i]=learn_parm->svm_c*	    learn_parm->svm_unlabbound;	}      }    }    return((long)2);  }  return((long)0); }/*************************** Working set selection ***************************/long select_next_qp_subproblem_grad(long int *label, 				    long int *unlabeled, 				    double *a, double *lin, double *c, 				    long int totdoc, 				    long int qp_size, 				    LEARN_PARM *learn_parm, 				    long int *inconsistent, 				    long int *active2dnum, 				    long int *working2dnum, 				    double *selcrit, 				    long int *select, 				    KERNEL_CACHE *kernel_cache, 				    long int *key, long int *chosen)     /* Use the feasible direction approach to select the */     /* next qp-subproblem  (see section 'Selecting a good */     /* working set') */{  long choosenum,i,j,k,activedoc,inum;  double s;  for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */  choosenum=0;  activedoc=0;  for(i=0;(j=active2dnum[i])>=0;i++) {    s=-label[j];    if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))       && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 	     && (s>0)))       && (!inconsistent[j])        && (label[j])       && (!chosen[j])) {      selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);      /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */      /* selcrit[activedoc]=lin[j]-(double)label[j]; */      key[activedoc]=j;      activedoc++;    }  }  select_top_n(selcrit,activedoc,select,(long)(qp_size/2));  for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {    i=key[select[k]];    chosen[i]=1;    working2dnum[inum+choosenum]=i;    choosenum+=1;    kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */                                        /* out of cache */  }  activedoc=0;  for(i=0;(j=active2dnum[i])>=0;i++) {    s=label[j];    if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))       && (!((a[j]>=(learn_parm->svm_cost[j]-learn
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -