📄 svm_struct_learn.c

📁 SVMhmm: Learns a hidden Markov model from examples. Training examples (e.g. for part-of-speech taggi
💻 C
📖 第 1 页 / 共 3 页
字号:
      alphasum+=alpha[i]*cset.rhs[i];
    modellength=model_length_s(svmModel,kparm);
    dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps))
               -(alphasum-0.5*modellength*modellength);
    
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,epsilon));
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
	     svmModel->at_upper_bound,n);
      printf("Norm of weight vector: |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, 
	   (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(fycache) {
    for(i=0;i<n;i++)
      free_svector(fycache[i]);
    free(fycache);
  }
  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(alphahist); 
  free(opti); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
}

void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
			    LEARN_PARM *lparm, KERNEL_PARM *kparm, 
			    STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        totconstraints=0;
  long        kernel_type_org;
  double      epsilon,epsilon_cached;
  double      lossval,factor,dist;
  double      margin=0;
  double      slack, slacksum, ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  double      *diff_n=NULL;
  SVECTOR     *fy, *fybar, *f, **fycache, *lhs;
  MODEL       *svmModel=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
  double      rt1,rt2;
  double      progress,progress_old;

  /*
  SVECTOR     ***fydelta_cache=NULL;
  double      **loss_cache=NULL;
  int         cache_size=0;
  */
  CCACHE      *ccache=NULL;
  int         cached_constraint;

  rt1=get_runtime();

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); 
    fflush(stdout);
    exit(0); 
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  lparm->biased_hyperplane=0;     /* set threshold to zero */
  epsilon=100.0;                  /* start with low precision and
				     increase later */
  epsilon_cached=epsilon;         /* epsilon to use for iterations
				     using constraints constructed
				     from the constraint cache */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }
  kparm->gram_matrix=NULL;
  if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
    kparm->gram_matrix=init_kernel_matrix(&cset,kparm);

  /* set initial model and slack variables */
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  fycache=(SVECTOR **)malloc(n*sizeof(SVECTOR *));
  for(i=0;i<n;i++) {
    fy=psi(ex[i].x,ex[i].y,sm,sparm);
    if(kparm->kernel_type == LINEAR) {
      diff=add_list_ss(fy); /* store difference vector directly */
      free_svector(fy);
      fy=diff;
    }
    fycache[i]=fy;
  }

  /* initialize the constraint cache */
  if(alg_type == DUAL_CACHE_ALG) {
    ccache=create_constraint_cache(sample,sparm);
  }

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=MAX(get_runtime()-rt1,0);

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively find and add constraints to working set */

      if(struct_verbosity>=1) { 
	printf("Iter %i: ",++numIt); 
	fflush(stdout);
      }
      
      rt1=get_runtime();

      /**** compute current slack ****/
      slack=0;
      for(j=0;j<cset.m;j++) 
	slack=MAX(slack,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      
      /**** find a violated joint constraint ****/
      lhs=NULL;
      dist=0;
      if(alg_type == DUAL_CACHE_ALG) {
	/* see if it is possible to construct violated constraint from cache */
	update_constraint_cache_for_model(ccache, svmModel);
	dist=find_most_violated_joint_constraint_in_cache(ccache,&lhs,&margin);
      }

      rt_total+=MAX(get_runtime()-rt1,0);

      /* Is there a sufficiently violated constraint in cache? */
      if(dist-slack > MAX(epsilon/10,sparm->epsilon)) { 
	/* use constraint from cache */
	rt1=get_runtime();
	cached_constraint=1;
	if(kparm->kernel_type == LINEAR) {
	  diff=add_list_ns(lhs); /* Linear case: compute weighted sum */
	  free_svector_shallow(lhs);
	}
	else { /* Non-linear case: make sure we have deep copy for cset */
	  diff=copy_svector(lhs); 
	  free_svector_shallow(lhs);
	}
	rt_total+=MAX(get_runtime()-rt1,0);
      }
      else { 
	/* do not use constraint from cache */
	rt1=get_runtime();
	cached_constraint=0;
	if(lhs)
	  free_svector_shallow(lhs);
	lhs=NULL;
	if(kparm->kernel_type == LINEAR) {
	  diff_n=create_nvector(sm->sizePsi);
	  clear_nvector(diff_n,sm->sizePsi);
	}
	margin=0;
	progress=0;
	progress_old=progress;
	rt_total+=MAX(get_runtime()-rt1,0);

	/**** find most violated joint constraint ***/
	for(i=0; i<n; i++) {
	  
	  rt1=get_runtime();
      
	  progress+=10.0/n;
	  if((struct_verbosity==1) && (((int)progress_old) != ((int)progress)))
	    {printf(".");fflush(stdout); progress_old=progress;}
	  if(struct_verbosity>=2)
	    {printf("."); fflush(stdout);}

	  rt2=get_runtime();
	  argmax_count++;
	  if(sparm->loss_type == SLACK_RESCALING) 
	    ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
							      ex[i].y,sm,
							      sparm);
	  else
	    ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
							       ex[i].y,sm,
							       sparm);
	  rt_viol+=MAX(get_runtime()-rt2,0);
	  
	  if(empty_label(ybar)) {
	    printf("ERROR: empty label was returned for example (%i)\n",i);
	    /* exit(1); */
	    continue;
	  }
	  
	  /**** get psi(x,y) and psi(x,ybar) ****/
	  rt2=get_runtime();
	  fy=copy_svector(fycache[i]); /*<= fy=psi(ex[i].x,ex[i].y,sm,sparm);*/
	  fybar=psi(ex[i].x,ybar,sm,sparm);
	  rt_psi+=MAX(get_runtime()-rt2,0);
	  lossval=loss(ex[i].y,ybar,sparm);
	  free_label(ybar);
	  
	  /**** scale feature vector and margin by loss ****/
	  if(sparm->loss_type == SLACK_RESCALING)
	    factor=lossval/n;
	  else                 /* do not rescale vector for */
	    factor=1.0/n;      /* margin rescaling loss type */
	  for(f=fy;f;f=f->next)
	    f->factor*=factor;
	  for(f=fybar;f;f=f->next)
	    f->factor*=-factor;
	  append_svector_list(fybar,fy);   /* compute fy-fybar */
	  
	  /**** add current fy-fybar and loss to cache ****/
	  if(alg_type == DUAL_CACHE_ALG) {
	    if(kparm->kernel_type == LINEAR) 
	      add_constraint_to_constraint_cache(ccache,svmModel,i,
						 add_list_ss(fybar),
						 lossval/n,sparm->ccache_size);
	    else
	      add_constraint_to_constraint_cache(ccache,svmModel,i,
						 copy_svector(fybar),
						 lossval/n,sparm->ccache_size);
	  }

	  /**** add current fy-fybar to constraint and margin ****/
	  if(kparm->kernel_type == LINEAR) {
	    add_list_n_ns(diff_n,fybar,1.0); /* add fy-fybar to sum */
	    free_svector(fybar);
	  }
	  else {
	    append_svector_list(fybar,lhs);  /* add fy-fybar to vector list */
	    lhs=fybar;
	  }
	  margin+=lossval/n;                 /* add loss to rhs */
	  
	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();

	/* create sparse vector from dense sum */
	if(kparm->kernel_type == LINEAR) {
	  diff=create_svector_n(diff_n,sm->sizePsi,"",1.0);
	  free_nvector(diff_n);
	}
	else {
	  diff=lhs;
	}

	rt_total+=MAX(get_runtime()-rt1,0);

      } /* end of finding most violated joint constraint */

      rt1=get_runtime();

      /**** if `error', then add constraint and recompute QP ****/
      doc=create_example(cset.m,0,1,1,diff);
      dist=classify_example(svmModel,doc);
      ceps=MAX(0,margin-dist-slack);
      if(slack > (margin-dist+0.000001)) {
	printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	printf("slack=%f, newslack=%f\n",slack,margin-dist);
	/* exit(1); */
      }
      if(ceps > sparm->epsilon) { 
	/**** resize constraint matrix and add new constraint ****/
	cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
	if(sparm->slack_norm == 1) 
	  cset.lhs[cset.m]=create_example(cset.m,0,1,1,diff);
	else if(sparm->slack_norm == 2)
	  exit(1);
	cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
	cset.rhs[cset.m]=margin;
	alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
	alpha[cset.m]=0;
	alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
	alphahist[cset.m]=optcount;
	cset.m++;
	totconstraints++;
	if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) {
	  if(struct_verbosity>=1) {
	    printf(":");fflush(stdout);
	  }
	  rt2=get_runtime();
	  kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
						  &cset,kparm);
	  rt_kernel+=MAX(get_runtime()-rt2,0);
	}
	
	/**** get new QP solution ****/
	if(struct_verbosity>=1) {
	  printf("*");fflush(stdout);
	}
	rt2=get_runtime();
	/* set svm precision so that higher than eps of most violated constr */
	if(cached_constraint) {
	  epsilon_cached=MIN(epsilon_cached,MAX(ceps,sparm->epsilon)); 
	  lparm->epsilon_crit=epsilon_cached/2; 
	}
	else {
	  epsilon=MIN(epsilon,MAX(ceps,sparm->epsilon)); /* best eps so far */
	  lparm->epsilon_crit=epsilon/2; 
	  epsilon_cached=epsilon;
	}
	free_model(svmModel,0);
	svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	/* Run the QP solver on cset. */
	kernel_type_org=kparm->kernel_type;
	if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
	  kparm->kernel_type=GRAM; /* use kernel stored in kparm */
	svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			       lparm,kparm,NULL,svmModel,alpha);
	kparm->kernel_type=kernel_type_org; 
	svmModel->kernel_parm.kernel_type=kernel_type_org;
	/* Always add weight vector, in case part of the kernel is
	   linear. If not, ignore the weight vector since its
	   content is bogus. */
	add_weight_vector_to_linear_model(svmModel);
	sm->svm_model=svmModel;
	sm->w=svmModel->lin_weights; /* short cut to weight vector */
	optcount++;
	/* keep track of when each constraint was last
	   active. constraints marked with -1 are not updated */
	for(j=0;j<cset.m;j++) 
	  if((alphahist[j]>-1) && (alpha[j] != 0))  
	    alphahist[j]=optcount;
	rt_opt+=MAX(get_runtime()-rt2,0);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=2)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
	if(struct_verbosity>=2)
	  printf("done. (NumConst=%d) ",cset.m);
      }
💿 文件大小 94 K
👤 上传用户 wldxmy
📂 所属分类其他
🏷️ 相关标签

#examples #e.g. #part-of-speech #Training
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -