📄 svm_struct_api.c

📁 New training algorithm for linear classification SVMs that can be much faster than SVMlight for larg
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/***********************************************************************/
/*                                                                     */
/*   svm_struct_api.c (instantiated for SVM-perform)                   */
/*                                                                     */
/*   Definition of API for attaching implementing SVM learning of      */
/*   structures (e.g. parsing, multi-label classification, HMM)        */ 
/*                                                                     */
/*   Author: Thorsten Joachims                                         */
/*   Date: 31.10.05                                                    */
/*                                                                     */
/*   Copyright (c) 2005  Thorsten Joachims - All rights reserved       */
/*                                                                     */
/*   This software is available for non-commercial use only. It must   */
/*   not be modified and distributed without prior permission of the   */
/*   author. The author is not responsible for implications from the   */
/*   use of this software.                                             */
/*                                                                     */
/***********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "svm_struct_api.h"
#include "svm_light/svm_common.h"
#include "svm_struct/svm_struct_common.h"
#include "svm_struct/svm_struct_learn.h"

#define MAX(x,y)      ((x) < (y) ? (y) : (x))
#define MIN(x,y)      ((x) > (y) ? (y) : (x))
#define SIGN(x)       ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0))))

int compareup(const void *a, const void *b) 
{
  double va,vb;
  va=((STRUCT_ID_SCORE *)a)->score;
  vb=((STRUCT_ID_SCORE *)b)->score;
  if(va == vb) {
    va=((STRUCT_ID_SCORE *)a)->tiebreak;
    vb=((STRUCT_ID_SCORE *)b)->tiebreak;
  }
  return((va > vb) - (va < vb));
}

int comparedown(const void *a, const void *b) 
{
  return(-compareup(a,b));
}

LABEL       find_most_violated_constraint_errorrate(PATTERN x, LABEL y, 
						     STRUCTMODEL *sm, 
						     STRUCT_LEARN_PARM *sparm,
						     int loss_type);
LABEL       find_most_violated_constraint_thresholdmetric(PATTERN x, LABEL y, 
						     STRUCTMODEL *sm, 
						     STRUCT_LEARN_PARM *sparm,
						     int loss_type);
LABEL       find_most_violated_constraint_rankmetric(PATTERN x, LABEL y, 
						     STRUCTMODEL *sm, 
						     STRUCT_LEARN_PARM *sparm,
						     int loss_type);
LABEL       find_most_violated_constraint_avgprec(PATTERN x, LABEL y, 
						     STRUCTMODEL *sm, 
						     STRUCT_LEARN_PARM *sparm,
						     int loss_type);

double zeroone(int a, int b, int c, int d);
double fone(int a, int b, int c, int d);
double errorrate(int a, int b, int c, int d);
double prec(int a, int b, int c, int d);
double rec(int a, int b, int c, int d);
double swappedpairs(LABEL y, LABEL ybar);
double rocarea(LABEL y, LABEL ybar);
double prbep(LABEL y, LABEL ybar);
double avgprec(LABEL y, LABEL ybar);

double zeroone_loss(int a, int b, int c, int d);
double fone_loss(int a, int b, int c, int d);
double errorrate_loss(int a, int b, int c, int d);
double prbep_loss(int a, int b, int c, int d);
double prec_k_loss(int a, int b, int c, int d);
double rec_k_loss(int a, int b, int c, int d);
double swappedpairs_loss(LABEL y, LABEL ybar);
double avgprec_loss(LABEL y, LABEL ybar);

void        svm_struct_learn_api_init(int argc, char* argv[])
{
  /* Called in learning part before anything else is done to allow
     any initializations that might be necessary. */
}

void        svm_struct_learn_api_exit()
{
  /* Called in learning part at the very end to allow any clean-up
     that might be necessary. */
}

void        svm_struct_classify_api_init(int argc, char* argv[])
{
  /* Called in prediction part before anything else is done to allow
     any initializations that might be necessary. */
}

void        svm_struct_classify_api_exit()
{
  /* Called in prediction part at the very end to allow any clean-up
     that might be necessary. */
}

SAMPLE      read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm)
{
  /* Reads struct examples and returns them in sample. The number of
     examples must be written into sample.n */
  SAMPLE   sample;  /* sample */
  EXAMPLE  *examples;
  long     n;       /* number of examples */
  long     totwords, maxlength=0, length, i, j, nump=0, numn=0;
  WORD     *words,*w;

  /* testing the matrix code 
  MATRIX *G,*L,*LT,*GG,*LI,*I;
  G=create_matrix(3,3);
  G->element[0][0]=5;
  G->element[0][1]=2;
  G->element[0][2]=3;
  G->element[1][1]=5;
  G->element[1][2]=1;
  G->element[2][2]=5;
  print_matrix(G);
  L=cholesky_matrix(G);
  print_matrix(L);
  LT=transpose_matrix(L);
  print_matrix(LT);
  GG=prod_matrix_matrix(L,LT);
  print_matrix(GG);
  LI=invert_ltriangle_matrix(L);
  print_matrix(LI);
  I=prod_matrix_matrix(L,LI);
  print_matrix(I);
  */

  /* we have only one big example */
  examples=(EXAMPLE *)my_malloc(sizeof(EXAMPLE));
  /* Using the read_documents function from SVM-light */
  read_documents(file,&examples[0].x.doc,&examples[0].y.class,&totwords,&n);
  examples[0].x.totdoc=n;
  examples[0].y.totdoc=n;
  sample.n=1;
  sample.examples=examples;

  for(i=0;i<sample.examples[0].x.totdoc;i++) {
    length=1;
    if(sample.examples[0].y.class[i]>0) 
      nump++;
    else 
      numn++;
    for(w=sample.examples[0].x.doc[i]->fvec->words;w->wnum;w++) {
      length++;
      if(length > maxlength) /* find vector with max elements */
	maxlength=length;
    }
  }

  /* add feature for bias if necessary */
  /* WARNING: Currently this works correctly only for linear kernel! */
  sparm->bias_featurenum=0;
  if(sparm->bias != 0) {
    words = (WORD *)my_malloc(sizeof(WORD)*(maxlength+1));
    sparm->bias_featurenum=totwords+1;
    totwords++;
    for(i=0;i<sample.examples[0].x.totdoc;i++) {
      for(j=0;sample.examples[0].x.doc[i]->fvec->words[j].wnum;j++) 
	words[j]=sample.examples[0].x.doc[i]->fvec->words[j];
      words[j].wnum=sparm->bias_featurenum; /* bias */
      words[j].weight=sparm->bias;
      j++;
      words[j].wnum=0; /* end */
      words[j].weight=0;
      free_svector(sample.examples[0].x.doc[i]->fvec);
      sample.examples[0].x.doc[i]->fvec=create_svector(words,"",1.0);      
    }
    free(words);
  }

  /* Remove all features with numbers larger than num_features, if
     num_features is set to a positive value. This is important for
     svm_struct_classify. */
  if(sparm->num_features > 0) 
    for(i=0;i<sample.examples[0].x.totdoc;i++)
      for(j=0;sample.examples[0].x.doc[i]->fvec->words[j].wnum;j++) 
	if(sample.examples[0].x.doc[i]->fvec->words[j].wnum>sparm->num_features) {
	  sample.examples[0].x.doc[i]->fvec->words[j].wnum=0;
	  sample.examples[0].x.doc[i]->fvec->words[j].weight=0;
	}

  /* change label value for better scaling using thresholdmetrics */
  if((sparm->loss_function == ZEROONE) 
     || (sparm->loss_function == FONE) 
     || (sparm->loss_function == ERRORRATE)
     || (sparm->loss_function == PRBEP) 
     || (sparm->loss_function == PREC_K) 
     || (sparm->loss_function == REC_K)) {
    for(i=0;i<sample.examples[0].x.totdoc;i++) {
      if(sample.examples[0].y.class[i]>0)
	sample.examples[0].y.class[i]=0.5*100.0/(numn+nump);
      else
	sample.examples[0].y.class[i]=-0.5*100.0/(numn+nump);
    }
  }
  /* change label value for easy computation of rankmetrics (i.e. ROC-area) */
  if(sparm->loss_function == SWAPPEDPAIRS) {
    for(i=0;i<sample.examples[0].x.totdoc;i++) {
      /*      if(sample.examples[0].y.class[i]>0)
	sample.examples[0].y.class[i]=numn*0.5;
      else
      sample.examples[0].y.class[i]=-nump*0.5; */
      if(sample.examples[0].y.class[i]>0)
	sample.examples[0].y.class[i]=0.5*100.0/nump;
      else
	sample.examples[0].y.class[i]=-0.5*100.0/numn;
    }
  }
  if(sparm->loss_function == AVGPREC) {
    for(i=0;i<sample.examples[0].x.totdoc;i++) {
      if(sample.examples[0].y.class[i]>0)
	sample.examples[0].y.class[i]=numn;
      else
	sample.examples[0].y.class[i]=-nump;
    }
  }

  return(sample);
}

void        init_struct_model(SAMPLE sample, STRUCTMODEL *sm, 
			      STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, 
			      KERNEL_PARM *kparm)
{
  /* Initialize structmodel sm. The weight vector w does not need to be
     initialized, but you need to provide the maximum size of the
     feature space in sizePsi. This is the maximum number of different
     weights that can be learned. Later, the weight vector w will
     contain the learned weights for the model. */
  long   i,j,k,totwords=0, totdoc=0, totexp=0, nump=0, numn=0, new_size;
  WORD   *words,*w;
  DOC    **orgdoc,**basis;
  double *dummy;
  MATRIX *G,*L;
  double *indep,ii,weight;

  totdoc=sample.examples[0].x.totdoc;
  sm->sparse_kernel_type=sparm->sparse_kernel_type;
  sm->invL=NULL; 
  sm->expansion=NULL;

  /* When using sparse kernel approximation, this replaces the
     original feature vector with the kernel values of the orgininal
     feature vector and the expansion. */
  if(sm->sparse_kernel_type > 0) {
    kparm->kernel_type=sm->sparse_kernel_type;

    if(strcmp(sparm->sparse_kernel_file,"")) {
      if(struct_verbosity > 0) 
	printf("Reading basis functions for sparse kernel expansion from '%s'...",sparm->sparse_kernel_file); fflush(stdout);
      read_documents(sparm->sparse_kernel_file,&basis,&dummy,&totwords,&totexp);
      free(dummy);
      if(struct_verbosity > 0) 
	printf("done.\n");
    }
    else {
      basis=(DOC **)malloc(sizeof(DOC *)*totdoc);
      for(i=0;i<totdoc;i++) {
	basis[i]=(DOC *)malloc(sizeof(DOC));
	(*(basis[i]))=(*(sample.examples[0].x.doc[i]));
	basis[i]->fvec=copy_svector(sample.examples[0].x.doc[i]->fvec);
      }
      totexp=totdoc;
    }

    /* determine examples to use in expansion: B */
    if(struct_verbosity > 0) 
      printf("Selecting basis functions for sparse kernel expansion..."); fflush(stdout);
    if(sparm->sparse_kernel_size > totexp) sparm->sparse_kernel_size=totexp;
    sm->expansion=(DOC **)malloc(sizeof(DOC *)*totexp);
    sm->expansion_size=0;
    for(ii=0.5;ii<totexp;ii+=((double)totexp/sparm->sparse_kernel_size)) {
      sm->expansion[sm->expansion_size]=(DOC *)malloc(sizeof(DOC));
      (*(sm->expansion[sm->expansion_size]))=(*(basis[(long)ii]));
      sm->expansion[sm->expansion_size]->fvec=copy_svector(basis[(long)ii]->fvec);
      sm->expansion_size++;
    }
    for(i=0;i<totexp;i++) 
      free_example(basis[i],1);
    free(basis);
    if(struct_verbosity > 0) 
      printf("done.\n");

    /* Make sure they are all independent. If not, select independent
       subset. */
    if(struct_verbosity > 0) 
      printf("Finding independent subset of vectors..."); fflush(stdout);
    G=create_matrix(sm->expansion_size,sm->expansion_size);
    for(i=0;i<sm->expansion_size;i++) { /* need only upper triangle */
      for(j=i;j<sm->expansion_size;j++) {
	G->element[i][j]=kernel(kparm,sm->expansion[i],sm->expansion[j]);
      }
    }
    indep=find_indep_subset_of_matrix(G,0.000001);
    free_matrix(G);
    new_size=0;
    for(i=0;i<sm->expansion_size;i++) {
      if(indep[i] != 0) {
	sm->expansion[new_size]=sm->expansion[i];
	new_size++;
      }
      else {
	free_example(sm->expansion[i],1);
      }
    }
    free_nvector(indep);
    if(struct_verbosity>0) 
      printf("found %ld of %ld...",new_size,sm->expansion_size);
    sm->expansion_size=new_size;
    if(struct_verbosity > 0) 
      printf("done.\n");

    /* compute matrix B^T*B */
    if(struct_verbosity > 0) 
      printf("Computing Gram matrix for kernel expansion..."); fflush(stdout);
    G=create_matrix(sm->expansion_size,sm->expansion_size);
    for(i=0;i<sm->expansion_size;i++) { /* need upper triangle for cholesky */
      for(j=i;j<sm->expansion_size;j++) {
	G->element[i][j]=kernel(kparm,sm->expansion[i],sm->expansion[j]);
      }
    }
    if(struct_verbosity > 0) 
      printf("done.\n");
    if(struct_verbosity > 0) 
      printf("Computing Cholesky decomposition and inverting..."); fflush(stdout);
    L=cholesky_matrix(G);
    sm->invL=invert_ltriangle_matrix(L);
    free_matrix(L);
    free_matrix(G);
    if(struct_verbosity > 0) 
      printf("done.\n");

    /* compute new features for each example x: B^T*x */
    if(struct_verbosity > 0) 
      printf("Replacing feature vectors..."); fflush(stdout);
    orgdoc=sample.examples[0].x.doc;
    sample.examples[0].x.doc=(DOC **)malloc(sizeof(DOC *)*totdoc);
    words=(WORD *)malloc(sizeof(WORD)*(sm->expansion_size+1));
    for(i=0;i<totdoc;i++) {
      k=0;
      for(j=0;j<sm->expansion_size;j++) {
	weight=kernel(kparm,orgdoc[i],sm->expansion[j]);
	if(weight != 0) {
	  words[k].wnum=j+1;
	  words[k].weight=weight;
	  k++;
	}
      }
      words[k].wnum=0;
      sample.examples[0].x.doc[i]=create_example(orgdoc[i]->docnum,
					  orgdoc[i]->queryid,
					  orgdoc[i]->slackid,
					  orgdoc[i]->costfactor,
					  create_svector(words,
					     orgdoc[i]->fvec->userdefined,1.0));
    }
    free(words);
    for(i=0;i<totdoc;i++) 
      free_example(orgdoc[i],1);
    free(orgdoc);
    kparm->kernel_type=LINEAR;
    if(struct_verbosity > 0) 
      printf("done.\n");
  }

  /* count number of positive and negative examples */
  for(i=0;i<sample.examples[0].x.totdoc;i++) {
    if(sample.examples[0].y.class[i]>0) 
      nump++;
    else 
      numn++;
  }

  totwords=0;
  for(i=0;i<totdoc;i++)  /* find highest feature number */
    for(w=sample.examples[0].x.doc[i]->fvec->words;w->wnum;w++) 
      if(totwords < w->wnum) 
	totwords=w->wnum;
  sparm->num_features=totwords;
  if(struct_verbosity>=0)
    printf("Training set properties: %d features, %ld examples (%ld pos / %ld neg)\n",
	   sparm->num_features,totdoc,nump,numn);
  sm->sizePsi=sparm->num_features;
  if(struct_verbosity>=2)
    printf("Size of Phi: %ld\n",sm->sizePsi);

  /* fill in default value for k when using Prec@k or Rec@k */
  if(sparm->loss_function == PREC_K) {
    if(sparm->prec_rec_k_frac == 0) {
      sparm->prec_rec_k_frac=0.5;
    }
    else if(sparm->prec_rec_k_frac > 1) {
      printf("\nERROR: The value of option --k for Prec@k must not be larger than 1.0!\n\n");
      exit(0);
    }
  }
  if(sparm->loss_function == REC_K) {
    if(sparm->prec_rec_k_frac == 0) {
      sparm->prec_rec_k_frac=2;
    }
    else if(sparm->prec_rec_k_frac < 1) {
      printf("\nERROR: The value of option --k for Rec@k must not be smaller than 1.0!\n\n");
      exit(0);
    }
  }

  /* make sure that the bias feature is not used with kernels */
  if((kparm->kernel_type != LINEAR) && (sparm->bias != 0)) {
    printf("\nThe value of option --b must not be zero when kernels are used.\n");
    printf("The option to use a bias for non-linear kernels is not implemented yet!\n\n");
    exit(0);
  }
}

CONSTSET    init_struct_constraints(SAMPLE sample, STRUCTMODEL *sm, 
				    STRUCT_LEARN_PARM *sparm)
{
  /* Initializes the optimization problem. Typically, you do not need
     to change this function, since you want to start with an empty
     set of constraints. However, if for example you have constraints
     that certain weights need to be positive, you might put that in
     here. The constraints are represented as lhs[i]*w >= rhs[i]. lhs
     is an array of feature vectors, rhs is an array of doubles. m is
     the number of constraints. The function returns the initial
     set of constraints. */
  CONSTSET c;
  long     sizePsi=sm->sizePsi;
  long     i;
  WORD     words[2];

  if(1) { /* normal case: start with empty set of constraints */
12 3 4 下一页
💿 文件大小 86 K
👤 上传用户 m472333662
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#classification #algorithm #for #training
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -