📄 svm_common.c

📁 一款不错的支持向量机程序
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
DOC *create_example(long docnum, long queryid, long slackid, 		    double costfactor, SVECTOR *fvec){  DOC *example;  example = (DOC *)my_malloc(sizeof(DOC));  example->docnum=docnum;  example->queryid=queryid;  example->slackid=slackid;  example->costfactor=costfactor;  example->fvec=fvec;  return(example);}void free_example(DOC *example, long deep){  if(example) {    if(deep) {      if(example->fvec)	free_svector(example->fvec);    }    free(example);  }}void write_model(char *modelfile, MODEL *model){  FILE *modelfl;  long j,i,sv_num;  SVECTOR *v;  if(verbosity>=1) {    printf("Writing model file..."); fflush(stdout);  }  if ((modelfl = fopen (modelfile, "w")) == NULL)  { perror (modelfile); exit (1); }  fprintf(modelfl,"SVM-light Version %s\n",VERSION);  fprintf(modelfl,"%ld # kernel type\n",	  model->kernel_parm.kernel_type);  fprintf(modelfl,"%ld # kernel parameter -d \n",	  model->kernel_parm.poly_degree);  fprintf(modelfl,"%.8g # kernel parameter -g \n",	  model->kernel_parm.rbf_gamma);  fprintf(modelfl,"%.8g # kernel parameter -s \n",	  model->kernel_parm.coef_lin);  fprintf(modelfl,"%.8g # kernel parameter -r \n",	  model->kernel_parm.coef_const);  fprintf(modelfl,"%s# kernel parameter -u \n",model->kernel_parm.custom);  fprintf(modelfl,"%ld # highest feature index \n",model->totwords);  fprintf(modelfl,"%ld # number of training documents \n",model->totdoc);   sv_num=1;  for(i=1;i<model->sv_num;i++) {    for(v=model->supvec[i]->fvec;v;v=v->next)       sv_num++;  }  fprintf(modelfl,"%ld # number of support vectors plus 1 \n",sv_num);  fprintf(modelfl,"%.8g # threshold b, each following line is a SV (starting with alpha*y)\n",model->b);  for(i=1;i<model->sv_num;i++) {    for(v=model->supvec[i]->fvec;v;v=v->next) {      fprintf(modelfl,"%.32g ",model->alpha[i]*v->factor);      for (j=0; (v->words[j]).wnum; j++) {	fprintf(modelfl,"%ld:%.8g ",		(long)(v->words[j]).wnum,		(double)(v->words[j]).weight);      }      fprintf(modelfl,"#%s\n",v->userdefined);    /* NOTE: this could be made more efficient by summing the       alpha's of identical vectors before writing them to the       file. */    }  }  fclose(modelfl);  if(verbosity>=1) {    printf("done\n");  }}MODEL *read_model(char *modelfile){  FILE *modelfl;  long i,queryid,slackid;  double costfactor;  long max_sv,max_words,ll,wpos;  char *line,*comment;  WORD *words;  char version_buffer[100];  MODEL *model;  if(verbosity>=1) {    printf("Reading model..."); fflush(stdout);  }  nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */  max_words+=2;  ll+=2;  words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));  line = (char *)my_malloc(sizeof(char)*ll);  model = (MODEL *)my_malloc(sizeof(MODEL));  if ((modelfl = fopen (modelfile, "r")) == NULL)  { perror (modelfile); exit (1); }  fscanf(modelfl,"SVM-light Version %s\n",version_buffer);  if(strcmp(version_buffer,VERSION)) {    perror ("Version of model-file does not match version of svm_classify!");     exit (1);   }  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);    fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);  model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);  model->index=NULL;  model->lin_weights=NULL;  for(i=1;i<model->sv_num;i++) {    fgets(line,(int)ll,modelfl);    if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,		       &costfactor,&wpos,max_words,&comment)) {      printf("\nParsing error while reading model file in SV %ld!\n%s",	     i,line);      exit(1);    }    model->supvec[i] = create_example(-1,				      0,0,				      0.0,				      create_svector(words,comment,1.0));  }  fclose(modelfl);  free(line);  free(words);  if(verbosity>=1) {    fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));  }  return(model);}MODEL *copy_model(MODEL *model){  MODEL *newmodel;  long  i;  newmodel=(MODEL *)my_malloc(sizeof(MODEL));  (*newmodel)=(*model);  newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);  newmodel->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);  newmodel->index = NULL; /* index is not copied */  newmodel->supvec[0] = NULL;  newmodel->alpha[0] = 0;  for(i=1;i<model->sv_num;i++) {    newmodel->alpha[i]=model->alpha[i];    newmodel->supvec[i]=create_example(model->supvec[i]->docnum,				       model->supvec[i]->queryid,0,				       model->supvec[i]->costfactor,				       copy_svector(model->supvec[i]->fvec));  }  if(model->lin_weights) {    newmodel->lin_weights = (double *)my_malloc(sizeof(double)*(model->totwords+1));    for(i=0;i<model->totwords+1;i++)       newmodel->lin_weights[i]=model->lin_weights[i];  }  return(newmodel);}void free_model(MODEL *model, int deep){  long i;  if(model->supvec) {    if(deep) {      for(i=1;i<model->sv_num;i++) {	free_example(model->supvec[i],1);      }    }    free(model->supvec);  }  if(model->alpha) free(model->alpha);  if(model->index) free(model->index);  if(model->lin_weights) free(model->lin_weights);  free(model);}void read_documents(char *docfile, DOC ***docs, double **label, 		    long int *totwords, long int *totdoc){  char *line,*comment;  WORD *words;  long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;  long max_words_doc, ll;  double doc_label,costfactor;  FILE *docfl;  if(verbosity>=1) {    printf("Scanning examples..."); fflush(stdout);  }  nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */  max_words_doc+=2;  ll+=2;  max_docs+=2;  if(verbosity>=1) {    printf("done\n"); fflush(stdout);  }  (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */  (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */  line = (char *)my_malloc(sizeof(char)*ll);  if ((docfl = fopen (docfile, "r")) == NULL)  { perror (docfile); exit (1); }  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));  if(verbosity>=1) {    printf("Reading examples into memory..."); fflush(stdout);  }  dnum=0;  (*totwords)=0;  while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {    if(line[0] == '#') continue;  /* line contains comments */    if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,		       &wpos,max_words_doc,&comment)) {      printf("\nParsing error in line %ld!\n%s",dnum,line);      exit(1);    }    (*label)[dnum]=doc_label;    /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */    if(doc_label > 0) dpos++;    if (doc_label < 0) dneg++;    if (doc_label == 0) dunlab++;    if((wpos>1) && ((words[wpos-2]).wnum>(*totwords)))       (*totwords)=(words[wpos-2]).wnum;    if((*totwords) > MAXFEATNUM) {      printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");      printf("LINE: %s\n",line);      exit(1);    }    (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,				   create_svector(words,comment,1.0));    /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq);  */    dnum++;      if(verbosity>=1) {      if((dnum % 100) == 0) {	printf("%ld..",dnum); fflush(stdout);      }    }  }   fclose(docfl);  free(line);  free(words);  if(verbosity>=1) {    fprintf(stdout, "OK. (%ld examples read)\n", dnum);  }  (*totdoc)=dnum;}int parse_document(char *line, WORD *words, double *label,		   long *queryid, long *slackid, double *costfactor,		   long int *numwords, long int max_words_doc,		   char **comment){  register long wpos,pos;  long wnum;  double weight;  int numread;  char featurepair[1000],junk[1000];  (*queryid)=0;  (*slackid)=0;  (*costfactor)=1;  pos=0;  (*comment)=NULL;  while(line[pos] ) {      /* cut off comments */    if((line[pos] == '#') && (!(*comment))) {      line[pos]=0;      (*comment)=&(line[pos+1]);    }    if(line[pos] == '\n') { /* strip the CR */      line[pos]=0;    }    pos++;  }  if(!(*comment)) (*comment)=&(line[pos]);  /* printf("Comment: '%s'\n",(*comment)); */  wpos=0;  /* check, that line starts with target value or zero, but not with     feature pair */  if(sscanf(line,"%s",featurepair) == EOF) return(0);  pos=0;  while((featurepair[pos] != ':') && featurepair[pos]) pos++;  if(featurepair[pos] == ':') {	perror ("Line must start with label or 0!!!\n"); 	printf("LINE: %s\n",line);	exit (1);   }  /* read the target value */  if(sscanf(line,"%lf",label) == EOF) return(0);  pos=0;  while(space_or_null((int)line[pos])) pos++;  while((!space_or_null((int)line[pos])) && line[pos]) pos++;  while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) && 	(numread > 0) && 	(wpos<max_words_doc)) {    /* printf("%s\n",featurepair); */    while(space_or_null((int)line[pos])) pos++;    while((!space_or_null((int)line[pos])) && line[pos]) pos++;    if(sscanf(featurepair,"qid:%ld%s",&wnum,junk)==1) {      /* it is the query id */      (*queryid)=(long)wnum;    }    else if(sscanf(featurepair,"sid:%ld%s",&wnum,junk)==1) {      /* it is the slack id */      if(wnum > 0) 	(*slackid)=(long)wnum;      else {	perror ("Slack-id must be greater or equal to 1!!!\n"); 	printf("LINE: %s\n",line);	exit (1);       }    }    else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) {      /* it is the example-dependent cost factor */      (*costfactor)=(double)weight;    }    else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) {      /* it is a regular feature */      if(wnum<=0) { 	perror ("Feature numbers must be larger or equal to 1!!!\n"); 	printf("LINE: %s\n",line);	exit (1);       }      if((wpos>0) && ((words[wpos-1]).wnum >= wnum)) { 	perror ("Features must be in increasing order!!!\n"); 	printf("LINE: %s\n",line);	exit (1);       }      (words[wpos]).wnum=wnum;      (words[wpos]).weight=(FVAL)weight;       wpos++;    }    else {      perror ("Cannot parse feature/value pair!!!\n");       printf("'%s' in LINE: %s\n",featurepair,line);      exit (1);     }  }  (words[wpos]).wnum=0;  (*numwords)=wpos+1;  return(1);}double *read_alphas(char *alphafile,long totdoc)     /* reads the alpha vector from a file as written by the        write_alphas function */{  FILE *fl;  double *alpha;  long dnum;  if ((fl = fopen (alphafile, "r")) == NULL)  { perror (alphafile); exit (1); }  alpha = (double *)my_malloc(sizeof(double)*totdoc);  if(verbosity>=1) {    printf("Reading alphas..."); fflush(stdout);  }  dnum=0;  while((!feof(fl)) && fscanf(fl,"%lf\n",&alpha[dnum]) && (dnum<totdoc)) {    dnum++;  }  if(dnum != totdoc)  { perror ("\nNot enough values in alpha file!"); exit (1); }  fclose(fl);  if(verbosity>=1) {    printf("done\n"); fflush(stdout);  }  return(alpha);}void nol_ll(char *file, long int *nol, long int *wol, long int *ll)      /* Grep through file and count number of lines, maximum number of        spaces per line, and longest line. */{  FILE *fl;  int ic;  char c;  long current_length,current_wol;  if ((fl = fopen (file, "r")) == NULL)  { perror (file); exit (1); }  current_length=0;  current_wol=0;  (*ll)=0;  (*nol)=1;  (*wol)=0;  while((ic=getc(fl)) != EOF) {    c=(char)ic;    current_length++;    if(space_or_null((int)c)) {      current_wol++;    }    if(c == '\n') {      (*nol)++;      if(current_length>(*ll)) {	(*ll)=current_length;      }      if(current_wol>(*wol)) {	(*wol)=current_wol;      }      current_length=0;      current_wol=0;    }  }  fclose(fl);}long minl(long int a, long int b){  if(a<b)    return(a);  else    return(b);}long maxl(long int a, long int b){  if(a>b)    return(a);  else    return(b);}long get_runtime(void){  clock_t start;  start = clock();  return((long)((double)start*100.0/(double)CLOCKS_PER_SEC));}# ifdef _MSC_VERint isnan(double a){  return(_isnan(a));}# endif int space_or_null(int c) {  if (c==0)    return 1;  return isspace(c);}void *my_malloc(size_t size){  void *ptr;  ptr=(void *)malloc(size);  if(!ptr) {     perror ("Out of memory!\n");     exit (1);   }  return(ptr);}void copyright_notice(void){  printf("\nCopyright: Thorsten Joachims, thorsten@joachims.org\n\n");  printf("This software is available for non-commercial use only. It must not\n");  printf("be modified and distributed without prior permission of the author.\n");  printf("The author is not responsible for implications from the use of this\n");  printf("software.\n\n");}
上一页 12
💿 文件大小 289 K
👤 上传用户 l550253832
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#支持向量机 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -