📄 svm_common.c
字号:
/************************************************************************//* *//* svm_common.c *//* *//* Definitions and functions used in both svm_learn and svm_classify. *//* *//* Author: Thorsten Joachims *//* Date: 16.11.99 *//* *//* Copyright (c) 1999 Universitaet Dortmund - All rights reserved *//* *//* This software is available for non-commercial use only. It must *//* not be modified and distributed without prior permission of the *//* author. The author is not responsible for implications from the *//* use of this software. *//* *//************************************************************************/# include "ctype.h"# include "svm_common.h"# include "kernel.h" /* this contains a user supplied kernel */long verbosity; /* verbosity level (0-4) */long kernel_cache_statistic;double classify_example(MODEL *model, DOC *ex) /* classifies one example */{ register long i; register double dist; dist=0; for(i=1;i<model->sv_num;i++) { dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i]; } return(dist-model->b);}double classify_example_linear(MODEL *model, DOC *ex) /* classifies example for linear kernel */ /* important: the model must have the linear weight vector computed */ /* important: the feature numbers in the example to classify must */ /* not be larger than the weight vector! */{ return((double)(sprod_ns(model->lin_weights,ex->words)-model->b));}CFLOAT kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) /* calculate the kernel function */{ kernel_cache_statistic++; switch(kernel_parm->kernel_type) { case 0: /* linear */ return((CFLOAT)sprod_ss(a->words,b->words)); case 1: /* polynomial */ return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); case 2: /* radial basis function */ return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq))); case 3: /* sigmoid neural net */ return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const)); case 4: /* custom-kernel supplied in file kernel.h*/ return((CFLOAT)custom_kernel(kernel_parm,a,b)); default: printf("Error: Unknown kernel function\n"); exit(1); }}double sprod_ss(WORD *a, WORD *b) /* compute the inner product of two sparse vectors */{ register FVAL sum=0; register WORD *ai,*bj; ai=a; bj=b; while (ai->wnum && bj->wnum) { if(ai->wnum > bj->wnum) { bj++; } else if (ai->wnum < bj->wnum) { ai++; } else { sum+=ai->weight * bj->weight; ai++; bj++; } } return((double)sum);}double model_length_s(MODEL *model, KERNEL_PARM *kernel_parm) /* compute length of weight vector */{ register long i,j; register double sum=0,alphai; register DOC *supveci; for(i=1;i<model->sv_num;i++) { alphai=model->alpha[i]; supveci=model->supvec[i]; for(j=1;j<model->sv_num;j++) { sum+=alphai*model->alpha[j] *kernel(kernel_parm,supveci,model->supvec[j]); } } return(sqrt(sum));}void clear_vector_n(double *vec, long int n){ register long i; for(i=0;i<=n;i++) vec[i]=0;}void add_vector_ns(double *vec_n, WORD *vec_s, double faktor){ register WORD *ai; ai=vec_s; while (ai->wnum) { vec_n[ai->wnum]+=(faktor*ai->weight); ai++; }}double sprod_ns(double *vec_n, WORD *vec_s){ register double sum=0; register WORD *ai; ai=vec_s; while (ai->wnum) { sum+=(vec_n[ai->wnum]*ai->weight); ai++; } return(sum);}void add_weight_vector_to_linear_model(MODEL *model) /* compute weight vector in linear case and add to model */{ long i; model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1)); clear_vector_n(model->lin_weights,model->totwords); for(i=1;i<model->sv_num;i++) { add_vector_ns(model->lin_weights,(model->supvec[i])->words, model->alpha[i]); }}void read_model(char *modelfile, MODEL *model, long int max_words, long int ll){ FILE *modelfl; long j,i; char *line; WORD *words; register long wpos; long wnum,pos; double weight; char version_buffer[100]; int numread; if(verbosity>=1) { printf("Reading model..."); fflush(stdout); } words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10)); line = (char *)my_malloc(sizeof(char)*ll); if ((modelfl = fopen (modelfile, "r")) == NULL) { perror (modelfile); exit (1); } fscanf(modelfl,"SVM-light Version %s\n",version_buffer); if(strcmp(version_buffer,VERSION)) { perror ("Version of model-file does not match version of svm_classify!"); exit (1); } fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type); fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const); fscanf(modelfl,"%s%*[^\n]\n", model->kernel_parm.custom); fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords); fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc); fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num); fscanf(modelfl,"%lf%*[^\n]\n", &model->b); for(i=1;i<model->sv_num;i++) { fgets(line,(int)ll,modelfl); pos=0; wpos=0; sscanf(line,"%lf",&model->alpha[i]); while(!isspace(line[++pos])); while(((numread=sscanf(line+pos,"%ld:%lf",&wnum,&weight)) != EOF) && (wpos<max_words)) { if(numread != 2) { perror("Parsing error while reading model!"); printf("LINE: %s\n",line); } while(!isspace(line[++pos])); words[wpos].wnum=wnum; words[wpos].weight=weight; wpos++; } model->supvec[i] = (DOC *)my_malloc(sizeof(DOC)); (model->supvec[i])->words = (WORD *)my_malloc(sizeof(WORD)*(wpos+1)); for(j=0;j<wpos;j++) { (model->supvec[i])->words[j]=words[j]; } ((model->supvec[i])->words[wpos]).wnum=0; (model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words, (model->supvec[i])->words); (model->supvec[i])->docnum = -1; } fclose(modelfl); free(line); free(words); if(verbosity>=1) { fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1)); }}void read_documents(char *docfile, DOC *docs, double *label, long int max_words_doc, long int ll, long int *totwords, long int *totdoc){ char *line; DOC doc; long dnum=0,wpos,i,dpos=0,dneg=0,dunlab=0; double doc_label; FILE *docfl; line = (char *)my_malloc(sizeof(char)*ll); if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); exit (1); } doc.words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); if(verbosity>=1) { printf("Reading examples into memory..."); fflush(stdout); } dnum=0; (*totwords)=0; while((!feof(docfl)) && fgets(line,(int)ll,docfl)) { if(line[0] == '#') continue; /* line contains comments */ if(!parse_document(line,&doc,&doc_label,&wpos,max_words_doc)) { printf("\nParsing error in line %ld!\n",dnum); exit(1); } label[dnum]=doc_label; /* printf("Class=%ld ",doc_label); */ if(doc_label > 0) dpos++; if (doc_label < 0) dneg++; if (doc_label == 0) dunlab++; if((wpos>1) && ((doc.words[wpos-2]).wnum>(*totwords))) (*totwords)=(doc.words[wpos-2]).wnum; docs[dnum].words = (WORD *)my_malloc(sizeof(WORD)*wpos); docs[dnum].docnum=dnum; for(i=0;i<wpos;i++) { docs[dnum].words[i]=doc.words[i]; /* printf("%ld::%f ",(docs[dnum].words[i]).wnum,(docs[dnum].words[i]).weight); */ } docs[dnum].twonorm_sq=doc.twonorm_sq; /* printf("\nNorm=%f\n",docs[dnum].twonorm_sq); */ dnum++; if(verbosity>=1) { if((dnum % 100) == 0) { printf("%ld..",dnum); fflush(stdout); } } } fclose(docfl); free(line); free(doc.words); if(verbosity>=1) { fprintf(stdout, "OK. (%ld examples read)\n", dnum); } if(verbosity>=2) { printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",dpos,dneg,dunlab); fflush(stdout); } (*totdoc)=dnum;}int parse_document(char *line, DOC *doc, double *label, long int *numwords, long int max_words_doc){ register long wpos,pos; long wnum; double weight; int numread; pos=0; while(line[pos]) { /* cut off comments */ if(line[pos] == '#') { line[pos]=0; } else { pos++; } } wpos=0; if(sscanf(line,"%lf",label) == EOF) return(0); pos=0; while(isspace(line[pos])) pos++; while(!isspace(line[pos])) pos++; while(((numread=sscanf(line+pos,"%ld:%lf",&wnum,&weight)) != EOF) && (wpos<max_words_doc)) { if(numread != 2) return(0); while(isspace(line[pos++])); while(!isspace(line[++pos])); if(wnum<=0) { perror ("Feature numbers must be larger or equal to 1!!!\n"); printf("LINE: %s\n",line); exit (1); } if((wpos>0) && ((doc->words[wpos-1]).wnum >= wnum)) { perror ("Features must be in increasing order!!!\n"); printf("LINE: %s\n",line); exit (1); } (doc->words[wpos]).wnum=wnum; (doc->words[wpos]).weight=weight; wpos++; } (doc->words[wpos]).wnum=0; (*numwords)=wpos+1; doc->docnum=-1; doc->twonorm_sq=sprod_ss(doc->words,doc->words); return(1);}void nol_ll(char *file, long int *nol, long int *wol, long int *ll) /* Grep through file and count number of lines, maximum number of spaces per line, and longest line. */{ FILE *fl; int ic; char c; long current_length,current_wol; if ((fl = fopen (file, "r")) == NULL) { perror (file); exit (1); } current_length=0; current_wol=0; (*ll)=0; (*nol)=1; (*wol)=0; while((ic=getc(fl)) != EOF) { c=(char)ic; current_length++; if(isspace(c)) { current_wol++; } if(c == '\n') { (*nol)++; if(current_length>(*ll)) { (*ll)=current_length; } if(current_wol>(*wol)) { (*wol)=current_wol; } current_length=0; current_wol=0; } } fclose(fl);}long minl(long int a, long int b){ if(a<b) return(a); else return(b);}long maxl(long int a, long int b){ if(a>b) return(a); else return(b);}long get_runtime(void){ clock_t start; start = clock(); return((long)((double)start*100.0/(double)CLOCKS_PER_SEC));}//# ifdef MICROSOFTint isnan(double a){ return(_isnan(a));}//# endifvoid *my_malloc(long int size){ void *ptr; ptr=(void *)malloc(size); if(!ptr) { perror ("Out of memory!\n"); exit (1); } return(ptr);}void copyright_notice(void){ printf("\nCopyright: Thorsten Joachims, thorsten@ls8.cs.uni-dortmund.de\n\n"); printf("This software is available for non-commercial use only. It must not\n"); printf("be modified and distributed without prior permission of the author.\n"); printf("The author is not responsible for implications from the use of this\n"); printf("software.\n\n");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -