📄 svm.cpp
字号:
if(model->probA) // regression has probA only { fprintf(fp, "probA"); for(int i=0;i<nr_class*(nr_class-1)/2;i++) fprintf(fp," %g",model->probA[i]); fprintf(fp, "\n"); } if(model->probB) { fprintf(fp, "probB"); for(int i=0;i<nr_class*(nr_class-1)/2;i++) fprintf(fp," %g",model->probB[i]); fprintf(fp, "\n"); } if(model->nSV) { fprintf(fp, "nr_sv"); for(int i=0;i<nr_class;i++) fprintf(fp," %d",model->nSV[i]); fprintf(fp, "\n"); } fprintf(fp, "SV\n"); const double * const *sv_coef = model->sv_coef; const svm_node * const *SV = model->SV; for(int i=0;i<l;i++) { for(int j=0;j<nr_class-1;j++) fprintf(fp, "%.16g ",sv_coef[j][i]); const svm_node *p = SV[i]; while(p->index != -1) { fprintf(fp,"%d:%.8g ",p->index,p->value); p++; } fprintf(fp, "\n"); } fclose(fp); return 0;}svm_model *svm_load_model(const char *model_file_name){ FILE *fp = fopen(model_file_name,"rb"); if(fp==NULL) return NULL; // read parameters svm_model *model = Malloc(svm_model,1); svm_parameter& param = model->param; model->rho = NULL; model->probA = NULL; model->probB = NULL; model->label = NULL; model->nSV = NULL; char cmd[81]; while(1) { fscanf(fp,"%80s",cmd); if(strcmp(cmd,"svm_type")==0) { fscanf(fp,"%80s",cmd); int i; for(i=0;svm_type_table[i];i++) { if(strcmp(svm_type_table[i],cmd)==0) { param.svm_type=i; break; } } if(svm_type_table[i] == NULL) { fprintf(stderr,"unknown svm type.\n"); free(model->rho); free(model->label); free(model->nSV); free(model); return NULL; } } else if(strcmp(cmd,"kernel_type")==0) { fscanf(fp,"%80s",cmd); int i; for(i=0;kernel_type_table[i];i++) { if(strcmp(kernel_type_table[i],cmd)==0) { param.kernel_type=i; break; } } if(kernel_type_table[i] == NULL) { fprintf(stderr,"unknown kernel function.\n"); free(model->rho); free(model->label); free(model->nSV); free(model); return NULL; } } else if(strcmp(cmd,"degree")==0) fscanf(fp,"%lf",¶m.degree); else if(strcmp(cmd,"gamma")==0) fscanf(fp,"%lf",¶m.gamma); else if(strcmp(cmd,"coef0")==0) fscanf(fp,"%lf",¶m.coef0); else if(strcmp(cmd,"nr_class")==0) fscanf(fp,"%d",&model->nr_class); else if(strcmp(cmd,"total_sv")==0) fscanf(fp,"%d",&model->l); else if(strcmp(cmd,"rho")==0) { int n = model->nr_class * (model->nr_class-1)/2; model->rho = Malloc(double,n); for(int i=0;i<n;i++) fscanf(fp,"%lf",&model->rho[i]); } else if(strcmp(cmd,"label")==0) { int n = model->nr_class; model->label = Malloc(int,n); for(int i=0;i<n;i++) fscanf(fp,"%d",&model->label[i]); } else if(strcmp(cmd,"probA")==0) { int n = model->nr_class * (model->nr_class-1)/2; model->probA = Malloc(double,n); for(int i=0;i<n;i++) fscanf(fp,"%lf",&model->probA[i]); } else if(strcmp(cmd,"probB")==0) { int n = model->nr_class * (model->nr_class-1)/2; model->probB = Malloc(double,n); for(int i=0;i<n;i++) fscanf(fp,"%lf",&model->probB[i]); } else if(strcmp(cmd,"nr_sv")==0) { int n = model->nr_class; model->nSV = Malloc(int,n); for(int i=0;i<n;i++) fscanf(fp,"%d",&model->nSV[i]); } else if(strcmp(cmd,"SV")==0) { while(1) { int c = getc(fp); if(c==EOF || c=='\n') break; } break; } else { fprintf(stderr,"unknown text in model file\n"); free(model->rho); free(model->label); free(model->nSV); free(model); return NULL; } } // read sv_coef and SV int elements = 0; long pos = ftell(fp); while(1) { int c = fgetc(fp); switch(c) { case '\n': // count the '-1' element case ':': ++elements; break; case EOF: goto out; default: ; } }out: fseek(fp,pos,SEEK_SET); int m = model->nr_class - 1; int l = model->l; model->sv_coef = Malloc(double *,m); int i; for(i=0;i<m;i++) model->sv_coef[i] = Malloc(double,l); model->SV = Malloc(svm_node*,l); svm_node *x_space=NULL; if(l>0) x_space = Malloc(svm_node,elements); int j=0; for(i=0;i<l;i++) { model->SV[i] = &x_space[j]; for(int k=0;k<m;k++) fscanf(fp,"%lf",&model->sv_coef[k][i]); while(1) { int c; do { c = getc(fp); if(c=='\n') goto out2; } while(isspace(c)); ungetc(c,fp); fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value)); ++j; } out2: x_space[j++].index = -1; } fclose(fp); model->free_sv = 1; // XXX return model;}void svm_destroy_model(svm_model* model){ if(model->free_sv && model->l > 0) free((void *)(model->SV[0])); for(int i=0;i<model->nr_class-1;i++) free(model->sv_coef[i]); free(model->SV); free(model->sv_coef); free(model->rho); free(model->label); free(model->probA); free(model->probB); free(model->nSV); free(model);}void svm_destroy_param(svm_parameter* param){ free(param->weight_label); free(param->weight);}const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param){ // svm_type int svm_type = param->svm_type; if(svm_type != C_SVC && svm_type != NU_SVC && svm_type != ONE_CLASS && svm_type != EPSILON_SVR && svm_type != NU_SVR) return "unknown svm type"; // kernel_type int kernel_type = param->kernel_type; if(kernel_type != LINEAR && kernel_type != POLY && kernel_type != RBF &&// Canasai's addition begin kernel_type != STRING &&// Canasai's addition end kernel_type != SIGMOID) return "unknown kernel type";// Canasai's addition begin if( param->gamma < 0 || param->gamma > 3 ) return "unknown string kernel type: gamma < 0 or gamma > 3"; if( param->mc_method != 0 && param->mc_method != 1 ) return "unknown multi-class classification method: mc_method != 0 and mc_method != 1";// Canasai's addition end // cache_size,eps,C,nu,p,shrinking if(param->cache_size <= 0) return "cache_size <= 0"; if(param->eps <= 0) return "eps <= 0"; if(svm_type == C_SVC || svm_type == EPSILON_SVR || svm_type == NU_SVR) if(param->C <= 0) return "C <= 0"; if(svm_type == NU_SVC || svm_type == ONE_CLASS || svm_type == NU_SVR) if(param->nu < 0 || param->nu > 1) return "nu < 0 or nu > 1"; if(svm_type == EPSILON_SVR) if(param->p < 0) return "p < 0"; if(param->shrinking != 0 && param->shrinking != 1) return "shrinking != 0 and shrinking != 1"; if(param->probability != 0 && param->probability != 1) return "probability != 0 and probability != 1"; if(param->probability == 1 && svm_type == ONE_CLASS) return "one-class SVM probability output not supported yet"; // check whether nu-svc is feasible if(svm_type == NU_SVC) { int l = prob->l; int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int i; for(i=0;i<l;i++) { int this_label = (int)prob->y[i]; int j; for(j=0;j<nr_class;j++) if(this_label == label[j]) { ++count[j]; break; } if(j == nr_class) { if(nr_class == max_nr_class) { max_nr_class *= 2; label = (int *)realloc(label,max_nr_class*sizeof(int)); count = (int *)realloc(count,max_nr_class*sizeof(int)); } label[nr_class] = this_label; count[nr_class] = 1; ++nr_class; } } for(i=0;i<nr_class;i++) { int n1 = count[i]; for(int j=i+1;j<nr_class;j++) { int n2 = count[j]; if(param->nu*(n1+n2)/2 > min(n1,n2)) { free(label); free(count); return "specified nu is infeasible"; } } } free(label); free(count); } return NULL;}int svm_check_probability_model(const svm_model *model){ return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA!=NULL && model->probB!=NULL) || ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && model->probA!=NULL);}double dagsvm_predict(const struct svm_model *model, const struct svm_node *x){ if(model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) { double *sv_coef = model->sv_coef[0]; double sum = 0; for(int i=0;i<model->l;i++) sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); sum -= model->rho[0]; if(model->param.svm_type == ONE_CLASS) return (sum>0)?1:-1; else return sum; } else { int i; int nr_class = model->nr_class; int l = model->l; double *kvalue = Malloc(double,l); for(i=0;i<l;i++) kvalue[i] = Kernel::k_function(x,model->SV[i],model->param); int *start = Malloc(int,nr_class); start[0] = 0; for(i=1;i<nr_class;i++) start[i] = start[i-1]+model->nSV[i-1]; i = 0; int j = nr_class - 1; while (i < j) { double sum = 0; int si = start[i]; int sj = start[j]; int ci = model->nSV[i]; int cj = model->nSV[j]; int k; double *coef1 = model->sv_coef[j-1]; double *coef2 = model->sv_coef[i]; for(k=0;k<ci;k++) sum += coef1[si+k] * kvalue[si+k]; for(k=0;k<cj;k++) sum += coef2[sj+k] * kvalue[sj+k]; sum -= model->rho[i*(nr_class - 1) - i*(i + 1)/2 + j - 1]; if(sum > 0) j--; else i++; } free(kvalue); free(start); return model->label[i]; }}// Canasai's addition beginchar **load_label( char *label_file, int *max_classname_length ){ int num_tokens = 0; int num_lines = 0; int longest_line = 0; text_scan( label_file, &num_tokens, &num_lines, &longest_line ); char *line = Malloc( char, longest_line ); char **label = Malloc( char *, num_lines ); FILE *file_ptr = fopen( label_file, "r" ); while( fgets( line, longest_line, file_ptr ) != NULL ) { if( text_not_blank( line ) ) { line[ strlen( line ) - 1 ] = '\0'; char *tmp_ptr = text_copy4( line ); char *tmp_ptr2 = tmp_ptr; tmp_ptr2 = strchr( tmp_ptr2, ' ' ); if( tmp_ptr2 == NULL ) { fprintf( stderr, "ERROR: read label file, %s\n", label_file ); exit( 0 ); } *tmp_ptr2 = '\0'; ++tmp_ptr2; int index = ( int )atof( tmp_ptr ); label[index] = text_copy( tmp_ptr2 ); int label_length = strlen( label[index] ); if( label_length > *max_classname_length ) *max_classname_length = label_length; free( tmp_ptr ); } } free( line ); fclose( file_ptr ); return( label );}// modified from stratified cross validationvoid sk_svm_cross_validation( const struct svm_problem *prob, const struct svm_parameter *param, double *target, char **label_name, int max_classname_length, int *nr_class_ ){ int i; int nr_fold = param->nr_fold; int *fold_start = Malloc( int, nr_fold + 1 ); int l = prob->l; int *perm = Malloc( int, l ); int nr_class; if( param->svm_type == C_SVC /*|| param->svm_type == NU_SVC*/ ) { int *start = NULL; int *label = NULL; int *count = NULL; svm_group_classes( prob, &nr_class, &label, &start, &count, perm ); (*nr_class_) = nr_class; // random shuffle and then data grouped by fold using the array perm int *fold_count = Malloc( int, nr_fold ); int c; int *index = Malloc( int, l ); for( i = 0; i < l; i++ ) index[i] = perm[i];/* for( c = 0; c < nr_class; c++ ) { for( i = 0; i < count[c]; i++ ) { int j = i + rand() % ( count[c] - i ); swap( index[start[c]+j], index[start[c]+i] ); } }*/ for( i = 0; i < nr_fold; i++ ) { fold_count[i] = 0; for( c = 0; c < nr_class; c++ ) fold_count[i] += ( i+1 ) * count[c] / nr_fold - i * count[c] / nr_fold; } fold_start[0] = 0; for( i = 1; i <= nr_fold; i++ ) fold_start[i] = fold_start[i-1] + fold_count[i-1]; for( c = 0; c < nr_class; c++ ) { for( i = 0; i < nr_fold; i++ ) { int begin = start[c] + i * count[c] / nr_fold; int end = start[c] + ( i+1 ) * count[c] / nr_fold; for( int j = begin; j < end; j++ ) { perm[fold_start[i]] = index[j]; fold_start[i]++; } } } fold_start[0] = 0; for( i = 1; i <= nr_fold; i++ ) fold_start[i] = fold_start[i-1] + fold_count[i-1]; free( start ); free( label ); free( count ); free( index ); free( fold_count ); } else { for( i = 0; i < l; i++ ) perm[i] = i; for( i = 0; i < l; i++ ) { int j = i + rand() % ( l-i ); swap( perm[i], perm[j] ); } for( i = 0; i <= nr_fold; i++ ) fold_start[i] = i * l / nr_fold; } for( i = 0; i < nr_fold; i++ ) { int begin = fold_start[i]; int end = fold_start[i+1]; int j, k; struct svm_problem subprob; subprob.l = l - ( end - begin ); subprob.x = Malloc( struct svm_node *, subprob.l ); subprob.y = Malloc( double, subprob.l ); k = 0; for( j = 0; j < begin; j++ ) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for( j = end; j < l; j++ ) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct svm_model *submodel = svm_train( &subprob, param ); int **conf_mat = ( int ** )malloc( nr_class * sizeof( int * ) ); for( j = 0; j < nr_class; j++ ) { conf_mat[j] = ( int * )malloc( nr_class * sizeof( int ) ); for( k = 0; k < nr_class; k++ ) conf_mat[j][k]=0; } for( j = begin; j < end; j++ ) { if( param->mc_method == ONE_AGAINST_ONE ) target[perm[j]]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -