📄 svm.cpp
字号:
for(i=0;i<l;i++) if (nonzero[i]) { model->SV[p] = x[i]; xtosv[i]=p; p++; } model->sv_coef = Malloc(double *,nr_binary); model->sv_ind = Malloc(int *,nr_binary); for(i=0;i<nr_binary;i++) { model->sv_ind[i] = Malloc(int,model->nSV_binary[i]); model->sv_coef[i] = Malloc(double,model->nSV_binary[i]); p=0; int r=0; for(int j=0; j<nr_class; j++) if (I[i][j]!=0) for (int k=0;k<count[j];k++) { if (fabs(f[i].alpha[p]) > 0) { model->sv_ind[i][r]=xtosv[start[j]+k]; model->sv_coef[i][r]=f[i].alpha[p]; r++; } p++; } } for(i=0;i<nr_binary;i++) free(I[i]); free(I); free(label); free(probA); free(probB); free(count); free(index); free(start); free(x); free(weighted_C); free(nonzero); free(xtosv); for(i=0;i<nr_binary;i++) free(f[i].alpha); free(f); } return model;}// stratified CVvoid svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target){ int i; int *perm = Malloc(int,prob->l); int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int max_nr_class = 16; int nr_class = 0; int **I = NULL; int nr_binary = 0; // random shuffle if(param->svm_type == C_SVC || param->svm_type == NU_SVC) { int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int *index = Malloc(int,l); int *fold_count = Malloc(int,nr_fold); int c; int min_count; for(i=0;i<l;i++) { int this_label = (int)prob->y[i]; int j; for(j=0;j<nr_class;j++) { if(this_label == label[j]) { ++count[j]; break; } } index[i] = j; if(j == nr_class) { if(nr_class == max_nr_class) { max_nr_class *= 2; label = (int *)realloc(label,max_nr_class*sizeof(int)); count = (int *)realloc(count,max_nr_class*sizeof(int)); } label[nr_class] = this_label; count[nr_class] = 1; ++nr_class; } } min_count = count[0]; for(i=0; i<nr_class; i++) if(count[i] < min_count) min_count = count[i]; if(min_count >= nr_fold) nr_binary = error_correcting_code(param->multiclass_type, nr_class, I); else nr_binary = 0; int *start = Malloc(int,nr_class); start[0] = 0; for(i=1;i<nr_class;i++) start[i] = start[i-1]+count[i-1]; for(i=0;i<l;i++) { perm[start[index[i]]] = i; ++start[index[i]]; } start[0] = 0; for(i=1;i<nr_class;i++) start[i] = start[i-1]+count[i-1]; for(i=0;i<l;i++) index[i]=perm[i]; for (c=0; c<nr_class; c++) for(i=0;i<count[c];i++) { int j = i+rand()%(count[c]-i); swap(index[start[c]+j],index[start[c]+i]); } for(i=0;i<nr_fold;i++) { fold_count[i] = 0; for (c=0; c<nr_class;c++) fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold; } fold_start[0]=0; for (i=1;i<=nr_fold;i++) fold_start[i] = fold_start[i-1]+fold_count[i-1]; for (c=0; c<nr_class;c++) for(i=0;i<nr_fold;i++) { int begin = start[c]+i*count[c]/nr_fold; int end = start[c]+(i+1)*count[c]/nr_fold; for(int j=begin;j<end;j++) { perm[fold_start[i]] = index[j]; fold_start[i]++; } } fold_start[0]=0; for (i=1;i<=nr_fold;i++) fold_start[i] = fold_start[i-1]+fold_count[i-1]; free(index); free(start); free(count); free(fold_count); } else { for(i=0;i<prob->l;i++) perm[i]=i; for(i=0;i<prob->l;i++) { int j = i+rand()%(prob->l-i); swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*prob->l/nr_fold; } for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; struct svm_problem subprob; subprob.l = prob->l-(end-begin); subprob.x = Malloc(struct svm_node*,subprob.l); subprob.y = Malloc(double,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<prob->l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } if(param->svm_type == C_SVC || param->svm_type == NU_SVC) { if(nr_binary > 0) { subprob.nr_binary = nr_binary; subprob.I = Malloc(int *, nr_binary); for(int i=0; i<nr_binary; i++) { subprob.I[i] = Malloc(int, nr_class); memcpy(subprob.I[i], I[i], nr_class*sizeof(int)); } } else subprob.nr_binary = error_correcting_code(param->multiclass_type, svm_find_nr_class(&subprob), subprob.I); } struct svm_model *submodel = svm_train(&subprob,param); if(param->probability && (param->svm_type == C_SVC || param->svm_type == NU_SVC)) { double *prob_estimates=Malloc(double,svm_get_nr_class(submodel)); for(j=begin;j<end;j++) target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates,param->probability); free(prob_estimates); } else for(j=begin;j<end;j++) target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]); svm_destroy_model(submodel); free(subprob.x); free(subprob.y); } if((param->svm_type == C_SVC || param->svm_type == NU_SVC) && nr_binary > 0) { for(int i=0; i<nr_binary; i++) free(I[i]); free(I); } free(perm); }// non-stratified cross validation/*void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target){ int i; int *perm = Malloc(int,prob->l); // random shuffle for(i=0;i<prob->l;i++) perm[i]=i; for(i=0;i<prob->l;i++) { int j = i+rand()%(prob->l-i); swap(perm[i],perm[j]); } for(i=0;i<nr_fold;i++) { int begin = i*prob->l/nr_fold; int end = (i+1)*prob->l/nr_fold; int j,k; struct svm_problem subprob; subprob.l = prob->l-(end-begin); subprob.x = Malloc(struct svm_node*,subprob.l); subprob.y = Malloc(double,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<prob->l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct svm_model *submodel = svm_train(&subprob,param); if(param->probability && (param->svm_type == C_SVC || param->svm_type == NU_SVC)) { double *prob_estimates=Malloc(double,svm_get_nr_class(submodel)); for(j=begin;j<end;j++) target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates); free(prob_estimates); } else for(j=begin;j<end;j++) target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]); svm_destroy_model(submodel); free(subprob.x); free(subprob.y); } free(perm); }*/int svm_get_svm_type(const svm_model *model){ return model->param.svm_type;}int svm_get_nr_class(const svm_model *model){ return model->nr_class;}int svm_find_nr_class(const svm_problem *prob){ int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int i; for(i=0;i<prob->l;i++) { int this_label = (int)prob->y[i]; int j; for(j=0;j<nr_class;j++) if(this_label == label[j]) break; if(j == nr_class) { if(nr_class == max_nr_class) { max_nr_class *= 2; label = (int *)realloc(label,max_nr_class*sizeof(int)); } label[nr_class] = this_label; ++nr_class; } } free(label); return nr_class;}void svm_get_labels(const svm_model *model, int* label){ if (model->label != NULL) for(int i=0;i<model->nr_class;i++) label[i] = model->label[i];}double svm_get_svr_probability(const svm_model *model){ if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && model->probA!=NULL) return model->probA[0]; else { info("Model doesn't contain information for SVR probability inference\n"); return 0; }}void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values){ if(model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) { double *sv_coef = model->sv_coef[0]; double sum = 0; for(int i=0;i<model->l;i++) sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); sum -= model->rho[0]; *dec_values = sum; } else { int i; int nr_binary = model->nr_binary; int l = model->l; double *kvalue = Malloc(double,l); for(i=0;i<l;i++) kvalue[i] = Kernel::k_function(x,model->SV[i],model->param); for(i=0;i<nr_binary;i++) { double sum = 0; for (int j=0; j< model->nSV_binary[i];j++) sum+= model->sv_coef[i][j]*kvalue[model->sv_ind[i][j]]; sum -= model->rho[i]; dec_values[i] = sum; } free(kvalue); }}double svm_predict(const svm_model *model, const svm_node *x){ if(model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) { double res; svm_predict_values(model, x, &res); if(model->param.svm_type == ONE_CLASS) return (res>0)?1:-1; else return res; } else { int i,j; int nr_class = model->nr_class; int nr_binary = model->nr_binary; double *dec_values = Malloc(double, nr_binary); svm_predict_values(model, x, dec_values); double *vote = Malloc(double,nr_class); for(i=0;i<nr_class;i++) vote[i] = 0; //int pos=0; for(i=0;i<nr_class;i++) for(j=0;j<nr_binary;j++) vote[i] += exp(-model->I[j][i] * dec_values[j]); /*for(int j=i+1;j<nr_class;j++) { if(dec_values[pos++] > 0) ++vote[i]; else ++vote[j]; }*/ int vote_min_idx = 0; for(i=1;i<nr_class;i++) if(vote[i] < vote[vote_min_idx]) vote_min_idx = i; free(vote); free(dec_values); return model->label[vote_min_idx]; }}double svm_predict_probability( const svm_model *model, const svm_node *x, double *prob_estimates, int method){ if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA!=NULL && model->probB!=NULL) { int i; int nr_class = model->nr_class; int nr_binary = model->nr_binary; double *dec_values = Malloc(double, nr_binary); svm_predict_values(model, x, dec_values); double min_prob=1e-7; /*double **pairwise_prob=Malloc(double *,nr_class); for(i=0;i<nr_class;i++) pairwise_prob[i]=Malloc(double,nr_class); int k=0; for(i=0;i<nr_class;i++) for(int j=i+1;j<nr_class;j++) { pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob); pairwise_prob[j][i]=1-pairwise_prob[i][j]; k++; } */ double *rp=Malloc(double, nr_binary); for(int i=0; i<nr_binary; i++) rp[i]= min(max(sigmoid_predict(dec_values[i],model->probA[i],model->probB[i]),min_prob),1-min_prob); //multiclass_probability(nr_class,pairwise_prob,prob_estimates); clock_t t = clock(); GBT_multiclass_probability(model->param.multiclass_type, nr_binary, nr_class, rp, prob_estimates, model->I, method); printf("clocks: %ld\n", clock() - t); int prob_max_idx = 0; for(i=1;i<nr_class;i++) if(prob_estimates[i] > prob_estimates[prob_max_idx]) prob_max_idx = i; //for(i=0;i<nr_class;i++) // free(pairwise_prob[i]); free(dec_values); //free(pairwise_prob); free(rp); return model->label[prob_max_idx]; } else return svm_predict(model, x);}const char *svm_type_table[] ={ "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL};const char *kernel_type_table[]={ "linear","polynomial","rbf","sigmoid",NULL};int svm_save_model(const char *model_file_name, const svm_model *model){ FILE *fp = fopen(model_file_name,"w"); if(fp==NULL) return -1; const svm_parameter& param = model->param; fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]); fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]); if(param.kernel_type == POLY) fprintf(fp,"degree %g\n", param.degree); if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID) fprintf(fp,"gamma %g\n", param.gamma); if(param.kernel_type == POLY || param.kernel_type == SIGMOID) fprintf(fp,"coef0 %g\n", param.coef0); int nr_class = model->nr_class; int nr_binary=model->nr_binary; int l = model->l; fprintf(fp, "nr_class %d\n", nr_class); fprintf(fp, "nr_binary %d\n", nr_binary); fprintf(fp, "total_sv %d\n",l); { fprintf(fp, "rho"); for(int i=0;i<nr_binary;i++) fprintf(fp," %g",model->rho[i]); fprintf(fp, "\n"); } if(model->label) { fprintf(fp, "label"); for(int i=0;i<nr_class;i++) fprintf(fp," %d",model->label[i]); fprintf(fp, "\n"); } if(model->probA) // regression has probA only { fprintf(fp, "probA"); for(int i=0;i<nr_binary;i++) fprintf(fp," %g",model->probA[i]); fprintf(fp, "\n"); } if(model->probB) { fprintf(fp, "probB"); for(int i=0;i<nr_binary;i++) fprintf(fp," %g",model->probB[i]); fprintf(fp, "\n"); } if(model->nSV) { fprintf(fp, "nr_sv"); for(int i=0;i<nr_class;i++) fprintf(fp," %d",model->nSV[i]); fprintf(fp, "\n"); } if(model->nSV_binary) { fprintf(fp, "nr_sv_binary"); for(int i=0;i<nr_binary;i++) fprintf(fp," %d",model->nSV_binary[i]); fprintf(fp, "\n"); } if(model->I) { fprintf(fp, "I %d\n", param.multiclass_type); int **I = model->I; for(int i=0; i<nr_binary; ++i) { int nr_Ip = 0, nr_In = 0; for(int j=0; j<nr_class; j++) if(I[i][j]>0) nr_Ip++; else if(I[i][j]<0) nr_In++; fprintf(fp,"%d", nr_Ip); for(int j=0; j<nr_class; j++) if(I[i][j]>0) fprintf(fp," %d",j); fprintf(fp, "\n"); fprintf(fp,"%d", nr_In); for(int j=0; j<nr_class; j++) if(I[i][j]<0) fprintf(fp," %d",j); fprintf(fp, "\n"); } } fprintf(fp, "alpha\n"); const int * const *sv_ind = model->sv_ind; const double * const *sv_coef = model->sv_coef; for(int i=0;i<nr_binary;i++) { for (int j=0;j<model->nSV_binary[i];j++) fprintf(fp, "%d ",sv_ind[i][j]); fprintf(fp, "\n"); for (int j=0;j<model->nSV_binary[i];j++) fprintf(fp, "%.16g ",sv_coef[i][j]); fprintf(fp, "\n"); } fprintf(fp, "SV\n"); const svm_node * const *SV = model->SV; for(int i=0;i<l;i++) { const svm_node *p = SV[i]; while(p->index != -1) { fprintf(fp,"%d:%.8g ",p->index,p->value); p++; } fprintf(fp, "\n"); } fclose(fp); return 0;}svm_model *svm_load_model(const char *model_file_name){ FILE *fp = fopen(model_file_name,"rb"); if(fp==NULL) return NULL; int nr_binary=0; // read parameters svm_model *model = Malloc(svm_model,1); svm_parameter& param = model->param; model->rho = NULL; model->probA = NULL; model->probB = NULL; model->label = NULL; model->nSV = NULL; model->nSV_binary = NULL; char cmd[81]; while(1) { fscanf(fp,"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -