📄 loo.c
字号:
{ Bj = workingset[j]; qp.Q[i*qp.n+j] = qp.Q[j*qp.n+i] = QB[i][Bj]; qp.p[i] -= qp.Q[i*qp.n+j]*x[Bj]; qp.p[j] -= qp.Q[j*qp.n+i]*x[Bi]; } }}void update_all(int q){ int i, j; double *dx; dx = (double *) xmalloc(sizeof(double)*q); for (i=0;i<q;i++) { if (x[workingset[i]]>ZERO) { Nsv--; if (x[workingset[i]]<COST) Nfree--; } dx[i] = qp.x[i] - x[workingset[i]]; x[workingset[i]] = qp.x[i]; if (x[workingset[i]]>ZERO) { Nsv++; if (x[workingset[i]]<COST) Nfree++; } } for(i=0;i<N;i++) { double Qx_i = Qx[i]; for(j=0;j<q;j++) Qx_i += dx[j]*QB[j][i]; Qx[i] = Qx_i; } free(dx);}double objective_value(){ int i; double ans = 0; for (i=0;i<N;i++) ans += x[i]*(Qx[i] - 2); return ans/2;}int test_training_sample(){ int i, mis = 0; for (i=0;i<N;i++) if (Qx[i] < 0) mis++; return mis; }void mainloop(){ int q = 0; double maxvio, objval; while ((maxvio = infnorm_of_grad()) > svm_param.epsilon) { if (svm_param.verbosity >= 3) { /* information before optimization */ objval = objective_value(); printf("Iteration %d: %d SV %d BSV |B| = %d \nobjective value = %f max violation = %f\n", iter, Nsv, Nsv - Nfree, q, objval, maxvio); } q = select_workingset(iter + 1); get_QB(q); construct_subQP(q); solvebqp(qp); update_all(q); iter++; } if(svm_param.verbosity >= 1) { printf("WORK COMPLETE. %d iterations ", iter-old_iter); printf("%d misclassified ", test_training_sample()); printf("%d SV %d BSV\nobjective value = %f max violation = %f\n", Nsv, Nsv - Nfree, objective_value(), maxvio); } old_iter = iter;}void output_model(FILE *fmodel, int Nsv, double *x, kernel_param_t *kernel_param){ long i, j; fprintf(fmodel, "%d\n", kernel_param->ktype); fprintf(fmodel, "%g\n", kernel_param->degree); fprintf(fmodel, "%.8g\n", kernel_param->gamma); fprintf(fmodel, "%.8g\n", kernel_param->coef0); fprintf(fmodel, "%d\n", Nsv); for (i=0;i<N;i++) if (x[i] > ZERO) { fprintf(fmodel, "%.16g ", sample[i].a*x[i]); for (j=0;sample[i].v[j].num != -1;j++) fprintf(fmodel, "%d:%.8g ", sample[i].v[j].num, sample[i].v[j].value); fprintf(fmodel, "\n"); }}int initialize_loo(FILE *fset, int cachesize, int qpsize){ read_dataset(fset); svm_param.cachesize = cachesize; svm_param.qpsize = qpsize; kernel_param.ktype = 0; // ramdomly chosen; it's not known at present initialize_bsvm(); SV = (int*) xmalloc(sizeof(int)*N); loo_init_x = (double*) xmalloc(sizeof(double)*N); loo_init_Qx = (double*) xmalloc(sizeof(double)*N); return N;}void initialize_bsvm(){ int qpsize; /* set gamma = 1/k if it has not been initialized */ if (kernel_param.gamma == 0) kernel_param.gamma = 1.0/(svm_param.veclen); switch(kernel_param.ktype) { case LINEAR_KERNEL: kernel_function = kernel_linear; break; case POLY_KERNEL: kernel_function = kernel_poly; break; case RBF_KERNEL: kernel_function = kernel_rbf; break; case TANH_KERNEL: kernel_function = kernel_tanh; break; default: myerror("Unknown kernel function"); } if (svm_param.qpsize > N) svm_param.qpsize = N; qpsize = svm_param.qpsize; qp.x = (double *) xmalloc(sizeof(double)*qpsize); qp.p = (double *) xmalloc(sizeof(double)*qpsize); qp.Q = (double *) xmalloc(sizeof(double)*qpsize*qpsize); x = (double *) xmalloc(sizeof(double)*N); Qx = (double *) xmalloc(sizeof(double)*N); workingset = (int *) xmalloc(sizeof(int)*qpsize); chosenflag = (int *) xmalloc(sizeof(int)*N); memset(chosenflag, 0, sizeof(int)*N); initialize_cache(); sortbase = (double *) xmalloc(sizeof(double)*N); positive_max = (double *) xmalloc(sizeof(double)*svm_param.qpsize); negative_max = (double *) xmalloc(sizeof(double)*svm_param.qpsize/2); positive_set = (int *) xmalloc(sizeof(int)*svm_param.qpsize); negative_set = (int *) xmalloc(sizeof(int)*svm_param.qpsize/2);}void free_bsvm(void){ int i; for (i=0;i<N;i++) free(sample[i].v); free(sample); free(qp.x); free(qp.p); free(qp.Q); free(Qx); free(workingset); free(chosenflag); free(positive_max); free(positive_set); free(negative_max); free(negative_set); free(sortbase); free_cache();}void free_loo(void){ free_bsvm(); free(SV); free(loo_init_x); free(loo_init_Qx);}int loo(loo_param_t *loo_param){ int cv_miss = 0; int j; double dec_fun; int nsv=0, nfree, n_retrain=0; double training_time, loo_time; double time_base = get_utime(); int testv = -1; svm_param.C = loo_param->C; svm_param.verbosity = loo_param->verbosity; memcpy(&kernel_param, &(loo_param->kp), sizeof(kernel_param_t)); switch(kernel_param.ktype) { case LINEAR_KERNEL: kernel_function = kernel_linear; break; case POLY_KERNEL: kernel_function = kernel_poly; break; case RBF_KERNEL: kernel_function = kernel_rbf; break; case TANH_KERNEL: kernel_function = kernel_tanh; break; default: myerror("Unknown kernel function"); } // first phase: use all data for training svm_param.epsilon = loo_param->epsilon_train; mainloop(); training_time = get_utime(); // begin cross validation store_Qx_and_x(); // initialization for(j=0; j<N; j++) { if(loo_init_x[j]<=ZERO) continue; nsv++; if(Qx[j] < 0) // error cv_miss++; else if(loo_init_x[j]>0.25) // not sure SV[n_retrain++] = j; // else not a loo error } assert(nsv == Nsv); // Nsv is resulted from training phase if(loo_param->has_rate_bound) sort_SV_by_Qx(); nfree = Nfree; svm_param.epsilon = loo_param->epsilon_loo; for(j=0; j<n_retrain; j++) { int i; testv = SV[j]; if(svm_param.verbosity >= 2) printf("Cross validation phase %d ; remove sample[%d]\n", j, testv); Nfree = nfree; if(loo_init_x[testv] < COST) Nfree--; for(i=0; i<N; i++) { x[i] = loo_init_x[i]; } x[testv] = 0; update_Qx_by_remove_sv(testv); Qx[testv] = DBL_MAX; //Qx[testv] = 1.0/0; Nsv = nsv-1; svm_param.verbosity--; mainloop(); svm_param.verbosity++; if((dec_fun=decision_function(&sample[testv])*sample[testv].a) < 0) { cv_miss++; if(svm_param.verbosity >= 2) printf("Test error found: %f\n", dec_fun); if(loo_param->has_rate_bound && cv_miss>loo_param->max_test_error) { cv_miss = N+1; break; } } } loo_time = get_utime(); if(svm_param.verbosity >= 1) { printf("Number of kernel evaluations = %.0lf\n" "training time = %lf seconds, loo time = %lf seconds\n", kernel_eval, training_time-time_base, loo_time-training_time); } return cv_miss; }int read_dataset(FILE *dataset){ int probsize, *nnzl, i, j, s_label, s_index, max_index = 0; double s_sumofsq, s_value; char line[MAXLEN], *p; /* allocate memory of samples */ probsize = get_N(dataset); nnzl = (int *) xmalloc(sizeof(int)*probsize); get_nnz(dataset, nnzl); sample = (sample_t *) xmalloc(sizeof(sample_t)*probsize); for (i=0;i<probsize;i++) sample[i].v = (feature_t *) xmalloc(sizeof(feature_t)*(nnzl[i]+1)); for (i=0;i<probsize;i++) { fgets(line, MAXLEN, dataset); p = line; sscanf(p, "%d", &s_label); while(isspace(*p)) ++p; while(!isspace(*p)) ++p; for(j=0,s_sumofsq = 0;sscanf(p, "%d:%lf", &s_index, &s_value) == 2;j++) { if (s_index > max_index) max_index = s_index; s_sumofsq += s_value*s_value; sample[i].v[j].num = s_index; sample[i].v[j].value = s_value; while(*p!=':') ++p; ++p; while(isspace(*p)) ++p; while(*p && !isspace(*p)) ++p; } sample[i].v[j].num = -1; sample[i].a = s_label; sample[i].twonorm_sq = s_sumofsq; } /* assume index of feature is in [1, max_index] */ svm_param.veclen = max_index; if(svm_param.verbosity >= 0) printf("%d examples read, %d features per example at most.\n", probsize, max_index); free (nnzl); /* return the number of examples read */ N = probsize; return probsize; }void reset_x(){ int i; for (i=0;i<N;i++) { x[i] = 0; Qx[i] = 0; } Nsv = 0; Nfree = 0;}void alpha_seeding(double old_bound, double new_bound){ int i; int Nbounded = 0; if(old_bound<0 || new_bound<0) return; Nsv = 0; Nfree = 0; for(i=0; i<N; i++) { x[i] = loo_init_x[i] / old_bound * new_bound; Qx[i] = loo_init_Qx[i] / old_bound * new_bound; if(x[i]>ZERO) Nsv++; if(x[i]>=new_bound-EPSILON_A) Nbounded++; } Nfree = Nsv-Nbounded;}void clear_cache(){ int i; for (i=0;i<N;i++) Q2cache[i] = -1; usingcacherows = 0;}double *get_x(){ return loo_init_x;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -