📄 bsvm.cpp
字号:
#include <math.h>#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <float.h>#include <string.h>#include <stdarg.h>#include "svm.h"typedef float Qfloat;typedef signed char schar;#ifndef mintemplate <class T> inline T min(T x,T y) { return (x<y)?x:y; }#endif#ifndef maxtemplate <class T> inline T max(T x,T y) { return (x>y)?x:y; }#endiftemplate <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }template <class S, class T> inline void clone(T*& dst, S* src, int n){ dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n);}#define INF HUGE_VAL#define Malloc(type,n) (type *)malloc((n)*sizeof(type))#if 1void info(char *fmt,...){ va_list ap; va_start(ap,fmt); vprintf(fmt,ap); va_end(ap);}void info_flush(){ fflush(stdout);}#elsevoid info(char *fmt,...) {}void info_flush() {}#endif//// Kernel Cache//// l is the number of total data items// size is the cache size limit in bytes//class Cache{public: Cache(int l,int size,int qpsize); ~Cache(); // request data [0,len) // return some position p where [p,len) need to be filled // (p >= len if nothing needs to be filled) int get_data(const int index, Qfloat **data, int len); void swap_index(int i, int j); // future_optionprivate: int l; int size; struct head_t { head_t *prev, *next; // a cicular list Qfloat *data; int len; // data[0,len) is cached in this entry }; head_t* head; head_t lru_head; void lru_delete(head_t *h); void lru_insert(head_t *h);};Cache::Cache(int l_,int size_,int qpsize):l(l_),size(size_){ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 size /= sizeof(Qfloat); size -= l * sizeof(head_t) / sizeof(Qfloat); size = max(size, qpsize*l); // cache must be large enough for 'qpsize' columns lru_head.next = lru_head.prev = &lru_head;}Cache::~Cache(){ for(head_t *h = lru_head.next; h != &lru_head; h=h->next) free(h->data); free(head);}void Cache::lru_delete(head_t *h){ // delete from current location h->prev->next = h->next; h->next->prev = h->prev;}void Cache::lru_insert(head_t *h){ // insert to last position h->next = &lru_head; h->prev = lru_head.prev; h->prev->next = h; h->next->prev = h;}int Cache::get_data(const int index, Qfloat **data, int len){ head_t *h = &head[index]; if(h->len) lru_delete(h); int more = len - h->len; if(more > 0) { // free old space while(size < more) { head_t *old = lru_head.next; lru_delete(old); free(old->data); size += old->len; old->data = 0; old->len = 0; } // allocate new space h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); size -= more; swap(h->len,len); } lru_insert(h); *data = h->data; return len;}void Cache::swap_index(int i, int j){ if(i==j) return; if(head[i].len) lru_delete(&head[i]); if(head[j].len) lru_delete(&head[j]); swap(head[i].data,head[j].data); swap(head[i].len,head[j].len); if(head[i].len) lru_insert(&head[i]); if(head[j].len) lru_insert(&head[j]); if(i>j) swap(i,j); for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) { if(h->len > i) { if(h->len > j) swap(h->data[i],h->data[j]); else { // give up lru_delete(h); free(h->data); size += h->len; h->data = 0; h->len = 0; } } }}//// Kernel evaluation//// the static method k_function is for doing single kernel evaluation// the constructor of Kernel prepares to calculate the l*l kernel matrix// the member function get_Q is for getting one column from the Q Matrix//class Kernel {public: Kernel(int l, svm_node * const * x, const svm_parameter& param); virtual ~Kernel(); static double k_function(const svm_node *x, const svm_node *y, const svm_parameter& param); virtual Qfloat *get_Q(int column, int len) const = 0; virtual void swap_index(int i, int j) const // no so const... { swap(x[i],x[j]); if(x_square) swap(x_square[i],x_square[j]); }protected: double (Kernel::*kernel_function)(int i, int j) const;private: const svm_node **x; double *x_square; // svm_parameter const int kernel_type; const double degree; const double gamma; const double coef0; static double dot(const svm_node *px, const svm_node *py); double kernel_linear(int i, int j) const { return dot(x[i],x[j]); } double kernel_poly(int i, int j) const { return pow(gamma*dot(x[i],x[j])+coef0,degree); } double kernel_rbf(int i, int j) const { return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); } double kernel_sigmoid(int i, int j) const { return tanh(gamma*dot(x[i],x[j])+coef0); }};Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param):kernel_type(param.kernel_type), degree(param.degree), gamma(param.gamma), coef0(param.coef0){ switch(kernel_type) { case LINEAR: kernel_function = &Kernel::kernel_linear; break; case POLY: kernel_function = &Kernel::kernel_poly; break; case RBF: kernel_function = &Kernel::kernel_rbf; break; case SIGMOID: kernel_function = &Kernel::kernel_sigmoid; break; } clone(x,x_,l); if(kernel_type == RBF) { x_square = new double[l]; for(int i=0;i<l;i++) x_square[i] = dot(x[i],x[i]); } else x_square = 0;}Kernel::~Kernel(){ delete[] x; delete[] x_square;}double Kernel::dot(const svm_node *px, const svm_node *py){ double sum = 0; while(px->index != -1 && py->index != -1) { if(px->index == py->index) { sum += px->value * py->value; ++px; ++py; } else { if(px->index > py->index) ++py; else ++px; } } return sum;}double Kernel::k_function(const svm_node *x, const svm_node *y, const svm_parameter& param){ switch(param.kernel_type) { case LINEAR: return dot(x,y); case POLY: return pow(param.gamma*dot(x,y)+param.coef0,param.degree); case RBF: { double sum = 0; while(x->index != -1 && y->index !=-1) { if(x->index == y->index) { double d = x->value - y->value; sum += d*d; ++x; ++y; } else { if(x->index > y->index) { sum += y->value * y->value; ++y; } else { sum += x->value * x->value; ++x; } } } while(x->index != -1) { sum += x->value * x->value; ++x; } while(y->index != -1) { sum += y->value * y->value; ++y; } return exp(-param.gamma*sum); } case SIGMOID: return tanh(param.gamma*dot(x,y)+param.coef0); default: return 0; /* Unreachable */ }}class Solver_SPOC {public: Solver_SPOC() {}; ~Solver_SPOC() {}; void Solve(int l, const Kernel& Q, double *alpha_, short *y_, double *C_, double eps, int shrinking, int nr_class);private: int active_size; double *G; // gradient of objective function short *y; bool *alpha_status; // free:true, bound:false double *alpha; const Kernel *Q; double eps; double *C; int *active_set; int l, nr_class; bool unshrinked; double get_C(int i, int m) { if (y[i] == m) return C[m]; return 0; } void update_alpha_status(int i, int m) { if(alpha[i*nr_class+m] >= get_C(i, m)) alpha_status[i*nr_class+m] = false; else alpha_status[i*nr_class+m] = true; } void swap_index(int i, int j); double select_working_set(int &q); void solve_sub_problem(double A, double *B, double C, double *nu); void reconstruct_gradient(); void do_shrinking();};void Solver_SPOC::swap_index(int i, int j){ Q->swap_index(i, j); swap(y[i], y[j]); swap(active_set[i], active_set[j]); for (int m=0;m<nr_class;m++) { swap(G[i*nr_class+m], G[j*nr_class+m]); swap(alpha[i*nr_class+m], alpha[j*nr_class+m]); swap(alpha_status[i*nr_class+m], alpha_status[j*nr_class+m]); }}void Solver_SPOC::reconstruct_gradient(){ if (active_size == l) return; int i, m; for (i=active_size*nr_class;i<l*nr_class;i++) G[i] = 1; for (i=active_size;i<l;i++) G[i*nr_class+y[i]] = 0; for (i=0;i<active_size;i++) for (m=0;m<nr_class;m++) if (fabs(alpha[i*nr_class+m]) != 0) { Qfloat *Q_i = Q->get_Q(i,l); double alpha_i_m = alpha[i*nr_class+m]; for (int j=active_size;j<l;j++) G[j*nr_class+m] += alpha_i_m*Q_i[j]; }}void Solver_SPOC::Solve(int l, const Kernel&Q, double *alpha_, short *y_, double *C_, double eps, int shrinking, int nr_class){ this->l = l; this->nr_class = nr_class; this->Q = &Q; clone(y,y_,l); clone(alpha,alpha_,l*nr_class); C = C_; this->eps = eps; unshrinked = false; int i, m, q, old_q = -1; // initialize alpha_status { alpha_status = new bool[l*nr_class]; for(i=0;i<l;i++) for (m=0;m<nr_class;m++) update_alpha_status(i, m); } // initialize active set (for shrinking) { active_set = new int[l]; for(int i=0;i<l;i++) active_set[i] = i; active_size = l; } // initialize gradient { G = new double[l*nr_class]; for (i=0;i<l*nr_class;i++) G[i] = 1; for (i=0;i<l;i++) G[i*nr_class+y[i]] = 0; for (i=0;i<l;i++) for (m=0;m<nr_class;m++) if (fabs(alpha[i*nr_class+m]) != 0) { Qfloat *Q_i = Q.get_Q(i,l); double alpha_i_m = alpha[i*nr_class+m]; for (int j=0;j<l;j++) G[j*nr_class+m] += alpha_i_m*Q_i[j]; } } // optimization step int iter = 0, counter = min(l*2, 2000) + 1; double *B = new double[nr_class]; double *nu = new double[nr_class]; while (1) { // show progress and do shrinking if (--counter == 0) { if (shrinking) do_shrinking(); info("."); counter = min(l*2, 2000); } if (select_working_set(q) < eps) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; info("*");info_flush(); if (select_working_set(q) < eps) break; else counter = 1; // do shrinking next iteration } if (counter == min(l*2, 2000)) if (old_q == q) break; old_q = q; ++iter; const Qfloat *Q_q = Q.get_Q(q, active_size); double A = Q_q[q]; for (m=0;m<nr_class;m++) B[m] = G[q*nr_class+m] - A*alpha[q*nr_class+m]; B[y[q]] += A*C[y[q]]; if (fabs(A) > 0) solve_sub_problem(A, B, C[y[q]], nu); else { i = 0; for (m=1;m<nr_class;m++) if (B[m] > B[i]) i = m; nu[i] = -C[y[q]]; } nu[y[q]] += C[y[q]]; for (m=0;m<nr_class;m++) { double d = nu[m] - alpha[q*nr_class+m];#if 0 if (fabs(d) > 1e-12)#endif { alpha[q*nr_class+m] = nu[m]; update_alpha_status(q, m); for (i=0;i<active_size;i++) G[i*nr_class+m] += d*Q_q[i]; } } } delete[] B; delete[] nu; // calculate objective value double obj = 0; for (i=0;i<l*nr_class;i++) obj += alpha[i]*(G[i] + 1); for (i=0;i<l;i++) obj -= alpha[i*nr_class+y[i]]; obj /= 2; int nSV = 0, nFREE = 0; for (i=0;i<nr_class*l;i++) { if (alpha_status[i]) nFREE++; if (fabs(alpha[i]) > 0) nSV++; } info("\noptimization finished, #iter = %d, obj = %lf\n",iter, obj); info("nSV = %d, nFREE = %d\n",nSV,nFREE); // put back the solution { for(int i=0;i<l;i++) { double *alpha_i = &alpha[i*nr_class]; double *alpha__i = &alpha_[active_set[i]*nr_class]; for (int m=0;m<nr_class;m++) alpha__i[m] = alpha_i[m]; } } delete[] active_set; delete[] alpha_status; delete[] G; delete[] y; delete[] alpha;}double Solver_SPOC::select_working_set(int &q){ double vio_q = -INF; int j = 0; for (int i=0;i<active_size;i++) { double lb = -INF, ub = INF; for (int m=0;m<nr_class;m++,j++) { lb = max(G[j], lb); if (alpha_status[j]) ub = min(G[j], ub); } if (lb - ub > vio_q) { q = i; vio_q = lb - ub; } } return vio_q;}void Solver_SPOC::do_shrinking(){ int i, m; double Gm = select_working_set(i); if (Gm < eps) return; // shrink for (i=0;i<active_size;i++) { bool *alpha_status_i = &alpha_status[i*nr_class]; double *G_i = &G[i*nr_class]; double th = G_i[y[i]] - Gm/2; for (m=0;m<y[i];m++) if (alpha_status_i[m] || G_i[m] >= th) goto out; for (m++;m<nr_class;m++) if (alpha_status_i[m] || G_i[m] >= th) goto out; --active_size; swap_index(i, active_size); --i; out: ; } // unshrink, check all variables again before final iterations if (unshrinked || Gm > 10*eps) return; unshrinked = true; reconstruct_gradient(); for (i=l-1;i>=active_size;i--) { double *G_i = &G[i*nr_class]; double th = G_i[y[i]] - Gm/2; for (m=0;m<y[i];m++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -