📄 svm.cpp
字号:
#include "stdafx.h"#include <math.h>#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <float.h>#include <string.h>#include <stdarg.h>#include "svm.h"typedef float Qfloat;typedef signed char schar;
/*以下是定义的几个主要的模板,主要是为了比较大小,交换数据和完全复制数据。
Min()和Max()在<math.h>中提供了相应的函数,这里的处理,估计是为了使函数内联,执行速度
会相对快一些,而且不同的数据类型,存储方式不同,使用模板会更有针对性,也从另外一方面
提高程序性能。*/#ifndef mintemplate <class T> inline T min(T x,T y) { return (x<y)?x:y; }#endif#ifndef maxtemplate <class T> inline T max(T x,T y) { return (x>y)?x:y; }#endiftemplate <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
/*这里的克隆函数是完全克隆,不同于一般的复制。操作结束后,内部的所有数据和指针完全一
样。*/template <class S, class T> inline void clone(T*& dst, S* src, int n){ dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n);}inline double powi(double base, int times){ double tmp = base, ret = 1.0; for(int t=times; t>0; t/=2) { if(t%2==1) ret*=tmp; tmp = tmp * tmp; } return ret;}#define INF HUGE_VAL#define TAU 1e-12#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
//以下的函数用作调试#if 1void info(char *fmt,...){ va_list ap; va_start(ap,fmt); vprintf(fmt,ap); va_end(ap);}void info_flush(){ fflush(stdout);}#elsevoid info(char *fmt,...) {}void info_flush() {}#endif//// Kernel Cache//// l is the number of total data items// size is the cache size limit in bytes//类定义:
class Cache{public: Cache(int l,int size); ~Cache(); // request data [0,len) // return some position p where [p,len) need to be filled // (p >= len if nothing needs to be filled) int get_data(const int index, Qfloat **data, int len); void swap_index(int i, int j); // future_optionprivate: int l; int size; struct head_t { head_t *prev, *next; // a cicular list Qfloat *data; int len; // data[0,len) is cached in this entry };//成员变量: head_t *head;/*变量指针,该指针用来记录程序所申请的内存,单块申请到的内存用struct
head_t来记录所申请内存的指针,并记录长度。而且通过双向的指针,形成链表,增加寻址的速
度。记录所有申请到的内存,一方面便于释放内存,另外方便在内存不够时适当释放一部分已经
申请到的内存。*/ head_t lru_head;//双向链表的头 void lru_delete(head_t *h);/*从双向链表中删除某个元素的链接,不删除、不释放该元素所
涉及的内存。*/ void lru_insert(head_t *h);/*在链表后面插入一个新的链接*/};
/*构造函数。该函数根据样本数L,申请L 个head_t 的空间。根据说明,该区域会初始化为0,
。Lru_head 因为尚没有head_t 中申请到内存,故双向链表指向自己。至于size 的
处理,先将原来的byte 数目转化为float 的数目,然后扣除L 个head_t 的内存数目。*/Cache::Cache(int l_,int size_):l(l_),size(size_){ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 size /= sizeof(Qfloat); size -= l * sizeof(head_t) / sizeof(Qfloat); size = max(size, 2*l); // cache must be large enough for two columns lru_head.next = lru_head.prev = &lru_head;}Cache::~Cache(){ for(head_t *h = lru_head.next; h != &lru_head; h=h->next) free(h->data); free(head);}void Cache::lru_delete(head_t *h){ // delete from current location h->prev->next = h->next; h->next->prev = h->prev;}void Cache::lru_insert(head_t *h){ // insert to last position h->next = &lru_head; h->prev = lru_head.prev; h->prev->next = h; h->next->prev = h;}int Cache::get_data(const int index, Qfloat **data, int len){ head_t *h = &head[index]; if(h->len) lru_delete(h); int more = len - h->len; if(more > 0) { // free old space while(size < more) { head_t *old = lru_head.next; lru_delete(old); free(old->data); size += old->len; old->data = 0; old->len = 0; } // allocate new space h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); size -= more; swap(h->len,len); } lru_insert(h); *data = h->data; return len;}void Cache::swap_index(int i, int j)
/*交换head_t[i] 和head_t[j]的内容,先从双向链表中断开,交换后重新进入双向链表中。对后
面的处理是防止中head_t[i] 和head_t[j]可能有一方并未申请内存。*/{ if(i==j) return; if(head[i].len) lru_delete(&head[i]); if(head[j].len) lru_delete(&head[j]); swap(head[i].data,head[j].data); swap(head[i].len,head[j].len); if(head[i].len) lru_insert(&head[i]); if(head[j].len) lru_insert(&head[j]); if(i>j) swap(i,j); for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) { if(h->len > i) { if(h->len > j) swap(h->data[i],h->data[j]); else { // give up lru_delete(h); free(h->data); size += h->len; h->data = 0; h->len = 0; } } }}//// Kernel evaluation //// the static method k_function is for doing single kernel evaluation// the constructor of Kernel prepares to calculate the l*l kernel matrix// the member function get_Q is for getting one column from the Q Matrix//class QMatrix {public: virtual Qfloat *get_Q(int column, int len) const = 0; virtual Qfloat *get_QD() const = 0; virtual void swap_index(int i, int j) const = 0; virtual ~QMatrix() {}};class Kernel: public QMatrix {//类Kernelpublic: Kernel(int l, svm_node * const * x, const svm_parameter& param); virtual ~Kernel(); static double k_function(const svm_node *x, const svm_node *y, const svm_parameter& param); virtual Qfloat *get_Q(int column, int len) const = 0; virtual Qfloat *get_QD() const = 0; virtual void swap_index(int i, int j) const // no so const... { swap(x[i],x[j]); if(x_square) swap(x_square[i],x_square[j]); }protected: double (Kernel::*kernel_function)(int i, int j) const;private: const svm_node **x; double *x_square; // svm_parameter const int kernel_type; const int degree; const double gamma; const double coef0; static double dot(const svm_node *px, const svm_node *py); //double kernel_linear(int i, int j) const //{ //return dot(x[i],x[j]); //} //double kernel_poly(int i, int j) const //{ //return powi(gamma*dot(x[i],x[j])+coef0,degree); //} double kernel_rbf(int i, int j) const { return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); } //double kernel_sigmoid(int i, int j) const //{ //return tanh(gamma*dot(x[i],x[j])+coef0); //} //double kernel_precomputed(int i, int j) const //{ // return x[i][(int)(x[j][0].value)].value; //}};Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param):kernel_type(param.kernel_type), degree(param.degree), gamma(param.gamma), coef0(param.coef0){ switch(kernel_type) { //case LINEAR: //kernel_function = &Kernel::kernel_linear; //break; //case POLY: //kernel_function = &Kernel::kernel_poly; //break; case RBF: kernel_function = &Kernel::kernel_rbf; break; //case SIGMOID: //kernel_function = &Kernel::kernel_sigmoid; //case PRECOMPUTED: //kernel_function = &Kernel::kernel_precomputed; //break; } clone(x,x_,l); if(kernel_type == RBF) { x_square = new double[l]; for(int i=0;i<l;i++) x_square[i] = dot(x[i],x[i]); } else x_square = 0;}Kernel::~Kernel(){ delete[] x; delete[] x_square;}double Kernel::dot(const svm_node *px, const svm_node *py){ double sum = 0; while(px->index != -1 && py->index != -1) { if(px->index == py->index) { sum += px->value * py->value; ++px; ++py; } else { if(px->index > py->index) ++py; else ++px; } } return sum;}double Kernel::k_function(const svm_node *x, const svm_node *y, const svm_parameter& param){ switch(param.kernel_type) { //case LINEAR: //return dot(x,y); //case POLY: //return powi(param.gamma*dot(x,y)+param.coef0,param.degree); case RBF: { double sum = 0; while(x->index != -1 && y->index !=-1) { if(x->index == y->index) { double d = x->value - y->value; sum += d*d; ++x; ++y; } else { if(x->index > y->index) { sum += y->value * y->value; ++y; } else { sum += x->value * x->value; ++x; } } } while(x->index != -1) { sum += x->value * x->value; ++x; } while(y->index != -1) { sum += y->value * y->value; ++y; } return exp(-param.gamma*sum); } //case SIGMOID: //return tanh(param.gamma*dot(x,y)+param.coef0); //case PRECOMPUTED: //x: test (validation), y: SV //return x[(int)(y->value)].value; default: return 0; /* Unreachable */ }}// Generalized SMO+SVMlight algorithm// Solves://// min 0.5(\alpha^T Q \alpha) + b^T \alpha//// y^T \alpha = \delta// y_i = +1 or -1// 0 <= alpha_i <= Cp for y_i = 1// 0 <= alpha_i <= Cn for y_i = -1//// Given://// Q, b, y, Cp, Cn, and an initial feasible point \alpha// l is the size of vectors and matrices// eps is the stopping criterion//// solution will be put in \alpha, objective value will be put in obj//class Solver {public: Solver() {}; virtual ~Solver() {}; struct SolutionInfo { double obj; double rho; double upper_bound_p; double upper_bound_n; double r; // for Solver_NU }; void Solve(int l, const QMatrix& Q, const double *b_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking);protected: int active_size; schar *y; double *G; // gradient of objective function enum { LOWER_BOUND, UPPER_BOUND, FREE }; char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE double *alpha; const QMatrix *Q; const Qfloat *QD; double eps; double Cp,Cn; double *b; int *active_set; double *G_bar; // gradient, if we treat free variables as 0 int l; bool unshrinked; // XXX double get_C(int i) { return (y[i] > 0)? Cp : Cn; } void update_alpha_status(int i) { if(alpha[i] >= get_C(i)) alpha_status[i] = UPPER_BOUND; else if(alpha[i] <= 0) alpha_status[i] = LOWER_BOUND; else alpha_status[i] = FREE; } bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } bool is_free(int i) { return alpha_status[i] == FREE; } void swap_index(int i, int j); void reconstruct_gradient(); virtual int select_working_set(int &i, int &j); virtual int max_violating_pair(int &i, int &j); virtual double calculate_rho(); virtual void do_shrinking();};void Solver::swap_index(int i, int j){ Q->swap_index(i,j); swap(y[i],y[j]); swap(G[i],G[j]); swap(alpha_status[i],alpha_status[j]); swap(alpha[i],alpha[j]); swap(b[i],b[j]); swap(active_set[i],active_set[j]); swap(G_bar[i],G_bar[j]);}void Solver::reconstruct_gradient(){ // reconstruct inactive elements of G from G_bar and free variables if(active_size == l) return; int i; for(i=active_size;i<l;i++) G[i] = G_bar[i] + b[i]; for(i=0;i<active_size;i++) if(is_free(i)) { const Qfloat *Q_i = Q->get_Q(i,l); double alpha_i = alpha[i]; for(int j=active_size;j<l;j++) G[j] += alpha_i * Q_i[j]; }}void Solver::Solve(int l, const QMatrix& Q, const double *b_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking){ this->l = l; this->Q = &Q; QD=Q.get_QD(); clone(b, b_,l); clone(y, y_,l); clone(alpha,alpha_,l); this->Cp = Cp; this->Cn = Cn; this->eps = eps; unshrinked = false; // initialize alpha_status { alpha_status = new char[l]; for(int i=0;i<l;i++) update_alpha_status(i); } // initialize active set (for shrinking) { active_set = new int[l]; for(int i=0;i<l;i++) active_set[i] = i; active_size = l; } // initialize gradient { G = new double[l]; G_bar = new double[l]; int i; for(i=0;i<l;i++) { G[i] = b[i]; G_bar[i] = 0; } for(i=0;i<l;i++) if(!is_lower_bound(i)) { const Qfloat *Q_i = Q.get_Q(i,l); double alpha_i = alpha[i]; int j; for(j=0;j<l;j++) G[j] += alpha_i*Q_i[j]; if(is_upper_bound(i)) for(j=0;j<l;j++) G_bar[j] += get_C(i) * Q_i[j]; } } // optimization step int iter = 0; int counter = min(l,1000)+1; while(1) { // show progress and do shrinking if(--counter == 0) { counter = min(l,1000); if(shrinking) do_shrinking(); info("."); info_flush(); } int i,j; if(select_working_set(i,j)!=0) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; info("*"); info_flush(); if(select_working_set(i,j)!=0) break; else counter = 1; // do shrinking next iteration } ++iter; // update alpha[i] and alpha[j], handle bounds carefully const Qfloat *Q_i = Q.get_Q(i,active_size); const Qfloat *Q_j = Q.get_Q(j,active_size); double C_i = get_C(i); double C_j = get_C(j); double old_alpha_i = alpha[i]; double old_alpha_j = alpha[j]; if(y[i]!=y[j]) { double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; if (quad_coef <= 0) quad_coef = TAU; double delta = (-G[i]-G[j])/quad_coef; double diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if(diff > 0) { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if(diff > C_i - C_j) { if(alpha[i] > C_i) { alpha[i] = C_i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -