📄 linear.cpp
字号:
#include <math.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include "linear.h"#include "tron.h"typedef signed char schar;template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }#ifndef mintemplate <class T> inline T min(T x,T y) { return (x<y)?x:y; }#endif#ifndef maxtemplate <class T> inline T max(T x,T y) { return (x>y)?x:y; }#endif#define Malloc(type,n) (type *)malloc((n)*sizeof(type))#define INF HUGE_VAL#if 1void info(const char *fmt,...){ va_list ap; va_start(ap,fmt); vprintf(fmt,ap); va_end(ap);}void info_flush(){ fflush(stdout);}#elsevoid info(char *fmt,...) {}void info_flush() {}#endifclass l2_lr_fun : public function{public: l2_lr_fun(const problem *prob, double Cp, double Cn); ~l2_lr_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void);private: void Xv(double *v, double *Xv); void XTv(double *v, double *XTv); double *C; double *z; double *D; const problem *prob;};l2_lr_fun::l2_lr_fun(const problem *prob, double Cp, double Cn){ int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; for (i=0; i<l; i++) { if (y[i] == 1) C[i] = Cp; else C[i] = Cn; }}l2_lr_fun::~l2_lr_fun(){ delete[] z; delete[] D; delete[] C;}double l2_lr_fun::fun(double *w){ int i; double f=0; int *y=prob->y; int l=prob->l; int n=prob->n; Xv(w, z); for(i=0;i<l;i++) { double yz = y[i]*z[i]; if (yz >= 0) f += C[i]*log(1 + exp(-yz)); else f += C[i]*(-yz+log(1 + exp(yz))); } f = 2*f; for(i=0;i<n;i++) f += w[i]*w[i]; f /= 2.0; return(f);}void l2_lr_fun::grad(double *w, double *g){ int i; int *y=prob->y; int l=prob->l; int n=prob->n; for(i=0;i<l;i++) { z[i] = 1/(1 + exp(-y[i]*z[i])); D[i] = z[i]*(1-z[i]); z[i] = C[i]*(z[i]-1)*y[i]; } XTv(z, g); for(i=0;i<n;i++) g[i] = w[i] + g[i];}int l2_lr_fun::get_nr_variable(void){ return prob->n;}void l2_lr_fun::Hv(double *s, double *Hs){ int i; int l=prob->l; int n=prob->n; double *wa = new double[l]; Xv(s, wa); for(i=0;i<l;i++) wa[i] = C[i]*D[i]*wa[i]; XTv(wa, Hs); for(i=0;i<n;i++) Hs[i] = s[i] + Hs[i]; delete[] wa;}void l2_lr_fun::Xv(double *v, double *Xv){ int i; int l=prob->l; feature_node **x=prob->x; for(i=0;i<l;i++) { feature_node *s=x[i]; Xv[i]=0; while(s->index!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } }}void l2_lr_fun::XTv(double *v, double *XTv){ int i; int l=prob->l; int n=prob->n; feature_node **x=prob->x; for(i=0;i<n;i++) XTv[i]=0; for(i=0;i<l;i++) { feature_node *s=x[i]; while(s->index!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } }}class l2loss_svm_fun : public function{public: l2loss_svm_fun(const problem *prob, double Cp, double Cn); ~l2loss_svm_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void);private: void Xv(double *v, double *Xv); void subXv(double *v, double *Xv); void subXTv(double *v, double *XTv); double *C; double *z; double *D; int *I; int sizeI; const problem *prob;};l2loss_svm_fun::l2loss_svm_fun(const problem *prob, double Cp, double Cn){ int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; I = new int[l]; for (i=0; i<l; i++) { if (y[i] == 1) C[i] = Cp; else C[i] = Cn; }}l2loss_svm_fun::~l2loss_svm_fun(){ delete[] z; delete[] D; delete[] C; delete[] I;}double l2loss_svm_fun::fun(double *w){ int i; double f=0; int *y=prob->y; int l=prob->l; int n=prob->n; Xv(w, z); for(i=0;i<l;i++) { z[i] = y[i]*z[i]; double d = z[i]-1; if (d < 0) f += C[i]*d*d; } f = 2*f; for(i=0;i<n;i++) f += w[i]*w[i]; f /= 2.0; return(f);}void l2loss_svm_fun::grad(double *w, double *g){ int i; int *y=prob->y; int l=prob->l; int n=prob->n; sizeI = 0; for (i=0;i<l;i++) if (z[i] < 1) { z[sizeI] = C[i]*y[i]*(z[i]-1); I[sizeI] = i; sizeI++; } subXTv(z, g); for(i=0;i<n;i++) g[i] = w[i] + 2*g[i];}int l2loss_svm_fun::get_nr_variable(void){ return prob->n;}void l2loss_svm_fun::Hv(double *s, double *Hs){ int i; int l=prob->l; int n=prob->n; double *wa = new double[l]; subXv(s, wa); for(i=0;i<sizeI;i++) wa[i] = C[I[i]]*wa[i]; subXTv(wa, Hs); for(i=0;i<n;i++) Hs[i] = s[i] + 2*Hs[i]; delete[] wa;}void l2loss_svm_fun::Xv(double *v, double *Xv){ int i; int l=prob->l; feature_node **x=prob->x; for(i=0;i<l;i++) { feature_node *s=x[i]; Xv[i]=0; while(s->index!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } }}void l2loss_svm_fun::subXv(double *v, double *Xv){ int i; feature_node **x=prob->x; for(i=0;i<sizeI;i++) { feature_node *s=x[I[i]]; Xv[i]=0; while(s->index!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } }}void l2loss_svm_fun::subXTv(double *v, double *XTv){ int i; int n=prob->n; feature_node **x=prob->x; for(i=0;i<n;i++) XTv[i]=0; for(i=0;i<sizeI;i++) { feature_node *s=x[I[i]]; while(s->index!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } }}// A coordinate descent algorithm for // L1-loss and L2-loss SVM dual problems//// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,// s.t. 0 <= alpha_i <= upper_bound_i,// // where Qij = yi yj xi^T xj and// D is a diagonal matrix //// In L1-SVM case:// upper_bound_i = Cp if y_i = 1// upper_bound_i = Cn if y_i = -1// D_ii = 0// In L2-Svm case:// upper_bound_i = INF// D_ii = 1/(2*Cp) if y_i = 1// D_ii = 1/(2*Cn) if y_i = -1//// Given: // x, y, Cp, Cn// eps is the stopping tolerance//// solution will be put in wstatic void solve_linear_c_svc( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type){ int l = prob->l; int n = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 20000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new; // default solver_type: L2LOSS_SVM_DUAL double diag_p = 0.5/Cp, diag_n = 0.5/Cn; double upper_bound_p = INF, upper_bound_n = INF; if(solver_type == L1LOSS_SVM_DUAL) { diag_p = 0; diag_n = 0; upper_bound_p = Cp; upper_bound_n = Cn; } for(i=0; i<n; i++) w[i] = 0; for(i=0; i<l; i++) { alpha[i] = 0; if(prob->y[i] > 0) { y[i] = +1; QD[i] = diag_p; } else { y[i] = -1; QD[i] = diag_n; } feature_node *xi = prob->x[i]; while (xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } index[i] = i; } while (iter < max_iter) { PGmax_new = 0; PGmin_new = 0; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s < active_size; s++) { i = index[s]; G = 0; schar yi = y[i]; feature_node *xi = prob->x[i]; while(xi->index!= -1) { G += w[xi->index-1]*(xi->value); xi++; } G = G*yi-1; if(yi == 1) { C = upper_bound_p; G += alpha[i]*diag_p; } else { C = upper_bound_n; G += alpha[i]*diag_n; } PG = 0; if (alpha[i] ==0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) { PG = G; PGmin_new = min(PGmin_new, PG); } } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) { PG = G; PGmax_new = max(PGmax_new, PG); } } else { PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); } if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; xi = prob->x[i]; while (xi->index != -1) { w[xi->index-1] += d*xi->value; xi++; } } } iter++; if(iter % 10 == 0) { info("."); info_flush(); } if(PGmax_new - PGmin_new <= eps) { if(active_size == l) break; else { active_size = l; info("*"); info_flush(); PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old == 0) PGmax_old = INF; if (PGmin_old == 0) PGmin_old = -INF; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("Warning: reaching max number of iterations\n"); // calculate objective value
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -