📄 svm.java
字号:
package libsvm;import java.io.*;import java.util.*;//// Kernel Cache//// l is the number of total data items// size is the cache size limit in bytes//class Cache { private final int l; private int size; private final class head_t { head_t prev, next; // a cicular list float[] data; int len; // data[0,len) is cached in this entry } private final head_t[] head; private head_t lru_head; Cache(int l_, int size_) { l = l_; size = size_; head = new head_t[l]; for(int i=0;i<l;i++) head[i] = new head_t(); size /= 4; size -= l * (16/4); // sizeof(head_t) == 16 size = Math.max(size, 2*l); // cache must be large enough for two columns lru_head = new head_t(); lru_head.next = lru_head.prev = lru_head; } private void lru_delete(head_t h) { // delete from current location h.prev.next = h.next; h.next.prev = h.prev; } private void lru_insert(head_t h) { // insert to last position h.next = lru_head; h.prev = lru_head.prev; h.prev.next = h; h.next.prev = h; } // request data [0,len) // return some position p where [p,len) need to be filled // (p >= len if nothing needs to be filled) // java: simulate pointer using single-element array int get_data(int index, float[][] data, int len) { head_t h = head[index]; if(h.len > 0) lru_delete(h); int more = len - h.len; if(more > 0) { // free old space while(size < more) { head_t old = lru_head.next; lru_delete(old); size += old.len; old.data = null; old.len = 0; } // allocate new space float[] new_data = new float[len]; if(h.data != null) System.arraycopy(h.data,0,new_data,0,h.len); h.data = new_data; size -= more; do {int _=h.len; h.len=len; len=_;} while(false); } lru_insert(h); data[0] = h.data; return len; } void swap_index(int i, int j) { if(i==j) return; if(head[i].len > 0) lru_delete(head[i]); if(head[j].len > 0) lru_delete(head[j]); do {float[] _=head[i].data; head[i].data=head[j].data; head[j].data=_;} while(false); do {int _=head[i].len; head[i].len=head[j].len; head[j].len=_;} while(false); if(head[i].len > 0) lru_insert(head[i]); if(head[j].len > 0) lru_insert(head[j]); if(i>j) do {int _=i; i=j; j=_;} while(false); for(head_t h = lru_head.next; h!=lru_head; h=h.next) { if(h.len > i) { if(h.len > j) do {float _=h.data[i]; h.data[i]=h.data[j]; h.data[j]=_;} while(false); else { // give up lru_delete(h); size += h.len; h.data = null; h.len = 0; } } } }}//// Kernel evaluation//// the static method k_function is for doing single kernel evaluation// the constructor of Kernel prepares to calculate the l*l kernel matrix// the member function get_Q is for getting one column from the Q Matrix//abstract class QMatrix { abstract float[] get_Q(int column, int len); abstract float[] get_QD(); abstract void swap_index(int i, int j);};abstract class Kernel extends QMatrix { private svm_node[][] x; private final double[] x_square; // svm_parameter private final int kernel_type; private final int degree; private final double gamma; private final double coef0; abstract float[] get_Q(int column, int len); abstract float[] get_QD(); void swap_index(int i, int j) { do {svm_node[] _=x[i]; x[i]=x[j]; x[j]=_;} while(false); if(x_square != null) do {double _=x_square[i]; x_square[i]=x_square[j]; x_square[j]=_;} while(false); } private static double powi(double base, int times) { double tmp = base, ret = 1.0; for(int t=times; t>0; t/=2) { if(t%2==1) ret*=tmp; tmp = tmp * tmp; } return ret; } private static double tanh(double x) { double e = Math.exp(x); return 1.0-2.0/(e*e+1); } double kernel_function(int i, int j) { switch(kernel_type) { case svm_parameter.LINEAR: return dot(x[i],x[j]); case svm_parameter.POLY: return powi(gamma*dot(x[i],x[j])+coef0,degree); case svm_parameter.RBF: return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); case svm_parameter.SIGMOID: return tanh(gamma*dot(x[i],x[j])+coef0); case svm_parameter.PRECOMPUTED: return x[i][(int)(x[j][0].value)].value; default: return 0; // java } } Kernel(int l, svm_node[][] x_, svm_parameter param) { this.kernel_type = param.kernel_type; this.degree = param.degree; this.gamma = param.gamma; this.coef0 = param.coef0; x = (svm_node[][])x_.clone(); if(kernel_type == svm_parameter.RBF) { x_square = new double[l]; for(int i=0;i<l;i++) x_square[i] = dot(x[i],x[i]); } else x_square = null; } static double dot(svm_node[] x, svm_node[] y) { double sum = 0; int xlen = x.length; int ylen = y.length; int i = 0; int j = 0; while(i < xlen && j < ylen) { if(x[i].index == y[j].index) sum += x[i++].value * y[j++].value; else { if(x[i].index > y[j].index) ++j; else ++i; } } return sum; } static double k_function(svm_node[] x, svm_node[] y, svm_parameter param) { switch(param.kernel_type) { case svm_parameter.LINEAR: return dot(x,y); case svm_parameter.POLY: return powi(param.gamma*dot(x,y)+param.coef0,param.degree); case svm_parameter.RBF: { double sum = 0; int xlen = x.length; int ylen = y.length; int i = 0; int j = 0; while(i < xlen && j < ylen) { if(x[i].index == y[j].index) { double d = x[i++].value - y[j++].value; sum += d*d; } else if(x[i].index > y[j].index) { sum += y[j].value * y[j].value; ++j; } else { sum += x[i].value * x[i].value; ++i; } } while(i < xlen) { sum += x[i].value * x[i].value; ++i; } while(j < ylen) { sum += y[j].value * y[j].value; ++j; } return Math.exp(-param.gamma*sum); } case svm_parameter.SIGMOID: return tanh(param.gamma*dot(x,y)+param.coef0); case svm_parameter.PRECOMPUTED: return x[(int)(y[0].value)].value; default: return 0; // java } }}// Generalized SMO+SVMlight algorithm// Solves://// min 0.5(\alpha^T Q \alpha) + b^T \alpha//// y^T \alpha = \delta// y_i = +1 or -1// 0 <= alpha_i <= Cp for y_i = 1// 0 <= alpha_i <= Cn for y_i = -1//// Given://// Q, b, y, Cp, Cn, and an initial feasible point \alpha// l is the size of vectors and matrices// eps is the stopping criterion//// solution will be put in \alpha, objective value will be put in obj//class Solver { int active_size; byte[] y; double[] G; // gradient of objective function static final byte LOWER_BOUND = 0; static final byte UPPER_BOUND = 1; static final byte FREE = 2; byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE double[] alpha; QMatrix Q; float[] QD; double eps; double Cp,Cn; double[] b; int[] active_set; double[] G_bar; // gradient, if we treat free variables as 0 int l; boolean unshrinked; // XXX static final double INF = java.lang.Double.POSITIVE_INFINITY; double get_C(int i) { return (y[i] > 0)? Cp : Cn; } void update_alpha_status(int i) { if(alpha[i] >= get_C(i)) alpha_status[i] = UPPER_BOUND; else if(alpha[i] <= 0) alpha_status[i] = LOWER_BOUND; else alpha_status[i] = FREE; } boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } boolean is_free(int i) { return alpha_status[i] == FREE; } // java: information about solution except alpha, // because we cannot return multiple values otherwise... static class SolutionInfo { double obj; double rho; double upper_bound_p; double upper_bound_n; double r; // for Solver_NU } void swap_index(int i, int j) { Q.swap_index(i,j); do {byte _=y[i]; y[i]=y[j]; y[j]=_;} while(false); do {double _=G[i]; G[i]=G[j]; G[j]=_;} while(false); do {byte _=alpha_status[i]; alpha_status[i]=alpha_status[j]; alpha_status[j]=_;} while(false); do {double _=alpha[i]; alpha[i]=alpha[j]; alpha[j]=_;} while(false); do {double _=b[i]; b[i]=b[j]; b[j]=_;} while(false); do {int _=active_set[i]; active_set[i]=active_set[j]; active_set[j]=_;} while(false); do {double _=G_bar[i]; G_bar[i]=G_bar[j]; G_bar[j]=_;} while(false); } void reconstruct_gradient() { // reconstruct inactive elements of G from G_bar and free variables if(active_size == l) return; int i; for(i=active_size;i<l;i++) G[i] = G_bar[i] + b[i]; for(i=0;i<active_size;i++) if(is_free(i)) { float[] Q_i = Q.get_Q(i,l); double alpha_i = alpha[i]; for(int j=active_size;j<l;j++) G[j] += alpha_i * Q_i[j]; } } void Solve(int l, QMatrix Q, double[] b_, byte[] y_, double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, int shrinking) { this.l = l; this.Q = Q; QD = Q.get_QD(); b = (double[])b_.clone(); y = (byte[])y_.clone(); alpha = (double[])alpha_.clone(); this.Cp = Cp; this.Cn = Cn; this.eps = eps; this.unshrinked = false; // initialize alpha_status { alpha_status = new byte[l]; for(int i=0;i<l;i++) update_alpha_status(i); } // initialize active set (for shrinking) { active_set = new int[l]; for(int i=0;i<l;i++) active_set[i] = i; active_size = l; } // initialize gradient { G = new double[l]; G_bar = new double[l]; int i; for(i=0;i<l;i++) { G[i] = b[i]; G_bar[i] = 0; } for(i=0;i<l;i++) if(!is_lower_bound(i)) { float[] Q_i = Q.get_Q(i,l); double alpha_i = alpha[i]; int j; for(j=0;j<l;j++) G[j] += alpha_i*Q_i[j]; if(is_upper_bound(i)) for(j=0;j<l;j++) G_bar[j] += get_C(i) * Q_i[j]; } } // optimization step int iter = 0; int counter = Math.min(l,1000)+1; int[] working_set = new int[2]; while(true) { // show progress and do shrinking if(--counter == 0) { counter = Math.min(l,1000); if(shrinking!=0) do_shrinking(); System.err.print("."); } if(select_working_set(working_set)!=0) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; System.err.print("*"); if(select_working_set(working_set)!=0) break; else counter = 1; // do shrinking next iteration } int i = working_set[0]; int j = working_set[1]; ++iter; // update alpha[i] and alpha[j], handle bounds carefully float[] Q_i = Q.get_Q(i,active_size); float[] Q_j = Q.get_Q(j,active_size); double C_i = get_C(i); double C_j = get_C(j); double old_alpha_i = alpha[i]; double old_alpha_j = alpha[j]; if(y[i]!=y[j]) { double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; if (quad_coef <= 0) quad_coef = 1e-12; double delta = (-G[i]-G[j])/quad_coef; double diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if(diff > 0) { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if(diff > C_i - C_j) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = C_i - diff; } } else { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = C_j + diff; } } } else { double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; if (quad_coef <= 0) quad_coef = 1e-12; double delta = (G[i]-G[j])/quad_coef; double sum = alpha[i] + alpha[j]; alpha[i] -= delta; alpha[j] += delta; if(sum > C_i) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = sum - C_i; } } else { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = sum; } } if(sum > C_j) { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = sum - C_j; } } else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -