📄 svm.java
字号:
package com.prudsys.pdm.Models.Regression.SVM.Algorithms.libsvm;
import java.io.*;
import java.util.*;
//
// Kernel Cache
//
// l is the number of total data items
// size is the cache size limit in bytes
//
class Cache {
private final int l;
private int size;
private final class head_t
{
head_t prev, next; // a cicular list
float[] data;
int len; // data[0,len) is cached in this entry
}
private final head_t[] head;
private head_t lru_head;
Cache(int l_, int size_)
{
l = l_;
size = size_;
head = new head_t[l];
for(int i=0;i<l;i++) head[i] = new head_t();
size /= 4;
size -= l * (16/4); // sizeof(head_t) == 16
lru_head = new head_t();
lru_head.next = lru_head.prev = lru_head;
}
private void lru_delete(head_t h)
{
// delete from current location
h.prev.next = h.next;
h.next.prev = h.prev;
}
private void lru_insert(head_t h)
{
// insert to last position
h.next = lru_head;
h.prev = lru_head.prev;
h.prev.next = h;
h.next.prev = h;
}
// request data [0,len)
// return some position p where [p,len) need to be filled
// (p >= len if nothing needs to be filled)
// java: simulate pointer using single-element array
int get_data(int index, float[][] data, int len)
{
head_t h = head[index];
if(h.len > 0) lru_delete(h);
int more = len - h.len;
if(more > 0)
{
// free old space
while(size < more)
{
head_t old = lru_head.next;
lru_delete(old);
size += old.len;
old.data = null;
old.len = 0;
}
// allocate new space
float[] new_data = new float[len];
if(h.data != null) System.arraycopy(h.data,0,new_data,0,h.len);
h.data = new_data;
size -= more;
do {int _=h.len; h.len=len; len=_;} while(false);
}
lru_insert(h);
data[0] = h.data;
return len;
}
void swap_index(int i, int j)
{
if(i==j) return;
if(head[i].len > 0) lru_delete(head[i]);
if(head[j].len > 0) lru_delete(head[j]);
do {float[] _=head[i].data; head[i].data=head[j].data; head[j].data=_;} while(false);
do {int _=head[i].len; head[i].len=head[j].len; head[j].len=_;} while(false);
if(head[i].len > 0) lru_insert(head[i]);
if(head[j].len > 0) lru_insert(head[j]);
if(i>j) do {int _=i; i=j; j=_;} while(false);
for(head_t h = lru_head.next; h!=lru_head; h=h.next)
{
if(h.len > i)
{
if(h.len > j)
do {float _=h.data[i]; h.data[i]=h.data[j]; h.data[j]=_;} while(false);
else
{
// give up
lru_delete(h);
size += h.len;
h.data = null;
h.len = 0;
}
}
}
}
}
//
// Kernel evaluation
//
// the static method k_function is for doing single kernel evaluation
// the constructor of Kernel prepares to calculate the l*l kernel matrix
// the member function get_Q is for getting one column from the Q Matrix
//
abstract class Kernel {
private svm_node[][] x;
private final double[] x_square;
// svm_parameter
private final int kernel_type;
private final double degree;
private final double gamma;
private final double coef0;
abstract float[] get_Q(int column, int len);
void swap_index(int i, int j)
{
do {svm_node[] _=x[i]; x[i]=x[j]; x[j]=_;} while(false);
if(x_square != null) do {double _=x_square[i]; x_square[i]=x_square[j]; x_square[j]=_;} while(false);
}
private static double tanh(double x)
{
double e = Math.exp(x);
return 1.0-2.0/(e*e+1);
}
double kernel_function(int i, int j)
{
switch(kernel_type)
{
case svm_parameter.LINEAR:
return dot(x[i],x[j]);
case svm_parameter.POLY:
return Math.pow(gamma*dot(x[i],x[j])+coef0,degree);
case svm_parameter.RBF:
return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
case svm_parameter.SIGMOID:
return tanh(gamma*dot(x[i],x[j])+coef0);
default:
System.err.print("unknown kernel function.\n");
System.exit(1);
return 0; // java
}
}
Kernel(int l, svm_node[][] x_, svm_parameter param)
{
this.kernel_type = param.kernel_type;
this.degree = param.degree;
this.gamma = param.gamma;
this.coef0 = param.coef0;
x = (svm_node[][])x_.clone();
if(kernel_type == svm_parameter.RBF)
{
x_square = new double[l];
for(int i=0;i<l;i++)
x_square[i] = dot(x[i],x[i]);
}
else x_square = null;
}
static double dot(svm_node[] x, svm_node[] y)
{
double sum = 0;
int xlen = x.length;
int ylen = y.length;
int i = 0;
int j = 0;
while(i < xlen && j < ylen)
{
if(x[i].index == y[j].index)
sum += x[i++].value * y[j++].value;
else
{
if(x[i].index > y[j].index)
++j;
else
++i;
}
}
return sum;
}
static double k_function(svm_node[] x, svm_node[] y,
svm_parameter param)
{
switch(param.kernel_type)
{
case svm_parameter.LINEAR:
return dot(x,y);
case svm_parameter.POLY:
return Math.pow(param.gamma*dot(x,y)+param.coef0,param.degree);
case svm_parameter.RBF:
{
double sum = 0;
int xlen = x.length;
int ylen = y.length;
int i = 0;
int j = 0;
while(i < xlen && j < ylen)
{
if(x[i].index == y[j].index)
{
double d = x[i++].value - y[j++].value;
sum += d*d;
}
else if(x[i].index > y[j].index)
{
sum += y[j].value * y[j].value;
++j;
}
else
{
sum += x[i].value * x[i].value;
++i;
}
}
while(i < xlen)
{
sum += x[i].value * x[i].value;
++i;
}
while(j < ylen)
{
sum += y[j].value * y[j].value;
++j;
}
return Math.exp(-param.gamma*sum);
}
case svm_parameter.SIGMOID:
return tanh(param.gamma*dot(x,y)+param.coef0);
default:
System.err.print("unknown kernel function.\n");
System.exit(1);
return 0; // java
}
}
}
// Generalized SMO+SVMlight algorithm
// Solves:
//
// min 0.5(\alpha^T Q \alpha) + b^T \alpha
//
// y^T \alpha = \delta
// y_i = +1 or -1
// 0 <= alpha_i <= Cp for y_i = 1
// 0 <= alpha_i <= Cn for y_i = -1
//
// Given:
//
// Q, b, y, Cp, Cn, and an initial feasible point \alpha
// l is the size of vectors and matrices
// eps is the stopping criterion
//
// solution will be put in \alpha, objective value will be put in obj
//
class Solver {
int active_size;
byte[] y;
double[] G; // gradient of objective function
static final byte LOWER_BOUND = 0;
static final byte UPPER_BOUND = 1;
static final byte FREE = 2;
byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
double[] alpha;
Kernel Q;
double eps;
double Cp,Cn;
double[] b;
int[] active_set;
double[] G_bar; // gradient, if we treat free variables as 0
int l;
boolean unshrinked; // XXX
static final double INF = java.lang.Double.POSITIVE_INFINITY;
double get_C(int i)
{
return (y[i] > 0)? Cp : Cn;
}
void update_alpha_status(int i)
{
if(alpha[i] >= get_C(i))
alpha_status[i] = UPPER_BOUND;
else if(alpha[i] <= 0)
alpha_status[i] = LOWER_BOUND;
else alpha_status[i] = FREE;
}
boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
boolean is_free(int i) { return alpha_status[i] == FREE; }
// java: information about solution except alpha,
// because we cannot return multiple values otherwise...
static class SolutionInfo {
double obj;
double rho;
double upper_bound_p;
double upper_bound_n;
double r; // for Solver_NU
}
void swap_index(int i, int j)
{
Q.swap_index(i,j);
do {byte _=y[i]; y[i]=y[j]; y[j]=_;} while(false);
do {double _=G[i]; G[i]=G[j]; G[j]=_;} while(false);
do {byte _=alpha_status[i]; alpha_status[i]=alpha_status[j]; alpha_status[j]=_;} while(false);
do {double _=alpha[i]; alpha[i]=alpha[j]; alpha[j]=_;} while(false);
do {double _=b[i]; b[i]=b[j]; b[j]=_;} while(false);
do {int _=active_set[i]; active_set[i]=active_set[j]; active_set[j]=_;} while(false);
do {double _=G_bar[i]; G_bar[i]=G_bar[j]; G_bar[j]=_;} while(false);
}
void reconstruct_gradient()
{
// reconstruct inactive elements of G from G_bar and free variables
if(active_size == l) return;
int i;
for(i=active_size;i<l;i++)
G[i] = G_bar[i] + b[i];
for(i=0;i<active_size;i++)
if(is_free(i))
{
float[] Q_i = Q.get_Q(i,l);
double alpha_i = alpha[i];
for(int j=active_size;j<l;j++)
G[j] += alpha_i * Q_i[j];
}
}
void Solve(int l, Kernel Q, double[] b_, byte[] y_,
double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, int shrinking)
{
this.l = l;
this.Q = Q;
b = (double[])b_.clone();
y = (byte[])y_.clone();
alpha = (double[])alpha_.clone();
this.Cp = Cp;
this.Cn = Cn;
this.eps = eps;
this.unshrinked = false;
// initialize alpha_status
{
alpha_status = new byte[l];
for(int i=0;i<l;i++)
update_alpha_status(i);
}
// initialize active set (for shrinking)
{
active_set = new int[l];
for(int i=0;i<l;i++)
active_set[i] = i;
active_size = l;
}
// initialize gradient
{
G = new double[l];
G_bar = new double[l];
int i;
for(i=0;i<l;i++)
{
G[i] = b[i];
G_bar[i] = 0;
}
for(i=0;i<l;i++)
if(!is_lower_bound(i))
{
float[] Q_i = Q.get_Q(i,l);
double alpha_i = alpha[i];
int j;
for(j=0;j<l;j++)
G[j] += alpha_i*Q_i[j];
if(is_upper_bound(i))
for(j=0;j<l;j++)
G_bar[j] += get_C(i) * Q_i[j];
}
}
// optimization step
int iter = 0;
int counter = Math.min(l,1000)+1;
int[] working_set = new int[2];
while(true)
{
// show progress and do shrinking
if(--counter == 0)
{
counter = Math.min(l,1000);
if(shrinking!=0) do_shrinking();
System.err.print(".");
}
if(select_working_set(working_set)!=0)
{
// reconstruct the whole gradient
reconstruct_gradient();
// reset active set size and check
active_size = l;
System.err.print("*");
if(select_working_set(working_set)!=0)
break;
else
counter = 1; // do shrinking next iteration
}
int i = working_set[0];
int j = working_set[1];
++iter;
// update alpha[i] and alpha[j], handle bounds carefully
float[] Q_i = Q.get_Q(i,active_size);
float[] Q_j = Q.get_Q(j,active_size);
double C_i = get_C(i);
double C_j = get_C(j);
double old_alpha_i = alpha[i];
double old_alpha_j = alpha[j];
if(y[i]!=y[j])
{
double delta = (-G[i]-G[j])/(Q_i[i]+Q_j[j]+2*Q_i[j]);
double diff = alpha[i] - alpha[j];
alpha[i] += delta;
alpha[j] += delta;
if(diff > 0)
{
if(alpha[j] < 0)
{
alpha[j] = 0;
alpha[i] = diff;
}
}
else
{
if(alpha[i] < 0)
{
alpha[i] = 0;
alpha[j] = -diff;
}
}
if(diff > C_i - C_j)
{
if(alpha[i] > C_i)
{
alpha[i] = C_i;
alpha[j] = C_i - diff;
}
}
else
{
if(alpha[j] > C_j)
{
alpha[j] = C_j;
alpha[i] = C_j + diff;
}
}
}
else
{
double delta = (G[i]-G[j])/(Q_i[i]+Q_j[j]-2*Q_i[j]);
double sum = alpha[i] + alpha[j];
alpha[i] -= delta;
alpha[j] += delta;
if(sum > C_i)
{
if(alpha[i] > C_i)
{
alpha[i] = C_i;
alpha[j] = sum - C_i;
}
}
else
{
if(alpha[j] < 0)
{
alpha[j] = 0;
alpha[i] = sum;
}
}
if(sum > C_j)
{
if(alpha[j] > C_j)
{
alpha[j] = C_j;
alpha[i] = sum - C_j;
}
}
else
{
if(alpha[i] < 0)
{
alpha[i] = 0;
alpha[j] = sum;
}
}
}
// update G
double delta_alpha_i = alpha[i] - old_alpha_i;
double delta_alpha_j = alpha[j] - old_alpha_j;
for(int k=0;k<active_size;k++)
{
G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
}
// update alpha_status and G_bar
{
boolean ui = is_upper_bound(i);
boolean uj = is_upper_bound(j);
update_alpha_status(i);
update_alpha_status(j);
int k;
if(ui != is_upper_bound(i))
{
Q_i = Q.get_Q(i,l);
if(ui)
for(k=0;k<l;k++)
G_bar[k] -= C_i * Q_i[k];
else
for(k=0;k<l;k++)
G_bar[k] += C_i * Q_i[k];
}
if(uj != is_upper_bound(j))
{
Q_j = Q.get_Q(j,l);
if(uj)
for(k=0;k<l;k++)
G_bar[k] -= C_j * Q_j[k];
else
for(k=0;k<l;k++)
G_bar[k] += C_j * Q_j[k];
}
}
}
// calculate rho
si.rho = calculate_rho();
// calculate objective value
{
double v = 0;
int i;
for(i=0;i<l;i++)
v += alpha[i] * (G[i] + b[i]);
si.obj = v/2;
}
// put back the solution
{
for(int i=0;i<l;i++)
alpha_[active_set[i]] = alpha[i];
}
si.upper_bound_p = Cp;
si.upper_bound_n = Cn;
System.out.print("\noptimization finished, #iter = "+iter+"\n");
}
// return 1 if already optimal, return 0 otherwise
int select_working_set(int[] working_set)
{
// return i,j which maximize -grad(f)^T d , under constraint
// if alpha_i == C, d != +1
// if alpha_i == 0, d != -1
double Gmax1 = -INF; // max { -grad(f)_i * d | y_i*d = +1 }
int Gmax1_idx = -1;
double Gmax2 = -INF; // max { -grad(f)_i * d | y_i*d = -1 }
int Gmax2_idx = -1;
for(int i=0;i<active_size;i++)
{
if(y[i]==+1) // y = +1
{
if(!is_upper_bound(i)) // d = +1
{
if(-G[i] > Gmax1)
{
Gmax1 = -G[i];
Gmax1_idx = i;
}
}
if(!is_lower_bound(i)) // d = -1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -