📄 svmtrain2.m.svn-base
字号:
% SVMTRAIN2 - Trains a support vector machine in batch mode
% using the L1 soft margin approach developed by
% Diehl and Cauwenberghs for two-class problems.
%
% Syntax: [a,b,g,ind,uind,X_mer,y_mer,Rs,Q] = svmtrain2(X,y,C,type,scale)
% [a,b,g,ind,uind,X_mer,y_mer,Rs,Q] = svmtrain2(X,y,C,type,scale,uind)
% (trains a new SVM on the given examples)
%
% [a,b,g,ind,uind,X_mer,y_mer,Rs,Q] = svmtrain2(X,y,C)
% [a,b,g,ind,uind,X_mer,y_mer,Rs,Q] = svmtrain2(X,y,C,uind)
% (trains the current SVM in memory on the given examples)
%
% a: alpha coefficients
% b: bias
% g: partial derivatives of cost function w.r.t. alpha coefficients
% ind: cell array containing indices of margin, error and reserve vectors
% ind{1}: indices of margin vectors
% ind{2}: indices of error vectors
% ind{3}: indices of reserve vectors
% uind: column vector of user-defined example indices (used for unlearning specified examples)
% X_mer: matrix of margin, error and reserve vectors stored columnwise
% y_mer: column vector of class labels (-1/+1) for margin, error and reserve vectors
% Rs: inverse of extended kernel matrix for margin vectors
% Q: extended kernel matrix for all vectors
% X: matrix of training vectors stored columnwise
% y: column vector of class labels (-1/+1) for training vectors
% C: soft-margin regularization parameter(s)
% dimensionality of C assumption
% 1-dimensional vector universal regularization parameter
% 2-dimensional vector class-conditional regularization parameters (-1/+1)
% n-dimensional vector regularization parameter per example
% (where n = # of examples)
% type: kernel type
% 1: linear kernel K(x,y) = x'*y
% 2-4: polynomial kernel K(x,y) = (scale*x'*y + 1)^type
% 5: Gaussian kernel with variance 1/(2*scale)
% scale: kernel scale
%
% Version 3.22e -- Comments to diehl@alumni.cmu.edu
%
function [a,b,g,ind,uind,X,y,Rs,Q] = svmtrain2(X_new,y_new,C_new,varargin)
% flags for example state
MARGIN = 1;
ERROR = 2;
RESERVE = 3;
UNLEARNED = 4;
% create a vector containing the regularization parameter
% for each example if necessary
if (length(C_new) == 1) % same regularization parameter for all examples
C_new = C_new*ones(size(y_new));
elseif (length(C_new) == 2) % class-conditional regularization parameters
flags = (y_new == -1);
C_new = C_new(1)*flags + C_new(2)*(~flags);
end;
if (nargin >= 5)
% define arguments
type_new = varargin{1};
scale_new = varargin{2};
if (nargin == 6)
uind_new = varargin(3);
else
uind_new = zeros(size(y_new));
end;
new_model = 1;
else
% define arguments
if (nargin == 4)
uind_new = varargin(1);
else
uind_new = zeros(size(y_new));
end;
new_model = 0;
end;
% define global variables
global a; % alpha coefficients
global b; % bias
global C; % regularization parameters
global deps; % jitter factor in kernel matrix
global g; % partial derivatives of cost function w.r.t. alpha coefficients
global ind; % cell array containing indices of margin, error, reserve and unlearned vectors
global kernel_evals; % kernel evaluations
global max_reserve_vectors; % maximum number of reserve vectors stored
global perturbations; % number of perturbations
global Q; % extended kernel matrix for all vectors
global Rs; % inverse of extended kernel matrix for margin vectors
global scale; % kernel scale
global type; % kernel type
global uind; % user-defined example indices
global X; % matrix of margin, error, reserve and unlearned vectors stored columnwise
global y; % column vector of class labels (-1/+1) for margin, error, reserve and unlearned vectors
% initialize variables
deps = 1e-5;
max_reserve_vectors = 3000;
if (new_model)
num_examples = size(X_new,2);
a = zeros(num_examples,1);
b = 0;
C = C_new;
g = -ones(num_examples,1);
ind = cell(4,1);
ind{UNLEARNED} = 1:num_examples;
kernel_evals = 0;
perturbations = 0;
Q = y_new';
Rs = Inf;
scale = scale_new;
type = type_new;
uind = uind_new;
X = X_new;
y = y_new;
else
num_examples = size(X,2);
num_new_examples = size(X_new,2);
a = [a ; zeros(num_new_examples,1)];
C = [C ; C_new];
g_new = y_new.*svmeval(X_new) - 1;
g = [g ; g_new];
flag = (g_new > 0);
indr = find(flag)';
indu = find(~flag)';
move_indr([],indr+num_examples);
ind{UNLEARNED} = indu+num_examples;
% assumes currently that there are no duplicate examples in the data - may not necessarily be true!
if (length(ind{MARGIN}) > 0)
Q_new = [y_new' ; (y(ind{MARGIN})*y_new').*kernel(X(:,ind{MARGIN}),X_new,type,scale)];
else
Q_new = y_new';
end;
Q = [Q Q_new];
uind = [uind ; uind_new];
X = [X X_new];
y = [y ; y_new];
num_examples = num_examples + num_new_examples;
end;
% perturbation coefficients
lambda = zeros(num_examples,1);
lambda(ind{UNLEARNED}) = C(ind{UNLEARNED});
% cached results
disp('Precomputing results for unlearned vectors.');
SQl = ((y*y(ind{UNLEARNED})').*kernel(X,X(:,ind{UNLEARNED}),type,scale))*lambda(ind{UNLEARNED}) + deps*lambda;
Syl = y(ind{UNLEARNED})'*lambda(ind{UNLEARNED});
p = 0;
num_MVs = length(ind{MARGIN});
num_learned = 0;
disp('Beginning training.');
i = 0;
while ((p < 1) | (length(ind{UNLEARNED}) > 0))
if (p < 1)
perturbations = perturbations + 1;
% compute beta and gamma
if (num_MVs > 0)
v = zeros(num_MVs+1,1);
if (p < 1-eps)
v(1) = -Syl - sum(y.*a)/(1-p);
else
v(1) = -Syl;
end;
v(2:num_MVs+1) = -SQl(ind{MARGIN});
beta = Rs*v;
gamma = zeros(size(Q,2),1);
ind_temp = [ind{ERROR} ind{RESERVE} ind{UNLEARNED}];
if (length(ind_temp) > 0)
gamma(ind_temp) = Q(:,ind_temp)'*beta + SQl(ind_temp);
end;
else
beta = 0;
gamma = SQl;
end;
% minimum acceptable parameter change
[min_delta_p,indss,cstatus,nstatus] = min_delta_p_s(p,gamma,beta,lambda);
% update a, b, g and p
if (length(ind{UNLEARNED}) > 0)
a(ind{UNLEARNED}) = a(ind{UNLEARNED}) + lambda(ind{UNLEARNED})*min_delta_p;
end;
if (num_MVs > 0)
a(ind{MARGIN}) = a(ind{MARGIN}) + beta(2:num_MVs+1)*min_delta_p;
end;
b = b + beta(1)*min_delta_p;
g = g + gamma*min_delta_p;
p = p + min_delta_p;
% perform bookkeeping
indco = bookkeeping(indss,cstatus,nstatus);
% update Rs and Q if necessary
if (nstatus == MARGIN)
num_MVs = num_MVs + 1;
if (num_MVs > 1)
% compute beta and gamma for indss
beta = -Rs*Q(:,indss);
gamma = kernel(X(:,indss),X(:,indss),type,scale) + deps + Q(:,indss)'*beta;
end;
% expand Rs and Q
updateRQ(beta,gamma,indss);
elseif (cstatus == MARGIN)
% compress Rs and Q
num_MVs = num_MVs - 1;
updateRQ(indco);
end;
% update SQl and Syl when the current status of indss is UNLEARNED
if (cstatus == UNLEARNED)
num_learned = num_learned + 1;
if (nstatus == MARGIN)
SQl = SQl - Q(num_MVs+1,:)'*lambda(indss);
else
SQl = SQl - ((y*y(indss)).*kernel(X,X(:,indss),type,scale))*lambda(indss);
SQl(indss) = SQl(indss) - deps*lambda(indss);
end;
Syl = Syl - y(indss)*lambda(indss);
if (mod(num_learned,50) == 0)
s = sprintf('Learned %d examples.',num_learned);
disp(s);
end;
end;
else
% label the remaining unlearned vectors as error vectors
num_learned = num_learned + length(ind{UNLEARNED});
a(ind{UNLEARNED}) = C(ind{UNLEARNED});
[ind{UNLEARNED},ind{ERROR}] = move_ind(ind{UNLEARNED},ind{ERROR},ind{UNLEARNED});
if (mod(num_learned,50) == 0)
s = sprintf('Learned %d examples.',num_learned);
disp(s);
end;
end;
end;
if (mod(num_learned,50) ~= 0)
s = sprintf('Learned %d examples.',num_learned);
disp(s);
end;
disp('Training complete!');
% set g(ind{MARGIN}) to zero
g(ind{MARGIN}) = 0;
% remove all but the closest reserve vectors from the dataset if necessary
if (length(ind{RESERVE}) == max_reserve_vectors)
ind_keep = [ind{MARGIN} ind{ERROR} ind{RESERVE}];
X = X(:,ind_keep);
y = y(ind_keep);
a = a(ind_keep);
g = g(ind_keep);
Q = Q(:,ind_keep);
uind = uind(ind_keep);
ind{MARGIN} = 1:length(ind{MARGIN});
ind{ERROR} = length(ind{MARGIN}) + (1:length(ind{ERROR}));
ind{RESERVE} = length(ind{MARGIN}) + length(ind{ERROR}) + (1:length(ind{RESERVE}));
end;
% summary statistics
s = sprintf('\nMargin vectors:\t\t%d',length(ind{MARGIN}));
disp(s);
s = sprintf('Error vectors:\t\t%d',length(ind{ERROR}));
disp(s);
s = sprintf('Reserve vectors:\t%d',length(ind{RESERVE}));
disp(s);
s = sprintf('Kernel evaluations:\t%d\n',kevals);
disp(s);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -