📄 ssvr_crstrain.m
字号:
function [alpha,sv,beta,time,centers,obj] = ssvr_crstrain(samples,labels,param)
%
% [alpha,sv,beta] = svr_crstrain(samples,targets,param)
% This function builds sparse insensitive Huber kernel regression by a mathcing pursuit method.
% The subroutine requires the following input:
% samples: n x d sample matrix
% targets: n x 1 target vector
% param: param.kernel,param.kernelparam,param.lamda,param.epsilon, param.delta
% The routine returns
% alpha: vector of expansion coefficients
% sv: indices of support vectors
% beta: matrix of expansion coefficients
%
num = 100;
if (nargin ~= 3) % check correct number of arguments
help ssvr_crstrain
else
fprintf('Sparse Support Vector Regression by Constructing a Reduced Set............\n')
tic;
% set the initial parameters
[n,d] = size(samples);
time = zeros(param.nsv,1);
centers = []; Kcenters = [];
K = []; R = [];
sv = [];
alpha = []; beta = zeros(param.nsv,param.nsv);
obj = [];
r = -labels;
I0 = find(abs(r) <= param.epsilon);
I1 = find(abs(r) > param.epsilon & abs(r) <= param.delta);
I2 = find(abs(r) > param.delta);
if param.trace
fprintf('Step\t\t\tObj\n');
end
% loops until a specified number of basis functions is reached
for i = 1:param.nsv
% select the basis function to be included from a random subset of
% candidates (exclude those which has already been selected)
perm = randperm(n);
tsamples = samples(perm(1:num),:);
TK = evalkernel([samples; centers],tsamples,param.kernel,param.kernelparam);
normcoff = sum(TK.^2)';
term = zeros(n,1);
term(I1) = 2*(abs(r(I1)) - param.epsilon).*sign(r(I1));
term(I2) = 2*(param.delta - param.epsilon)*sign(r(I2));
term = [term; 2*param.lamda*alpha];
score = -(TK'*term).^2./normcoff/sum(term.^2);
[temp,tindex] = min(score);
% construct the basis function
options = optimset('GradObj','on');
options = optimset('GradConstr','on');
options = optimset(options,'LargeScale','off');
options = optimset(options,'DerivativeCheck','off');
options = optimset(options,'Display','off');
options = optimset(options,'MaxIter',500);
options = optimset(options,'MaxFunEvals',5000);
options = optimset(options,'TolFun',1e-1);
options = optimset(options,'TolX',1e-3);
options = optimset(options,'TolCon',1e-0);
constant = normcoff(tindex);
samplescenters = [samples; centers];
aaa = sum(samplescenters.^2,2);
x0 = tsamples(tindex,:)';
f0 = correlation(x0,term,samplescenters,aaa,param,constant);
[x,f] = fmincon(@correlation,x0,[],[],[],[],[],[],@corrconstraintceq,...
options,term,samplescenters,aaa,param,constant);
add = evalkernel([samples;centers],x',param.kernel,param.kernelparam);
if abs(sum(add.^2) - constant) > 1e-0 | f > f0
x = x0;
end
addK = evalkernel(samples,x',param.kernel,param.kernelparam);
centers = [centers; x'];
addKcenters = evalkernel(centers,x',param.kernel,param.kernelparam);
Kcenters = [Kcenters addKcenters(1:end-1); addKcenters(1:end-1)' addKcenters(end)];
% solve the subprolbem including the new basis function
K = [K addK];
x = K(I1,i);
X = K(I1,1:i-1);
diag_k = x'*x + param.lamda*addKcenters(end); % diagonal element k in K'K + param.lamda*K matrix
col_k = x'*X + param.lamda*addKcenters(1:end-1,1)'; % elements of column k in K'K + param.lamda*K matrix
R = cholinsert(R,diag_k,col_k); % Cholesky insert of the Hessian
alpha = [alpha; 0];
[alpha,R,r] = finiteNewton(R,r,K,Kcenters,labels,param,alpha,I0,I1,I2);
% update I0, I1 and I2, and output the current objective value
beta(1:i,i) = alpha;
I0 = find(abs(r) <= param.epsilon);
I1 = find(abs(r) > param.epsilon & abs(r) <= param.delta);
I2 = find(abs(r) > param.delta);
obj = [obj; sum((abs(r(I1)) - param.epsilon).^2)...
+ (param.delta - param.epsilon)*sum(2*abs(r(I2)) - param.delta - param.epsilon)...
+ param.lamda*alpha'*Kcenters*alpha];
if param.trace
fprintf('%d\t\t\t\t%d\n', i, obj(i));
end
time(i) = toc;
end
fprintf('Execution time : %4.1f seconds\n',time(end));
sv = 1:param.nsv;
end
function [alpha,R,r] = finiteNewton(R,r,K,Kcenters,labels,param,oldalpha,oldI0,oldI1,oldI2)
while 1
% compute the Newton direction
c = K([oldI1;oldI2],:)'*[labels(oldI1) + param.epsilon*sign(r(oldI1));...
- (param.delta - param.epsilon)*sign(r(oldI2))];
h = R\(R'\c) - oldalpha;
% exaxt line search
u = K*h;
ucenters = Kcenters*h;
grad = 2*K(oldI1,:)'*(r(oldI1) + labels(oldI1)) + 2*param.lamda*Kcenters*oldalpha - 2*c;
coff(1,1) = h'*grad;
coff(1,2) = 2*u(oldI1)'*u(oldI1) + 2*param.lamda*h'*ucenters;
t = svrexactlinesearch(coff,r,u,param.epsilon,param.delta,oldI0,oldI1,oldI2);
% compute the new alpha and r
alpha = oldalpha + t*h;
r = r + t*u;
I0 = find(abs(r) <= param.epsilon);
I1 = find(abs(r) > param.epsilon & abs(r) <= param.delta);
I2 = find(abs(r) > param.delta);
% Cholesky update
index = setdiff(I1,oldI1);
for i = 1:length(index)
R = cholupdate(R,K(index(i),:)','+');
end
index = setdiff(oldI1,I1);
for i = 1:length(index)
R = cholupdate(R,K(index(i),:)','-');
end
% return if the stop condition is reached
if isempty(setxor(I1,oldI1)) & isempty(setxor(I2,oldI2))
break;
end
% update oldI0, oldI1, oldI2 and oldalpha
oldI0 = I0;
oldI1 = I1;
oldI2 = I2;
oldalpha = alpha;
end
function R = cholinsert(R, diag_k, col_k)
% Cholesky insert
if isempty(R)
R = sqrt(diag_k);
else
R_k = R'\col_k'; % R'R_k = (K'K)_k + param.lamda*K, solve for R_k
R_kk = sqrt(diag_k - R_k'*R_k); % norm(x'x) = norm(R'*R), find last element by exclusion
R = [R R_k; [zeros(1,size(R,2)) R_kk]]; % update R
end
function t = svrexactlinesearch(coff,r,u,epsilon,delta,nonsv,sv1,sv2)
%
% This function performs the exact line search for insensitive Huber loss function.
% The subroutine requires the following input:
% coff: initial coefficients
% r: residual vector
% u: K*h
% epsilon: parameter of loss function
% delta: parameter of loss function
% nonsv: the index of non-support vectors
% sv1: the index of the support vectors lying in the quadratic loss
% sv2: the index of the support vectors lying in the linear loss
% The routine returns
% t: step size
%
if norm(u) < eps
t = 0;
return;
end
% from zero loss to the quadratic loss, the linear loss
index = find(abs(u(nonsv)) > 0);
index = nonsv(index);
kv = [(sign(u(index))*epsilon - r(index))./u(index) index]; % enter the quadratic loss
kv = [kv; [(sign(u(index))*delta - r(index))./u(index) -index]]; % leave the quadratic loss
% from quadratic loss to the other loss
index1 = find(r(sv1).* u(sv1) > 0);
index1 = sv1(index1);
kv = [kv; [(sign(u(index1))*delta - r(index1))./u(index1) -index1]]; % enter the linear loss
index2 = find(r(sv1).* u(sv1) < 0);
index2 = sv1(index2);
kv = [kv; [(-sign(u(index2))*epsilon - r(index2))./u(index2) -index2]]; % enter the zero loss
kv = [kv; [(sign(u(index2))*epsilon - r(index2))./u(index2) index2]]; % enter the quadratic loss
kv = [kv; [(sign(u(index2))*delta - r(index2))./u(index2) -index2]]; % enter the linear loss
% from linear loss to the other loss
index = find(r(sv2).* u(sv2) < 0);
index = sv2(index);
kv = [kv; [(-sign(u(index))*delta - r(index))./u(index) index]]; % enters the quadratic loss
kv = [kv; [(-sign(u(index))*epsilon - r(index))./u(index) -index]]; % enters the zero loss
kv = [kv; [(sign(u(index))*epsilon - r(index))./u(index) index]]; % again enters the quadratic loss
kv = [kv; [(sign(u(index))*delta - r(index))./u(index) -index]]; % enters the linear loss
% sort the kink values
[temp ind] = sort(kv(:,1));
kv = kv(ind,:);
% compute the step length
kv = [kv; [Inf,0]];
leng = size(kv,1);
for i = 1:leng
if coff(1,1) + kv(i,1)*coff(1,2) > 0
t = -coff(1,1)/coff(1,2);
break;
else % update coefficients
if kv(i,2) > 0
ind = abs(kv(i,2));
coff(1,1) = coff(1,1) - 2*kv(i,1)*u(ind)^2;
coff(1,2) = coff(1,2) + 2*u(ind)^2;
else
ind = abs(kv(i,2));
coff(1,1) = coff(1,1) + 2*kv(i,1)*u(ind)^2;;
coff(1,2) = coff(1,2) - 2*u(ind)^2;
end
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -