📄 example43_run_b3.m
字号:
% the SVM output only by the difference of old and new alpha
changedSV = find(net.alpha~=alphaOld);
changedAlpha = net.alpha(changedSV)-alphaOld(changedSV);
end
if strcmp(net.kernel, 'linear'),
chunks = ceil(length(changedSV)/chsize);
% Linear kernel: Build the normal vector of the separating
% hyperplane by computing the weighted sum of all Support Vectors
for ch = 1:chunks,
ind = (1+(ch-1)*chsize):min(length(changedSV), ch*chsize);
temp = changedAlpha(ind).*Y(changedSV(ind));
net.normalw = net.normalw+temp'*X(changedSV(ind), :);
end
% Find the output of the SVM by multiplying the examples with the
% normal vector
SVMout = zeros(N, 1);
chunks = ceil(N/chsize);
for ch = 1:chunks,
ind = (1+(ch-1)*chsize):min(N, ch*chsize);
SVMout(ind) = X(ind,:)*(net.normalw');
end
else
% A normal kernel function: Split both the examples and the Support
% Vectors into small chunks
chunks1 = ceil(N/chsize);
chunks2 = ceil(length(changedSV)/chsize);
for ch1 = 1:chunks1,
ind1 = (1+(ch1-1)*chsize):min(N, ch1*chsize);
for ch2 = 1:chunks2,
% Compute the kernel function for a chunk of Support Vectors and
% a chunk of examples
ind2 = (1+(ch2-1)*chsize):min(length(changedSV), ch2*chsize);
K12 = svmkernel(net, X(ind1, :), X(changedSV(ind2), :));
% Add the weighted kernel matrix to the SVM output. In update
% cycles, the kernel matrix is weighted by the difference of
% alphas, in other cycles it is weighted by the value alpha alone.
coeff = changedAlpha(ind2).*Y(changedSV(ind2));
SVMout(ind1) = SVMout(ind1)+K12*coeff;
end
if dodisplay>2,
K1all = svmkernel(net, X(ind1,:), X(net.svind,:));
coeff2 = net.alpha(net.svind).*Y(net.svind);
fprintf('Maximum error due to matrix partitioning: %g\n', ...
max((SVMout(ind1)-K1all*coeff2)'));
end
end
end
% Step 3: Compute the bias of the SVM decision function.
if net.use2norm,
% The bias can be found from the SVM output for Support Vectors. For
% those vectors, the output should be 1-alpha/C resp -1+alpha/C.
workSV = find(SV & workset);
if ~isempty(workSV),
net.bias = mean((1-net.alpha(workSV)./C(workSV)).*Y(workSV)- ...
SVMout(workSV));
end
else
% normal 1norm SVM:
% The bias can be found from Support Vector whose value alpha is not at
% the upper bound. For those vectors, the SVM output should be +1
% resp. -1.
workSV = find(SVnonbound & workset);
if ~isempty(workSV),
net.bias = mean(Y(workSV)-SVMout(workSV));
end
end
% The nasty case that no SVs to determine the bias have been found.
% The only sensible thing do to is to leave the bias unchanged.
if isempty(workSV) & (dodisplay>0),
disp('No Support Vectors in the current working set.');
disp('Leaving the bias unchanged.');
end
% Step 4: Compute the values of the Karush-Kuhn-Tucker conditions
% of the quadratic program. If no violations of these conditions are
% found, the optimal solution has been found, and we are finished.
% KKT describes how correct each example is classified. KKT must be
% positive for all examples that are on the correct side and that are
% not Support Vectors
% 0 for all Support Vectors
% negative for all examples on the wrong side of the hyperplane
if net.use2norm,
KKT = (SVMout+net.bias).*Y-1+net.alpha./C;
KKTviolations = logical(uint8((SV & (abs(KKT)>net.kkttol)) | ...
(~SV & (KKT<-net.kkttol))));
else
KKT = (SVMout+net.bias).*Y-1;
KKTviolations = logical(uint8((SVnonbound & (abs(KKT)>net.kkttol)) | ...
(SVbound & (KKT>net.kkttol)) | ...
(~SV & (KKT<-net.kkttol))));
end
ind = find(KKTviolations & workset);
if ~isempty(ind),
% The coefficients alpha for the current working set have just been
% optimised, non of those should violate the KKT conditions.
if dodisplay>0,
fprintf('KKT conditions not met in working set (value %g)', ...
max(abs(KKT(ind))));
end
end
if dodisplay>0,
fprintf('%i violations of the KKT conditions.\n', ...
length(find(KKTviolations)));
fprintf(['(%i violations from positive examples, %i from negative' ...
' examples)\n'], length(find(KKTviolations & class1)), ...
length(find(KKTviolations & class0)));
if (dodisplay>1) & ~isempty(find(KKTviolations)),
disp('The following examples violate the KKT conditions:');
fprintf(' %i', find(KKTviolations));
fprintf('\n');
end
end
% Check how many violations of the KKT-conditions have been found. If
% none, we are finished.
if length(find(KKTviolations)) == 0,
break;
end
% Step 5: Determine a new working set. To this aim, a linear
% approximation of the objective function is made. The new working set
% constitutes of the QPSIZE largest elements of the gradient of the
% linear approximation. The gradient of the linear approximation can be
% expressed using the ouput of the SVM on all training examples.
if net.use2norm,
searchDir = SVMout+Y.*(net.alpha./C-1);
set1 = logical(uint8(SV | class0));
set2 = logical(uint8(SV | class1));
else
searchDir = SVMout-Y;
set1 = logical(uint8((SV | class0) & (~SVbound | class1)));
set2 = logical(uint8((SV | class1) & (~SVbound | class0)));
end
% During the very first iteration: If no initial values for net.alpha
% are given, perform a random working set selection
if randomWS,
searchDir = rand([N 1]);
set1 = class1;
set2 = class0;
randomWS = 0;
end
% Step 6: Select the working set.
% Goal is to select an equal number of examples from set1 and set2
% (QPsize/2 examples from set1, QPsize/2 from set2). The examples from
% set1 are the QPsize/2 highest elements of searchDir for set1,
% the examples from set2 are the QPsize/2 smallest elements of searchDir
% for set2.
worksetOld = workset;
workset = logical(uint8(zeros(N, 1)));
if length(find(set1 | set2)) <= QPsize,
workset(set1 | set2) = 1;
% Less than QPsize examples to select from: Use them all
elseif length(find(set1)) <= floor(QPsize/2),
workset(set1) = 1;
% set1 has less than half QPsize examples: Use all of set1, fill the
% rest with examples from set2 starting with the ones that have low
% values for searchDir
set2 = find(set2 & ~workset);
[dummy, ind] = sort(searchDir(set2));
from2 = min(length(set2), QPsize-length(find(workset)));
workset(set2(ind(1:from2))) = 1;
elseif length(find(set2)) <= floor(QPsize/2),
% set2 has less than half QPsize examples: Use all of set2, fill the
% rest with examples from set1 starting with the ones that have high
% values for searchDir
workset(set2) = 1;
set1 = find(set1 & ~workset);
[dummy, ind] = sort(-searchDir(set1));
from1 = min(length(set1), QPsize-length(find(workset)));
workset(set1(ind(1:from1))) = 1;
else
set1 = find(set1);
[dummy, ind] = sort(-searchDir(set1));
from1 = min(length(set1), floor(QPsize/2));
workset(set1(ind(1:from1))) = 1;
% Use the QPsize/2 highest values for searchDir from set1
set2 = find(set2 & ~workset);
% Make sure that no examples are added twice
[dummy, ind] = sort(searchDir(set2));
from2 = min(length(set2), QPsize-length(find(workset)));
workset(set2(ind(1:from2))) = 1;
% Use the QPsize/2 lowest values for searchDir from set2
end
worksetind = find(workset);
% Workaround for Matlab bug when indexing sparse arrays with logicals:
% use index set instead
% Emergency exit: If we end up with the same work set in 2 subsequent
% iterations, something strange must have happened (for example, the
% accuracy of the QP solver is insufficient as compared to the required
% precision given by NET.alphatol and NET.kkttol)
% Exit immediately if 'loqo' is used, since loqo ignores the start
% values, so another iteration will not improve the results.
if all(workset==worksetOld),
sameWS = sameWS+1;
if ((sameWS==3) | strcmp(qpsolver, 'loqo')),
warnstr = 'Working set not changed - check accuracy. Exiting.';
if dodisplay>0,
disp(warnstr);
end
warning(warnstr);
break;
end
else
sameWS = 0;
end
worksize = length(find(workset));
nonworkset = ~workset;
if dodisplay>1,
disp('Working set consists of examples ');
fprintf(' %i', find(workset));
fprintf('\n');
end
% Step 7: Determine the linear part of the quadratic program. We have
% determined the working set already. The linear term of the quadratic
% program is made up of all the kernel evaluations K(Support Vectors
% outside of the working set, Support Vectors in the working set)
nonworkSV = find(nonworkset & SV);
% All Support Vectors outside of the working set
qBN = 0;
if length(nonworkSV)>0,
% The nonworkSV may be a very large matrix. Split up into smaller
% chunks.
chunks = ceil(length(nonworkSV)/chsize);
for ch = 1:chunks,
% Indices of the current chunk in NONWORKSV
ind = (1+(ch-1)*chsize):min(length(nonworkSV), ch*chsize);
% Evaluate kernel function for working set and the current chunk of
% non-working set
Ki = svmkernel(net, X(worksetind, :), X(nonworkSV(ind), :));
% The linear term qBN for the quadratic program is a column vector
% given by summing up the kernel evaluations multiplied by the
% corresponding alpha's and the class labels.
qBN = qBN+Ki*(net.alpha(nonworkSV(ind)).*Y(nonworkSV(ind)));
end
qBN = qBN.*Y(workset);
end
% Second linear term is a vector of one's
f = qBN-ones(worksize, 1);
% Step 8: Solve the quadratic program. The quadratic term of the
% objective function is made of the examples in the working set, the
% linear term comes from examples outside of the working set. The so
% found values WORKALPHA replace the old values NET.alpha for the
% examples in the working set.
% Quadratic term H is given by the kernel evaluations for the working
% set
H = svmkernel(net, X(worksetind,:), X(worksetind,:));
if net.use2norm,
% with 2norm of the slack variables: the quadratic program has values
% 1/C in the diagonal. Additionally, this makes H better conditioned.
H = H+diag(1./C(workset));
else
% Suggested by Mathworks support for improving the condition
% number. Condition number should should not be much larger than
% 1/sqrt(eps) to avoid numerical problems. Condition number of H will
% now be < eps^(-2/3)
H = H+diag(ones(worksize, 1)*eps^(2/3));
end
H = H.*(Y(workset)*Y(workset)');
% Matrix A for the equality constraint
A = Y(workset)';
% If there are Support Vectors outside of the working set, the equality
% constraint must give the weighted class labels of all these
% vectors. Otherwise the equality constraint gives zero.
if length(nonworkSV)>0,
eqconstr = -net.alpha(nonworkSV)'*Y(nonworkSV);
else
eqconstr = 0;
end
% Lower and upper bound for the coefficients alpha in the
% current working set
VLB = zeros(worksize, 1);
if net.use2norm,
% no upper bound in the 2norm case
VUB = [];
else
% normal 1norm SVM: error weights C are the upper bounds
VUB = C(workset);
end
tic;
% Solve the quadratic program with 1 equality constraint.
% Initial guess for the solution of the QP problem.
startVal = net.alpha(workset);
switch qpsolver
case 'quadprog'
workalpha = amyquadprog(H, f, [], [], A, eqconstr, VLB, VUB, startVal, ...
quadprogopt);
case 'qp'
workalpha = qp(H, f, A, eqconstr, VLB, VUB, startVal, 1);
case 'loqo'
if isempty(VUB),
% LOQO crashes if upper bound is missing
% Use a relatively low value (instead of Inf) for faster
% convergence
VUB = repmat(1e7, size(VLB));
end
workalpha = loqo(H, f, A, eqconstr, VLB, VUB, startVal, 1);
end
t = toc;
if dodisplay>1,
fprintf('QP subproblem solved after %i minutes, %2.1f seconds.\n', ...
floor(t/60), mod(t, 60));
end
% Sometime QUADPROG returns a solution with small imaginary part
% (usually with ill-posed problems, badly conditioned H)
if any(imag(workalpha)>0),
warning(['The QP solver returned a complex solution. '...
'Check condition number cond(H).']);
workalpha = real(workalpha);
end
% Update the newly found coefficients in NET.alpha
alphaOld = net.alpha;
net.alpha(workset) = workalpha;
iteration = iteration+1;
end
% Finished! Store the Support Vectors and the coefficient given by
% NET.alpha and the corresponding label.
net.svcoeff = net.alpha(net.svind).*Y(net.svind);
net.sv = X(net.svind, :);
if dodisplay>0,
fprintf('\n\n\nTraining finished.\n');
disp('Information about the trained Support Vector Machine:');
svmstat(net,1);
% output statistics over SVs and separating hyperplane
end
function [net, SVthresh, SV, SVbound, SVnonbound] = findSV(net, C)
% FINDSV - Select the Support Vectors from the current coefficients NET.alpha
% Threshold for selecting Support Vectors
maxalpha = max(net.alpha);
if maxalpha > net.alphatol,
% For most cases, net.alphatol is a reasonable choice (approx 1e-2)
SVthresh = net.alphatol;
else
% For complex kernel on small data sets: all alphas will be very small.
% Use the mean between the minimum and maximum logarithm of values
% NET.alpha as a threshold.
SVthresh = exp((log(max(eps,maxalpha))+log(eps))/2);
end
% All examples that have a value of NET.alpha above this threshold are
% assumed to be Support Vectors.
SV = logical(uint8(net.alpha>=SVthresh));
% All Support Vectors that have a value at their upper bound C
if net.use2norm,
% There is no such thing in the 2norm case!
SVbound = logical(repmat(uint8(0), size(net.alpha)));
else
SVbound = logical(uint8(net.alpha>(C-net.alphatol)));
end
% The Support Vectors not at the upper bound
SVnonbound = SV & (~SVbound);
% The actual indices of the Support Vectors in the training set
net.svind = find(SV);
function [X,fval,exitflag,output,lambda]=amyquadprog(H,f,A,B,Aeq,Beq,lb,ub,X0,options,varargin)
% Handle missing arguments
defaultopt = struct('Display','final','Diagnostics','off',...
'HessMult',[],... % must be [] by default
'TolX',100*eps,'TolFun',100*eps,...
'LargeScale','on','MaxIter',200,...
'PrecondBandWidth',0,'TypicalX','ones(numberOfVariables,1)',...
'TolPCG',0.1,'MaxPCGIter','max(1,floor(numberOfVariables/2))');
% If just 'defaults' passed in, return the default options in X
if nargin==1 & nargout <= 1 & isequal(H,'defaults')
X = defaultopt;
return
end
if nargin < 2,
error('QUADPROG requires at least two input arguments')
end
if nargin < 10, options =[];
if nargin < 9, X0 = [];
if nargin < 8, ub = [];
if nargin < 7, lb = [];
if nargin < 6, Beq = [];
if nargin < 5, Aeq = [];
if nargin < 4, B = [];
if nargin < 3, A = [];
end, end, end, end, end, end, end, end
% Set up constant strings
medium = 'medium-scale: active-set';
large = 'large-scale';
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -