📄 svmtrain.m
字号:
% The only sensible thing do to is to leave the bias unchanged. if isempty(workSV) & (dodisplay>0), disp('No Support Vectors in the current working set.'); disp('Leaving the bias unchanged.'); end % Step 4: Compute the values of the Karush-Kuhn-Tucker conditions % of the quadratic program. If no violations of these conditions are % found, the optimal solution has been found, and we are finished. % KKT describes how correct each example is classified. KKT must be % positive for all examples that are on the correct side and that are % not Support Vectors % 0 for all Support Vectors % negative for all examples on the wrong side of the hyperplane if net.use2norm, KKT = (SVMout+net.bias).*Y-1+net.alpha./C; KKTviolations = logical(uint8((SV & (abs(KKT)>net.kkttol)) | ... (~SV & (KKT<-net.kkttol)))); else KKT = (SVMout+net.bias).*Y-1; KKTviolations = logical(uint8((SVnonbound & (abs(KKT)>net.kkttol)) | ... (SVbound & (KKT>net.kkttol)) | ... (~SV & (KKT<-net.kkttol)))); end ind = find(KKTviolations & workset); if ~isempty(ind), % The coefficients alpha for the current working set have just been % optimised, non of those should violate the KKT conditions. if dodisplay>0, fprintf('KKT conditions not met in working set (value %g)', ... max(abs(KKT(ind)))); end end if dodisplay>0, fprintf('%i violations of the KKT conditions.\n', ... length(find(KKTviolations))); fprintf(['(%i violations from positive examples, %i from negative' ... ' examples)\n'], length(find(KKTviolations & class1)), ... length(find(KKTviolations & class0))); if (dodisplay>1) & ~isempty(find(KKTviolations)), disp('The following examples violate the KKT conditions:'); fprintf(' %i', find(KKTviolations)); fprintf('\n'); end end % Check how many violations of the KKT-conditions have been found. If % none, we are finished. if length(find(KKTviolations)) == 0, break; end % Step 5: Determine a new working set. To this aim, a linear % approximation of the objective function is made. The new working set % constitutes of the QPSIZE largest elements of the gradient of the % linear approximation. The gradient of the linear approximation can be % expressed using the ouput of the SVM on all training examples. if net.use2norm, searchDir = SVMout+Y.*(net.alpha./C-1); set1 = logical(uint8(SV | class0)); set2 = logical(uint8(SV | class1)); else searchDir = SVMout-Y; set1 = logical(uint8((SV | class0) & (~SVbound | class1))); set2 = logical(uint8((SV | class1) & (~SVbound | class0))); end % During the very first iteration: If no initial values for net.alpha % are given, perform a random working set selection if randomWS, searchDir = rand([N 1]); set1 = class1; set2 = class0; randomWS = 0; end % Step 6: Select the working set. % Goal is to select an equal number of examples from set1 and set2 % (QPsize/2 examples from set1, QPsize/2 from set2). The examples from % set1 are the QPsize/2 highest elements of searchDir for set1, % the examples from set2 are the QPsize/2 smallest elements of searchDir % for set2. worksetOld = workset; workset = logical(uint8(zeros(N, 1))); if length(find(set1 | set2)) <= QPsize, workset(set1 | set2) = 1; % Less than QPsize examples to select from: Use them all elseif length(find(set1)) <= floor(QPsize/2), workset(set1) = 1; % set1 has less than half QPsize examples: Use all of set1, fill the % rest with examples from set2 starting with the ones that have low % values for searchDir set2 = find(set2 & ~workset); [dummy, ind] = sort(searchDir(set2)); from2 = min(length(set2), QPsize-length(find(workset))); workset(set2(ind(1:from2))) = 1; elseif length(find(set2)) <= floor(QPsize/2), % set2 has less than half QPsize examples: Use all of set2, fill the % rest with examples from set1 starting with the ones that have high % values for searchDir workset(set2) = 1; set1 = find(set1 & ~workset); [dummy, ind] = sort(-searchDir(set1)); from1 = min(length(set1), QPsize-length(find(workset))); workset(set1(ind(1:from1))) = 1; else set1 = find(set1); [dummy, ind] = sort(-searchDir(set1)); from1 = min(length(set1), floor(QPsize/2)); workset(set1(ind(1:from1))) = 1; % Use the QPsize/2 highest values for searchDir from set1 set2 = find(set2 & ~workset); % Make sure that no examples are added twice [dummy, ind] = sort(searchDir(set2)); from2 = min(length(set2), QPsize-length(find(workset))); workset(set2(ind(1:from2))) = 1; % Use the QPsize/2 lowest values for searchDir from set2 end worksetind = find(workset); % Workaround for Matlab bug when indexing sparse arrays with logicals: % use index set instead % Emergency exit: If we end up with the same work set in 2 subsequent % iterations, something strange must have happened (for example, the % accuracy of the QP solver is insufficient as compared to the required % precision given by NET.alphatol and NET.kkttol) % Exit immediately if 'loqo' is used, since loqo ignores the start % values, so another iteration will not improve the results. if all(workset==worksetOld), sameWS = sameWS+1; if ((sameWS==3) | strcmp(qpsolver, 'loqo')), warnstr = 'Working set not changed - check accuracy. Exiting.'; if dodisplay>0, disp(warnstr); end warning(warnstr); break; end else sameWS = 0; end worksize = length(find(workset)); nonworkset = ~workset; if dodisplay>1, disp('Working set consists of examples '); fprintf(' %i', find(workset)); fprintf('\n'); end % Step 7: Determine the linear part of the quadratic program. We have % determined the working set already. The linear term of the quadratic % program is made up of all the kernel evaluations K(Support Vectors % outside of the working set, Support Vectors in the working set) nonworkSV = find(nonworkset & SV); % All Support Vectors outside of the working set qBN = 0; if length(nonworkSV)>0, % The nonworkSV may be a very large matrix. Split up into smaller % chunks. chunks = ceil(length(nonworkSV)/chsize); for ch = 1:chunks, % Indices of the current chunk in NONWORKSV ind = (1+(ch-1)*chsize):min(length(nonworkSV), ch*chsize); % Evaluate kernel function for working set and the current chunk of % non-working set Ki = svmkernel(net, X(worksetind, :), X(nonworkSV(ind), :)); % The linear term qBN for the quadratic program is a column vector % given by summing up the kernel evaluations multiplied by the % corresponding alpha's and the class labels. qBN = qBN+Ki*(net.alpha(nonworkSV(ind)).*Y(nonworkSV(ind))); end qBN = qBN.*Y(workset); end % Second linear term is a vector of one's f = qBN-ones(worksize, 1); % Step 8: Solve the quadratic program. The quadratic term of the % objective function is made of the examples in the working set, the % linear term comes from examples outside of the working set. The so % found values WORKALPHA replace the old values NET.alpha for the % examples in the working set. % Quadratic term H is given by the kernel evaluations for the working % set H = svmkernel(net, X(worksetind,:), X(worksetind,:)); if net.use2norm, % with 2norm of the slack variables: the quadratic program has values % 1/C in the diagonal. Additionally, this makes H better conditioned. H = H+diag(1./C(workset)); else % Suggested by Mathworks support for improving the condition % number. Condition number should should not be much larger than % 1/sqrt(eps) to avoid numerical problems. Condition number of H will % now be < eps^(-2/3) H = H+diag(ones(worksize, 1)*eps^(2/3)); end H = H.*(Y(workset)*Y(workset)'); % Matrix A for the equality constraint A = Y(workset)'; % If there are Support Vectors outside of the working set, the equality % constraint must give the weighted class labels of all these % vectors. Otherwise the equality constraint gives zero. if length(nonworkSV)>0, eqconstr = -net.alpha(nonworkSV)'*Y(nonworkSV); else eqconstr = 0; end % Lower and upper bound for the coefficients alpha in the % current working set VLB = zeros(worksize, 1); if net.use2norm, % no upper bound in the 2norm case VUB = []; else % normal 1norm SVM: error weights C are the upper bounds VUB = C(workset); end tic; % Solve the quadratic program with 1 equality constraint. % Initial guess for the solution of the QP problem. startVal = net.alpha(workset); switch qpsolver case 'quadprog' workalpha = quadprog(H, f, [], [], A, eqconstr, VLB, VUB, startVal, ... quadprogopt); case 'qp' workalpha = qp(H, f, A, eqconstr, VLB, VUB, startVal, 1); case 'loqo' if isempty(VUB), % LOQO crashes if upper bound is missing % Use a relatively low value (instead of Inf) for faster % convergence VUB = repmat(1e7, size(VLB)); end workalpha = loqo(H, f, A, eqconstr, VLB, VUB, startVal, 1); end t = toc; if dodisplay>1, fprintf('QP subproblem solved after %i minutes, %2.1f seconds.\n', ... floor(t/60), mod(t, 60)); end % Sometime QUADPROG returns a solution with small imaginary part % (usually with ill-posed problems, badly conditioned H) if any(imag(workalpha)>0), warning(['The QP solver returned a complex solution. '... 'Check condition number cond(H).']); workalpha = real(workalpha); end % Update the newly found coefficients in NET.alpha alphaOld = net.alpha; net.alpha(workset) = workalpha; iteration = iteration+1;end% Finished! Store the Support Vectors and the coefficient given by% NET.alpha and the corresponding label.net.svcoeff = net.alpha(net.svind).*Y(net.svind);net.sv = X(net.svind, :);if dodisplay>0, fprintf('\n\n\nTraining finished.\n'); disp('Information about the trained Support Vector Machine:'); svmstat(net,1); % output statistics over SVs and separating hyperplaneendfunction [net, SVthresh, SV, SVbound, SVnonbound] = findSV(net, C)% FINDSV - Select the Support Vectors from the current coefficients NET.alpha% Threshold for selecting Support Vectorsmaxalpha = max(net.alpha);if maxalpha > net.alphatol, % For most cases, net.alphatol is a reasonable choice (approx 1e-2) SVthresh = net.alphatol;else % For complex kernel on small data sets: all alphas will be very small. % Use the mean between the minimum and maximum logarithm of values % NET.alpha as a threshold. SVthresh = exp((log(max(eps,maxalpha))+log(eps))/2);end% All examples that have a value of NET.alpha above this threshold are% assumed to be Support Vectors.SV = logical(uint8(net.alpha>=SVthresh));% All Support Vectors that have a value at their upper bound Cif net.use2norm, % There is no such thing in the 2norm case! SVbound = logical(repmat(uint8(0), size(net.alpha)));else SVbound = logical(uint8(net.alpha>(C-net.alphatol)));end% The Support Vectors not at the upper boundSVnonbound = SV & (~SVbound);% The actual indices of the Support Vectors in the training setnet.svind = find(SV);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -