📄 example43_run_b3.m

📁 是一个用MATLAB编的一个系统
💻 M
📖 第 1 页 / 共 5 页
字号:
    % the SVM output only by the difference of old and new alpha
    changedSV = find(net.alpha~=alphaOld);
    changedAlpha = net.alpha(changedSV)-alphaOld(changedSV);
  end
  
  if strcmp(net.kernel, 'linear'),
    chunks = ceil(length(changedSV)/chsize);
    % Linear kernel: Build the normal vector of the separating
    % hyperplane by computing the weighted sum of all Support Vectors
    for ch = 1:chunks,
      ind = (1+(ch-1)*chsize):min(length(changedSV), ch*chsize);
      temp = changedAlpha(ind).*Y(changedSV(ind));
      net.normalw = net.normalw+temp'*X(changedSV(ind), :);
    end
    % Find the output of the SVM by multiplying the examples with the
    % normal vector
    SVMout = zeros(N, 1);
    chunks = ceil(N/chsize);
    for ch = 1:chunks,
      ind = (1+(ch-1)*chsize):min(N, ch*chsize);
      SVMout(ind) = X(ind,:)*(net.normalw');
    end
  else
    % A normal kernel function: Split both the examples and the Support
    % Vectors into small chunks
    chunks1 = ceil(N/chsize);
    chunks2 = ceil(length(changedSV)/chsize);
    for ch1 = 1:chunks1,
      ind1 = (1+(ch1-1)*chsize):min(N, ch1*chsize);
      for ch2 = 1:chunks2,
	% Compute the kernel function for a chunk of Support Vectors and
        % a chunk of examples
	ind2 = (1+(ch2-1)*chsize):min(length(changedSV), ch2*chsize);
	K12 = svmkernel(net, X(ind1, :), X(changedSV(ind2), :));
	% Add the weighted kernel matrix to the SVM output. In update
        % cycles, the kernel matrix is weighted by the difference of
        % alphas, in other cycles it is weighted by the value alpha alone.
	coeff = changedAlpha(ind2).*Y(changedSV(ind2));
	SVMout(ind1) = SVMout(ind1)+K12*coeff;
      end
      if dodisplay>2,
	K1all = svmkernel(net, X(ind1,:), X(net.svind,:));
	coeff2 = net.alpha(net.svind).*Y(net.svind);
	fprintf('Maximum error due to matrix partitioning: %g\n', ...
		max((SVMout(ind1)-K1all*coeff2)'));
      end
    end
  end

  
  % Step 3: Compute the bias of the SVM decision function.
  if net.use2norm,
    % The bias can be found from the SVM output for Support Vectors. For
    % those vectors, the output should be 1-alpha/C resp -1+alpha/C.
    workSV = find(SV & workset);
    if ~isempty(workSV),
      net.bias = mean((1-net.alpha(workSV)./C(workSV)).*Y(workSV)- ...
                      SVMout(workSV));
    end
  else
    % normal 1norm SVM:
    % The bias can be found from Support Vector whose value alpha is not at
    % the upper bound. For those vectors, the SVM output should be +1
    % resp. -1.
    workSV = find(SVnonbound & workset);
    if ~isempty(workSV),
      net.bias = mean(Y(workSV)-SVMout(workSV));
    end
  end
  % The nasty case that no SVs to determine the bias have been found.
  % The only sensible thing do to is to leave the bias unchanged.
  if isempty(workSV) & (dodisplay>0),
    disp('No Support Vectors in the current working set.');
    disp('Leaving the bias unchanged.');
  end

  
  % Step 4: Compute the values of the Karush-Kuhn-Tucker conditions
  % of the quadratic program. If no violations of these conditions are
  % found, the optimal solution has been found, and we are finished.
  % KKT describes how correct each example is classified. KKT must be
  %   positive for all examples that are on the correct side and that are
  %     not Support Vectors
  %   0 for all Support Vectors
  %   negative for all examples on the wrong side of the hyperplane
  if net.use2norm,
    KKT = (SVMout+net.bias).*Y-1+net.alpha./C;
    KKTviolations = logical(uint8((SV & (abs(KKT)>net.kkttol)) | ...
                                  (~SV & (KKT<-net.kkttol))));
  else
    KKT = (SVMout+net.bias).*Y-1;
    KKTviolations = logical(uint8((SVnonbound & (abs(KKT)>net.kkttol)) | ...
                                  (SVbound & (KKT>net.kkttol)) | ...
                                  (~SV & (KKT<-net.kkttol))));
  end
  ind = find(KKTviolations & workset);
  if ~isempty(ind),
    % The coefficients alpha for the current working set have just been
    % optimised, non of those should violate the KKT conditions.
    if dodisplay>0,
      fprintf('KKT conditions not met in working set (value %g)', ...
              max(abs(KKT(ind))));
    end
  end
  if dodisplay>0,
    fprintf('%i violations of the KKT conditions.\n', ... 
	    length(find(KKTviolations)));
    fprintf(['(%i violations from positive examples, %i from negative' ...
	     ' examples)\n'], length(find(KKTviolations & class1)), ...
	    length(find(KKTviolations & class0)));
    if (dodisplay>1) & ~isempty(find(KKTviolations)),
      disp('The following examples violate the KKT conditions:');
      fprintf(' %i', find(KKTviolations));
      fprintf('\n');
    end
  end
  % Check how many violations of the KKT-conditions have been found. If
  % none, we are finished.
  if length(find(KKTviolations)) == 0,
    break;
  end
  
  % Step 5: Determine a new working set. To this aim, a linear
  % approximation of the objective function is made. The new working set
  % constitutes of the QPSIZE largest elements of the gradient of the
  % linear approximation. The gradient of the linear approximation can be
  % expressed using the ouput of the SVM on all training examples.
  if net.use2norm,
    searchDir = SVMout+Y.*(net.alpha./C-1);
    set1 = logical(uint8(SV | class0));
    set2 = logical(uint8(SV | class1));
  else
    searchDir = SVMout-Y;
    set1 = logical(uint8((SV | class0) & (~SVbound | class1)));
    set2 = logical(uint8((SV | class1) & (~SVbound | class0)));
  end
  % During the very first iteration: If no initial values for net.alpha
  % are given, perform a random working set selection
  if randomWS,
    searchDir = rand([N 1]);
    set1 = class1;
    set2 = class0;
    randomWS = 0;
  end

  % Step 6: Select the working set.
  % Goal is to select an equal number of examples from set1 and set2
  % (QPsize/2 examples from set1, QPsize/2 from set2). The examples from
  % set1 are the QPsize/2 highest elements of searchDir for set1,
  % the examples from set2 are the QPsize/2 smallest elements of searchDir
  % for set2.
  worksetOld = workset;
  workset = logical(uint8(zeros(N, 1)));
  if length(find(set1 | set2)) <= QPsize,
    workset(set1 | set2) = 1;
    % Less than QPsize examples to select from: Use them all
  elseif length(find(set1)) <= floor(QPsize/2),
    workset(set1) = 1;
    % set1 has less than half QPsize examples: Use all of set1, fill the
    % rest with examples from set2 starting with the ones that have low
    % values for searchDir
    set2 = find(set2 & ~workset);
    [dummy, ind] = sort(searchDir(set2));
    from2 = min(length(set2), QPsize-length(find(workset)));
    workset(set2(ind(1:from2))) = 1;
  elseif length(find(set2)) <= floor(QPsize/2),
    % set2 has less than half QPsize examples: Use all of set2, fill the
    % rest with examples from set1 starting with the ones that have high
    % values for searchDir
    workset(set2) = 1;
    set1 = find(set1 & ~workset);
    [dummy, ind] = sort(-searchDir(set1));
    from1 = min(length(set1), QPsize-length(find(workset)));
    workset(set1(ind(1:from1))) = 1;
  else
    set1 = find(set1);
    [dummy, ind] = sort(-searchDir(set1));
    from1 = min(length(set1), floor(QPsize/2));
    workset(set1(ind(1:from1))) = 1;
    % Use the QPsize/2 highest values for searchDir from set1
    set2 = find(set2 & ~workset);
    % Make sure that no examples are added twice
    [dummy, ind] = sort(searchDir(set2));
    from2 = min(length(set2), QPsize-length(find(workset)));
    workset(set2(ind(1:from2))) = 1;
    % Use the QPsize/2 lowest values for searchDir from set2
  end
  worksetind = find(workset);
  % Workaround for Matlab bug when indexing sparse arrays with logicals:
  % use index set instead
  
  % Emergency exit: If we end up with the same work set in 2 subsequent
  % iterations, something strange must have happened (for example, the
  % accuracy of the QP solver is insufficient as compared to the required
  % precision given by NET.alphatol and NET.kkttol)
  % Exit immediately if 'loqo' is used, since loqo ignores the start
  % values, so another iteration will not improve the results.
  if all(workset==worksetOld),
    sameWS = sameWS+1;
    if ((sameWS==3) | strcmp(qpsolver, 'loqo')),
      warnstr = 'Working set not changed - check accuracy. Exiting.';
      if dodisplay>0,
        disp(warnstr);
      end
      warning(warnstr);
      break;
    end
  else
    sameWS = 0;
  end
  worksize = length(find(workset));
  nonworkset = ~workset;
  if dodisplay>1,
    disp('Working set consists of examples ');
    fprintf(' %i', find(workset));
    fprintf('\n');
  end

  
  % Step 7: Determine the linear part of the quadratic program. We have
  % determined the working set already. The linear term of the quadratic
  % program is made up of all the kernel evaluations  K(Support Vectors
  % outside of the working set, Support Vectors in the working set)
  nonworkSV = find(nonworkset & SV);
  % All Support Vectors outside of the working set
  qBN = 0;
  if length(nonworkSV)>0,
    % The nonworkSV may be a very large matrix. Split up into smaller
    % chunks.
    chunks = ceil(length(nonworkSV)/chsize);
    for ch = 1:chunks,
      % Indices of the current chunk in NONWORKSV
      ind = (1+(ch-1)*chsize):min(length(nonworkSV), ch*chsize);
      % Evaluate kernel function for working set and the current chunk of
      % non-working set
      Ki = svmkernel(net, X(worksetind, :), X(nonworkSV(ind), :));
      % The linear term qBN for the quadratic program is a column vector
      % given by summing up the kernel evaluations multiplied by the
      % corresponding alpha's and the class labels.
      qBN = qBN+Ki*(net.alpha(nonworkSV(ind)).*Y(nonworkSV(ind)));
    end
    qBN = qBN.*Y(workset);
  end
  % Second linear term is a vector of one's
  f = qBN-ones(worksize, 1);

  
  % Step 8: Solve the quadratic program. The quadratic term of the
  % objective function is made of the examples in the working set, the
  % linear term comes from examples outside of the working set. The so
  % found values WORKALPHA replace the old values NET.alpha for the
  % examples in the working set.
  % Quadratic term H is given by the kernel evaluations for the working
  % set
  H = svmkernel(net, X(worksetind,:), X(worksetind,:));
  if net.use2norm,
    % with 2norm of the slack variables: the quadratic program has values
    % 1/C in the diagonal. Additionally, this makes H better conditioned.
    H = H+diag(1./C(workset));
  else
    % Suggested by Mathworks support for improving the condition
    % number. Condition number should should not be much larger than
    % 1/sqrt(eps) to avoid numerical problems. Condition number of H will
    % now be < eps^(-2/3)
    H = H+diag(ones(worksize, 1)*eps^(2/3));
  end
  H = H.*(Y(workset)*Y(workset)');
  % Matrix A for the equality constraint
  A = Y(workset)';
  % If there are Support Vectors outside of the working set, the equality
  % constraint must give the weighted class labels of all these
  % vectors. Otherwise the equality constraint gives zero.
  if length(nonworkSV)>0,
    eqconstr = -net.alpha(nonworkSV)'*Y(nonworkSV);
  else
    eqconstr = 0;
  end
  % Lower and upper bound for the coefficients alpha in the
  % current working set
  VLB = zeros(worksize, 1);
  if net.use2norm,
    % no upper bound in the 2norm case
    VUB = [];
  else
    % normal 1norm SVM: error weights C are the upper bounds
    VUB = C(workset);
  end
  tic;
  % Solve the quadratic program with 1 equality constraint.
  % Initial guess for the solution of the QP problem.
  startVal = net.alpha(workset);
  switch qpsolver
    case 'quadprog'
      workalpha = amyquadprog(H, f, [], [], A, eqconstr, VLB, VUB, startVal, ...
                           quadprogopt);
    case 'qp'
      workalpha = qp(H, f, A, eqconstr, VLB, VUB, startVal, 1);
    case 'loqo'
      if isempty(VUB),
        % LOQO crashes if upper bound is missing
        % Use a relatively low value (instead of Inf) for faster
        % convergence
        VUB = repmat(1e7, size(VLB));
      end
      workalpha = loqo(H, f, A, eqconstr, VLB, VUB, startVal, 1);
  end
  t = toc;
  if dodisplay>1,
    fprintf('QP subproblem solved after %i minutes, %2.1f seconds.\n', ...
	  floor(t/60), mod(t, 60));
  end
  % Sometime QUADPROG returns a solution with small imaginary part
  % (usually with ill-posed problems, badly conditioned H)
  if any(imag(workalpha)>0),
    warning(['The QP solver returned a complex solution. '...
             'Check condition number cond(H).']);
    workalpha = real(workalpha);
  end
  % Update the newly found coefficients in NET.alpha
  alphaOld = net.alpha;
  net.alpha(workset) = workalpha;
  
  iteration = iteration+1;
end

% Finished! Store the Support Vectors and the coefficient given by
% NET.alpha and the corresponding label.
net.svcoeff = net.alpha(net.svind).*Y(net.svind);
net.sv = X(net.svind, :);

if dodisplay>0,
  fprintf('\n\n\nTraining finished.\n');
  disp('Information about the trained Support Vector Machine:');
  svmstat(net,1);
  % output statistics over SVs and separating hyperplane
end



function [net, SVthresh, SV, SVbound, SVnonbound] = findSV(net, C)
% FINDSV - Select the Support Vectors from the current coefficients NET.alpha

% Threshold for selecting Support Vectors
maxalpha = max(net.alpha);
if maxalpha > net.alphatol,
  % For most cases, net.alphatol is a reasonable choice (approx 1e-2)
  SVthresh = net.alphatol;
else
  % For complex kernel on small data sets: all alphas will be very small.
  % Use the mean between the minimum and maximum logarithm of values
  % NET.alpha as a threshold.
  SVthresh = exp((log(max(eps,maxalpha))+log(eps))/2);
end
% All examples that have a value of NET.alpha above this threshold are
% assumed to be Support Vectors.
SV = logical(uint8(net.alpha>=SVthresh));
% All Support Vectors that have a value at their upper bound C
if net.use2norm,
  % There is no such thing in the 2norm case!
  SVbound = logical(repmat(uint8(0), size(net.alpha)));
else
  SVbound = logical(uint8(net.alpha>(C-net.alphatol)));
end
% The Support Vectors not at the upper bound
SVnonbound = SV & (~SVbound);
% The actual indices of the Support Vectors in the training set
net.svind = find(SV);


function [X,fval,exitflag,output,lambda]=amyquadprog(H,f,A,B,Aeq,Beq,lb,ub,X0,options,varargin)

% Handle missing arguments

defaultopt = struct('Display','final','Diagnostics','off',...
   'HessMult',[],... % must be [] by default
   'TolX',100*eps,'TolFun',100*eps,...
   'LargeScale','on','MaxIter',200,...
   'PrecondBandWidth',0,'TypicalX','ones(numberOfVariables,1)',...
   'TolPCG',0.1,'MaxPCGIter','max(1,floor(numberOfVariables/2))');

% If just 'defaults' passed in, return the default options in X
if nargin==1 & nargout <= 1 & isequal(H,'defaults')
   X = defaultopt;
   return
end

if nargin < 2, 
   error('QUADPROG requires at least two input arguments')
end

if nargin < 10, options =[];
   if nargin < 9, X0 = []; 
      if nargin < 8, ub = []; 
         if nargin < 7, lb = []; 
            if nargin < 6, Beq = []; 
               if nargin < 5, Aeq = [];
                  if nargin < 4, B = [];
                     if nargin < 3, A = [];
                     end, end, end, end, end, end, end, end

% Set up constant strings
medium =  'medium-scale: active-set';
large = 'large-scale';
💿 文件大小 546 K
👤 上传用户 dedien
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#MATLAB
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -