📄 example43_run_b3.m
字号:
step = -1;
end
if step<0,
paramSeq = max(range):step:min(range);
else
paramSeq = min(range):step:max(range);
end
end
end
% Storing all validation set errors for each parameter choice
allErr = zeros(nfold, length(paramSeq));
% Storing the confusion matrices for each parameter choice
cm = cell(1, length(paramSeq));
for j = 1:length(paramSeq),
cm{j} = zeros(2);
end
% shuffle X and Y in the same way
perm = randperm(N);
X = X(perm,:);
Y = Y(perm,:);
% size of one test set
chsize = floor (N/nfold);
% the training set is not exactly the whole data minus the one test set,
% but it is the union of the other test sets. So only effsize examples
% of the data set will ever be used
effsize = nfold*chsize;
% check if leave-one-out CV (or almost such) is required
usePrev = (nfold>=(N/2));
prevInd = [];
for i = 1:nfold,
% currentX/Y is the current training set
if (nfold == 1),
currentX = X;
currentY = Y;
testX = Xv;
testY = Yv;
else
% start and end index of current test set
ind1 = 1+(i-1)*chsize;
ind2 = i*chsize;
currentInd = [1:(ind1-1), (ind2+1):effsize];
currentX = X(currentInd, :);
currentY = Y(currentInd, :);
testX = X(ind1:ind2,:);
testY = Y(ind1:ind2,:);
end;
% We start out with the most powerful kernel (smallest sigma for RBF
% kernel, highest degree for polynomial). We assume that all training
% examples will be support vectors due to overfitting, thus we start
% the optimization with a value of C/2 for each example.
if length(net.c(:))==1,
alpha0 = repmat(net.c, [length(currentY) 1]);
% The same upper bound for all examples
elseif length(net.c(:))==2,
alpha0 = zeros([length(currentY) 1]);
alpha0(currentY>0) = net.c(1);
alpha0(currentY<=0) = net.c(2);
% Different upper bounds C for the positive and negative examples
else
net2.c = net.c(perm);
alpha0 = net2.c(currentInd);
% Use different C for each example: permute the original C's
end
alpha0 = alpha0/2;
% Start out with alpha = C/2 for the optimization routine.
% another little trick for leave-one-out CV: training sets will only
% slightly differ, thus use the alphas from the previous iteration,
% even if it resulted from a different parameter setting, as initial
% values alpha0
if usePrev & ~isempty(prevInd),
a = zeros(N, 1);
a(currentInd) = alpha0;
a(prevInd) = prevAlpha;
alpha0 = a(currentInd);
end
% Now loop over all parameter settings and train the SVM on currentX/Y
net2 = net;
if (dodisplay>0),
fprintf('Split %i of the training data:\n', i);
end
for j = 1:length(paramSeq),
param = paramSeq(j);
net2.kernelpar = param;
% Plug the current parameter settings into the SVM and train on the
% current training set
net2 = svmtrain(net2, currentX, currentY, alpha0, max(0,dodisplay-2));
% Evaluate on the non-training data
testPred = svmfwd(net2, testX);
allErr(i, j) = mean(testY ~= testPred);
% Compute the confusion matrix
for k = [1 2],
for l = [1 2],
c(k,l) = sum(((testPred>=0)==(l-1)).*((testY>=0)==(k-1)));
end
end
cm{j} = cm{j}+c;
% take out the computed coefficients alpha and use them as starting
% values for the next iteration (next parameter choice)
alpha0 = net2.alpha;
if (dodisplay>1),
fprintf('Split %i with parameter %g:\n', i, param);
fprintf(' Test set error = %2.3f%%\n', allErr(i, j)*100);
[fracSV, normW] = svmstat(net2, (dodisplay>2));
fprintf(' Norm of the separating hyperplane: %g\n', normW);
fprintf(' Fraction of support vectors: %2.3f%%\n', fracSV*100);
end
end
if usePrev,
prevAlpha = net2.alpha;
prevInd = currentInd;
end
end
% Compute mean and standard deviation over all nfold runs
meanErr = mean(allErr, 1);
stdErr = std(allErr, 0, 1);
CVErr = [meanErr; stdErr]';
% Find the point of minimum mean error and plug that parameter into the
% output SVM structure. If there should be several points of minimal
% error, select the one with minimal standard deviation
[sortedMean, sortedInd] = sort(meanErr);
minima = find(sortedMean(1)==meanErr);
[dummy, sortedInd2] = sort(stdErr(minima));
net.kernelpar = paramSeq(minima(sortedInd2(1)));
if (dodisplay>0),
for j = 1:length(paramSeq),
fprintf('kernelpar=%g: Avg CV error %2.3f%% with stddev %1.4f\n', ...
paramSeq(j), meanErr(j)*100, stdErr(j)*100);
if any(cm{j}~=0),
fprintf(' Confusion matrix, averaged over all runs:\n');
fprintf(' Predicted class:\n');
fprintf(' %5i %5i\n', -1, +1);
c1 = cm{j}';
c2 = 100*c1./repmat(sum(c1), [2 1]);
c3 = [c1(:) c2(:)]';
fprintf([' True -1: %5i (%3.2f%%) %5i (%3.2f%%)\n True +1: %5i' ...
' (%3.2f%%) %5i (%3.2f%%)\n'], c3(:));
end
end
end
function [Y, Y1] = svmfwd(net, X)
%
% Check arguments for consistency
errstring = consist(net, 'svm', X);
if ~isempty(errstring);
error(errstring);
end
[N d] = size(X);
if strcmp(net.kernel, 'linear'),
if ~isfield(net, 'normalw') | ~all(size(net.normalw)==[1 d]),
error('Structure NET does not contain a valid field ''normalw''');
end
else
if ~isfield(net, 'sv') | ((size(net.sv, 2)~=d) & ~isempty(net.sv)),
error('Structure NET does not contain a valid field ''sv''');
end
nbSV = size(net.sv, 1);
if nbSV~=size(net.svcoeff, 1),
error('Structure NET does not contain a valid field ''svcoeff''');
end
if ~isfield(net, 'bias') | ~all(size(net.bias)==[1 1]),
error('Structure NET does not contain a valid field ''bias''');
end
end
if strcmp(net.kernel, 'linear'),
Y1 = X*(net.normalw');
else
chsize = net.chunksize;
Y1 = zeros(N, 1);
chunks1 = ceil(N/chsize);
chunks2 = ceil(nbSV/chsize);
for ch1 = 1:chunks1,
ind1 = (1+(ch1-1)*chsize):min(N, ch1*chsize);
for ch2 = 1:chunks2,
ind2 = (1+(ch2-1)*chsize):min(nbSV, ch2*chsize);
K12 = svmkernel(net, X(ind1, :), net.sv(ind2, :));
Y1(ind1) = Y1(ind1)+K12*net.svcoeff(ind2);
end
end
end
Y1 = Y1+net.bias;
Y = sign(Y1);
Y(Y==0) = 1;
function K = svmkernel(net, X1, X2)
%
errstring = consist(net, 'svm', X1);
if ~isempty(errstring);
error(errstring);
end
errstring = consist(net, 'svm', X2);
if ~isempty(errstring);
error(errstring);
end
[N1, d] = size(X1);
[N2, d] = size(X2);
switch net.kernel
case 'linear'
K = X1*X2';
case 'poly'
K = (1+X1*X2').^net.kernelpar(1);
case 'rbf'
dist2 = repmat(sum((X1.^2)', 1), [N2 1])' + ...
repmat(sum((X2.^2)',1), [N1 1]) - ...
2*X1*(X2');
K = exp(-dist2/(net.nin*net.kernelpar(1)));
case 'rbffull'
bias = 0;
if any(all(repmat(size(net.kernelpar), [4 1]) == ...
[d 1; 1 d; d+1 1; 1 d+1], 2), 1),
weights = diag(net.kernelpar(1:d));
if length(net.kernelpar)>d,
bias = net.kernelpar(end);
end
elseif all(size(net.kernelpar)==[d d]),
weights = net.kernelpar;
else
error('Size of NET.kernelpar does not match the chosen kernel ''rbffull''');
end
dist2 = (X1.^2)*weights*ones([d N2]) + ...
ones([N1 d])*weights*(X2.^2)' - ...
2*X1*weights*(X2');
K = exp(bias-dist2/net.nin);
otherwise
error('Unknown kernel function');
end
K = double(K);
% Convert to full matrix if inputs are sparse
function [fracSV, normW, nbSV, nbBoundSV, posSV, negSV, posBound, negBound] = svmstat(net, doDisplay)
%
if nargin<2,
doDisplay = 0;
end
nbSV = 0;
fracSV = 0;
nbPosSV = 0;
nbNegSV = 0;
normW = 0;
if (net.nbexamples<=0) | (size(net.svcoeff,1)==0),
warning('The given SVM has not been trained yet');
return;
end
if isfield(net, 'use2norm'),
use2norm = net.use2norm;
else
use2norm = 0;
end
posSV = find(net.svcoeff>0);
negSV = find(net.svcoeff<0);
% Indices of the positive and negative examples that have become Support
% Vectors
nbPosSV = length(posSV);
nbNegSV = length(negSV);
nbSV = nbPosSV+nbNegSV;
fracSV = nbSV/net.nbexamples;
if (nargout<2) & ~doDisplay,
% shortcut to avoid the expensive computation of the norm
return;
end
% Extract the upper bound for the examples that are Support Vectors
if length(net.c(:))==1,
C = repmat(net.c, [length(net.svcoeff) 1]);
% The same upper bound for all examples
elseif length(net.c(:))==2,
C = zeros([length(net.svcoeff) 1]);
C(posSV) = net.c(1);
C(negSV) = net.c(2);
% Different upper bounds C for the positive and negative examples
else
C = net.c(net.svind);
end
if isfield(net, 'alphatol'),
tol = net.alphatol;
else
tol = net.accur;
% old versions of SVM used only one field NET.accur
end
if use2norm,
posBound = [];
negBound = [];
else
posBound = find(abs(net.svcoeff(posSV))>(C(posSV)-tol));
negBound = find(abs(net.svcoeff(negSV))>(C(negSV)-tol));
end
nbPosBound = length(posBound);
nbNegBound = length(negBound);
nbBoundSV = nbPosBound+nbNegBound;
if strcmp(net.kernel, 'linear') & isfield(net, 'normalw'),
normW = norm(net.normalw);
% linear kernel: norm of the separating hyperplane can be computed
% directly
else
if use2norm,
alpha = abs(net.svcoeff);
% For the 2norm SVM, the norm of the hyperplance is easy to compute
normW = sqrt(sum(alpha)+sum((alpha.^2)./C));
else
% normal 1norm SVM:
[dummy, svOutput] = svmfwd(net, net.sv);
svOutput = svOutput-net.bias;
% norm of the hyperplane is computed using the output
% of the SVM for all Support Vectors without the bias term
normW = sqrt(net.svcoeff'*svOutput);
% norm is basically the quadratic term of the SVM objective function
end
end
if doDisplay,
fprintf('Number of Support Vectors (SV): %i\n', nbSV);
fprintf(' (that is a fraction of %2.3f%% of the training examples)\n', ...
100*fracSV);
if ~use2norm,
fprintf(' %i of the SV have a coefficient ALPHA at the upper bound\n', ...
nbBoundSV);
end
fprintf(' %i Support Vectors from positive examples\n', nbPosSV);
if ~use2norm,
fprintf(' %i of them have a coefficient ALPHA at the upper bound\n', ...
nbPosBound);
end
fprintf(' %i Support Vectors from negative examples\n', nbNegSV);
if ~use2norm,
fprintf(' %i of them have a coefficient ALPHA at the upper bound\n', ...
nbNegBound);
end
fprintf('Norm of the separating hyperplane: %g\n', normW);
end
function [x,lb,ub,msg] = checkbounds(xin,lbin,ubin,nvars)
msg = [];
% Turn into column vectors
lb = lbin(:);
ub = ubin(:);
xin = xin(:);
lenlb = length(lb);
lenub = length(ub);
lenx = length(xin);
% Check maximum length
if lenlb > nvars
warning('Length of lower bounds is > length(x); ignoring extra bounds');
lb = lb(1:nvars);
lenlb = nvars;
elseif lenlb < nvars
lb = [lb; -inf*ones(nvars-lenlb,1)];
lenlb = nvars;
end
if lenub > nvars
warning('Length of upper bounds is > length(x); ignoring extra bounds');
ub = ub(1:nvars);
lenub = nvars;
elseif lenub < nvars
ub = [ub; inf*ones(nvars-lenub,1)];
lenub = nvars;
end
% Check feasibility of bounds
len = min(lenlb,lenub);
if any( lb( (1:len)' ) > ub( (1:len)' ) )
count = full(sum(lb>ub));
if count == 1
msg=sprintf(['\nExiting due to infeasibility: %i lower bound exceeds the' ...
' corresponding upper bound.\n'],count);
else
msg=sprintf(['\nExiting due to infeasibility: %i lower bounds exceed the' ...
' corresponding upper bounds.\n'],count);
end
end
% check if -inf in ub or inf in lb
if any(eq(ub, -inf))
error('-Inf detected in upper bound: upper bounds must be > -Inf.');
elseif any(eq(lb,inf))
error('+Inf detected in lower bound: lower bounds must be < Inf.');
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -