📄 regularization.m
字号:
% ==========================================================
%
% Neural Networks A Classroom Approach
% Satish Kumar
% Copyright Tata McGraw Hill, 2004
%
% MATLAB code for gradient descent using cross-entropy
% error function and regularization
% Reference: Table 8.17;Page 336
%
% ==========================================================
clear all;
% Set up some figure parameters
AxisShift = 0.05;
ClassSymbol1 = 'ko';
ClassSymbol2 = 'k^';
% Fix the seeds
rand('state', 423);
randn('state', 423);
%
% Generate the data
%
n=200;
% Set up mixture model: 2d data with two centres
% Class 1 is first centre, class 2 from the other
dim = 2;
ncentres = 2;
type = 'full';
mix.nin = dim;
mix.ncentres = ncentres;
mix.covar_type = type;
% Initialise priors to be equal and summing to one
mix.priors = ones(1,mix.ncentres) ./ mix.ncentres;
% Initialise centres
mix.centres = randn(mix.ncentres, mix.nin);
% Initialise all the variances to unity
% Store covariance matrices in a row vector of matrices
mix.covars = repmat(eye(mix.nin), [1 1 mix.ncentres]);
mix.nwts = mix.ncentres + mix.ncentres*mix.nin + mix.ncentres*mix.nin*mix.nin;
mix.priors = [0.5 0.5];
mix.centres = [1 1; 3 3];
mix.covars(:,:,1) = [0.625 -0.2165; -0.2165 0.875];
mix.covars(:,:,2) = [0.2375 0.1516; 0.1516 0.4125];
% Determine number to sample from each component
priors = rand(1, n);
% Pre-allocate data array
data = zeros(n, mix.nin);
label = zeros(n, 1);
cum_prior = 0; % Cumulative sum of priors
total_samples = 0; % Cumulative sum of number of sampled points
for i = 1:mix.ncentres
num_samples = sum(priors >= cum_prior & priors < cum_prior + mix.priors(i));
% Form a full covariance matrix
covar = mix.covars(:,:,i);
d = size(covar, 1);
[evec, eval] = eig(covar);
coeffs = randn(num_samples, d)*sqrt(eval);
data(total_samples+1:total_samples+num_samples, :) = ones(num_samples, 1)*mix.centres(i,:) + coeffs*evec';
label(total_samples+1:total_samples+num_samples) = i;
cum_prior = cum_prior + mix.priors(i);
total_samples = total_samples + num_samples;
end
x0 = min(data(:,1));
x1 = max(data(:,1));
y0 = min(data(:,2));
y1 = max(data(:,2));
dx = x1-x0;
dy = y1-y0;
expand = 5/100; % Add on 5 percent each way
x0 = x0 - dx*expand;
x1 = x1 + dx*expand;
y0 = y0 - dy*expand;
y1 = y1 + dy*expand;
resolution = 30;
step = dx/resolution;
xrange = [x0:step:x1];
yrange = [y0:step:y1];
% Generate the grid
[X Y]=meshgrid([x0:step:x1],[y0:step:y1]);
figure(1);
x=[X(:) Y(:)];
ndata = size(x, 1);
a = zeros(ndata, mix.ncentres); % Preallocate matrix
normal = (2*pi)^(mix.nin/2);
for i = 1:mix.ncentres
diffs = x - (ones(ndata, 1) * mix.centres(i,:));
% Use Cholesky decomposition of covariance matrix to speed computation
c = chol(mix.covars(:,:,i));
temp = diffs/c;
a(:,i) = exp(-0.5*sum(temp.*temp, 2))./(normal*prod(diag(c)));
end
px_j=a;
px = reshape(px_j*(mix.priors)',size(X));
post = (ones(ndata, 1)*mix.priors).*a;
s = sum(post, 2);
% Set any zeros to one before dividing
s = s + (s==0);
post = post./(s*ones(1, mix.ncentres));
p1_x = reshape(post(:, 1), size(X));
p2_x = reshape(post(:, 2), size(X));
colormap(hot);
hold on
plot(data((label==1),1),data(label==1,2),ClassSymbol1)
plot(data((label==2),1),data(label==2,2),ClassSymbol2)
contour(xrange,yrange, p1_x,[0.5 0.5],'k');
axis([x0 x1 y0 y1])
set(gca,'Box','On')
title('The Sampled Data');
data = [ones(n,1) data];
label = label -1; %Bring it to 0,1
figure(2);
eta = .01;
alpha = 0;
maxepoch = 10000;
w1(1,:)=[.1 .1 .1];
for i=1:maxepoch
y = data * w1(i,:)';
s = 1./(1 + exp( -y));
e = label - s;
g = -data' * e;
w1(i+1,:) = w1(i,:) - eta * (g' + alpha * w1(i,:));
end
biasin = ones(size(X));
activation = biasin.*w1(maxepoch,1) + X.*w1(maxepoch,2) + Y.*w1(maxepoch,3);
signal = 1./(1 + exp(-activation));
subplot(4,3,1);
mesh(X,Y,signal);
view(-22,44);
subplot(4,3,2);
contour(X,Y, signal);
subplot(4,3,3);
plot(w1(:,2),w1(:,3));
alpha = 0.01;
maxepoch = 10000;
w2(1,:)=[.1 .1 .1];
for i=1:maxepoch
y = data * w2(i,:)';
s = 1./(1 + exp( -y));
e = label - s;
g = -data' * e;
w2(i+1,:) = w2(i,:) - eta * (g' + alpha * w2(i,:));
end
biasin = ones(size(X));
activation = biasin.*w2(maxepoch,1) + X.*w2(maxepoch,2) + Y.*w2(maxepoch,3);
signal = 1./(1 + exp(-activation));
subplot(4,3,4);
mesh(X,Y,signal);
view(-22,44);
subplot(4,3,5);
contour(X,Y, signal);
subplot(4,3,6);
plot(w2(:,2),w2(:,3));
alpha = 0.1;
maxepoch = 10000;
w3(1,:)=[.1 .1 .1];
for i=1:maxepoch
y = data * w3(i,:)';
s = 1./(1 + exp( -y));
e = label - s;
g = -data' * e;
w3(i+1,:) = w3(i,:) - eta * (g' + alpha * w3(i,:));
end
biasin = ones(size(X));
activation = biasin.*w3(maxepoch,1) + X.*w3(maxepoch,2) + Y.*w3(maxepoch,3);
signal = 1./(1 + exp(-activation));
subplot(4,3,7);
mesh(X,Y,signal);
view(-22,44);
subplot(4,3,8);
contour(X,Y, signal);
subplot(4,3,9);
plot(w3(:,2),w3(:,3));
alpha = 1;
maxepoch = 10000;
w4(1,:)=[.1 .1 .1];
for i=1:maxepoch
y = data * w4(i,:)';
s = 1./(1 + exp( -y));
e = label - s;
g = -data' * e;
w4(i+1,:) = w4(i,:) - eta * (g' + alpha * w4(i,:));
end
biasin = ones(size(X));
activation = biasin.*w4(maxepoch,1) + X.*w4(maxepoch,2) + Y.*w4(maxepoch,3);
signal = 1./(1 + exp(-activation));
subplot(4,3,10);
mesh(X,Y,signal);
view(-22,44);
subplot(4,3,11);
contour(X,Y, signal);
subplot(4,3,12);
plot(w4(:,2),w4(:,3));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -