📄 nnarmax2.m
字号:
function [W1,W2,PI_vector,iteration,lambda]=nnarmax2(NetDef,NN,W1,W2,trparms,skip,Y,U)
% NNARMAX2
% --------
% [W1,W2,critvec,iteration,lambda]=...
% nnarmax2(NetDef,NN,W1,W2,trparms,skip,Y,U)
% Determines a nonlinear ARMAX model
% y(t)=f(y(t-1),...,u(t-k),...,e(t-1),...)
% of a dynamic system by training a two layer neural network with
% the Marquardt method. The function can handle multi-input
% systems (MISO).
%
% INPUTS:
% U : Input signal (= control signal) (left out in the nnarma case)
% dim(U) = [(inputs) * (# of data)]
% Y : Output signal. dim(Y) = [1 * # of data]
% NN : NN=[na nb nc nk].
% na = # of past outputs used for determining the prediction
% nb = # of past inputs
% nc = # of past residuals (= order of C)
% nk = time delay (usually 1)
% For multi-input systems nb and nk contain as many columns as
% there are inputs.
% W1,W2 : Input-to-hidden layer and hidden-to-output layer weights.
% If they are passed as [] they are initialized automatically
% trparms : Contains parameters associated with the training (see MARQ)
% if trparms=[] it is reset to trparms = [500 0 1 0]
% skip : Don't use the first 'skip' samples for training in order to
% reduce the influence from the transient occuring because of the
% unknown initial prediction error and gradient. If skip=[]
% it is reset to skip=0.
%
% For time series (NNARMA models) NN=[na nc] only.
%
%
% NB NB NB!
% ---------
% See the function MARQ for an explanation of the remaining input arguments
% as well as of the returned variables.
%
%
% Programmed by : Magnus Norgaard, IAU/IMM
% LastEditDate : July 16, 1996
%----------------------------------------------------------------------------------
%-------------- NETWORK INITIALIZATIONS -------------
%----------------------------------------------------------------------------------
if isempty(skip),
skip=0;
end
skip=skip+1;
Ndat = length(Y); % # of data
na = NN(1);
if length(NN)==2 % nnarma model
nc = NN(2);
nb = 0;
nk = 0;
nu = 0;
else % nnarmax model
[nu,Ndat]= size(U);
nb = NN(2:1+nu);
nc = NN(2+nu);
nk = NN(2+nu+1:2+2*nu);
end
nmax = max([na,nb+nk-1,nc]); % 'Oldest' signal used as input to the model
N = Ndat - nmax; % Size of training set
N2 = N-skip+1;
nab = na+sum(nb); % na+nb
nabc = nab+nc; % na+nb+nc
hidden = length(NetDef(1,:)); % Number of hidden neurons
inputs = nabc; % Number of inputs to the network
outputs = 1; % Only one output
L_hidden = find(NetDef(1,:)=='L')'; % Location of linear hidden neurons
H_hidden = find(NetDef(1,:)=='H')'; % Location of tanh hidden neurons
L_output = find(NetDef(2,:)=='L')'; % Location of linear output neurons
H_output = find(NetDef(2,:)=='H')'; % Location of tanh output neurons
y1 = zeros(hidden,N); % Hidden layer outputs
y1 = [y1;ones(1,N)];
y2 = zeros(outputs,N); % Network output
E = zeros(outputs,N); % Initialize prediction error vector
E_new = zeros(outputs,N); % Initialize prediction error vector
index = outputs*(hidden+1) + 1 + [0:hidden-1]*(inputs+1); % A useful vector!
index2 = (0:N-1)*outputs; % Yet another useful vector
iteration= 1; % Counter variable
dw = 1; % Flag telling that the weights are new
parameters1= hidden*(inputs+1); % # of input-to-hidden weights
parameters2= outputs*(hidden+1); % # of hidden-to-output weights
parameters=parameters1 + parameters2; % Total # of weights
ones_h = ones(hidden+1,1); % A vector of ones
ones_i = ones(inputs+1,1); % Another vector of ones
if isempty(W1) | isempty(W2), % Initialize weights if nescessary
if nb==0,
[W1,W2]=nnarx(NetDef,na,[],[],[100 trparms(2:length(trparms))],Y);
else
[W1,W2]=nnarx(NetDef,[na nb nk],[],[],[100 trparms(2:length(trparms))],Y,U);
end
W1=[W1(:,1:nab),0.05*rand(hidden,nc)-0.025, W1(:,nab+1)];
end
% Parameter vector containing all weights
theta = [reshape(W2',parameters2,1) ; reshape(W1',parameters1,1)];
theta_index = find(theta); % Index to weights<>0
theta_red = theta(theta_index); % Reduced parameter vector
reduced = length(theta_index); % The # of parameters in theta_red
index3 = 1:(reduced+1):(reduced^2); % A third useful vector
dy2de = zeros(nc,N); % Der. of outputs wrt. the past residuals
index4 = nab+1:nabc; % And a fourth
PSI_red = zeros(reduced,N); % Deriv. of output w.r.t. each weight
RHO = zeros(parameters,N); % Partial -"- -"-
if isempty(trparms), % Default training parameters
max_iter = 500;
stop_crit = 0;
lambda = 1;
D = 0;
else % User specified values
max_iter = trparms(1);
stop_crit = trparms(2);
lambda = trparms(3);
if length(trparms)==4, % Scalar weight decay parameter
D = trparms(4*ones(1,reduced))';
elseif length(trparms)==5, % Two weight decay parameters
D = trparms([4*ones(1,parameters2+nc) 5*ones(1,parameters1)])';
D = D(theta_index);
elseif length(trparms)>5, % Individual weight decay
D = trparms(4:length(trparms))';
end
end
PI_vector = zeros(max_iter,1); % A vector containing the accumulated SSE
% >>>>>>>>>>>>>>>>>>>> CONSTRUCT THE REGRESSION MATRIX PHI <<<<<<<<<<<<<<<<<<<<<
PHI = zeros(nabc,N);
jj = nmax+1:Ndat;
for k = 1:na, PHI(k,:) = Y(jj-k); end
index5 = na;
for kk = 1:nu,
for k = 1:nb(kk), PHI(k+index5,:) = U(kk,jj-k-nk(kk)+1); end
index5 = index5 + nb(kk);
end
PHI_aug = [PHI;ones(1,N)]; % Augment PHI with a row containg ones
Y = Y(nmax+1:Ndat); % Extract the 'target' part of Y
%----------------------------------------------------------------------------------
%-------------- TRAIN NETWORK -------------
%----------------------------------------------------------------------------------
clc;
c=fix(clock);
fprintf('Network training started at %2i.%2i.%2i\n\n',c(4),c(5),c(6));
% >>>>>>>>>>>>>>>>>>>>> COMPUTE NETWORK OUTPUT y2(theta) <<<<<<<<<<<<<<<<<<<<<<
for t=1:N,
h1 = W1*PHI_aug(:,t);
y1(H_hidden,t) = pmntanh(h1(H_hidden));
y1(L_hidden,t) = h1(L_hidden);
h2 = W2*y1(:,t);
y2(H_output,t) = pmntanh(h2(H_output,:));
y2(L_output,t) = h2(L_output,:);
E(:,t) = Y(:,t) - y2(:,t); % Prediction error
for d=1:min(nc,N-t),
PHI_aug(nab+d,t+d) = E(:,t);
end
end
SSE = E(skip:N)*E(skip:N)'; % Sum of squared errors (SSE)
PI = (SSE+theta_red'*(D.*theta_red))/(2*N2); % Performance index
while iteration<=max_iter
if dw==1,
% >>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE THE RHO MATRIX <<<<<<<<<<<<<<<<<<<<<<<<<<
% Partial derivative of output (y2) with respect to each weight and neglecting
% that the model inputs (the residuals) depends on the weights
% ========== Elements corresponding to the linear output units ============
for i = L_output'
index1 = (i-1) * (hidden + 1) + 1;
% -- The part of RHO corresponding to hidden-to-output layer weights --
RHO(index1:index1+hidden,index2+i) = y1;
% ---------------------------------------------------------------------
% -- The part of RHO corresponding to input-to-hidden layer weights ---
for j = L_hidden',
RHO(index(j):index(j)+inputs,index2+i) = W2(i,j)*PHI_aug;
end
for j = H_hidden',
tmp = W2(i,j)*(1-y1(j,:).*y1(j,:));
RHO(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).*PHI_aug;
end
% ---------------------------------------------------------------------
end
% ============ Elements corresponding to the tanh output units =============
for i = H_output',
index1 = (i-1) * (hidden + 1) + 1;
% -- The part of RHO corresponding to hidden-to-output layer weights --
tmp = 1 - y2(i,:).*y2(i,:);
RHO(index1:index1+hidden,index2+i) = y1.*tmp(ones_h,:);
% ---------------------------------------------------------------------
% -- The part of RHO corresponding to input-to-hidden layer weights ---
for j = L_hidden',
tmp = W2(i,j)*(1-y2(i,:).*y2(i,:));
RHO(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).* PHI_aug;
end
for j = H_hidden',
tmp = W2(i,j)*(1-y1(j,:).*y1(j,:));
tmp2 = (1-y2(i,:).*y2(i,:));
RHO(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:)...
.*tmp2(ones_i,:).* PHI_aug;
end
% ---------------------------------------------------------------------
end
RHO_red = RHO(theta_index(1:reduced),:);
% >>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE THE PSI MATRIX <<<<<<<<<<<<<<<<<<<<<<<<<<
% ---------- Find derivative of output wrt. the past residuals ----------
for t=1:N,
dy2dy1 = W2(:,1:hidden);
for j = H_output',
dy2dy1(j,:) = W2(j,1:hidden)*(1-y2(j,t).*y2(j,t));
end
% Matrix with partial derivatives of the output from each hidden neurons with
% respect to each input:
dy1de = W1(:,index4);
for j = H_hidden',
dy1de(j,:) = W1(j,index4)*(1-y1(j,t).*y1(j,t));
end
% Matrix with partial derivative of each output with respect to each input
dy2de(:,t)= (dy2dy1 * dy1de)';
end
% ---------- Determine PSI by "filtering" ----------
for t=1:N,
PSI_red(:,t)=RHO_red(:,t);
for t1=1:min(nc,t-1),
PSI_red(:,t) = PSI_red(:,t)-dy2de(t1,t)*PSI_red(:,t-t1);
end
end
% >>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE h_k <<<<<<<<<<<<<<<<<<<<<<<<<<<
% -- Gradient --
G = PSI_red(:,skip:N)*E(skip:N)'-D.*theta_red;
% -- Mean square error part of Hessian --
R = PSI_red(:,skip:N)*PSI_red(:,skip:N)';
dw = 0;
end
% -- Hessian --
H = R;
H(index3) = H(index3)'+lambda+D; % Add diagonal matrix
% -- Search direction --
h = H\G; % Solve for search direction
% -- Compute 'apriori' iterate --
theta_red_new = theta_red + h; % Update parameter vector
theta(theta_index) = theta_red_new;
% -- Put the parameters back into the weight matrices --
W1_new = reshape(theta(parameters2+1:parameters),inputs+1,hidden)';
W2_new = reshape(theta(1:parameters2),hidden+1,outputs)';
% >>>>>>>>>>>>>>>>>>>> COMPUTE NETWORK OUTPUT y2(theta+h) <<<<<<<<<<<<<<<<<<<<
for t=1:N,
h1 = W1_new*PHI_aug(:,t);
y1(H_hidden,t) = pmntanh(h1(H_hidden));
y1(L_hidden,t) = h1(L_hidden);
h2 = W2_new*y1(:,t);
y2(H_output,t) = pmntanh(h2(H_output,:));
y2(L_output,t) = h2(L_output,:);
E_new(:,t) = Y(:,t) - y2(:,t); % Prediction error
for d=1:min(nc,N-t),
PHI_aug(nab+d,t+d) = E_new(:,t);
end
end
SSE_new = E_new(skip:N)*E_new(skip:N)';% Sum of squared errors (SSE)
PI_new = (SSE_new + theta_red_new'*(D.*theta_red_new))/(2*N2); % PI
% >>>>>>>>>>>>>>>>>>>>>>>>>>> UPDATE lambda <<<<<<<<<<<<<<<<<<<<<<<<<<<<
L = h'*G + h'*(h.*(D+lambda));
% Decrease lambda if SSE has fallen 'sufficiently'
if 2*N2*(PI - PI_new) > (0.75*L),
lambda = lambda/2;
% Increase lambda if SSE has grown 'sufficiently'
elseif 2*N2*(PI-PI_new) <= (0.25*L),
lambda = 2*lambda;
end
% >>>>>>>>>>>>>>>>>>>> UPDATES FOR NEXT ITERATION <<<<<<<<<<<<<<<<<<<<
% Update only if criterion has decreased
if PI_new < PI,
W1 = W1_new;
W2 = W2_new;
theta_red = theta_red_new;
E = E_new;
PI = PI_new;
dw = 1;
iteration = iteration + 1;
PI_vector(iteration-1) = PI; % Collect PI in vector
fprintf('iteration # %i PI = %4.3e\r',iteration-1,PI); % Print on-line inform
end
% Check if stop condition is fulfilled
if (PI < stop_crit) | (lambda>1e7), break, end
end
%----------------------------------------------------------------------------------
%-------------- END OF NETWORK TRAINING -------------
%----------------------------------------------------------------------------------
PI_vector = PI_vector(1:iteration-1);
c=fix(clock);
fprintf('\n\nNetwork training ended at %2i.%2i.%2i\n',c(4),c(5),c(6));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -