⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nnarmax1.m

📁 类神经网路─MATLAB的应用(范例程式)
💻 M
字号:
function [W1,W2,Chat,PI_vector,iteration,lambda]=nnarmax1(NetDef,NN,W1,W2,Chat,...
                                                                   trparms,skip,Y,U)
%  NNARMAX1
%  --------
%          [W1,W2,Chat,critvec,iteration,lambda]=...
%                              nnarmax1(NetDef,NN,W1,W2,Chat,trparms,skip,Y,U)
%          Determines a nonlinear ARMAX model of a dynamic system by training
%          a two layer neural network with the Marquardt method. The function
%          can handle multi-input systems (MISO). It is assumed that the noise 
%          is filtered by a linear MA-filter. y(t)=f(Y(t-1),U(t-1)) + Ce(t)
%
%  INPUT:
%  U       : Input signal (= control signal) (left out in the nnarma case)
%            dim(U) = [(inputs) * (# of data)]
%  Y       : Output signal. dim(Y) = [1 * # of data]
%  NN      : NN=[na nb nc nk].
%            na = # of past outputs used for determining the prediction
%            nb = # of past inputs
%            nc = # of past residuals (= order of C)
%            nk = time delay (usually 1)
%            For multi-input systems, nb and nk contain as many columns as
%            there are inputs.
%  W1,W2   : Input-to-hidden layer and hidden-to-output layer weights.
%            If they are passed as [], they are initialized automatically
%  Chat    : Initial MA-filter estimate (initialized automatically if Chat=[])
%  trparms : Contains parameters associated with the training (see MARQ)
%            if trparms=[], it is set to trparms = [500 0 1 0]
%  skip    : Don not use the first 'skip' samples for training to
%            reduce the influence from the transient occuring because of
%            unknown initial prediction errors and gradients. If skip=[]
%            it is reset to skip=0.
%
%           For time series (NNARMA models) NN=[na nc] only.
% 
%
%  NB NB NB! 
%  --------- 
%  See the function MARQ for an explanation of the remaining input arguments
%  as well as of the returned variables.
                                                                                 
%  Programmed by : Magnus Norgaard, IAU/IMM
%  LastEditDate  : July 16, 1996

%----------------------------------------------------------------------------------
%--------------             NETWORK INITIALIZATIONS                   -------------
%----------------------------------------------------------------------------------
if isempty(skip),
  skip=0;
end
skip=skip+1;
Ndat     = length(Y);                   % # of data
na = NN(1);
if length(NN)==2                        % nnarma model
  nc     = NN(2);
  nb     = 0;
  nk     = 0;
  nu     = 0;
else                                    % nnarmax model
  [nu,Ndat]= size(U); 
  nb     = NN(2:1+nu);
  nc     = NN(2+nu);
  nk     = NN(2+nu+1:2+2*nu);
end
nmax     = max([na,nb+nk-1,nc]);        % 'Oldest' signal used as input to the model
N        = Ndat - nmax;                 % Size of training set
N2 = N-skip+1;
nab      = na+sum(nb);                  % na+nb
nabc     = nab+nc;                      % na+nb+nc
hidden   = length(NetDef(1,:));         % Number of hidden neurons
inputs   = nab;                         % Number of inputs to the network
outputs  = 1;                           % Only one output 
L_hidden = find(NetDef(1,:)=='L')';     % Location of linear hidden neurons
H_hidden = find(NetDef(1,:)=='H')';     % Location of tanh hidden neurons
L_output = find(NetDef(2,:)=='L')';     % Location of linear output neurons
H_output = find(NetDef(2,:)=='H')';     % Location of tanh output neurons
y1       = [zeros(hidden,N);ones(1,N)]; % Hidden layer outputs
y2       = zeros(outputs,N);            % Network output
index = outputs*(hidden+1) + 1 + [0:hidden-1]*(inputs+1); % A useful vector!
index2 = (0:N-1)*outputs;               % Yet another useful vector
iteration= 1;                           % Counter variable
dw       = 1;                           % Flag telling that the weights are new
parameters1= hidden*(inputs+1);         % # of input-to-hidden weights
parameters2= outputs*(hidden+1);        % # of hidden-to-output weights
parameters12=parameters1 + parameters2; % Total # of weights
parameters = parameters12+nc;           % Total # of parameters
ones_h   = ones(hidden+1,1);            % A vector of ones
ones_i   = ones(inputs+1,1);            % Another vector of ones
if isempty(W1) | isempty(W2),           % Initialize weights if nescessary
  if nb==0,
    [W1,W2]=nnarx(NetDef,na,[],[],[200 trparms(2:length(trparms))],Y);
  else
    [W1,W2]=nnarx(NetDef,[na nb nk],[],[],[200 trparms(2:length(trparms))],Y,U);
  end                            
end
if isempty(Chat),                       % Initialize a stable Chat if nescessary
  while sum(abs(roots(Chat))<1)<nc
    Chat = [1 rand(1,nc)-0.5];
  end
end
                                        % Parameter vector containing all weights
theta = [reshape(W2',parameters2,1) ; reshape(W1',parameters1,1) ; Chat(2:nc+1)'];
theta_index = find(theta);              % Index to weights<>0
theta_red = theta(theta_index);         % Reduced parameter vector
reduced  = length(theta_index);         % The # of parameters in theta_red
index3   = 1:(reduced+1):(reduced^2);   % A third useful vector
PSI_red  = zeros(reduced,N);            % Deriv. of output w.r.t. each weight
RHO      = zeros(parameters12,N);       % Partial -"-  -"-
RHO2     = zeros(nc,N);                 % Partial deriv. of output wrt. each C-par.
if isempty(trparms),                    % Default training parameters
  max_iter  = 500;
  stop_crit = 0;
  lambda    = 1;
  D         = 0;
else                                    % User specified values
  max_iter  = trparms(1);
  stop_crit = trparms(2);
  lambda    = trparms(3);
  if length(trparms)==4,                % Scalar weight decay parameter
    D = trparms(4*ones(1,reduced))';      
  elseif length(trparms)==5,            % Two weight decay parameters
    D = trparms([4*ones(1,parameters2+nc) 5*ones(1,parameters1)])';
    D = D(theta_index);
  elseif length(trparms)>5,             % Individual weight decay
    D = trparms(4:length(trparms))';
  end
end
PI_vector = zeros(max_iter,1);          % A vector containing the accumulated SSE


% >>>>>>>>>>>>>>>>>>>>  CONSTRUCT THE REGRESSION MATRIX PHI   <<<<<<<<<<<<<<<<<<<<<
PHI = zeros(nab,N);
jj  = nmax+1:Ndat;
for k = 1:na, PHI(k,:)    = Y(jj-k); end
index4 = na;
for kk = 1:nu,
  for k = 1:nb(kk), PHI(k+index4,:) = U(kk,jj-k-nk(kk)+1); end
  index4 = index4 + nb(kk);
end
PHI_aug = [PHI;ones(1,N)];              % Augment PHI with a row containg ones
Y       = Y(nmax+1:Ndat);               % Extract the 'target' part of Y


%----------------------------------------------------------------------------------
%--------------                   TRAIN NETWORK                       -------------
%----------------------------------------------------------------------------------
clc;
c=fix(clock);
fprintf('Network training started at %2i.%2i.%2i\n\n',c(4),c(5),c(6));


% >>>>>>>>>>>>>>>>>>>>>  COMPUTE NETWORK OUTPUT  y2(theta)   <<<<<<<<<<<<<<<<<<<<<<
h1 = W1*PHI_aug;  
y1(H_hidden,:) = pmntanh(h1(H_hidden,:));
y1(L_hidden,:) = h1(L_hidden,:);

h2 = W2*y1;
y2(H_output,:) = pmntanh(h2(H_output,:));
y2(L_output,:) = h2(L_output,:);

Ebar     = Y - y2;                      % Error between Y and deterministic part
E        = filter(1,Chat,Ebar);         % Prediction error
SSE      = E(skip:N)*E(skip:N)';        % Sum of squared errors (SSE)
PI       = (SSE+theta_red'*(D.*theta_red))/(2*N2); % Performance index


while iteration<=max_iter    
if dw==1,
% >>>>>>>>>>>>>>>>>>>>>>>>>>>   COMPUTE THE RHO MATRIX   <<<<<<<<<<<<<<<<<<<<<<<<<<
% Partial derivative of output (y2) with respect to each weight and neglecting
% that the model inputs (the residuals) depends on the weights

    % ==========   Elements corresponding to the linear output units   ============
    for i = L_output'
      index1 = (i-1) * (hidden + 1) + 1;

      % -- The part of RHO corresponding to hidden-to-output layer weights --
      RHO(index1:index1+hidden,index2+i) = y1;
      % ---------------------------------------------------------------------
 
      % -- The part of RHO corresponding to input-to-hidden layer weights ---
      for j = L_hidden',
        RHO(index(j):index(j)+inputs,index2+i) = W2(i,j)*PHI_aug;
      end
     
      for j = H_hidden',
        tmp = W2(i,j)*(1-y1(j,:).*y1(j,:)); 
        RHO(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).*PHI_aug;
      end 
      % ---------------------------------------------------------------------    
    end

    % ============  Elements corresponding to the tanh output units   =============
    for i = H_output',
      index1 = (i-1) * (hidden + 1) + 1;

      % -- The part of RHO corresponding to hidden-to-output layer weights --
      tmp = 1 - y2(i,:).*y2(i,:);
      RHO(index1:index1+hidden,index2+i) = y1.*tmp(ones_h,:);
      % ---------------------------------------------------------------------
         
      % -- The part of RHO corresponding to input-to-hidden layer weights ---
      for j = L_hidden',
        tmp = W2(i,j)*(1-y2(i,:).*y2(i,:));
        RHO(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).* PHI_aug;
      end
      
      for j = H_hidden',
        tmp  = W2(i,j)*(1-y1(j,:).*y1(j,:));
        tmp2 = (1-y2(i,:).*y2(i,:));
        RHO(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:)...
                                                  .*tmp2(ones_i,:).* PHI_aug;
      end
      % ---------------------------------------------------------------------
    end
    RHO_red = RHO(theta_index(1:reduced-nc),:);


    % Partial deriv. of output wrt. each C-par.  
    for i=1:nc,
      RHO2(i,nmax+i-1:N) = E(1:N-i-nmax+2);
    end
  
  

% >>>>>>>>>>>>>>>>>>>>>>>>>>>   COMPUTE THE PSI MATRIX   <<<<<<<<<<<<<<<<<<<<<<<<<<
    for i=1:reduced-nc,
      PSI_red(i,:) = filter(1,Chat,RHO_red(i,:));
    end
    for i=1:nc,
      PSI_red(reduced-nc+i,:) = filter(1,Chat,RHO2(i,:));
    end
  
   
% >>>>>>>>>>>>>>>>>>>>>>>>>>>        COMPUTE h_k        <<<<<<<<<<<<<<<<<<<<<<<<<<<
    % -- Gradient --
    G = PSI_red(:,skip:N)*E(skip:N)'-D.*theta_red;
    
    % -- Hessian  --
    R = PSI_red(:,skip:N)*PSI_red(:,skip:N)';
    dw = 0;
  end
  
  % -- Hessian  --
  H = R;
  H(index3) = H(index3)'+lambda+D;                  % Add diagonal matrix

  % -- Search direction --
  h = H\G;                                          % Solve for search direction

  % -- Compute 'apriori' iterate --
  theta_red_new = theta_red + h;                    % Update parameter vector
  theta(theta_index) = theta_red_new;

  % -- Put the parameters back into the weight matrices and Chat --
  W1_new = reshape(theta(parameters2+1:parameters12),inputs+1,hidden)';
  W2_new = reshape(theta(1:parameters2),hidden+1,outputs)';
  Chat   = [1 theta(parameters12+1:parameters)'];
  croots = roots(Chat);
  for i=1:length(croots),
    if abs(croots(i))>1, croots(i)=1/croots(i); end
  end
  Chat=real(poly(croots));
  theta(parameters-nc+1:parameters)=Chat(2:nc+1)';
  theta_red_new = theta(theta_index);
  
    
% >>>>>>>>>>>>>>>>>>>>   COMPUTE NETWORK OUTPUT  y2(theta+h)   <<<<<<<<<<<<<<<<<<<<
  h1 = W1_new*PHI_aug;  
  y1(H_hidden,:) = pmntanh(h1(H_hidden,:));
  y1(L_hidden,:) = h1(L_hidden,:);
    
  h2 = W2_new*y1;
  y2(H_output,:) = pmntanh(h2(H_output,:));
  y2(L_output,:) = h2(L_output,:);
  Ebar           = Y - y2;                % Error between Y and deterministic part
  E_new    = filter(1,Chat,Ebar);         % Prediction error
  SSE_new  = E_new(skip:N)*E_new(skip:N)';% Sum of squared errors (SSE)
  PI_new   = (SSE_new + theta_red_new'*(D.*theta_red_new))/(2*N2); % PI
    

% >>>>>>>>>>>>>>>>>>>>>>>>>>>       UPDATE  lambda     <<<<<<<<<<<<<<<<<<<<<<<<<<<<
  L = h'*G + h'*(h.*(D+lambda));

  % Decrease lambda if SSE has fallen 'sufficiently'
  if 2*N2*(PI - PI_new) > (0.75*L),
    lambda = lambda/2;
  
  % Increase lambda if SSE has grown 'sufficiently'
  elseif 2*N2*(PI-PI_new) <= (0.25*L),
    lambda = 2*lambda;
  end


% >>>>>>>>>>>>>>>>>>>>       UPDATES FOR NEXT ITERATION        <<<<<<<<<<<<<<<<<<<<
  % Update only if criterion has decreased
  if PI_new < PI,                      
    W1 = W1_new;
    W2 = W2_new;
    theta_red = theta_red_new;
    E = E_new;
    PI = PI_new;
    dw = 1;
    iteration = iteration + 1;
    PI_vector(iteration-1) = PI;                             % Collect PI in vector
    fprintf('iteration # %i   PI = %4.3e\r',iteration-1,PI); % Print on-line inform
  end

  % Check if stop condition is fulfilled
  if (PI < stop_crit) | (lambda>1e7), break, end             
end
%----------------------------------------------------------------------------------
%--------------              END OF NETWORK TRAINING                  -------------
%----------------------------------------------------------------------------------
PI_vector = PI_vector(1:iteration-1);
c=fix(clock);
fprintf('\n\nNetwork training ended at %2i.%2i.%2i\n',c(4),c(5),c(6));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -