📄 rpe.m

📁 类神经网路─MATLAB的应用(范例程式)
💻 M
字号:
function [W1,W2,PI_vector,iter]=rpe(NetDef,W1,W2,PHI,Y,trparms,method)
%  RPE
%  --- 
%          Train a two layer neural network with a recursive prediction error
%          algorithm ("recursive Gauss-Newton"). Also pruned (i.e., not fully
%          connected) networks can be trained.
%
%          The activation functions can either be linear or tanh. The network
%          architecture is defined by the matrix 'NetDef', which has of two
%          rows. The first row specifies the hidden layer while the second
%          specifies the output layer.
%
%          E.g.:    NetDef = ['LHHHH'
%                             'LL---']
% 
%          (L = linear, H = tanh)
%
%          A weight is pruned by setting it to zero.
%
%          The algorithm is described in:
%          L. Ljung: "System Identification - Theory for the User"
%          (Prentice-Hall, 1987)
%
%          Notice that the bias is included as the last column
%          in the weight matrices.
%
%  CALL:
%            [W1,W2,critvec,iter]=rpe(NetDef,W1,W2,PHI,Y,trparms,method)
%
%  INPUT:
%  NetDef: Network definition 
%  W1    : Input-to-hidden layer weights. The matrix dimension is
%          dim(W1) = [(# of hidden units) * (inputs + 1)] (the 1 is due to the bias)
%  W2    : hidden-to-output layer weights.
%           dim(W2) = [(outputs)  *  (# of hidden units + 1)]
%  PHI   : Input vector. dim(PHI) = [(inputs)  *  (# of data)]
%  Y     : Output data. dim(Y) = [(outputs)  * (# of data)]
%  trparms : Contains parameters associated with the training
%  method  : Training (=estimation) method (ff, ct, efra)
%     method = 'ff' (forgetting factor)
%                   trparms = [max_iter stop_crit p0 lambda]
%     method = 'ct' (constant trace)
%                   trparms = [max_iter stop_crit alpha_max alpha_min]
%     method = 'efra' (exponential forgetting and resetting algorithm)
%                   trparms = [max_iter stop_crit alpha beta delta lambda]
%       max_iter       : max # of iterations.
%       stop_crit      : Stop training if criterion gets below this value
%       p0             : The covariance matrix is initialized to p0*I 
%       lambda         : Forgetting factor
%       alpha_max      : Max. eigenvalue of P matrix
%       alpha_min      : Min. eigenvalue of P matrix
%       alpha, beta, delta: EFRA parameters
% 
%
%  OUTPUT:
%  W1, W2    : Weight matrices after training
%  critvec   : Vector containing the criterion of fit after each iteration
%  iter      : # of iterations
%  
%  Programmed by : Magnus Norgaard, IAU/IMM, Technical University of Denmark 
%  LastEditDate  : July 17, 1996


%----------------------------------------------------------------------------------
%--------------             NETWORK INITIALIZATIONS                   -------------
%----------------------------------------------------------------------------------
max_iter = trparms(1);
stop_crit= trparms(2);
[outputs,N] = size(Y);                  % # of outputs and # of data
[hidden,inputs] = size(W1);             % # of hidden units 
inputs   =inputs-1;                     % # of inputs
L_hidden = find(NetDef(1,:)=='L')';     % Location of linear hidden neurons
H_hidden = find(NetDef(1,:)=='H')';     % Location of tanh hidden neurons
L_output = find(NetDef(2,:)=='L')';     % Location of linear output neurons
H_output = find(NetDef(2,:)=='H')';     % Location of tanh output neurons
PI_vector= [];                          % Vector containing the accumulated SSE
y1       = zeros(hidden,1);             % Hidden layer outputs
y2       = zeros(outputs,1);            % Network output
index = outputs*(hidden+1) + 1 + [0:hidden-1]*(inputs+1); % A useful vector!
PHI_aug  = [PHI;ones(1,N)];             % Augment PHI with a row containg ones
parameters1= hidden*(inputs+1);         % # of input-to-hidden weights
parameters2= outputs*(hidden+1);        % # of hidden-to-output weights
parameters = parameters1 + parameters2; % Total # of weights
PSI        = zeros(parameters,outputs); % Deriv. of each output w.r.t. each weight
                                        % Parametervector containing all weights
theta = [reshape(W2',parameters2,1) ; reshape(W1',parameters1,1)];
theta_index = find(theta);              % Index to weights<>0
theta_red = theta(theta_index);         % Reduced parameter vector
reduced  = length(theta_index);         % The # of parameters in theta_red
index3= 1:(reduced+1):(reduced^2);      % Yet another useful vector
if strcmp(method,'ff'),                 % Forgetting factor method
  mflag     = 1;                        % Method flag
  lambda    = trparms(4);               % Forgetting factor
  p0        = trparms(3);
  P         = p0 * eye(reduced);        % Initialize covariance matrix
elseif strcmp(method,'ct'),             % Constant trace method
  mflag     = 2;                        % Method flag
  alpha_max = trparms(3);               % Max. eigenvalue
  alpha_min = trparms(4);               % Min. eigenvalue
  P      = alpha_max * eye(reduced);    % Initialize covariance matrix
elseif strcmp(method,'efra'),           % EFRA method
  mflag     = 3;                        % Method flag
  alpha     = trparms(3);               % EFRA parameters
  beta      = trparms(4);
  delta     = trparms(5);
  lambda    = trparms(6);
  gamma     = (1-lambda)/lambda;
                                        % Max. eigenvalue
  maxeig = gamma/(2*delta)*(1+sqrt(1+4*beta*delta/(gamma*gamma)));
  P      = maxeig * eye(reduced);       % Initialize covariance matrix
  betaI     = beta*eye(reduced);        % Useful diagonal matrix
end
I        = eye(outputs);                % (outputs|outputs) unity matrix
lambdaI  = lambda*I;                    % Diagonal matrix


%----------------------------------------------------------------------------------
%-------------                    TRAIN NETWORK                       -------------
%----------------------------------------------------------------------------------
clc;
c=fix(clock);
fprintf('Network training started at %2i.%2i.%2i\n\n',c(4),c(5),c(6));

for iteration=1:max_iter,
  SSE=0;
  for t=1:N,

% >>>>>>>>>>>>>>>>>>>>>>>>  COMPUTE NETWORK OUTPUT y2(theta) <<<<<<<<<<<<<<<<<<<<<<
    h1 = W1(:,1:inputs)*PHI(:,t) + W1(:,inputs+1);  
    y1(H_hidden) = pmntanh(h1(H_hidden)); 
    y1(L_hidden) = h1(L_hidden);
    
    h2 = W2(:,1:hidden)*y1 + W2(:,hidden+1);
    y2(H_output) = pmntanh(h2(H_output));
    y2(L_output) = h2(L_output);

    y1_aug=[y1;1];
    E = Y(:,t) - y2;                      % Training error


%>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  COMPUTE THE PSI MATRIX  <<<<<<<<<<<<<<<<<<<<<<<<<<<
% (The derivative of each y2(t) with respect to each weight)

    % ==========   Elements corresponding to the linear output units   ============
    for i = L_output'

      % -- The part of PSI corresponding to hidden-to-output layer weights --
      index1 = (i-1) * (hidden + 1) + 1;
      PSI(index1:index1+hidden,i) = y1_aug;
      % ---------------------------------------------------------------------
 
      % -- The part of PSI corresponding to input-to-hidden layer weights ---
      for j = L_hidden',
        PSI(index(j):index(j)+inputs,i) = W2(i,j)*PHI_aug(:,t);
      end
      
      for j = H_hidden',
        PSI(index(j):index(j)+inputs,i) = W2(i,j)*(1-y1(j)*y1(j))*PHI_aug(:,t);
      end 
      % ---------------------------------------------------------------------    
    end

    % ============  Elements corresponding to the tanh output units   =============
    for i = H_output',
      % -- The part of PSI corresponding to hidden-to-output layer weights --
      index1 = (i-1) * (hidden + 1) + 1;
      PSI(index1:index1+hidden,i) = y1_aug * (1 - y2(i)*y2(i));
      % ---------------------------------------------------------------------
       
      % -- The part of PSI corresponding to input-to-hidden layer weights ---
      for j = L_hidden',
        PSI(index(j):index(j)+inputs,i) = W2(i,j)*(1-y2(i)*y2(i))...
                                                              * PHI_aug(:,t);
      end
      
      for j = H_hidden',
        PSI(index(j):index(j)+inputs,i) = W2(i,j)*(1-y1(j)*y1(j))...
                                             *(1-y2(i)*y2(i)) * PHI_aug(:,t);
      end
      % ---------------------------------------------------------------------
    end



%>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    UPDATE THE WEIGHTS    <<<<<<<<<<<<<<<<<<<<<<<<<<<
    PSI_red = PSI(theta_index);
    
    % ---------- Forgetting factor method ----------
    if mflag == 1,
      % -- Update P matrix --
      P = (P - P*PSI_red*inv(lambdaI + PSI_red'*P*PSI_red)*PSI_red'*P ) / lambda;

      % -- Update Parameters --
      theta_red = theta_red + P*PSI_red*E;
      
    % ----------  Constant trace method   ---------- 
    elseif mflag == 2,
      % -- Measurement update of P matrix --
      P = (P - P*PSI_red * inv(I + PSI_red'*P*PSI_red) * PSI_red'*P );

      % -- Update Parameters --
      theta_red = theta_red + P*PSI_red*E;

      % -- Time update of P matrix --
      P         = ((alpha_max-alpha_min)/trace(P))*P;
      P(index3) = P(index3)+alpha_min;
      
    % ----------       EFRA method        ---------- 
    else 
      % -- Correction factor --
      K = P*PSI_red * (alpha*inv(I + PSI_red'*P*PSI_red));

      % -- Update Parameters --
      theta_red = theta_red + K*E;
      
      % -- Update P --
      P = P/lambda - K*PSI_red'*P + betaI-delta*P*P;
    end
    theta(theta_index) = theta_red;       % Put estimated weights back into theta


    % -- Put the parameters back into the weight matrices --
    W1 = reshape(theta(parameters2+1:parameters),inputs+1,hidden)';
    W2 = reshape(theta(1:parameters2),hidden+1,outputs)';

    % -- Accumulate SSE --
    SSE = SSE + E'*E;
  end
  
  
%>>>>>>>>>>>>>>>>>>>>>>       UPDATES FOR NEXT ITERATION       <<<<<<<<<<<<<<<<<<<<
  PI = SSE/(2*N);
  PI_vector(iteration) = PI;            % Collect PI

  fprintf('iteration # %i   PI = %4.3e\r',iteration,PI); % Print on-line inform.
  if PI < stop_crit, break, end         % Check if stop condition is fulfilled
end


%----------------------------------------------------------------------------------
%-------------              END OF NETWORK TRAINING                  --------------
%----------------------------------------------------------------------------------
c=fix(clock);
fprintf('\n\nNetwork training ended at %2i.%2i.%2i\n',c(4),c(5),c(6));
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -