📄 levmarq.m

📁 人工神经网络：MATLAB源程序用于训练测试
💻 M
字号:
%#										
%# function [w1f,w2f,indisp] = levmarq(topo,w1,w2,xtr,ytr,xm,ym,epochs,periodisp)
%#										
%# AIM:		Levenberg-Marquardt algorithm for training feed-forward backpropagation
%#		neural networks . Also pruned (ie. not fully connected) networks can be
%#		trained. 							
%#										
%# PRINCIPLE:	Second-order optimization method based on an approximation of the 
%#		inverse Hessian matrix of second derivatives of the cost function with
%#		respect to the weights. The function is called by "nnmodel.m".		
%# 										
%# INPUT:	topo (2 x nh) : matrix containing structure of the NN (nh hidden nodes)
%#			      (H: hyperbolic tangent, L: linear, -: no function)
%#		w1 (nh x (p+1)) : matrix of random weights between input-hidden layer
%#		w2 (1 x (nh+1)) : matrix of random weights between hidden-output layer
%#		xtr (p x ntr) : matrix of training inputs (ntr objects, p variables)	
%#		ytr (1 x ntr) : matrix of training responses	
%#		xm (p x nm) : matrix of monitoring inputs		
%#		ym (1 x nm) : vector of monitoring responses		
%#		epochs : maximum number of epochs (iterations) for the training
%#		periodisp : periodicity of display											
%#										
%# OUTPUT:	w1f (nh x (p+1)) : matrix of weights between input and hidden layer
%#		w2f (1 x (nh+1)) : matrix of weights between hidden and output layer
%#		indisp : optimum number of epochs				
%# 										
%# SUBROUTINES: pmntanh.m : fast hyperbolic tangent function			
%#		lmeval.m : estimation of responses with the final neural network model
%#										
%# AUTHOR:	Programmed by : Magnus Norgaard, IAU/EI/IMM (1994)		
%#			        Neural Network-Based System Identification Toolbox
%#				Web site : http://www.iau.dk/Projects/proj/nnsysid.html
%#											
%#		Modified by Frederic Despagne				
%#		Copyright(c) 1997 for ChemoAC				
%#		Dienst FABI, Vrije Universiteit Brussel		
%#		Laarbeeklaan 103, 1090 Jette					
%#									
%# VERSION: 1.1 (28/02/1998)						
%#										
%# TEST:	Krzysztof Szczubialka 					
%#									

function [w1f,w2f,indisp] = levmarq(topo,w1,w2,xtr,ytr,xm,ym,epochs,periodisp)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NETWORK INITIALIZATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

close
displ = 1;				% Index for training curve display. If not wanted, set it to 0.
N = length(ytr);                  	% Number of training output data
Nm = length(ym);			% Number of monitoring data
[hidden,inputs] = size(w1);          	% Number of hidden nodes 
inputs = inputs-1;                     	% Number of input variables
L_hidden = find(topo(1,:)=='L')';     	% Location of linear hidden neurons
H_hidden = find(topo(1,:)=='H')';     	% Location of tanh hidden neurons
L_output = find(topo(2,:)=='L')';     	% Location of linear output neurons
H_output = find(topo(2,:)=='H')';     	% Location of tanh output neurons
y1 = [zeros(hidden,N);ones(1,N)];  	% Hidden layer outputs
y2 = zeros(1,N);            		% Network output
index = 1*(hidden+1)+1+[0:hidden-1]*(inputs+1); % A useful vector
index2 = (0:N-1)*1;               	% Another useful vector
iteration = 1;                          % Counter variable
dw = 1;                           	% Flag telling that the weights are new
xtr = [xtr;ones(1,N)];             	% Adds bias to training input data
parameters1 = hidden*(inputs+1);        % Number of input-to-hidden weights
parameters2 = 1*(hidden+1);        	% Number of hidden-to-output weights
parameters = parameters1 + parameters2; % Total # of weights
PSI = zeros(parameters,1*N); 		% Derivative of each output w.r.t. each weight
ones_h = ones(hidden+1,1);            	% A vector of ones
ones_i = ones(inputs+1,1);            	% Another vector of ones
                                        % Parameter vector containing all weights
theta = [reshape(w2',parameters2,1) ; reshape(w1',parameters1,1)];
theta_index = find(theta);              % Index to weights<>0
theta_red = theta(theta_index);         % Reduced parameter vector
p0 = 1e6;                         	% Diagonal element of H_inv
Ident = eye(1);                		% Identity matrix
reduced = length(theta_index);         	% The number of parameters in theta_red
index3 = 1:(reduced+1):(reduced^2);   	% A third useful vector
stop_crit = 0;				% Stop training if criterion gets below this value
lambda = 1;				% Initial regularization factor     
PI_vector = zeros(epochs,1);          	% A vector containing the accumulated SSE
clc;


%%%%%%%%%%%%%%%%%%%%%%%%% COMPUTATION OF NETWORK OUTPUT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

h1 = w1*xtr; 					% Inputs to hidden layer 
y1(H_hidden,:) = pmntanh(h1(H_hidden,:));	% Nonlinear outputs of hidden layer
y1(L_hidden,:) = h1(L_hidden,:);    		% Linear outputs of hidden layer

h2 = w2*y1;					% Inputs to output layer
y2(H_output,:) = pmntanh(h2(H_output,:));	% Nonlinear outputs of output layer
y2(L_output,:) = h2(L_output,:);		% Linear outputs of output layer

E = ytr - y2;                     		% Training error
E_vector = E(:);                        	% Reshape E into a long vector
SSE = E_vector'*E_vector;          		% Sum of squared errors (SSE)
PI = SSE; 					% Performance index

while iteration <= epochs 
   
	if dw == 1,


%%%%% COMPUTATION OF NETWORK OUTPUT DERIVATIVES WITH RESPECT TO EACH WEIGHT (PSI MATRIX) %%%%

    		% Elements corresponding to linear output units
		for i = L_output'

      			index1 = (i-1) * (hidden + 1) + 1;
	
			% Part of PSI corresponding to hidden-to-output layer weights
      			PSI(index1:index1+hidden,index2+i) = y1;

			% Part of PSI corresponding to input-to-hidden layer weights
      			for j = L_hidden',
 
  				% Derivatives of linear hidden layer outputs
        			PSI(index(j):index(j)+inputs,index2+i) = w2(i,j)*xtr;
      			end

      			for j = H_hidden',

        			tmp = w2(i,j)*(1-y1(j,:).*y1(j,:));	% Hyp. tang. derivative

				% Derivatives of nonlinear hidden layer outputs
        			PSI(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).*xtr;
      			end   
    		end

		% Elements corresponding to nonlinear output units
    		for i = H_output',	
			
			index1 = (i-1) * (hidden + 1) + 1;
     			tmp = 1 - y2(i,:).*y2(i,:);

			% Part of PSI corresponding to hidden-to-output layer weights
      			PSI(index1:index1+hidden,index2+i) = y1.*tmp(ones_h,:);
     
      			% Part of PSI corresponding to input-to-hidden layer weights
      			for j = L_hidden',

        			tmp = w2(i,j)*(1-y2(i,:).*y2(i,:));	% Hyp. tang. derivative

				% Derivatives of nonlinear hidden layer outputs
        			PSI(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).*xtr;
      			end
      
      			for j = H_hidden',

        			tmp  = w2(i,j)*(1-y1(j,:).*y1(j,:));
        			tmp2 = (1-y2(i,:).*y2(i,:));

				% Derivatives of nonlinear hidden layer outputs
        			PSI(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:)...
                                                  			.*tmp2(ones_i,:).*xtr;
      			end
    		end

    		PSI_red = PSI(theta_index,:);	% Reduced PSI-matrix    
		G = PSI_red*E_vector;		% Gradient
		R = PSI_red*PSI_red';		% Means square error part Hessian  
    		dw = 0;			
  	end
  
   
%%%%%%%%%%%%%%%%%%%%%%%%%%% COMPUTATION OF WEIGHTS UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  
  	H = R;					% Hessian matrix
  	H(index3) = H(index3)'+lambda;          % Adds diagonal matrix
	h = H\G;                                % Calculates new search direction
  	theta_red_new = theta_red + h;          % Updates parameter vector
  	theta(theta_index) = theta_red_new;

  	% Put the parameters back into the weight matrices 
  	w1_new = reshape(theta(parameters2+1:parameters),inputs+1,hidden)';
  	w2_new = reshape(theta(1:parameters2),hidden+1,1)';


%%%%%%%%%%%%%%%%%%%%%%%%% COMPUTATION OF NEW NETWORK OUTPUT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

  	h1 = w1_new*xtr;  					% Inputs to hidden layer
  	y1(H_hidden,:) = pmntanh(h1(H_hidden,:));		% Nonlinear outputs of hidden layer
  	y1(L_hidden,:) = h1(L_hidden,:);			% Linear outputs of hidden layer
    
  	h2 = w2_new*y1;						% Inputs to output layer
  	y2(H_output,:) = pmntanh(h2(H_output,:));		% Nonlinear outputs of output layer
  	y2(L_output,:) = h2(L_output,:);			% Linear outputs of output layer

  	E_new = ytr-y2;                 			% Training error
  	E_new_vector = E_new(:);               			% Reshape E into a long vector
  	SSE_new = sum(sum(E_new_vector'*E_new_vector))/(2*N); 	% Sum of squared training errors (SSE)
  	PI_new = SSE_new; 					% Performance index


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% UPDATE LAMBDA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

  	L = h'*G+h'*(h.*lambda);

  	if 2*N*(PI-PI_new) > (0.75*L),
    		lambda = lambda/2;	% Decrease lambda if SSE has fallen sufficiently
  
  	elseif 2*N*(PI-PI_new) <= (0.25*L),
    		lambda = 2*lambda;	% Increase lambda if SSE has grown sufficiently
  	end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% UPDATE FOR NEXT ITERATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

  	% Update only if criterion has decreased
  	if PI_new < PI,   
                   
    		w1 = w1_new;			% New input-hidden matrix of weigths
    		w2 = w2_new;			% New hidden-output matrix of weights
    		theta_red = theta_red_new;	% New parameter vector
    		E_vector = E_new_vector;	% Training error
    		PI = PI_new;			% New performance index
    		dw = 1;				% Indicates new weights
    		iteration = iteration + 1;	% Increments counter
    		PI_vector(iteration-1) = PI;	% Collect PI in vector


%%%%%%%%%%%%%%%%%%%%%%% PERIODIC PREDICTIONS ON MONITORING SET %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    		[yhv,yvh] = lmeval(topo,w1,w2,xm);		% Predict monitoring response
    		dd = 0:displ-1;					% Display vector
    		sec(displ) = PI_new;				% Training SSE
    		E_mon = ym - yvh;       			% Monitoring error
		sep(displ) = sum(sum(E_mon.*E_mon))/(2*Nm);	% Monitoring SSE

    		if displ == 1
			oldsep = sep(displ);	% Reference value of monitoring SSE
    		end

		% Display learning curves at regular intervals
    		if periodisp & iteration/periodisp == round(iteration/periodisp)
			plot(dd,sec(1:displ),'--',dd,sep(1:displ),'c')
			rens1=sprintf('   Architecture :(%1.0f-%1.0f-1)',[inputs hidden]);
			rens2=sprintf('Epochs : %5.0f',iteration);
			rens3=sprintf('NNMODEL   ');
			title([rens3 rens1]); xlabel(rens2); ylabel('-- SSEC   - SSEP')
			drawnow
    		end

		% Stores weights if monitoring SSE smaller than the reference value
    		if sep(displ) <= oldsep
			w1f = w1;
			w2f = w2;
			oldsep = sep(displ);	% Update the reference SSE
			indisp = iteration;	% Save the corresponding number of epochs
   		end

   		displ = displ+1;		% Increment display counter
  
 	end

  	% Interrupts training if stop condition is fullfilled
  	if (PI < stop_crit) | (lambda>1e7),break, end          
end
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -