nnmisc.c
来自「基于MATLAB的神经网络非线性系统辨识软件包.」· C语言 代码 · 共 1,071 行 · 第 1/3 页
C
1,071 行
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix2.h"
#include "nnmisc.h"
#undef PI
/*********************************************************************************
* *
* MARQC *
* ----- *
* *
* This is a C-version of the Matlab function marq. *
* Type 'help marq' from Matlab for information on *
* how to call this function. *
* *
* *
* Programmed by: Magnus Norgaard *
* LastEditDate : Jan. 21, 2000 *
* *
*********************************************************************************/
void marqc(matrix **PI_vectorpp, int *iter, double *lam,\
matrix *NetDef, matrix *W1, matrix *W2, matrix *PHII,\
matrix *Y, trparmstruct *trparms)
{
/*
-----------------------------------------------------------------------------------
--------------- VARIABLE DECLARATIONS -------------
-----------------------------------------------------------------------------------
*/
register i, j, k;
int iteration, outputs, N, Nout, layers, hidden, inputs;
int parameters1, parameters2, parameters, reduced, index1, ii, jj;
int lhids, hhids, louts, houts, index11;
double critdif, gradmax, paramdif, lambda, SSE, SSE_new, PI, PI_new, L, tmp1;
double *ptm1, *ptm2, sum, lambda_old;
char dw;
matrix *L_hidden, *H_hidden, *L_output, *H_output, *h1, *h2, *y1, *y2;
matrix *E, *E_new, *E_vector, *E_new_vector, *W1_new, *W2_new, *PHI, *D, *Dtmp;
matrix *PI_vector, *tmp, *Htmp;
matrix *theta, *thtmp, *theta_index, *theta_red, *theta_red_new, *PSI, *G, *H, *h;
matrix *all, *index0, *index7, *onesvec, *tmp0, *tmp2, *tmp3, *index, *index2;
/*
-----------------------------------------------------------------------------------
--------------- NETWORK INITIALIZATIONS -------------
-----------------------------------------------------------------------------------
*/
outputs = getrows(Y); /* # of outputs */
N = getcols(Y); /* # of data */
hidden = getrows(W1); /* # of hidden units */
inputs = getcols(W1); /* Number of inputs */
inputs = inputs-1;
Nout = N*outputs; /* N*outputs */
h1 = mmake(hidden,N); /* Argument to hidden layer act. fcts */
h2 = mmake(outputs,N); /* Argument to hidden layer act. fcts */
iteration = 1; /* Initialize iteration counter */
dw = 1; /* Flag telling that the weights are new*/
onesvec = mmake(1,N); /* Vector of all ones */
minitx(onesvec,1.0);
y1 = mmake(hidden+1,N); minit(y1);/* Hidden layer outputs */
mat2mat(y1,hidden,0,onesvec); /* Add a row of ones (bias to outputs) */
y2 = mmake(outputs,N); minit(y2); /* Output layer output */
E = mmake(outputs,N); /* Prediction error matrix */
E_new = mmake(outputs,N); /* A priori E */
E_vector = mmake(N*outputs,1); /* Prediction error vector */
E_new_vector = mmake(N*outputs,1); /* A priori E_vector */
PHI = mmake(inputs+1,N); /* Matrix of input vectors (incl. bias) */
addrows(PHI,PHII,onesvec);
parameters1= hidden*(inputs+1); /* # of input-to-hidden weights */
parameters2= outputs*(hidden+1); /* # of hidden-to-output weights */
parameters = parameters1+parameters2; /* Total # of weights */
theta = mmake(parameters,1); /* Vector containing all weights */
m2vreshape(theta,0,W2);
m2vreshape(theta,parameters2,W1);
thtmp = mnofind(theta,0.0); /* Find non-zero entries in theta */
reduced = getrows(thtmp); /* # of non-zero elements */
theta_index = mmake(reduced,1); /* Indices to weights <> 0 */
submat(theta_index,thtmp,0,reduced-1,0,0);
theta_red = mmake(reduced,1); /* Reduced parameter vector */
for(i=0;i<reduced;i++) /* theta_red = theta(theta_index) */
cvput(theta_red,i,cvget(theta,(int)cvget(theta_index,i)));
theta_red_new = mmake(reduced,1); /* A priori update of parameters */
W1_new = mmake(hidden,inputs+1); /* A priori updated W1 */
W2_new = mmake(outputs,hidden+1); /* A priori updated W2 */
PSI = mmake(parameters,Nout); /* Der. of each output wrt. each weight */
minit(PSI);
G = mmake(reduced,1); /* Gradient vector */
H = mmake(reduced,reduced); /* Hessian matrix */
Htmp = mmake(reduced,reduced); /* Matrix used by the linear sys solver */
h = mmake(reduced,1); /* Update vector */
all = mmake(N,1); /* Index vector (0:N-1) */
for(k=0;k<N;k++) cvput(all,k,(double)k);
index0 = mmake(1,1); /* Index vector (0) */
put_val(index0,0,0,0);
index7 = mmake(parameters,1); /* Index vector (0:parameters-1) */
for(k=0;k<parameters;k++) cvput(index7,k,(double)k);
L_hidden = neuvector(NetDef,1,'L'); /* Location of linear hidden units */
H_hidden = neuvector(NetDef,1,'H'); /* Location of tanh hidden units */
L_output = neuvector(NetDef,2,'L'); /* Location of linear output units */
H_output = neuvector(NetDef,2,'H'); /* Location of tanh output units */
lhids = getrows(L_hidden); /* # of linear hidden units */
hhids = getrows(H_hidden); /* # of tanh hidden units */
louts = getrows(L_output); /* # of linear output units */
houts = getrows(H_output); /* # of tanh output units */
if(hhids>0) tmp0 = mmake(hhids,N); /* Used to construct PSI */
else tmp0 = mmake(1,1);
tmp2 = mmake(1,N); /* Used to construct PSI */
tmp3 = mmake(1,N); /* Used to construct PSI */
index2 = mmake(N,1); /* Index vector (0:N-1)*outputs */
for(k=0;k<N;k++) cvput(index2,k,(double)k*outputs);
index = mmake(hidden,1); /* Index vector outputs*(hidden+1)+... */
for(k=0;k<hidden;k++) cvput(index,k,(double)(outputs*(hidden+1)+k*(inputs+1)));
lambda_old = 0.0;
lambda = trparms->lambda; /* Levenberg-Marquardt parameter */
D = mmake(reduced,1); /* Initialize vector cont. weight decays*/
Dtmp = mmake(parameters,1);
if(length(trparms->D)==1) /* Scalar weight decay parameters */
for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms->D,0));
else if(length(trparms->D)==2) /* Two weight decay parameters */
{
for(i=0;i<parameters2;i++) cvput(Dtmp,i,rvget(trparms->D,0));
for(i=parameters2;i<parameters;i++) cvput(Dtmp,i,rvget(trparms->D,1));
for(i=0;i<reduced;i++) cvput(D,i,cvget(Dtmp,(int)cvget(theta_index,i)));
}
else if(length(trparms->D)==reduced){ /* Individual weight decays */
for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms->D,i));
}
else
mexErrMsgTxt("tparms.D has the wrong length.");
critdif = trparms->critterm+1.0; /* Initialize stopping variables */
gradmax = trparms->gradterm+1;
paramdif = trparms->paramterm+1;
PI_vector = mmake(trparms->maxiter,1); /* Vector containing the PI's */
/*
-----------------------------------------------------------------------------------
--------------- TRAIN NETWORK -------------
-----------------------------------------------------------------------------------
*/
/* Clear screen on HP systems.
Uncomment the following line and comment the subsequent one */
/*printf("\x1BH\x1BJNetwork training started at %.8s\n\n",asctime(c)+11);*/
printf("\nNetwork training started.\n\n");
/*
>>>>>>>>>>>>>> Compute network output y2(theta) <<<<<<<<<<<<<<<
*/
mmul(h1,W1,PHI);
mtanh(y1,H_hidden,all,h1,H_hidden,all);
mcopyi(y1,L_hidden,all,h1,L_hidden,all);
mmul(h2,W2,y1);
mtanh(y2,H_output,all,h2,H_output,all);
mcopyi(y2,L_output,all,h2,L_output,all);
msub(E,Y,y2); /* Training error */
m2vreshape2(E_vector,0,E); /* reshape E intor a long vector */
SSE=sprod3(E_vector,E_vector); /* Sum of squared errors */
tmp1 = 0;
for(i=0;i<reduced;i++)
tmp1+=cvget(theta_red,i)*cvget(theta_red,i)*cvget(D,i);
PI = (SSE+tmp1)/(2*N);
/* Iterate until stopping criterion is satisfied */
while (iteration<=trparms->maxiter && PI>trparms->critmin && lambda<1e7 &&
(critdif>trparms->critterm || gradmax>trparms->gradterm ||
paramdif>trparms->paramterm))
{
if(dw==1)
{
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE THE PSI MATRIX <<<<<<<<<<<<<<<<<<<<<<<<<<
(The derivative of each network output (y2) with respect to each weight)
*/
/* Some intermidiate computations */
for(j=0;j<hhids;j++)
{
jj = (int)cvget(H_hidden,j);
for(k=0;k<N;k++)
put_val(tmp0,j,k,1-get_val(y1,jj,k)*get_val(y1,jj,k));
}
/* ========== Elements corresponding to the linear output units ============*/
for(i=0; i<louts; i++)
{
ii = (int)cvget(L_output,i);
/*** The part of PSI corresponding to hidden-to-output layer weights ***/
index1 = ii * (hidden+1);
psi1(PSI, index1, index2, ii, y1);
/************************************************************************/
/**** The part of PSI corresponding to input-to-hidden layer weights ****/
for(j=0; j<lhids; j++)
{
jj = (int)cvget(L_hidden,j);
psi2(PSI, (int)cvget(index,jj), index2, ii, get_val(W2,ii,jj), PHI);
}
for(j=0; j<hhids;j++)
{
jj = (int)cvget(H_hidden,j);
psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
}
/************************************************************************/
}
/* ============ Elements corresponding to the tanh output units =============*/
for(i=0; i<houts; i++)
{
ii = (int)cvget(H_output,i);
index1 = ii * (hidden + 1);
for(k=0; k<N; k++)
put_val(tmp2,0,k,1-get_val(y2,ii,k)*get_val(y2,ii,k));
/* -- The part of PSI corresponding to hidden-to-output layer weights --*/
psi4(PSI, index1, index2, ii, tmp2, y1);
/* ---------------------------------------------------------------------*/
/* -- The part of PSI corresponding to input-to-hidden layer weights ---*/
for(j=0; j<lhids; j++)
{
jj = (int)cvget(L_hidden,j);
smul(tmp3, tmp2, get_val(W2,ii,jj));
psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
}
for(j=0; j<hhids; j++)
{
jj = (int)cvget(H_hidden,j);
psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
psi5(PSI, (int)cvget(index,jj), index2, ii, tmp3, tmp2, PHI);
}
/* ---------------------------------------------------------------------*/
}
dw = 0;
/* -- Gradient (G = PSI_red*E_vector - D*theta_red) -- */
for(i=0; i<reduced; i++){
ii = (int)cvget(theta_index,i);
for(sum=0.0,k=0; k<Nout; k++) sum += get_val(PSI,ii,k)*cvget(E_vector,k);
cvput(G,i,sum - cvget(D,i)*cvget(theta_red,i));
}
/* -- Mean square error part of Hessian (PSI_red*PSI_red' --*/
for(i=0; i<reduced; i++){
ii = (int)cvget(theta_index,i);
for(j=i; j<reduced; j++){
jj = (int)cvget(theta_index,j);
for(sum=0.0,k=0; k<Nout; k++)
sum += get_val(PSI,ii,k)*get_val(PSI,jj,k);
put_val(H,i,j,sum);
put_val(H,j,i,sum);
}
}
for(i=0;i<reduced;i++) /* Add diagonal matrix */
put_val(H,i,i,get_val(H,i,i)+cvget(D,i));
}
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE h_k <<<<<<<<<<<<<<<<<<<<<<<<<<<
*/
/* -- Hessian (H = R + lambda*I + D) --*/
tmp1 = lambda-lambda_old;
for(i=0;i<reduced;i++) /* Add diagonal matrix */
put_val(H,i,i,get_val(H,i,i)+tmp1);
/* -- Search direction -- */
choldc(H, Htmp);
cholsl(Htmp,h,G);
/* -- Compute 'apriori' iterate -- */
madd(theta_red_new,theta_red,h); /* Update parameter vector */
mcopyi(theta,theta_index,index0,theta_red_new,index7,index0);
/* -- Put the parameters back into the weight matrices -- */
v2mreshape(W1_new,theta,parameters2);
v2mreshape(W2_new,theta,0);
/*
>>>>>>>>>>>>>> Compute network output y2(theta+h) <<<<<<<<<<<<<<<
*/
mmul(h1,W1_new,PHI);
mtanh(y1,H_hidden,all,h1,H_hidden,all);
mcopyi(y1,L_hidden,all,h1,L_hidden,all);
mmul(h2,W2_new,y1);
mtanh(y2,H_output,all,h2,H_output,all);
mcopyi(y2,L_output,all,h2,L_output,all);
msub(E_new,Y,y2); /* Training error */
m2vreshape2(E_new_vector,0,E_new); /* reshape E intor a long vector */
SSE_new = sprod3(E_new_vector,E_new_vector); /* Sum of squared errors */
for(tmp1=0.0,i=0; i<reduced; i++)
{
tmp1+=cvget(theta_red_new,i)*cvget(theta_red_new,i)*cvget(D,i);
PI_new = (SSE_new+tmp1)/(2*N);
}
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> UPDATE lambda <<<<<<<<<<<<<<<<<<<<<<<<<<<<
*/
tmp1 = 0;
lambda_old = lambda;
for(i=0;i<reduced;i++) tmp1+=cvget(h,i)*cvget(h,i)*(cvget(D,i)+lambda);
L = sprod3(h,G) + tmp1;
/* Decrease lambda if SSE has fallen 'sufficiently' */
if(2*N*(PI - PI_new) > (0.75*L)) lambda = lambda/2;
/* Increase lambda if SSE has grown 'sufficiently' */
else if(2*N*(PI-PI_new) <= (0.25*L)) lambda = 2*lambda;
/*
>>>>>>>>>>>>>>>>>>>> UPDATES FOR NEXT ITERATION <<<<<<<<<<<<<<<<<<<<<
*/
/* Update only if criterion has decreased */
if(PI_new<PI)
{
critdif = PI-PI_new; /* Criterion difference */
for(i=0,gradmax=0.0,ptm1=G->mat[0];i<reduced;i++){ /* Maximum gradient */
sum = fabs(*(ptm1++));
if(gradmax<sum)
gradmax = sum;
}
gradmax/=N;
ptm1=theta_red_new->mat[0];
ptm2=theta_red->mat[0];
for(i=0,paramdif=0.0;i<reduced;i++){ /* Maximum gradient */
sum = fabs(*(ptm1++) - *(ptm2++));
if(paramdif<sum)
paramdif = sum;
}
lambda_old = 0.0;
tmp = W1; W1=W1_new; W1_new=tmp;
tmp = W2; W2=W2_new; W2_new=tmp;
tmp = theta_red; theta_red=theta_red_new; theta_red_new = tmp;
tmp = E_vector; E_vector = E_new_vector; E_new_vector = tmp;
dw = 1;
PI = PI_new;
cvput(PI_vector,iteration-1,PI);
switch(trparms->infolevel){ /* Print on-line inform */
case 1:
printf("# %i W=%4.3e critdif=%3.2e maxgrad=%3.2e paramdif=%3.2e\n",
iteration,PI,critdif,gradmax,paramdif);
break;
default:
printf("iteration # %i W = %4.3e\r",iteration,PI);
}
++iteration;
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?