📄 nnoe.c
字号:
/*
* INCLUDE HEADERS
*/
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "mex.h"
#include "matrix2.h"
#include "nnmisc.h"
void nnoe(matrix**, int*, double*, matrix*, matrix*, matrix*, matrix*, trparmstruct*,\
matrix*, matrix*);
/*********************************************************************************
* *
* NNOE *
* ---- *
* *
* This is a CMEX-version of the Matlab function nnoe. *
* Type 'help nnoe' from Matlab for information on *
* how to call this function. *
* *
* *
* Programmed by: Magnus Norgaard *
* LastEditDate : Jan. 21, 2000 *
* *
*********************************************************************************/
void nnoe(matrix **NSSEvecpp, int *iter, double *lam,\
matrix *NetDef, matrix *NN, matrix *W1, matrix *W2, trparmstruct *trparms,\
matrix *Y, matrix *U)
{
/*
-----------------------------------------------------------------------------------
--------------- VARIABLE DECLARATIONS -------------
-----------------------------------------------------------------------------------
*/
register i, j, k, t;
int outputs, N, Nout, layers, dummy, hidden, inputs, iteration;
int parameters1, parameters2, parameters, reduced, index1, ii, jj;
int lhids, hhids, louts, houts, index11, skip;
int Ndat, N2, na, nu, nab, nmax, index5, dummy2;
double lambda, SSE, SSE_new, NSSE, NSSE_new, L, tmp1, sum, dummy3;
double critdif, gradmax, paramdif, *ptm1, *ptm2, lambda_old;
char dw;
matrix *L_hidden, *H_hidden, *L_output, *H_output, *h1, *h2, *y1, *y2;
matrix *E, *E_new, *W1_new, *W2_new, *PHI, *D, *Dtmp;
matrix *NSSEvec, *miter, *tmp, *Htmp;
matrix *theta, *thtmp, *theta_index, *theta_red, *theta_red_new, *PSI, *G, *H, *h;
matrix *all, *index0, *index7, *onesvec, *tmp0, *tmp2, *tmp3, *index, *index2;
matrix *nb, *nk, *dy2dy1, *dy2dy, *dy1dy, *dy2dy_vec, *Y2, *dummy1;
trparmstruct *trd;
/*
-----------------------------------------------------------------------------------
--------------- NETWORK INITIALIZATIONS -------------
-----------------------------------------------------------------------------------
*/
Ndat = getcols(Y); /* # of data */
na = vget(NN,0); /* Past predictions used as inputs */
nu = getrows(U); /* # of input signals */
nb = mmake(1,nu); /* Past controls used as inputs */
subvec(nb,NN,1,nu);
nk = mmake(1,nu); /* Time delays */
subvec(nk,NN,nu+1,2*nu);
nmax = na; /* Oldest signal used as input */
for(k=0;k<nu;k++){
i=rvget(nb,k)+rvget(nk,k)-1;
if(nmax<i) nmax=i;
}
skip = trparms->skip;
N = Ndat - nmax; /* Size of training set */
N2 = N-skip;
nab = na; /* na+nb */
for(k=0;k<nu;k++) nab=nab+rvget(nb,k);
Y2 = mmake(1,N); /* Observed outputs used for training */
hidden = getcols(NetDef); /* # of hidden units */
inputs = nab; /* Number of inputs to network */
outputs = 1; /* Always one outputs */
Nout = N*outputs; /* N*outputs */
L_hidden = neuvector(NetDef,1,'L'); /* Location of linear hidden units */
H_hidden = neuvector(NetDef,1,'H'); /* Location of tanh hidden units */
L_output = neuvector(NetDef,2,'L'); /* Location of linear output units */
H_output = neuvector(NetDef,2,'H'); /* Location of tanh output units */
lhids = getrows(L_hidden); /* # of linear hidden units */
hhids = getrows(H_hidden); /* # of tanh hidden units */
louts = getrows(L_output); /* # of linear output units */
houts = getrows(H_output); /* # of tanh output units */
miter = mmake(1,1); /* Temp element */
h1 = mmake(hidden,1); /* Argument to hidden layer act. fcts */
h2 = mmake(outputs,1); /* Argument to hidden layer act. fcts */
onesvec = mmake(1,N); /* Vector of all ones */
minitx(onesvec,1.0);
y1 = mmake(hidden+1,N); /* Hidden layer outputs */
minit(y1);
mat2mat(y1,hidden,0,onesvec); /* Add a row of ones (bias to outputs) */
y2 = mmake(outputs,N); /* Output layer output */
minit(y2);
E = mmake(outputs,N); /* Prediction error matrix */
E_new = mmake(outputs,N); /* A priori E */
index = mmake(hidden,1); /* Index vector outputs*(hidden+1)+... */
for(k=0;k<hidden;k++) cvput(index,k,(double)(outputs*(hidden+1)+k*(inputs+1)));
index2 = mmake(N,1); /* Index vector (0:N-1)*outputs */
for(k=0;k<N;k++) cvput(index2,k,(double)k*outputs);
iteration = 1; /* Initialize iteration counter */
dw = 1; /* Flag telling that the weights are new*/
parameters1= hidden*(inputs+1); /* # of input-to-hidden weights */
parameters2= outputs*(hidden+1); /* # of hidden-to-output weights */
parameters = parameters1+parameters2; /* Total # of weights */
/*
>>>>>>>>>>>>>>>>>>>> CONSTRUCT THE REGRESSION MATRIX PHI <<<<<<<<<<<<<<<<<<<<<
*/
PHI = mmake(nab+1,N); /* Matrix of input vectors (incl. bias) */
mat2mat(PHI,nab,0,onesvec);
for(k=0;k<na;k++){
for(i=0;i<Ndat-nmax;i++) mput(PHI,k,i,vget(Y,i+nmax-k-1));
}
index5 = na; /* Insert controls in PHI */
for(i=0;i<nu;i++){
for(k=0;k<vget(nb,i);k++){
for(j=0;j<Ndat-nmax;j++){
mput(PHI,index5+k,j,mget(U,i,nmax+j-k-vget(nk,i)));
}
}
index5=index5+vget(nb,i);
}
for(t=0;t<N;t++) rvput(Y2,t,rvget(Y,t+nmax));
/*
>>>>>>>>>>>>>>>>> INITIALIZE WEIGHTS WITH NNARX IF NECESSARY <<<<<<<<<<<<<<<<<<
*/
if(getrows(W2)==0){
W2->row=1;
mrand(W1); smul(W1,W1,0.5);
mrand(W2); smul(W2,W2,0.5);
PHI->row=PHI->row-1;
trd = (trparmstruct*)malloc(sizeof(trparmstruct));
trd->infolevel = TRDINFOLEVEL;
trd->maxiter = 100;
trd->critmin = TRDCRITMIN;
trd->critterm = TRDCRITTERM;
trd->gradterm = TRDGRADTERM;
trd->paramterm = TRDPARAMTERM;
trd->D = mmake(1,1);
put_val(trd->D,0,0,TRDD);
trd->lambda = TRDLAMBDA;
trd->skip = TRDSKIP;
marqc(&dummy1, &dummy2, &dummy3, NetDef, W1, W2, PHI, Y2, trd);
PHI->row=PHI->row+1;
mfree(trd->D);
free(trd);
mfree(dummy1);
}
W1_new = mmake(hidden,inputs+1); /* A priori updated W1 */
W2_new = mmake(outputs,hidden+1); /* A priori updated W2 */
theta = mmake(parameters,1); /* Vector containing all weights */
m2vreshape(theta,0,W2);
m2vreshape(theta,parameters2,W1);
thtmp = mnofind(theta,0.0); /* Find non-zero entries in theta */
reduced = getrows(thtmp); /* # of non-zero elements */
theta_index = mmake(reduced,1); /* Indices to weights <> 0 */
submat(theta_index,thtmp,0,reduced-1,0,0);
theta_red = mmake(reduced,1); /* Reduced parameter vector */
for(i=0;i<reduced;i++) /* theta_red = theta(theta_index) */
cvput(theta_red,i,cvget(theta,(int)cvget(theta_index,i)));
theta_red_new = mmake(reduced,1); /* A priori update of parameters */
dy2dy = mmake(na,N); /* Der. of output wrt. the past outputs */
dy1dy = mmake(hidden,na); /* Der.of hid. unit outp. wrt. past outp*/
dy2dy1 = mmake(1,hidden); /* Der. of outp. wrt. hidden outp. */
dy2dy_vec = mmake(1,na); /* For temp. computations */
PSI = mmake(parameters,Nout); /* Der. of each output wrt. each weight */
G = mmake(reduced,1); /* Gradient vector */
H = mmake(reduced,reduced); /* Hessian matrix */
Htmp = mmake(reduced,reduced); /* Matrix used by the linear sys solver */
h = mmake(reduced,1); /* Update vector */
all = mmake(N,1); /* Index vector (0:N-1) */
for(k=0;k<N;k++) cvput(all,k,(double)k);
index0 = mmake(1,1); /* Index vector (0) */
put_val(index0,0,0,0);
index7 = mmake(parameters,1); /* Index vector (0:parameters-1) */
for(k=0;k<parameters;k++) cvput(index7,k,(double)k);
if (hhids>0) tmp0 = mmake(hhids,N); /* Used to construct PSI */
else tmp0 = mmake(1,1);
tmp2 = mmake(1,N); /* Used to construct PSI */
tmp3 = mmake(1,N); /* Used to construct PSI */
lambda = trparms->lambda; /* Levenberg-Marquardt parameter */
lambda_old = 0.0;
D = mmake(reduced,1); /* Initialize vector cont. weight decays*/
Dtmp = mmake(parameters,1);
if(length(trparms->D)==1) /* Scalar weight decay parameters */
for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms->D,0));
else if(length(trparms->D)==2) /* Two weight decay parameters */
{
for(i=0;i<parameters2;i++) cvput(Dtmp,i,rvget(trparms->D,0));
for(i=parameters2;i<parameters;i++) cvput(Dtmp,i,rvget(trparms->D,1));
for(i=0;i<reduced;i++) cvput(D,i,cvget(Dtmp,(int)cvget(theta_index,i)));
}
else if(length(trparms->D)==reduced){ /* Individual weight decays */
for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms->D,i));
}
else
mexErrMsgTxt("tparms.D has the wrong length.");
critdif = trparms->critterm+1.0; /* Initialize stopping variables */
gradmax = trparms->gradterm+1;
paramdif = trparms->paramterm+1;
NSSEvec = mmake(trparms->maxiter,1); /* Vector containing the PI's */
/*
-----------------------------------------------------------------------------------
--------------- TRAIN NETWORK -------------
-----------------------------------------------------------------------------------
*/
/* Clear screen on HP systems.
Uncomment the following line and comment the subsequent one */
/*printf("\x1BH\x1BJNetwork training started at %.8s\n\n",asctime(c)+11);*/
printf("\nNetwork training started.\n\n");
/*
>>>>>>>>>>>>>> Compute network output y2(theta) <<<<<<<<<<<<<<<
*/
for(t=0;t<N;t++){
mvmul(h1,W1,PHI,t);
vtanh(y1,H_hidden,t,h1,H_hidden,0);
vcopyi(y1,L_hidden,t,h1,L_hidden,0);
mvmul(h2,W2,y1,t);
vtanh(y2,H_output,t,h2,H_output,0);
vcopyi(y2,L_output,t,h2,L_output,0);
j=na;
if(N-t-1<na) j=N-t-1;
for(i=1;i<=j;i++){
put_val(PHI,i-1,t+i,rvget(y2,t));
}
}
for(t=0;t<N;t++) rvput(E,t,rvget(Y2,t)-rvget(y2,t)); /* Prediction error */
for(SSE=0,t=skip;t<N;t++) SSE+=rvget(E,t)*rvget(E,t);/* Sum of squared errors */
for(tmp1=0,i=0;i<reduced;i++) tmp1+=cvget(theta_red,i)*cvget(theta_red,i)*cvget(D,i);
NSSE = (SSE+tmp1)/(2*N2); /* Value of cost function */
/* Iterate until stopping criterion is satisfied */
while (iteration<=trparms->maxiter && NSSE>trparms->critmin && lambda<1e7 &&
(critdif>trparms->critterm || gradmax>trparms->gradterm ||
paramdif>trparms->paramterm))
{
if(dw==1)
{
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE THE PSI MATRIX <<<<<<<<<<<<<<<<<<<<<<<<<<
(The derivative of each network output (y2) with respect to each weight)
*/
/* Some intermidiate computations */
for(j=0;j<hhids;j++)
{
jj = (int)cvget(H_hidden,j);
for(k=0;k<N;k++)
put_val(tmp0,j,k,1-get_val(y1,jj,k)*get_val(y1,jj,k));
}
/* ========== Elements corresponding to the linear output units ===========*/
for(i=0; i<louts; i++)
{
ii = (int)cvget(L_output,i);
/*** The part of PSI corresponding to hidden-to-output layer weights ***/
index1 = ii * (hidden+1);
psi1(PSI, index1, index2, ii, y1);
/************************************************************************/
/**** The part of PSI corresponding to input-to-hidden layer weights ****/
for(j=0; j<lhids; j++)
{
jj = (int)cvget(L_hidden,j);
psi2(PSI, (int)cvget(index,jj), index2, ii, get_val(W2,ii,jj), PHI);
}
for(j=0; j<hhids;j++)
{
jj = (int)cvget(H_hidden,j);
psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
}
/************************************************************************/
}
/* =========== Elements corresponding to the tanh output units ===========*/
for(i=0; i<houts; i++)
{
ii = (int)cvget(H_output,i);
index1 = ii * (hidden + 1);
for(k=0; k<N; k++)
put_val(tmp2,0,k,1-get_val(y2,ii,k)*get_val(y2,ii,k));
/* -- The part of PSI corresponding to hidden-to-output layer weights --*/
psi4(PSI, index1, index2, ii, tmp2, y1);
/* ---------------------------------------------------------------------*/
/* -- The part of PSI corresponding to input-to-hidden layer weights ---*/
for(j=0; j<lhids; j++)
{
jj = (int)cvget(L_hidden,j);
smul(tmp3, tmp2, get_val(W2,ii,jj));
psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
}
for(j=0; j<hhids; j++)
{
jj = (int)cvget(H_hidden,j);
psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
psi5(PSI, (int)cvget(index,jj), index2, ii, tmp3, tmp2, PHI);
}
/* ---------------------------------------------------------------------*/
}
/*
>>>>>>>>>>>>>>>>>>>> Linearize network <<<<<<<<<<<<<<<<<<<<<
*/
for(t=0;t<N;t++){
/*-- Derivative of output wrt. hidden outputs --*/
if(louts==1) for(k=0;k<hidden;k++) rvput(dy2dy1,k,rvget(W2,k));
else for(k=0;k<hidden;k++) rvput(dy2dy1,k,rvget(W2,k)*(1-\
rvget(y2,t)*rvget(y2,t)));
/*-- Partial deriv. of output from each hidden unit wrt. past net outp. --*/
for(j=0;j<lhids;j++){
i=(int)cvget(L_hidden,j);
for(k=0;k<na;k++) put_val(dy1dy,i,k,\
get_val(W1,i,k));
}
for(j=0;j<hhids;j++){
i=(int)cvget(H_hidden,j);
for(k=0;k<na;k++) put_val(dy1dy,i,k,\
get_val(W1,i,k)*(1-get_val(y1,i,t)*get_val(y1,i,t)));
}
/*--Partial derivative of net output w.r.t. past net outputs --*/
mmul(dy2dy_vec,dy2dy1,dy1dy);
for(i=0;i<na;i++) put_val(dy2dy,i,t,rvget(dy2dy_vec,i));
}
/*
>>>>>>>>>>>>>>>>>>> Filter partial derivatives <<<<<<<<<<<<<<<<<<<<
*/
for(t=0;t<N;t++){
j=na;
if(t<na) j=t;
for(k=1;k<=j;k++){
for(i=0;i<reduced;i++) {
ii =(int)cvget(theta_index,i);
PSI->mat[ii][t] += get_val(dy2dy,k-1,t)*get_val(PSI,ii,t-k);
}
}
}
dw = 0;
/*
>>>>>>>>>>>> Gradient (G = PSI_red*E_vector - D*theta_red) <<<<<<<<<<<<<
*/
for(i=0; i<reduced; i++){
ii = (int)cvget(theta_index,i);
for(sum=0.0,k=skip; k<N; k++) sum+=get_val(PSI,ii,k)*rvget(E,k);
cvput(G,i,sum - cvget(D,i)*cvget(theta_red,i));
}
/*
>>>>>>>>>> Mean square error part of Hessian (PSI_red*PSI_red') <<<<<<<<<<
*/
for(i=0; i<reduced; i++){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -