📄 nnmisc.c
字号:
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "mex.h"
#include "matrix2.h"
#include "nnmisc.h"
/*********************************************************************************
* *
* MARQC *
* ----- *
* *
* This is a C-version of the Matlab function marq. *
* Type 'help marq' from Matlab for information on *
* how to call this function. *
* *
* *
* Programmed by: Magnus Norgaard *
* LastEditDate : okt. 05, 1994 *
* *
*********************************************************************************/
void marqc(matrix **PI_vectorpp, int *iter, double *lam,\
matrix *NetDef, matrix *W1, matrix *W2, matrix *PHII,\
matrix *Y, matrix *trparms)
{
/*
-----------------------------------------------------------------------------------
--------------- VARIABLE DECLARATIONS -------------
-----------------------------------------------------------------------------------
*/
register i, j, k;
int max_iter, iteration, outputs, N, Nout, layers, hidden, inputs;
int parameters1, parameters2, parameters, reduced, index1, ii, jj;
int lhids, hhids, louts, houts, index11;
double stop_crit, lambda, SSE, SSE_new, PI, PI_new, L, tmp1, sum;
char dw;
matrix *L_hidden, *H_hidden, *L_output, *H_output, *h1, *h2, *y1, *y2;
matrix *E, *E_new, *E_vector, *E_new_vector, *W1_new, *W2_new, *PHI, *D, *Dtmp;
matrix *PI_vector, *tmp, *Htmp, *R;
matrix *theta, *thtmp, *theta_index, *theta_red, *theta_red_new, *PSI, *G, *H, *h;
matrix *all, *index0, *index7, *onesvec, *tmp0, *tmp2, *tmp3, *index, *index2;
struct tm *c;
time_t lt;
/*
-----------------------------------------------------------------------------------
--------------- NETWORK INITIALIZATIONS -------------
-----------------------------------------------------------------------------------
*/
outputs = getrows(Y); /* # of outputs */
N = getcols(Y); /* # of data */
hidden = getrows(W1); /* # of hidden units */
inputs = getcols(W1); /* Number of inputs */
inputs = inputs-1;
Nout = N*outputs; /* N*outputs */
h1 = mmake(hidden,N); /* Argument to hidden layer act. fcts */
h2 = mmake(outputs,N); /* Argument to hidden layer act. fcts */
iteration = 1; /* Initialize iteration counter */
dw = 1; /* Flag telling that the weights are new*/
onesvec = mmake(1,N); /* Vector of all ones */
minitx(onesvec,1.0);
y1 = mmake(hidden+1,N); minit(y1);/* Hidden layer outputs */
mat2mat(y1,hidden,0,onesvec); /* Add a row of ones (bias to outputs) */
y2 = mmake(outputs,N); minit(y2); /* Output layer output */
E = mmake(outputs,N); /* Prediction error matrix */
E_new = mmake(outputs,N); /* A priori E */
E_vector = mmake(N*outputs,1); /* Prediction error vector */
E_new_vector = mmake(N*outputs,1); /* A priori E_vector */
PHI = mmake(inputs+1,N); /* Matrix of input vectors (incl. bias) */
addrows(PHI,PHII,onesvec);
parameters1= hidden*(inputs+1); /* # of input-to-hidden weights */
parameters2= outputs*(hidden+1); /* # of hidden-to-output weights */
parameters = parameters1+parameters2; /* Total # of weights */
theta = mmake(parameters,1); /* Vector containing all weights */
m2vreshape(theta,0,W2);
m2vreshape(theta,parameters2,W1);
thtmp = mnofind(theta,0.0); /* Find non-zero entries in theta */
reduced = getrows(thtmp); /* # of non-zero elements */
theta_index = mmake(reduced,1); /* Indices to weights <> 0 */
submat(theta_index,thtmp,0,reduced-1,0,0);
theta_red = mmake(reduced,1); /* Reduced parameter vector */
for(i=0;i<reduced;i++) /* theta_red = theta(theta_index) */
cvput(theta_red,i,cvget(theta,(int)cvget(theta_index,i)));
theta_red_new = mmake(reduced,1); /* A priori update of parameters */
W1_new = mmake(hidden,inputs+1); /* A priori updated W1 */
W2_new = mmake(outputs,hidden+1); /* A priori updated W2 */
PSI = mmake(parameters,Nout); /* Der. of each output wrt. each weight */
minit(PSI);
G = mmake(reduced,1); /* Gradient vector */
H = mmake(reduced,reduced); /* Hessian matrix */
R = mmake(reduced,reduced); /* Mean square error G-N Hessian */
Htmp = mmake(reduced,reduced); /* Matrix used by the linear sys solver */
h = mmake(reduced,1); /* Update vector */
all = mmake(N,1); /* Index vector (0:N-1) */
for(k=0;k<N;k++) cvput(all,k,(double)k);
index0 = mmake(1,1); /* Index vector (0) */
put_val(index0,0,0,0);
index7 = mmake(parameters,1); /* Index vector (0:parameters-1) */
for(k=0;k<parameters;k++) cvput(index7,k,(double)k);
L_hidden = neuvector(NetDef,1,'L'); /* Location of linear hidden units */
H_hidden = neuvector(NetDef,1,'H'); /* Location of tanh hidden units */
L_output = neuvector(NetDef,2,'L'); /* Location of linear output units */
H_output = neuvector(NetDef,2,'H'); /* Location of tanh output units */
lhids = getrows(L_hidden); /* # of linear hidden units */
hhids = getrows(H_hidden); /* # of tanh hidden units */
louts = getrows(L_output); /* # of linear output units */
houts = getrows(H_output); /* # of tanh output units */
if(hhids>0) tmp0 = mmake(hhids,N); /* Used to construct PSI */
else tmp0 = mmake(1,1);
tmp2 = mmake(1,N); /* Used to construct PSI */
tmp3 = mmake(1,N); /* Used to construct PSI */
index2 = mmake(N,1); /* Index vector (0:N-1)*outputs */
for(k=0;k<N;k++) cvput(index2,k,(double)k*outputs);
index = mmake(hidden,1); /* Index vector outputs*(hidden+1)+... */
for(k=0;k<hidden;k++) cvput(index,k,(double)(outputs*(hidden+1)+k*(inputs+1)));
max_iter = vget(trparms,0); /* Max. no. iterations */
stop_crit = vget(trparms,1); /* Error bound */
lambda = vget(trparms,2); /* Levenberg-Marquardt parameter */
D = mmake(reduced,1); /* Initialize vector cont. weight decays*/
Dtmp = mmake(parameters,1);
if(length(trparms)==4) /* Scalar weight decay parameters */
for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms,3));
else if(length(trparms)==5) /* Two weight decay parameters */
{
for(i=0;i<parameters2;i++) cvput(Dtmp,i,rvget(trparms,3));
for(i=parameters2;i<parameters;i++) cvput(Dtmp,i,rvget(trparms,4));
mcopyi(D,theta_index,index0,Dtmp,index7,index0);
}
else{ /* Individual weight decays */
for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms,3+i));
}
PI = stop_crit+1; /* Intialize cost function */
PI_vector = mmake(max_iter,1); /* Vector containing the PI's */
/*
-----------------------------------------------------------------------------------
--------------- TRAIN NETWORK -------------
-----------------------------------------------------------------------------------
*/
lt = time(NULL);
c = localtime(<);
/* Clear screen on HP systems.
Uncomment the following line and comment the subsequent one */
/*printf("\x1BH\x1BJNetwork training started at %.8s\n\n",asctime(c)+11);*/
printf("\nNetwork training started at %.8s\n\n",asctime(c)+11);
/*
>>>>>>>>>>>>>> Compute network output y2(theta) <<<<<<<<<<<<<<<
*/
mmul(h1,W1,PHI);
mtanh(y1,H_hidden,all,h1,H_hidden,all);
mcopyi(y1,L_hidden,all,h1,L_hidden,all);
mmul(h2,W2,y1);
mtanh(y2,H_output,all,h2,H_output,all);
mcopyi(y2,L_output,all,h2,L_output,all);
msub(E,Y,y2); /* Training error */
m2vreshape2(E_vector,0,E); /* reshape E intor a long vector */
SSE=sprod3(E_vector,E_vector); /* Sum of squared errors */
tmp1 = 0;
for(i=0;i<reduced;i++)
tmp1+=cvget(theta_red,i)*cvget(theta_red,i)*cvget(D,i);
PI = (SSE+tmp1)/(2*N);
while (iteration<=max_iter && PI>stop_crit && lambda<1e7)
{
if(dw==1)
{
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE THE PSI MATRIX <<<<<<<<<<<<<<<<<<<<<<<<<<
(The derivative of each network output (y2) with respect to each weight)
*/
/* Some intermidiate computations */
for(j=0;j<hhids;j++)
{
jj = (int)cvget(H_hidden,j);
for(k=0;k<N;k++)
put_val(tmp0,j,k,1-get_val(y1,jj,k)*get_val(y1,jj,k));
}
/* ========== Elements corresponding to the linear output units ============*/
for(i=0; i<louts; i++)
{
ii = (int)cvget(L_output,i);
/*** The part of PSI corresponding to hidden-to-output layer weights ***/
index1 = ii * (hidden+1);
psi1(PSI, index1, index2, ii, y1);
/************************************************************************/
/**** The part of PSI corresponding to input-to-hidden layer weights ****/
for(j=0; j<lhids; j++)
{
jj = (int)cvget(L_hidden,j);
psi2(PSI, (int)cvget(index,jj), index2, ii, get_val(W2,ii,jj), PHI);
}
for(j=0; j<hhids;j++)
{
jj = (int)cvget(H_hidden,j);
psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
}
/************************************************************************/
}
/* ============ Elements corresponding to the tanh output units =============*/
for(i=0; i<houts; i++)
{
ii = (int)cvget(H_output,i);
index1 = ii * (hidden + 1);
for(k=0; k<N; k++)
put_val(tmp2,0,k,1-get_val(y2,ii,k)*get_val(y2,ii,k));
/* -- The part of PSI corresponding to hidden-to-output layer weights --*/
psi4(PSI, index1, index2, ii, tmp2, y1);
/* ---------------------------------------------------------------------*/
/* -- The part of PSI corresponding to input-to-hidden layer weights ---*/
for(j=0; j<lhids; j++)
{
jj = (int)cvget(L_hidden,j);
smul(tmp3, tmp2, get_val(W2,ii,jj));
psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
}
for(j=0; j<hhids; j++)
{
jj = (int)cvget(H_hidden,j);
psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
psi5(PSI, (int)cvget(index,jj), index2, ii, tmp3, tmp2, PHI);
}
/* ---------------------------------------------------------------------*/
}
dw = 0;
/* -- Gradient (G = PSI_red*E_vector - D*theta_red) -- */
for(i=0; i<reduced; i++){
ii = (int)cvget(theta_index,i);
for(sum=0.0,k=0; k<Nout; k++) sum += get_val(PSI,ii,k)*cvget(E_vector,k);
cvput(G,i,sum - cvget(D,i)*cvget(theta_red,i));
}
/* -- Mean square error part of Hessian (PSI_red*PSI_red' --*/
for(i=0; i<reduced; i++){
ii = (int)cvget(theta_index,i);
for(j=i; j<reduced; j++){
jj = (int)cvget(theta_index,j);
for(sum=0.0,k=0; k<Nout; k++)
sum += get_val(PSI,ii,k)*get_val(PSI,jj,k);
put_val(R,i,j,sum);
put_val(R,j,i,sum);
}
}
}
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> COMPUTE h_k <<<<<<<<<<<<<<<<<<<<<<<<<<<
*/
/* -- Hessian (H = R + lambda*I + D) --*/
mset(H,R);
for(i=0;i<reduced;i++) /* Add diagonal matrix */
put_val(H,i,i,get_val(H,i,i)+lambda+cvget(D,i));
/* -- Search direction -- */
choldc(H, Htmp);
cholsl(Htmp,h,G);
/* -- Compute 'apriori' iterate -- */
madd(theta_red_new,theta_red,h); /* Update parameter vector */
mcopyi(theta,theta_index,index0,theta_red_new,index7,index0);
/* -- Put the parameters back into the weight matrices -- */
v2mreshape(W1_new,theta,parameters2);
v2mreshape(W2_new,theta,0);
/*
>>>>>>>>>>>>>> Compute network output y2(theta+h) <<<<<<<<<<<<<<<
*/
mmul(h1,W1_new,PHI);
mtanh(y1,H_hidden,all,h1,H_hidden,all);
mcopyi(y1,L_hidden,all,h1,L_hidden,all);
mmul(h2,W2_new,y1);
mtanh(y2,H_output,all,h2,H_output,all);
mcopyi(y2,L_output,all,h2,L_output,all);
msub(E_new,Y,y2); /* Training error */
m2vreshape2(E_new_vector,0,E_new); /* reshape E intor a long vector */
SSE_new = sprod3(E_new_vector,E_new_vector); /* Sum of squared errors */
for(tmp1=0.0,i=0; i<reduced; i++)
{
tmp1+=cvget(theta_red_new,i)*cvget(theta_red_new,i)*cvget(D,i);
PI_new = (SSE_new+tmp1)/(2*N);
}
/*
>>>>>>>>>>>>>>>>>>>>>>>>>>> UPDATE lambda <<<<<<<<<<<<<<<<<<<<<<<<<<<<
*/
tmp1 = 0;
for(i=0;i<reduced;i++) tmp1+=cvget(h,i)*cvget(h,i)*(cvget(D,i)+lambda);
L = sprod3(h,G) + tmp1;
/* Decrease lambda if SSE has fallen 'sufficiently' */
if(2*N*(PI - PI_new) > (0.75*L)) lambda = lambda/2;
/* Increase lambda if SSE has grown 'sufficiently' */
else if(2*N*(PI-PI_new) <= (0.25*L)) lambda = 2*lambda;
/*
>>>>>>>>>>>>>>>>>>>> UPDATES FOR NEXT ITERATION <<<<<<<<<<<<<<<<<<<<<
*/
/* Update only if criterion has decreased */
if(PI_new<PI)
{
tmp = W1; W1=W1_new; W1_new=tmp;
tmp = W2; W2=W2_new; W2_new=tmp;
tmp = theta_red; theta_red=theta_red_new; theta_red_new = tmp;
tmp = E_vector; E_vector = E_new_vector; E_new_vector = tmp;
dw = 1;
PI = PI_new;
cvput(PI_vector,iteration-1,PI);
printf("iteration # %i PI = %4.3e\r",iteration,PI); /* On-line information */
++iteration;
}
}
/*
>>>>>>>>>> RETURN POINTERS TO RETURN ARGUMENTS & FREE MEMORY <<<<<<<<<<<<
*/
/* Swap pointers if they have been messed up */
if ((iteration&1) == 0) {
mset(W1_new,W1);
tmp = W1; W1=W1_new; W1_new=tmp;
mset(W2_new,W2);
tmp = W2; W2=W2_new; W2_new=tmp;
}
iteration=iteration-1;
if(iteration==0){
*PI_vectorpp = mmake(1,1);
(*PI_vectorpp)->row=0;
(*PI_vectorpp)->col=0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -