📄 nnmisc.c

📁 基于神经网络的辨识工具箱 (527KB)
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "mex.h"
#include "matrix2.h"
#include "nnmisc.h"

/*********************************************************************************
 *                                                                               *
 *    MARQC                                                                      *
 *    -----                                                                      *
 *                                                                               *
 *    This is a C-version of the Matlab function marq.                           *
 *    Type 'help marq' from Matlab for information on                            *
 *    how to call this function.                                                 *
 *                                                                               *
 *                                                                               *
 *    Programmed by: Magnus Norgaard                                             *
 *    LastEditDate : okt. 05, 1994                                               *
 *                                                                               *
 *********************************************************************************/
void marqc(matrix **PI_vectorpp, int *iter, double *lam,\
       matrix *NetDef, matrix *W1, matrix *W2, matrix *PHII,\
        matrix *Y, matrix *trparms)
{
/*
-----------------------------------------------------------------------------------
---------------              VARIABLE DECLARATIONS                    -------------
-----------------------------------------------------------------------------------
*/
register i, j, k;
int max_iter, iteration, outputs, N, Nout, layers, hidden, inputs;
int parameters1, parameters2, parameters, reduced, index1, ii, jj;
int lhids, hhids, louts, houts, index11;
double stop_crit, lambda, SSE, SSE_new, PI, PI_new, L, tmp1, sum;
char dw;
matrix *L_hidden, *H_hidden, *L_output, *H_output, *h1, *h2, *y1, *y2;
matrix *E, *E_new, *E_vector, *E_new_vector, *W1_new, *W2_new, *PHI, *D, *Dtmp;
matrix *PI_vector, *tmp, *Htmp, *R;
matrix *theta, *thtmp, *theta_index, *theta_red, *theta_red_new, *PSI, *G, *H, *h;
matrix *all, *index0, *index7, *onesvec, *tmp0, *tmp2, *tmp3, *index, *index2;
struct tm *c;
time_t lt;


/*
-----------------------------------------------------------------------------------
---------------             NETWORK INITIALIZATIONS                   -------------
-----------------------------------------------------------------------------------
 */
outputs   = getrows(Y);                  /* # of outputs                         */
N         = getcols(Y);                  /* # of data                            */
hidden    = getrows(W1);                 /* # of hidden units                    */
inputs    = getcols(W1);                 /* Number of inputs                     */
inputs    = inputs-1;                 
Nout      = N*outputs;                   /* N*outputs                            */ 
h1        = mmake(hidden,N);             /* Argument to hidden layer act. fcts   */
h2        = mmake(outputs,N);            /* Argument to hidden layer act. fcts   */
iteration = 1;                           /* Initialize iteration counter         */
dw        = 1;                           /* Flag telling that the weights are new*/
onesvec   = mmake(1,N);                  /* Vector of all ones                   */
minitx(onesvec,1.0);
y1        = mmake(hidden+1,N); minit(y1);/* Hidden layer outputs                 */
mat2mat(y1,hidden,0,onesvec);            /* Add a row of ones (bias to outputs)  */
y2        = mmake(outputs,N); minit(y2); /* Output layer output                  */
E         = mmake(outputs,N);            /* Prediction error matrix              */
E_new     = mmake(outputs,N);            /* A priori E                           */
E_vector  = mmake(N*outputs,1);          /* Prediction error vector              */
E_new_vector = mmake(N*outputs,1);       /* A priori E_vector                    */
PHI       = mmake(inputs+1,N);           /* Matrix of input vectors (incl. bias) */
addrows(PHI,PHII,onesvec);
parameters1= hidden*(inputs+1);          /* # of input-to-hidden weights         */
parameters2= outputs*(hidden+1);         /* # of hidden-to-output weights        */
parameters = parameters1+parameters2;    /* Total # of weights                   */
theta      = mmake(parameters,1);        /* Vector containing all weights        */
m2vreshape(theta,0,W2);
m2vreshape(theta,parameters2,W1);
thtmp      = mnofind(theta,0.0);         /* Find non-zero entries in theta       */
reduced    = getrows(thtmp);             /* # of non-zero elements               */
theta_index = mmake(reduced,1);          /* Indices to weights <> 0              */
submat(theta_index,thtmp,0,reduced-1,0,0);
theta_red = mmake(reduced,1);            /* Reduced parameter vector             */
for(i=0;i<reduced;i++)                   /* theta_red = theta(theta_index)       */
  cvput(theta_red,i,cvget(theta,(int)cvget(theta_index,i)));
theta_red_new = mmake(reduced,1);        /* A priori update of parameters        */
W1_new    = mmake(hidden,inputs+1);      /* A priori updated W1                  */
W2_new    = mmake(outputs,hidden+1);     /* A priori updated W2                  */
PSI       = mmake(parameters,Nout);      /* Der. of each output wrt. each weight */
minit(PSI);
G         = mmake(reduced,1);            /* Gradient vector                      */
H         = mmake(reduced,reduced);      /* Hessian matrix                       */
R         = mmake(reduced,reduced);      /* Mean square error G-N Hessian        */
Htmp      = mmake(reduced,reduced);      /* Matrix used by the linear sys solver */
h         = mmake(reduced,1);            /* Update vector                        */
all       = mmake(N,1);                  /* Index vector (0:N-1)                 */
for(k=0;k<N;k++) cvput(all,k,(double)k);
index0    = mmake(1,1);                  /* Index vector (0)                     */
put_val(index0,0,0,0);
index7    = mmake(parameters,1);         /* Index vector (0:parameters-1)        */
for(k=0;k<parameters;k++) cvput(index7,k,(double)k);
L_hidden = neuvector(NetDef,1,'L');      /* Location of linear hidden units      */
H_hidden = neuvector(NetDef,1,'H');      /* Location of tanh hidden units        */
L_output = neuvector(NetDef,2,'L');      /* Location of linear output units      */
H_output = neuvector(NetDef,2,'H');      /* Location of tanh output units        */
lhids     = getrows(L_hidden);           /* # of linear hidden units             */
hhids     = getrows(H_hidden);           /* # of tanh hidden units               */
louts     = getrows(L_output);           /* # of linear output units             */ 
houts     = getrows(H_output);           /* # of tanh output units               */
if(hhids>0) tmp0 = mmake(hhids,N);       /* Used to construct PSI                */
else tmp0 = mmake(1,1);
tmp2      = mmake(1,N);                  /* Used to construct PSI                */
tmp3      = mmake(1,N);                  /* Used to construct PSI                */
index2    = mmake(N,1);                  /* Index vector (0:N-1)*outputs         */
for(k=0;k<N;k++) cvput(index2,k,(double)k*outputs);
index     = mmake(hidden,1);             /* Index vector outputs*(hidden+1)+...  */
for(k=0;k<hidden;k++) cvput(index,k,(double)(outputs*(hidden+1)+k*(inputs+1)));
max_iter  = vget(trparms,0);             /* Max. no. iterations                  */
stop_crit = vget(trparms,1);             /* Error bound                          */
lambda    = vget(trparms,2);             /* Levenberg-Marquardt parameter        */
D         = mmake(reduced,1);            /* Initialize vector cont. weight decays*/
Dtmp      = mmake(parameters,1);
if(length(trparms)==4)                   /* Scalar weight decay parameters       */
  for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms,3));
else if(length(trparms)==5)              /* Two weight decay parameters          */
{
  for(i=0;i<parameters2;i++) cvput(Dtmp,i,rvget(trparms,3));
  for(i=parameters2;i<parameters;i++) cvput(Dtmp,i,rvget(trparms,4));
  mcopyi(D,theta_index,index0,Dtmp,index7,index0);
}
else{                                    /* Individual weight decays             */
  for(i=0;i<reduced;i++) cvput(D,i,rvget(trparms,3+i));   
}
PI        = stop_crit+1;                 /* Intialize cost function              */
PI_vector = mmake(max_iter,1);           /* Vector containing the PI's           */


/* 
-----------------------------------------------------------------------------------
---------------                    TRAIN NETWORK                      -------------
-----------------------------------------------------------------------------------
*/
lt = time(NULL);
c  = localtime(&lt);

/* Clear screen on HP systems.
Uncomment the following line and comment the subsequent one */
/*printf("\x1BH\x1BJNetwork training started at %.8s\n\n",asctime(c)+11);*/

printf("\nNetwork training started at %.8s\n\n",asctime(c)+11);

/*
 >>>>>>>>>>>>>>       Compute network output y2(theta)          <<<<<<<<<<<<<<< 
*/
    mmul(h1,W1,PHI);
    mtanh(y1,H_hidden,all,h1,H_hidden,all);
    mcopyi(y1,L_hidden,all,h1,L_hidden,all);
    mmul(h2,W2,y1);
    mtanh(y2,H_output,all,h2,H_output,all);
    mcopyi(y2,L_output,all,h2,L_output,all);
    msub(E,Y,y2);                         /* Training error                      */
    m2vreshape2(E_vector,0,E);            /* reshape E intor a long vector       */
    SSE=sprod3(E_vector,E_vector);        /* Sum of squared errors               */
    tmp1 = 0;
    for(i=0;i<reduced;i++)
      tmp1+=cvget(theta_red,i)*cvget(theta_red,i)*cvget(D,i);
    PI = (SSE+tmp1)/(2*N);

while (iteration<=max_iter && PI>stop_crit && lambda<1e7)
{
  if(dw==1)
  {
/*
 >>>>>>>>>>>>>>>>>>>>>>>>>>>   COMPUTE THE PSI MATRIX   <<<<<<<<<<<<<<<<<<<<<<<<<<
 (The derivative of each network output (y2) with respect to each weight)
*/
/* Some intermidiate computations */
    for(j=0;j<hhids;j++)
    {
      jj = (int)cvget(H_hidden,j);
      for(k=0;k<N;k++)
	put_val(tmp0,j,k,1-get_val(y1,jj,k)*get_val(y1,jj,k));
    }

/*   ==========   Elements corresponding to the linear output units   ============*/
    for(i=0; i<louts; i++)
    {
      ii = (int)cvget(L_output,i);

      /***  The part of PSI corresponding to hidden-to-output layer weights ***/
      index1 = ii * (hidden+1);
      psi1(PSI, index1, index2, ii, y1);
      /************************************************************************/

      /**** The part of PSI corresponding to input-to-hidden layer weights ****/
      for(j=0; j<lhids; j++)
      {
	jj = (int)cvget(L_hidden,j);
        psi2(PSI, (int)cvget(index,jj), index2, ii, get_val(W2,ii,jj), PHI);
      }

      for(j=0; j<hhids;j++)
      {
        jj = (int)cvget(H_hidden,j);
	psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
	psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
      }
      /************************************************************************/    
    }


    /* ============  Elements corresponding to the tanh output units   =============*/
    for(i=0; i<houts; i++)
    {
      ii = (int)cvget(H_output,i);
      index1 = ii * (hidden + 1);
      for(k=0; k<N; k++)
	put_val(tmp2,0,k,1-get_val(y2,ii,k)*get_val(y2,ii,k));

      /* -- The part of PSI corresponding to hidden-to-output layer weights --*/
      psi4(PSI, index1, index2, ii, tmp2, y1);
      /* ---------------------------------------------------------------------*/
    
      /* -- The part of PSI corresponding to input-to-hidden layer weights ---*/
      for(j=0; j<lhids; j++)
      {
        jj = (int)cvget(L_hidden,j);
	smul(tmp3, tmp2, get_val(W2,ii,jj));
	psi4(PSI, (int)cvget(index,jj), index2, ii, tmp3, PHI);
      }
      
      for(j=0; j<hhids; j++)
      {
      	jj = (int)cvget(H_hidden,j);
	psi3(tmp3, tmp0, j, get_val(W2,ii,jj));
        psi5(PSI, (int)cvget(index,jj), index2, ii, tmp3, tmp2, PHI);
      }
      /* ---------------------------------------------------------------------*/
    }
      dw = 0;
      /* -- Gradient (G = PSI_red*E_vector - D*theta_red) -- */
     	for(i=0; i<reduced; i++){
      		ii = (int)cvget(theta_index,i);
    		for(sum=0.0,k=0; k<Nout; k++) sum += get_val(PSI,ii,k)*cvget(E_vector,k);
    		cvput(G,i,sum - cvget(D,i)*cvget(theta_red,i));
      	}
    	
    	/* -- Mean square error part of Hessian (PSI_red*PSI_red'  --*/
    	for(i=0; i<reduced; i++){
    		ii = (int)cvget(theta_index,i);
    		for(j=i; j<reduced; j++){
      			jj = (int)cvget(theta_index,j);
      			for(sum=0.0,k=0; k<Nout; k++)
			sum += get_val(PSI,ii,k)*get_val(PSI,jj,k);
      			put_val(R,i,j,sum);
      			put_val(R,j,i,sum);
      			
    		}
  	}
  }

/*
 >>>>>>>>>>>>>>>>>>>>>>>>>>>        COMPUTE h_k        <<<<<<<<<<<<<<<<<<<<<<<<<<<
 */
 
  /* -- Hessian (H = R + lambda*I + D)  --*/
  mset(H,R);
  for(i=0;i<reduced;i++)                            /* Add diagonal matrix     */
    put_val(H,i,i,get_val(H,i,i)+lambda+cvget(D,i));               

  /* -- Search direction -- */
  choldc(H, Htmp);
  cholsl(Htmp,h,G);

  /* -- Compute 'apriori' iterate -- */
  madd(theta_red_new,theta_red,h);                  /* Update parameter vector */
  mcopyi(theta,theta_index,index0,theta_red_new,index7,index0);

  /* -- Put the parameters back into the weight matrices -- */
  v2mreshape(W1_new,theta,parameters2);
  v2mreshape(W2_new,theta,0);


/*
 >>>>>>>>>>>>>>       Compute network output y2(theta+h)          <<<<<<<<<<<<<<< 
*/
    mmul(h1,W1_new,PHI);
    mtanh(y1,H_hidden,all,h1,H_hidden,all);
    mcopyi(y1,L_hidden,all,h1,L_hidden,all);
    mmul(h2,W2_new,y1);
    mtanh(y2,H_output,all,h2,H_output,all);
    mcopyi(y2,L_output,all,h2,L_output,all);
    msub(E_new,Y,y2);                     /* Training error                      */
    m2vreshape2(E_new_vector,0,E_new);    /* reshape E intor a long vector       */
    SSE_new = sprod3(E_new_vector,E_new_vector); /* Sum of squared errors        */
    for(tmp1=0.0,i=0; i<reduced; i++)
    {
      tmp1+=cvget(theta_red_new,i)*cvget(theta_red_new,i)*cvget(D,i);
      PI_new = (SSE_new+tmp1)/(2*N);
    }

/*
 >>>>>>>>>>>>>>>>>>>>>>>>>>>       UPDATE  lambda     <<<<<<<<<<<<<<<<<<<<<<<<<<<<
 */
    tmp1 = 0;
    for(i=0;i<reduced;i++) tmp1+=cvget(h,i)*cvget(h,i)*(cvget(D,i)+lambda);
    L = sprod3(h,G) + tmp1;

    /* Decrease lambda if SSE has fallen 'sufficiently' */
    if(2*N*(PI - PI_new) > (0.75*L)) lambda = lambda/2;
  
    /* Increase lambda if SSE has grown 'sufficiently'  */
    else if(2*N*(PI-PI_new) <= (0.25*L)) lambda = 2*lambda;  


/*
 >>>>>>>>>>>>>>>>>>>>       UPDATES FOR NEXT ITERATION        <<<<<<<<<<<<<<<<<<<<<
 */
    /* Update only if criterion has decreased */
    if(PI_new<PI)
    {
     tmp = W1; W1=W1_new; W1_new=tmp;
     tmp = W2; W2=W2_new; W2_new=tmp;
     tmp = theta_red; theta_red=theta_red_new; theta_red_new = tmp;
     tmp = E_vector; E_vector = E_new_vector; E_new_vector = tmp;
     dw = 1;
     PI = PI_new;
     cvput(PI_vector,iteration-1,PI);
     printf("iteration # %i   PI = %4.3e\r",iteration,PI); /* On-line information  */
     ++iteration;
   }
}


/*
 >>>>>>>>>>    RETURN POINTERS TO RETURN ARGUMENTS & FREE MEMORY    <<<<<<<<<<<<
 */
 
/* Swap pointers if they have been messed up */
if ((iteration&1) == 0) {
	mset(W1_new,W1);
	tmp = W1; W1=W1_new; W1_new=tmp;
	mset(W2_new,W2);
     	tmp = W2; W2=W2_new; W2_new=tmp;
}
iteration=iteration-1;
if(iteration==0){
	*PI_vectorpp = mmake(1,1);
	(*PI_vectorpp)->row=0;
	(*PI_vectorpp)->col=0;
}
12 3 下一页
💿 文件大小 527 K
👤 上传用户 sujiwei
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#527 #KB #神经网络 #工具箱
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -