📄 conjgrad.c
字号:
outgrad = grad + nhid1 * (nin+1) ;
outprev = hid1 ;
nprev = nhid1 ;
nnext = nout ;
nextcoefs = out_coefs ;
nextdelta = outdelta ;
}
else { // Two hidden layers
n = nhid1 * (nin+1) + nhid2 * (nhid1+1) + nout * (nhid2+1) ;
hid1grad = grad ;
hid2grad = grad + nhid1 * (nin+1) ;
outgrad = hid2grad + nhid2 * (nhid1+1) ;
outprev = hid2 ;
nprev = nhid2 ;
nnext = nhid2 ;
nextcoefs = hid2_coefs ;
nextdelta = hid2delta ;
}
for (i=0 ; i<n ; i++) // Zero gradient for summing
grad[i] = 0.0 ;
error = 0.0 ; // Will cumulate total error here
for (tset=0 ; tset<tptr->ntrain ; tset++) { // Do all samples
dptr = tptr->data + size * tset ; // Point to this sample
trial ( dptr ) ; // Evaluate network for it
if (outmod == OUTMOD_AUTO) { // If this is AUTOASSOCIATIVE
for (i=0 ; i<nout ; i++) { // then the expected outputs
diff = *dptr++ - out[i] ; // are just the inputs
error += diff * diff ;
outdelta[i] = diff * actderiv ( out[i] ) ;
}
}
else if (outmod == OUTMOD_CLASSIFY) { // If this is Classification
tclass = (int) dptr[nin] - 1 ; // class is stored after inputs
for (i=0 ; i<nout ; i++) { // Recall that train added a
if (tclass == i) // fraction so that the above
diff = NEURON_ON - out[i] ; // truncation to get tclass is
else // always safe in any radix
diff = NEURON_OFF - out[i] ;
error += diff * diff ;
outdelta[i] = diff * actderiv ( out[i] ) ;
}
}
else if (outmod == OUTMOD_GENERAL) { // If this is GENERAL output
dptr += nin ; // outputs stored after inputs
for (i=0 ; i<nout ; i++) {
diff = *dptr++ - out[i] ;
error += diff * diff ;
outdelta[i] = diff * actderiv ( out[i] ) ;
}
}
/*
Cumulate output gradient
*/
if (nhid1 == 0) // No hidden layer
prevact = tptr->data + size * tset ;
else
prevact = outprev ; // Point to previous layer
gradptr = outgrad ;
for (i=0 ; i<nout ; i++) {
delta = outdelta[i] ;
for (j=0 ; j<nprev ; j++)
*gradptr++ += delta * prevact[j] ;
*gradptr++ += delta ; // Bias activation is always 1
}
/*
Cumulate hid2 gradient (if it exists)
*/
if (nhid2) {
gradptr = hid2grad ;
for (i=0 ; i<nhid2 ; i++) {
delta = 0.0 ;
for (j=0 ; j<nout ; j++)
delta += outdelta[j] * out_coefs[j*(nhid2+1)+i] ;
delta *= actderiv ( hid2[i] ) ;
hid2delta[i] = delta ;
for (j=0 ; j<nhid1 ; j++)
*gradptr++ += delta * hid1[j] ;
*gradptr++ += delta ; // Bias activation is always 1
}
}
/*
Cumulate hid1 gradient (if it exists)
*/
if (nhid1) {
prevact = tptr->data + size * tset ;
gradptr = hid1grad ;
for (i=0 ; i<nhid1 ; i++) {
delta = 0.0 ;
for (j=0 ; j<nnext ; j++)
delta += nextdelta[j] * nextcoefs[j*(nhid1+1)+i] ;
delta *= actderiv ( hid1[i] ) ;
for (j=0 ; j<nin ; j++)
*gradptr++ += delta * prevact[j] ;
*gradptr++ += delta ; // Bias activation is always 1
}
}
} // for all tsets
return error / ((double) tptr->ntrain * (double) nout) ;
}
/*
--------------------------------------------------------------------------------
Local routine to find gamma
--------------------------------------------------------------------------------
*/
double LayerNet::gamma ( double *g , double *grad )
{
int i, n ;
double denom, numer ;
if (nhid1 == 0) // No hidden layer
n = nout * (nin+1) ;
else if (nhid2 == 0) // One hidden layer
n = nhid1 * (nin+1) + nout * (nhid1+1) ;
else // Two hidden layers
n = nhid1 * (nin+1) + nhid2 * (nhid1+1) + nout * (nhid2+1) ;
numer = denom = 0. ;
for (i=0 ; i<n ; i++) {
denom += g[i] * g[i] ;
numer += (grad[i] - g[i]) * grad[i] ; // Grad is neg gradient
}
if (denom == 0.) // Should never happen (means gradient is zero!)
return 0. ;
else
return numer / denom ;
}
/*
--------------------------------------------------------------------------------
Local routine to find correction for next iteration
--------------------------------------------------------------------------------
*/
void LayerNet::find_new_dir ( double gam , double *g ,
double *h , double *grad )
{
int i, n ;
double *gptr, *hptr, *cptr ;
if (nhid1 == 0) // No hidden layer
n = nout * (nin+1) ;
else if (nhid2 == 0) // One hidden layer
n = nhid1 * (nin+1) + nout * (nhid1+1) ;
else // Two hidden layers
n = nhid1 * (nin+1) + nhid2 * (nhid1+1) + nout * (nhid2+1) ;
for (i=0 ; i<n ; i++) {
g[i] = grad[i] ;
grad[i] = h[i] = g[i] + gam * h[i] ;
}
}
/*
--------------------------------------------------------------------------------
Local routine for debugging
--------------------------------------------------------------------------------
*/
void LayerNet::check_grad ( TrainingSet *tptr , double *grad )
{
int i, j, n ;
double f0, f1, deriv, dot, len1, len2 ;
dot = len1 = len2 = 0.0 ;
f0 = trial_error ( tptr ) ;
for (i=0 ; i<nhid1 ; i++) {
for (j=0 ; j<=nin ; j++) {
hid1_coefs[i*(nin+1)+j] += .001 ;
f1 = trial_error ( tptr ) ;
hid1_coefs[i*(nin+1)+j] -= .001 ;
deriv = 10000.0 * (f0 - f1) ;
len1 += *grad * *grad ;
len2 += deriv * deriv ;
dot += *grad++ * deriv ;
}
}
for (i=0 ; i<nhid2 ; i++) {
for (j=0 ; j<=nhid1 ; j++) {
hid2_coefs[i*(nhid1+1)+j] += .001 ;
f1 = trial_error ( tptr ) ;
hid2_coefs[i*(nhid1+1)+j] -= .001 ;
deriv = 10000.0 * (f0 - f1) ;
len1 += *grad * *grad ;
len2 += deriv * deriv ;
dot += *grad++ * deriv ;
}
}
if (nhid1 == 0) // No hidden layer
n = nin ;
else if (nhid2 == 0) // One hidden layer
n = nhid1 ;
else // Two hidden layers
n = nhid2 ;
for (i=0 ; i<nout ; i++) {
for (j=0 ; j<=n ; j++) {
out_coefs[i*(n+1)+j] += .001 ;
f1 = trial_error ( tptr ) ;
out_coefs[i*(n+1)+j] -= .001 ;
deriv = 10000.0 * (f0 - f1) ;
len1 += *grad * *grad ;
len2 += deriv * deriv ;
dot += *grad++ * deriv ;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -