📄 real.c
字号:
notclose = last->unitcount;
pl = last->currentpat[TRAIN];
target = pl->pats;
u = (UNIT *) last->units;
while (u != NULL)
{
diff = (target++)->val - u->oj;
if (diff > 0) adiff = diff; else adiff = -diff;
if (adiff < toler) notclose = notclose - 1;
totaldiff = totaldiff + adiff;
if (deriv == 'd')
{
deltaj = diff;
}
else
{
switch (actfun) {
case 'l': deltaj = 1.0;
break;
case 's': deltaj = u->oj * (1.0 - u->oj);
break;}
if (deriv == 'f')
{
deltaj = diff * (deltaj + 0.1);
}
else deltaj = diff * deltaj;
}
etadeltaj = deltaj * reta;
w = (WTNODE *) u->wtlist;
#ifdef SYMMETRIC
while (w->next != NULL)
#else
while (w)
#endif
{
bunit = (UNIT *) w->backunit;
#ifdef SYMMETRIC
*(w->olddw) = etadeltaj * bunit->oj + ralpha * *(w->olddw);
*(w->weight) = *(w->weight) + *(w->olddw);
#else
w->olddw = etadeltaj * bunit->oj + ralpha * w->olddw;
w->weight = w->weight + w->olddw;
if (bunit->layernumber > 1)
bunit->error = bunit->error + deltaj * w->weight;
#endif
w = w->next;
};
u = u->next;
}
return(notclose);
}
#ifndef SYMMETRIC
void cbackinner() /* for the "wrong" continuous update method (c) */
{
LAYER *layer;
register REAL deltaj, etadeltaj, reta, ralpha;
register UNIT *bunit, *u;
register WTNODE *w;
char actfun;
reta = eta2;
ralpha = alpha;
layer = last->backlayer;
while (layer->backlayer != NULL)
{
actfun = layer->activation;
u = (UNIT *) layer->units;
while (u != NULL)
{
switch (actfun) {
case 'l': deltaj = 1.0;
break;
case 's': deltaj = u->oj * (1.0 - u->oj);
break;}
deltaj = u->error * deltaj;
etadeltaj = reta * deltaj;
w = (WTNODE *) u->wtlist;
while (w)
{
bunit = (UNIT *) w->backunit;
w->olddw = etadeltaj * bunit->oj + ralpha * w->olddw;
w->weight = w->weight + w->olddw;
if (bunit->layernumber > 1)
bunit->error = bunit->error + deltaj * w->weight;
w = w->next;
};
u = u->next;
};
layer = layer->backlayer;
};
}
#endif
#ifndef SYMMETRIC
/* A still experimental block of code for temporal difference learning */
void tdupdate(onthefly,t) /* for the original online td update method */
int onthefly; /* if 1 then use t otherwise take from pattern list */
REAL t; /* a target supplied on the fly by the user */
{
register REAL reta, ralpha;
register UNIT *u;
register WTNODE *w;
LAYER *layer;
REAL diff, poftplus1, target;
PATLIST *pl;
if (onthefly == 0)
{
pl = last->currentpat[TRAIN];
target = (pl->pats)->val;
}
else target = t;
if (target == (REAL) SKIPCODE)
{
poft = last->units->oj;
poftplus1 = poft;
}
else if (target == (REAL) DIFFCODE) poftplus1 = last->units->oj;
else poftplus1 = target;
diff = poftplus1 - poft;
poft = poftplus1;
ralpha = alpha;
layer = last;
while (layer->backlayer != NULL)
{
/* let eta be what the td(lambda) people normally call alpha */
/* let reta (register eta) be eta * the temporal difference */
if (layer == last) reta = eta * diff; else reta = eta2 * diff;
u = (UNIT *) layer->units;
while (u != NULL)
{
w = (WTNODE *) u->wtlist;
while (w)
{
/* using w->slope as the sum of the slopes */
/* using ralpha (register alpha) as lambda */
if (target == (REAL) SKIPCODE)
{
w->slope = w->total;
w->olddw = 0.0;
}
else /* DIFFCODE or the end of the sequence */
{
if (update == 't')
{
w->slope = w->total + ralpha * w->slope;
w->weight = w->weight + reta * w->slope;
}
else
{
w->slope = w->total + ralpha * w->slope;
w->olddw = w->olddw + reta * w->slope;
if (target != (REAL) DIFFCODE)
w->weight = w->weight + w->olddw;
};
};
w->total = 0;
w = w->next;
};
u = u->next;
};
layer = layer->backlayer;
};
}
/* a test case for temporal difference */
UNIT *setbinpat(door,u) /* this produces the 1 0 0 type sequence */
int door;
UNIT *u;
{
int i;
for (i=1;i<=3;i++)
{
if (i == door) u->oj = 1; else u->oj = 0;
u = u->next;
};
return(u);
}
/* This checks each possible move by rating it with the network or,
for the last layer it checks for a win or a loss. It returns the
door with the highest rated move. */
int getbestmove(level,maxlevels,maxdoors,moves)
int level, maxlevels, maxdoors, *moves;
{
int move, bestmove, levelnumber;
REAL rating, bestrating;
UNIT *u;
for (move=1;move<=maxdoors;move++)
{
u = start->units;
levelnumber = 0;
while (u != NULL) /* put values on the input units */
{
levelnumber = levelnumber + 1;
if (levelnumber < level) /* copy old move into the net */
u = setbinpat(moves[levelnumber],u);
else if (levelnumber == move) /* new move to explore */
u = setbinpat(level,u);
else /* not there yet, insert 0 into the net */
u = setbinpat(0,u);
};
if (level < maxlevels)
{ /* evaluate the network */
forward();
rating = last->units->oj;
}
/* its a win or a loss */
else if (moves[2] == 2 && move == 3) rating = 1;
else rating = 0;
/*
if (debugoutput == '+')
{
sprintf(outstr,"level = %3d move = %3d rating = %6.3f\n",level,move,rating);
pg(stdout,outstr);
};
*/
if (move == 1 || rating > bestrating)
{
bestrating = rating;
bestmove = move;
};
}; /* end for move */
if (debugoutput == '+')
{
sprintf(outstr,"level = %3d best move = %3d best rating = %10.6f\n",level,bestmove,bestrating);
pg(stdout,outstr);
};
return(bestmove);
}
/* After finding the best move this routine puts in on the input units
again and evaluates the net. */
makemove(bestmove,level,moves)
int bestmove,level,*moves;
{
UNIT *u;
int levelnumber;
u = start->units;
levelnumber = 0;
while (u != NULL)
{
levelnumber = levelnumber + 1;
if (levelnumber < level) /* copy old move into the net */
u = setbinpat(moves[levelnumber],u);
else if (levelnumber == level) /* new move */
u = setbinpat(bestmove,u);
else /* not there yet, insert 0 into the net */
u = setbinpat(0,u);
};
forward();
};
void learn(level,maxlevels,moves)
int level,maxlevels,*moves;
{
LAYER *layer;
UNIT *u;
WTNODE *w;
layer = last; /* make all w->totals = 0, maybe unnecessary */
while (layer->backlayer != NULL)
{
u = (UNIT *) layer->units;
while (u != NULL)
{
w = (WTNODE *) u->wtlist;
while (w != NULL)
{
w->total = 0;
w = w->next;
};
u = u->next;
};
layer = layer->backlayer;
}; /* end while layer */
/* send back errors from the output layer */
backoutput();
/* if necessary send errors farther back */
if (nlayers > 2) backinner();
/* pass in the right "target" value for the tdupdate routine, a 1 for
a win, a 0 for a loss or a code to skip the weight update or a code
that uses the temporal difference to update the weights */
if (level == 1) tdupdate(1, (REAL) SKIPCODE);
else if (level < maxlevels) tdupdate(1, (REAL) DIFFCODE);
else if (moves[2] == 2 && moves[4] == 3) tdupdate(1,1.0);
else tdupdate(1,0.0);
}
/* runs one game */
void onegame(maxlevels,maxdoors,moves)
int maxlevels,maxdoors,*moves;
{
int level, bm;
for (level = 1; level <= maxlevels; level++)
{
bm = getbestmove(level,maxlevels,maxdoors,moves);
makemove(bm,level,moves);
moves[level] = bm;
learn(level,maxlevels,moves);
};
}
/* This is the function called by the g command. The array moves stores
the moves made in a decimal format, for instance the door numbers as
1 2 1 3. These digits must be converted to the 1 0 0 type of format
when loading the network. */
void maze(ngames,maxlevels,maxdoors)
int maxlevels,maxdoors,ngames;
{
int *moves, i;
/* the index will go from 1 to maxlevels wasting the location moves[0] */
moves = (int *) malloc((1 + maxlevels) * sizeof(int));
for (i=1;i<=ngames;i++) onegame(maxlevels,maxdoors,moves);
}
#else /* if SYMMETRIC, take the easy out and define empty functions */
void tdupdate() {}
void maze() {}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -