📄 real.c

📁 #ifdef INTEGER #include "ibp.h" #else #include "rbp.h" #endif /* built-in C functions */
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
notclose = last->unitcount;
pl = last->currentpat[TRAIN];
target = pl->pats;
u = (UNIT *) last->units;
while (u != NULL)
 {
  diff = (target++)->val - u->oj;
  if (diff > 0) adiff = diff; else adiff = -diff;
  if (adiff < toler) notclose = notclose - 1;
  totaldiff = totaldiff + adiff;
  if (deriv == 'd')
   {
    deltaj = diff;
   }
  else
   {
    switch (actfun) {
    case 'l': deltaj = 1.0;
              break;
    case 's': deltaj = u->oj * (1.0 - u->oj);
              break;}
    if (deriv == 'f')
     {
      deltaj = diff * (deltaj + 0.1);
     }
    else deltaj = diff * deltaj;
   }
  etadeltaj = deltaj * reta;
  w = (WTNODE *) u->wtlist;
#ifdef SYMMETRIC
   while (w->next != NULL)
#else
   while (w)
#endif
    {
     bunit = (UNIT *) w->backunit;
#ifdef SYMMETRIC
     *(w->olddw) = etadeltaj * bunit->oj + ralpha * *(w->olddw);
     *(w->weight) = *(w->weight) + *(w->olddw);
#else
     w->olddw = etadeltaj * bunit->oj + ralpha * w->olddw;
     w->weight = w->weight + w->olddw;
     if (bunit->layernumber > 1)
        bunit->error = bunit->error + deltaj * w->weight;
#endif
     w = w->next;
    };
  u = u->next;
 }
return(notclose);
}

#ifndef SYMMETRIC

void cbackinner()  /* for the "wrong" continuous update method (c) */
{
LAYER *layer;
register REAL deltaj, etadeltaj, reta, ralpha;
register UNIT *bunit, *u;
register WTNODE *w;
char actfun;

reta = eta2;
ralpha = alpha;
layer = last->backlayer;
while (layer->backlayer != NULL)
 {
  actfun = layer->activation;
  u = (UNIT *) layer->units;
  while (u != NULL)
   {
    switch (actfun) {
    case 'l': deltaj = 1.0;
              break;
    case 's': deltaj = u->oj * (1.0 - u->oj);
              break;}
     deltaj = u->error * deltaj;
    etadeltaj = reta * deltaj;
    w = (WTNODE *) u->wtlist;
     while (w)
      {
       bunit = (UNIT *) w->backunit;
       w->olddw = etadeltaj * bunit->oj + ralpha * w->olddw;
       w->weight = w->weight + w->olddw;
       if (bunit->layernumber > 1)
          bunit->error = bunit->error + deltaj * w->weight;
       w = w->next;
      };
    u = u->next;
   };
  layer = layer->backlayer;
 };
}
#endif

#ifndef SYMMETRIC
/* A still experimental block of code for temporal difference learning */

void tdupdate(onthefly,t) /* for the original online td update method */
int onthefly; /* if 1 then use t otherwise take from pattern list */
REAL t;       /* a target supplied on the fly by the user */
{
register REAL reta, ralpha;
register UNIT *u;
register WTNODE *w;
LAYER *layer;
REAL diff, poftplus1, target;
PATLIST *pl;

if (onthefly == 0)
   {
    pl = last->currentpat[TRAIN];
    target = (pl->pats)->val;
   }
else target = t;
if (target == (REAL) SKIPCODE)
 {
  poft = last->units->oj;
  poftplus1 = poft;
 }
else if (target == (REAL) DIFFCODE) poftplus1 = last->units->oj;
else poftplus1 = target;
diff = poftplus1 - poft;
poft = poftplus1;
ralpha = alpha;
layer = last;
while (layer->backlayer != NULL)
 {
  /* let eta be what the td(lambda) people normally call alpha */
  /* let reta (register eta) be eta * the temporal difference */
  if (layer == last) reta = eta * diff; else reta = eta2 * diff;
  u = (UNIT *) layer->units;
  while (u != NULL)
   {
    w = (WTNODE *) u->wtlist;
    while (w)
     {
      /* using w->slope as the sum of the slopes */
      /* using ralpha (register alpha) as lambda */
      if (target == (REAL) SKIPCODE)
       {
        w->slope = w->total;
        w->olddw = 0.0;
       }
      else /* DIFFCODE or the end of the sequence */
       {
        if (update == 't')
         {
          w->slope = w->total + ralpha * w->slope;
          w->weight = w->weight + reta * w->slope;
         }
        else
         {
          w->slope = w->total + ralpha * w->slope;
          w->olddw = w->olddw + reta * w->slope;
          if (target != (REAL) DIFFCODE)
             w->weight = w->weight + w->olddw; 
         };
       };
      w->total = 0;
      w = w->next;
     };
    u = u->next;
   };
  layer = layer->backlayer;
 };
}

/* a test case for temporal difference */

UNIT *setbinpat(door,u)  /* this produces the 1 0 0 type sequence */
int door;
UNIT *u;
{
 int i;
 for (i=1;i<=3;i++)
    {
     if (i == door) u->oj = 1; else u->oj = 0;
     u = u->next;
    };
return(u);
}

/* This checks each possible move by rating it with the network or,
   for the last layer it checks for a win or a loss.  It returns the
   door with the highest rated move. */

int getbestmove(level,maxlevels,maxdoors,moves)
int level, maxlevels, maxdoors, *moves;
{
 int move, bestmove, levelnumber;
 REAL rating, bestrating;
 UNIT *u;

 for (move=1;move<=maxdoors;move++)
    {
     u = start->units;
     levelnumber = 0;
     while (u != NULL)  /* put values on the input units */
        {
         levelnumber = levelnumber + 1;
         if (levelnumber < level) /* copy old move into the net */
            u = setbinpat(moves[levelnumber],u);
         else if (levelnumber == move) /* new move to explore */
            u = setbinpat(level,u);
         else /* not there yet, insert 0 into the net */
            u = setbinpat(0,u);
        };
     if (level < maxlevels)
        { /* evaluate the network */
         forward();
         rating = last->units->oj;
        }
     /* its a win or a loss */
     else if (moves[2] == 2 && move == 3) rating = 1;
     else rating = 0;
/*
if (debugoutput == '+')
   {
    sprintf(outstr,"level = %3d move = %3d rating = %6.3f\n",level,move,rating);
    pg(stdout,outstr);
   };
*/
     if (move == 1 || rating > bestrating)
        {
         bestrating = rating;
         bestmove = move;
        };
    }; /* end for move */
if (debugoutput == '+')
   {
    sprintf(outstr,"level = %3d best move = %3d best rating = %10.6f\n",level,bestmove,bestrating); 
    pg(stdout,outstr);
   };
return(bestmove);
}

/* After finding the best move this routine puts in on the input units
   again and evaluates the net. */

makemove(bestmove,level,moves)
int bestmove,level,*moves;
{
 UNIT *u;
 int levelnumber;
 u = start->units;
 levelnumber = 0;
 while (u != NULL)
    {
     levelnumber = levelnumber + 1;
     if (levelnumber < level) /* copy old move into the net */
        u = setbinpat(moves[levelnumber],u);
     else if (levelnumber == level) /* new move */
        u = setbinpat(bestmove,u);
     else  /* not there yet, insert 0 into the net */
        u = setbinpat(0,u);
    };
 forward();
};

void learn(level,maxlevels,moves)
int level,maxlevels,*moves;
{
 LAYER *layer;
 UNIT *u;
 WTNODE *w;

 layer = last;      /* make all w->totals = 0, maybe unnecessary */
 while (layer->backlayer != NULL)
  {
   u = (UNIT *) layer->units;
   while (u != NULL)
    {
     w = (WTNODE *) u->wtlist;
     while (w != NULL)
      {
       w->total = 0;
       w = w->next;
      };
     u = u->next;
    };
   layer = layer->backlayer;
  }; /* end while layer */
/* send back errors from the output layer */
 backoutput();
/* if necessary send errors farther back */
 if (nlayers > 2) backinner();
/* pass in the right "target" value for the tdupdate routine, a 1 for
   a win, a 0 for a loss or a code to skip the weight update or a code
   that uses the temporal difference to update the weights */
 if (level == 1) tdupdate(1, (REAL) SKIPCODE);
 else if (level < maxlevels) tdupdate(1, (REAL) DIFFCODE);
 else if (moves[2] == 2 && moves[4] == 3) tdupdate(1,1.0);
 else tdupdate(1,0.0);
}

/* runs one game */

void onegame(maxlevels,maxdoors,moves)
int maxlevels,maxdoors,*moves;
{
 int level, bm;
 for (level = 1; level <= maxlevels; level++)
    {
     bm = getbestmove(level,maxlevels,maxdoors,moves);
     makemove(bm,level,moves);
     moves[level] = bm;
     learn(level,maxlevels,moves);
    };
}

/* This is the function called by the g command.  The array moves stores
   the moves made in a decimal format, for instance the door numbers as
   1 2 1 3.  These digits must be converted to the 1 0 0 type of format
   when loading the network. */

void maze(ngames,maxlevels,maxdoors)
int maxlevels,maxdoors,ngames;
{
 int *moves, i;
 /* the index will go from 1 to maxlevels wasting the location moves[0] */
 moves = (int *) malloc((1 + maxlevels) * sizeof(int));
 for (i=1;i<=ngames;i++) onegame(maxlevels,maxdoors,moves);
}

#else /* if SYMMETRIC, take the easy out and define empty functions */

void tdupdate() {}
void maze() {}

#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -