rl.c

来自「足球机器人仿真组CMU97的源码」· C语言代码 · 共 1,020 行 · 第 1/2 页
1,020 行
    int FullRewardTime = 50;    if (Mem->CurrentTime - Time > FullRewardTime){      /* not lowered beyond factor of 10 */      float divisor = 1 + 	9*(Mem->CurrentTime-Time-FullRewardTime)/(MAX_REWARD_TIME-FullRewardTime);      reward /= divisor; /* divisor between 1 and 10 */    }  }#if GOAL_ONLY_REWARDS  if (Mem->PlayMode != BEFORE_KICK_OFF)    return;  if ( Mem->MyScore > MyScore )    reward = 100;  else if ( Mem->TheirScore > TheirScore )    reward = -100;  float divisor = Mem->CurrentTime - Time;  if (divisor > 0)    reward /= divisor;#endif#if POS_5_LEARN  if (Mem->GetMyPosition()==5 && Mem->GetPositionOfMyLocation()==5      && QLastActionFrom==5){    ;/*printf("%d - %d reward for %d (%.3f) = %.1f\n",	   Mem->MyNumber,Mem->CurrentTime,QLastActionTo,QLastActionVal,reward);*/  }  else return;#endif  RewardQTable->UpdateQTable(QLastActionTo,QLastActionVal,QLastActionConf,			     QLastFutureVal,reward);  FILE *dataFile = fopen(RewardQTable->dataFileName,"w");  while ( dataFile == NULL ){    fclose(dataFile);    dataFile = fopen(RewardQTable->dataFileName,"w");  }      RewardQTable->Write(dataFile);  fclose(dataFile);  QActionTaken = FALSE;}void RewardInfo::LookForRewards(){  if ( !KeepLearning )    return;  if ( !QActionTaken ) my_error("no action for which to loof for rewards");#if GOAL_ONLY_REWARDS#else  /* if it's been 30 seconds, close rewards */  if ( Mem->CurrentTime - Time >= MAX_REWARD_TIME )     CloseRewards();#endif  /* Don't want to divide by 0 -- nothing new has happened */  if ( Mem->CurrentTime == Time )    return;  /* Here just tabulate ball positions */  /* weighted by time in each place    */  float previousWeight = AvgBallX * (AvgBallUpdateTime - Time);  float currentWeight  = Mem->GetBallGlobalX() * (Mem->CurrentTime - AvgBallUpdateTime);  AvgBallX = (previousWeight + currentWeight)/(Mem->CurrentTime - Time);  AvgBallUpdateTime = Mem->CurrentTime;}/****************************************************************************/int GetActionNumber(int index, int *actions, int *action_types){  int action_number;  if ( action_types[index] == RL_PASS_ACTION )    action_number = Mem->GetPlayerPosition(actions[index]);  else /* RL_KNOCK_ACTION */ /* knocks stored after passes */    action_number = (actions[index]-MY_GOAL) + NUM_RL_PASS_ACTIONS;   return action_number;}/****************************************************************************/int ChooseReceiverRandom(int NumActions, int *actions, int *action_types, float *FeatureValues, float *Confidences){/*  int passes = 0, knocks = 0;  for (int i=0; i<NumActions; i++){    if (action_types[i] == RL_KNOCK_ACTION)      knocks++;    else if (action_types[i] == RL_PASS_ACTION)      passes++;    else      my_error("which type of action?");  }*/  int NumOptions=0;  int Options[NumActions];  float maxVal = -20000;  for (int i=0; i<NumActions; i++){#if ONLY_KNOCKS    if (action_types[i] == RL_PASS_ACTION) continue;#endif#if ONLY_PASSES    if (action_types[i] == RL_KNOCK_ACTION && 	Mem->GetMyLocationsPositionType() != FORWARD) continue;#endif#if BREAK_TIES_MAXQ    if (FeatureValues[i] < maxVal) continue;    /*else if (FeatureValues[i] > maxVal){*/    else if (FeatureValues[i] > maxVal && Mem->GetMyLocationsPositionType() != FORWARD){      /* Reset to only consider actions with the highest value */      if (maxVal < DT_SUCCESS_CUTOFF) NumOptions=0;      maxVal = MIN(DT_SUCCESS_CUTOFF,FeatureValues[i]);    }#endif    Options[NumOptions++] = i;  }    if (!NumOptions) return NumActions-1; /* the knock to goal */  else return Options[int_random(NumOptions)];/*    #if ONLY_KNOCKS  passes=0;#endif#if ONLY_PASSES  if (Mem->GetMyLocationsPositionType() != FORWARD){    if (passes)      return int_random(NumActions-knocks);     else      return int_random(knocks)+NumActions-knocks;  }#endif  /* 50% chance of shooting or passing *//*  if (int_random(2) && passes)     return int_random(NumActions-knocks);   else    return int_random(knocks)+NumActions-knocks;*/}int ChooseReceiverMaxQ(int NumActions, int *actions, int *action_types, float *FeatureValues, float *Confidences){  QTable *MyQTable = Mem->GetMyQTable(); #if 0  int   action_index = NumActions; /* me */  float maxQ = MyQTable->GetQValue(Mem->GetMyPosition(),				   KNOCK_CONF,DEFAULT_FUTURE);  /*printf("%d to self = %.1f\n",Mem->MyNumber,maxQ);*/#endif  int action_index = -1;  float maxQ = -20000; /* Smaller than least possible */  float maxVal = -20000;    float Q;  int action;  for (int i=0; i<NumActions; i++){#if BREAK_TIES_MAXQ    if (FeatureValues[i] < maxVal) continue;    //else if (FeatureValues[i] > maxVal){    else if (FeatureValues[i] > maxVal && Mem->GetMyLocationsPositionType() != FORWARD){      /* Reset to only consider actions with the highest value */      if (maxVal < DT_SUCCESS_CUTOFF){	maxQ = -20000;	action_index = -1;      }      maxVal = MIN(DT_SUCCESS_CUTOFF,FeatureValues[i]);    }#endif#if ONLY_KNOCKS    if (action_types[i] != RL_KNOCK_ACTION)      continue;#endif#if ONLY_PASSES    if (Mem->GetMyLocationsPositionType() != FORWARD &&	action_types[i] != RL_PASS_ACTION)      continue;#endif    action = GetActionNumber(i,actions,action_types);    Q = MyQTable->GetQValue(action,FeatureValues[i],DEFAULT_FUTURE);#if TRUST_CONF    //Q += QMAX;  /* So not negative -- done to all of them (0-200) */    //Q *= (Confidences[i]+1)/2;    Q *= ((Confidences[i]+1)/2 + 1);  /* so ranges from 1-2 */    //Q *= ((Confidences[i]+1)/4 + 1);    /* Result ranges 0-4, so not actual reward, but should correlate */#endif    if ( Q >= maxQ ){      maxQ = Q;      action_index = i;    }    /* printf("%d to %d (conf %.1f, val %.1f) = %.1f (was %.1f) action_index = %d\n",	   MyQTable->Position,action,Confidences[i],FeatureValues[i],Q,	   MyQTable->GetQValue(action,FeatureValues[i],DEFAULT_FUTURE),	   action_index); */  }  if (action_index == -1){    // my_error("Should have found SOME best action"); /* no passes */    return ChooseReceiverRandom(NumActions,actions,action_types,				FeatureValues,Confidences);  }#if BREAK_TIES_MAXQ  /*printf("%d (%d) at %d options: ",Mem->MyNumber, MyQTable->Position, Mem->CurrentTime);  for (int i =0; i<NumActions; i++){    if (FeatureValues[i] == maxVal)       printf("%d ",GetActionNumber(i,actions,action_types));  }  printf("\n");*/#endif    return action_index;}int ChooseReceiverRandomized(int NumActions, int *actions, int *action_types, float *FeatureValues, float *Confidences){  /* p is probability of choosing random action -- decreases over time            */  /* decreasing p linearly to .5 at game 40, to .1 at game 80, to .01 at game 120 */  float p;  int game = Mem->CurrentTime/GAME_LENGTH;  /* game += 120; */  /* game = game/8; *//*  if (game>=200){    game=game%200;    game=game/2;  }*/#if POS_5_LEARN  if (Mem->GetMyPosition()!=5 || Mem->GetPositionOfMyLocation()!=5)    game+=160;#endif  //game+=160;  if ( game<=40 )    p = 1 - .5*game/40;  else if ( game<=80 )    p = .5 - .4*(game-40)/40;  else if ( game<=120 )    p = .1 - .09*(game-80)/40;  else    p = .01;  if ( range_random(0,1) <= p )    return ChooseReceiverRandom(NumActions,actions,action_types,FeatureValues,Confidences);  else     return ChooseReceiverMaxQ(NumActions,actions,action_types,FeatureValues,Confidences);}/****************************************************************************/float GetHeuristicConfidence(int action, int action_type){  float destR, destTheta;  if (action_type == RL_PASS_ACTION){    destR = Mem->GetTeammateDistance(action);    destTheta = Mem->GetTeammateAngle(action);  }  else{ /* action_type == RL_KNOCK_ACTION */    destR = MIN(40,Mem->GetMarkerDistance(action));    destTheta = Mem->GetMarkerAngle(action);  }    /*printf("(%.1f,%.1f) ",Mem->GetGlobalX(),Mem->GetGlobalY());*/  float result = 2;  float angleDiff, distAlongLine, distFromLine;  for (int i=1; i<=TEAM_SIZE; i++){    if (!Mem->OpponentValid(i)) continue;    /* Special case to ignore goalie for shots */    if (i==1 &&	Mem->MarkerValid(THEIR_GOAL) &&        destR == Mem->GetMarkerDistance(THEIR_GOAL) &&	destTheta  == Mem->GetMarkerAngle(THEIR_GOAL) ) continue;    angleDiff = Mem->GetOpponentAngle(i) - destTheta;    CleanAngle(&angleDiff);    angleDiff = fabs(deg_to_rad(angleDiff));    distAlongLine = Mem->GetOpponentDistance(i) * cos(angleDiff);    distFromLine  = Mem->GetOpponentDistance(i) * sin(angleDiff);        if (distAlongLine > destR) continue;    if (distAlongLine < 0) continue;    //if (distAlongLine > 40) continue;    if (distFromLine  > 30) continue;    if (distFromLine > distAlongLine) continue;        /* printf("%d: %d angleDiff = %.1f (%.1f - %.1f), distAlong = %.1f, distFrom = %.1f\n",	   action,i,rad_to_deg(angleDiff),Mem->GetOpponentAngle(i),destTheta,	   distAlongLine,distFromLine);*/    /*printf("%d(%.1f,%.1f) ",i,Mem->GetOpponentGlobalX(i),Mem->GetOpponentGlobalY(i));*/    result *= .8*(distFromLine/(MIN(30,distAlongLine)));  }  result-=1;  /*printf("%d:%d action = %d, result = %.1f\n",Mem->MyNumber, Mem->CurrentTime,	 action,result);*/  return result;}/****************************************************************************/int RLtest(int NumActions, int *actions, int *action_types, float *Confidences){  /* don't alter actions or confidences */  /* Only alter Q's when you're training (players in fixed positions) */  /* Start by loading 'em up for your position */  /* If position switching, load for all */  /* Take an action, return it, and then start looking for rewards */  /* code for don't pass: me the receiver, passsuccessconf = 0 */  float HConfidences[NumActions];  for (int i=0; i<NumActions; i++)    Confidences[i] = GetHeuristicConfidence(actions[i],action_types[i]);  float FeatureValues[NumActions];  for (int i=0; i<NumActions; i++)    FeatureValues[i] = Confidences[i];#if 0  /* Used for 10-27 values */  for (int i=0; i<NumActions; i++)    FeatureValues[i] = MIN_ACTION_VALUE;#endif#if 1  /* Only care if it's a success or failure */  static int greater=0, less=0;  for (int i=0; i<NumActions; i++){    if ( FeatureValues[i] >= DT_SUCCESS_CUTOFF ){      FeatureValues[i] = 0.45;      greater++;    }    else{      FeatureValues[i] = -0.45;      less++;    }    if (!((greater+less)%30000))      printf("%d : %d greater: %d, less: %d\n",Mem->MyNumber, Mem->CurrentTime,	     greater,less);  }#endif#if 0  /* 2 states-- one for right-client, one for left                    */  /* based on #opps on each side                                      */  /* Actually 3 --and didn't correlate very well with opp client type */  int opp_r=0, opp_l=0;  for (int i=1; i<=TEAM_SIZE; i++){    if ( !Mem->OpponentValid(i) )      continue;    if (Mem->GetOpponentGlobalY(i) > 0)      opp_l++;    else if (Mem->GetOpponentGlobalY(i) < 0)      opp_r++;  }  float val = 0;  if (opp_l > opp_r) val = .45;  else if (opp_r > opp_l) val = -.45;  for (int i=0; i<NumActions; i++){      FeatureValues[i] = val;      /* printf("%.1f ",FeatureValues[i]); */  }  /* printf("(%d)\n",Mem->CurrentTime); */#endif#if 0  /* 2 states-- one for right-client, one for left                    */  /* based on avg opponent y position                                 */  /* Actually 3 --and didn't correlate very well with opp client type */  float opp_y=0,x,y;  for (int i=1; i<=TEAM_SIZE; i++){    if ( Mem->GetOpponentGlobalSetTime(i) == 0)      continue;    Mem->GetOpponentLastGlobalXY(i,&x,&y);    opp_y+=y;  }  float val = 0;  if (opp_y > 0) val = .45;  else if (opp_y < 0) val = -.45;  for (int i=0; i<NumActions; i++){      FeatureValues[i] = val;      /* printf("%.1f ",FeatureValues[i]); */  }  /* printf("(%d)\n",Mem->CurrentTime); */#endif#if 0  /* One DT value for right and left clients--time cued */  float val = (Mem->CurrentTime/3000)%2 ? .45 : -.45;  for (int i=0; i<NumActions; i++){      FeatureValues[i] = val;  }#endif#if 0  /* 2 DT values randomly distributed */  for (int i=0; i<NumActions; i++)    FeatureValues[i] = int_random(2) ? .45 : -.45;#endif  int   receiver_index    = ChooseReceiverRandomized(NumActions,actions,action_types,FeatureValues,Confidences);  //int   receiver_index    = ChooseReceiverMaxQ(NumActions,actions,action_types,FeatureValues,Confidences);  //int   receiver_index    = ChooseReceiverRandom(NumActions,actions,action_types,FeatureValues,Confidences);#if 0  static int last_max_action = -1;  if (Mem->GetMyPosition()==5 && Mem->GetPositionOfMyLocation()==5){    int max_index = ChooseReceiverMaxQ(NumActions,actions,action_types,FeatureValues,Confidences);    int max_action = GetActionNumber(max_index,actions,action_types);    printf("%d: %d taking action %d\n",Mem->MyNumber,Mem->CurrentTime,	     GetActionNumber(receiver_index,actions,action_types));    if (max_action != last_max_action){      printf("%d: %d max = %d (was %d)\n",Mem->MyNumber,Mem->CurrentTime,max_action,	     last_max_action);	           Mem->GetMyQTable()->Write(stdout);    }    last_max_action = max_action;  }#endif#if 0  /* To always shoot */  if ( int_random(100) ){    receiver_index = NumActions-1;            actions[receiver_index] = THEIR_GOAL;    action_types[receiver_index] = RL_KNOCK_ACTION;    FeatureValues[receiver_index] = MIN_ACTION_VALUE;  }  else     receiver_index    = ChooseReceiverRandom(NumActions,actions,action_types,FeatureValues,Confidences);#endif  int   receiver          = actions  [receiver_index];  float feature_val       = FeatureValues[receiver_index];      float receiver_conf     = Confidences[receiver_index];      float receiver_future   = DEFAULT_FUTURE;  int   action_number;  /* Was receiver_position */   action_number = GetActionNumber(receiver_index,actions,action_types);  if (action_number == UNKNOWN_POSITION)    my_error("Should know position of player I'm passing to");  int game = Mem->CurrentTime/GAME_LENGTH;#if 0  static int printed_game = 0;  if (game>printed_game && Mem->MyNumber==6){    printf("After Game %d (time %d ):\n",game,Mem->CurrentTime);    Mem->GetQTable(Mem->GetCurrentFormationType(),5)->Write(stdout);     fflush(stdout);    printed_game = game;  }#endif#if POS_5_LEARN  if (Mem->GetMyPosition()!=5 || Mem->GetPositionOfMyLocation()!=5)    game=161;#endif  //game=161;  if (game<=160 || game>=200)    Mem->KeepLearning=TRUE;  else    Mem->KeepLearning=FALSE;  if ( Mem->GetBallDistance() <= KICK_DISTANCE && Mem->KeepLearning){    /* else can't actually act */    /* printf("%d:%d (action %d)\n",Mem->MyNumber,Mem->CurrentTime,receiver_position);*/    Mem->SetActionState(action_number,feature_val,receiver_conf,receiver_future);  }  if (0 && Mem->MyNumber==11){    printf("%d %d ( %d ) taking action %d (dt: %.2f heur: %.2f )\n",	   Mem->MyNumber, Mem->CurrentTime, Mem->GetPositionOfMyLocation(),	   action_number,receiver_conf,HConfidences[receiver_index]);    printf("Action   DT   Heur\n");    for (int i=0; i<NumActions; i++){      printf("%d  %f   %f\n",GetActionNumber(i,actions,action_types),	     Confidences[i],HConfidences[i]);    }    printf("[ Opps seen: ");    for (int i=1; i<=TEAM_SIZE; i++)      if (Mem->OpponentValid(i)) printf("%d (%.1f %.1f) : ",i,Mem->GetOpponentGlobalX(i),					Mem->GetOpponentGlobalY(i));    printf(" ]\n");    printf("[ Teammates seen: ");    for (int i=1; i<=TEAM_SIZE; i++)      if (Mem->TeammateValid(i)) printf("%d - %d (%.1f %.1f) : ",i,					Mem->GetPlayerPosition(i),					Mem->GetTeammateGlobalX(i),					Mem->GetTeammateGlobalY(i));    printf(" ]\n");    printf("\n\n");  }    return receiver_index;}int RLforReceiver(int NumActions, int *actions, int *action_types, float *Confidences){  return RLtest(NumActions,actions,action_types,Confidences);}
rl.c - 源码说明

本页面展示了「足球机器人仿真组CMU97的源码」中的 rl.c 源码文件，采用 C语言编程语言编写，共 1,020 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与CMU相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?