rl.h

来自「足球机器人仿真组CMU97的源码」· C头文件代码 · 共 170 行

170 行

/* rl.h * CMUnited-97 (soccer client for Robocup-97) * Peter Stone <pstone@cs.cmu.edu> * Computer Science Department * Carnegie Mellon University * Copyright (C) 1997 Peter Stone * * CMUnited-97 was created by Peter Stone and Manuela Veloso * * You may copy and distribute this program freely as long as you retain this notice. * If you make any changes or have any comments we would appreciate a message. */#ifndef _RL_H_#define _RL_H_#include "position.h"#define USE_RL 1#define QMAX 100/* max time before closing rewards */#define MAX_REWARD_TIME 300/* confidence of knock/dribble/clear action from knockorpass.c */#define KNOCK_CONF 0/****************************************************************************//* FutureValueInterval is meant to store an interval of values from    the receiver and the associated QValue.  The 2nd dimension deep    in the 2-dim. q-value function.  ActionValueInterval is the top level    and points to a list of FutureValueIntervals (disjoint covering)*/class FutureValueInterval{public:   FutureValueInterval(float min, float max, float q);   ~FutureValueInterval();  /* Insert a new entry by min key returns a pointer to the new entry */  FutureValueInterval *Insert(float min);       void  UpdateQValue(float ActionConf, float FutureValue, float Reward);  float GetQValue(float FutureValue);  void  Print(FILE *oStream);  void  Load (FILE *oStream);  float FutureMin;  float FutureMax;  FutureValueInterval *Next;  int   num;  /* Number of times executed (with reward received) */  float weight;  /* Amount of weight in weighted average of Qvalues */private:  float QValue;};/****************************************************************************/#define MIN_ACTION_VALUE -1#define MAX_ACTION_VALUE 1/* ActionValueInterval should also be a disjoint covering.  Each entry    points to a FutureValueInterval list with the associated Q values*/class ActionValueInterval{public:   ActionValueInterval(float min, float max, float q);   ~ActionValueInterval();  /* Insert a new entry by min key returns a pointer to the new entry */  ActionValueInterval *Insert(float min, float q);       void  UpdateQValue(float ActionValue, float ActionConf, float FutureValue, float Reward);  float GetQValue(float ActionValue, float FutureValue);  int   GetNum();  /* Total number of examples in interval */  void  Print(FILE *oStream);  void  Load (FILE *oStream);  void  Load (FILE *oStream,float min,float max);  float ActionMin;  float ActionMax;  ActionValueInterval *Next;private:  FutureValueInterval *FutureValueList;};/****************************************************************************//* Head[i] points to an ordered disjoint covering of the interval [-1,1)   which represents the range of possible DT outputs.  For each range   (possibly treated as single values), there is another disjoint covering   of the possible future values that might have been communicated back    from the player playing position i.  In the simplest case, it will be a    single interval holding the Q value for the DT confidence.*//* Players can pass to any player or knock to any receiver */#define RL_PASS_ACTION  0#define NUM_RL_PASS_ACTIONS TEAM_SIZE#define RL_KNOCK_ACTION 1#define NUM_RL_KNOCK_ACTIONS NUM_EDGE_MARKERS#define NUM_RL_ACTIONS (NUM_RL_PASS_ACTIONS + NUM_RL_KNOCK_ACTIONS)class QTable{public:  QTable(int form, int pos);  ~QTable();  /* Update current entry, or add new entry */  void  UpdateQTable(int position, float FeatureVal, float DTConf, float FutureValue, float Reward);  float GetQValue(int position, float DTConf, float FutureValue);  int   GetNum(int action);    void  Write(FILE *oStream);  inline void  Print() { Write(stdout); }  int  Load (FILE *oStream);    inline int IsLoaded() { return Loaded; }  int                 Formation;  int                 Position;  char                dataFileName[30];  int                 MightExist;private:  int                 NumActions;  int                 Loaded;  ActionValueInterval *Head[NUM_RL_ACTIONS];};/****************************************************************************/class RewardInfo{public:  RewardInfo();  ~RewardInfo();    QTable *GetMyQTable();  QTable *GetQTable(int formation, int position);  void   SetActionState(int to, float val, float conf, float future);  void   CloseRewards();  void   LookForRewards();  int    QActionTaken;    /* TRUE/FALSE -- look for rewards? */  int    KeepLearning;private:  QTable           *QTables[NUM_FORMATIONS][TEAM_SIZE];  int    QLastFormation;  /* Formation we were in            */  int    QLastActionFrom; /* Position I was playing          */  int    QLastActionTo;   /* Action Taken                    */  float  QLastActionVal;  /* Feature value for that action   */  float  QLastActionConf; /* Confidence value for that action*/  float  QLastFutureVal;  /* Future returned from receiver   */  int   MyScore;     /* Values when the action is taken */  int   TheirScore;  float BallX,BallY;  int   Time;  float AvgBallX;  int   AvgBallUpdateTime;};/****************************************************************************/int RLforReceiver(int NumActions, int *actions, int *action_types, float *Confidences);#endif

rl.h - 源码说明

本页面展示了「足球机器人仿真组CMU97的源码」中的 rl.h 源码文件，采用 C头文件编程语言编写，共 170 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与CMU相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?