📄 rl.h
字号:
/* rl.h * CMUnited-97 (soccer client for Robocup-97) * Peter Stone <pstone@cs.cmu.edu> * Computer Science Department * Carnegie Mellon University * Copyright (C) 1997 Peter Stone * * CMUnited-97 was created by Peter Stone and Manuela Veloso * * You may copy and distribute this program freely as long as you retain this notice. * If you make any changes or have any comments we would appreciate a message. */#ifndef _RL_H_#define _RL_H_#include "position.h"#define USE_RL 1#define QMAX 100/* max time before closing rewards */#define MAX_REWARD_TIME 300/* confidence of knock/dribble/clear action from knockorpass.c */#define KNOCK_CONF 0/****************************************************************************//* FutureValueInterval is meant to store an interval of values from the receiver and the associated QValue. The 2nd dimension deep in the 2-dim. q-value function. ActionValueInterval is the top level and points to a list of FutureValueIntervals (disjoint covering)*/class FutureValueInterval{public: FutureValueInterval(float min, float max, float q); ~FutureValueInterval(); /* Insert a new entry by min key returns a pointer to the new entry */ FutureValueInterval *Insert(float min); void UpdateQValue(float ActionConf, float FutureValue, float Reward); float GetQValue(float FutureValue); void Print(FILE *oStream); void Load (FILE *oStream); float FutureMin; float FutureMax; FutureValueInterval *Next; int num; /* Number of times executed (with reward received) */ float weight; /* Amount of weight in weighted average of Qvalues */private: float QValue;};/****************************************************************************/#define MIN_ACTION_VALUE -1#define MAX_ACTION_VALUE 1/* ActionValueInterval should also be a disjoint covering. Each entry points to a FutureValueInterval list with the associated Q values*/class ActionValueInterval{public: ActionValueInterval(float min, float max, float q); ~ActionValueInterval(); /* Insert a new entry by min key returns a pointer to the new entry */ ActionValueInterval *Insert(float min, float q); void UpdateQValue(float ActionValue, float ActionConf, float FutureValue, float Reward); float GetQValue(float ActionValue, float FutureValue); int GetNum(); /* Total number of examples in interval */ void Print(FILE *oStream); void Load (FILE *oStream); void Load (FILE *oStream,float min,float max); float ActionMin; float ActionMax; ActionValueInterval *Next;private: FutureValueInterval *FutureValueList;};/****************************************************************************//* Head[i] points to an ordered disjoint covering of the interval [-1,1) which represents the range of possible DT outputs. For each range (possibly treated as single values), there is another disjoint covering of the possible future values that might have been communicated back from the player playing position i. In the simplest case, it will be a single interval holding the Q value for the DT confidence.*//* Players can pass to any player or knock to any receiver */#define RL_PASS_ACTION 0#define NUM_RL_PASS_ACTIONS TEAM_SIZE#define RL_KNOCK_ACTION 1#define NUM_RL_KNOCK_ACTIONS NUM_EDGE_MARKERS#define NUM_RL_ACTIONS (NUM_RL_PASS_ACTIONS + NUM_RL_KNOCK_ACTIONS)class QTable{public: QTable(int form, int pos); ~QTable(); /* Update current entry, or add new entry */ void UpdateQTable(int position, float FeatureVal, float DTConf, float FutureValue, float Reward); float GetQValue(int position, float DTConf, float FutureValue); int GetNum(int action); void Write(FILE *oStream); inline void Print() { Write(stdout); } int Load (FILE *oStream); inline int IsLoaded() { return Loaded; } int Formation; int Position; char dataFileName[30]; int MightExist;private: int NumActions; int Loaded; ActionValueInterval *Head[NUM_RL_ACTIONS];};/****************************************************************************/class RewardInfo{public: RewardInfo(); ~RewardInfo(); QTable *GetMyQTable(); QTable *GetQTable(int formation, int position); void SetActionState(int to, float val, float conf, float future); void CloseRewards(); void LookForRewards(); int QActionTaken; /* TRUE/FALSE -- look for rewards? */ int KeepLearning;private: QTable *QTables[NUM_FORMATIONS][TEAM_SIZE]; int QLastFormation; /* Formation we were in */ int QLastActionFrom; /* Position I was playing */ int QLastActionTo; /* Action Taken */ float QLastActionVal; /* Feature value for that action */ float QLastActionConf; /* Confidence value for that action*/ float QLastFutureVal; /* Future returned from receiver */ int MyScore; /* Values when the action is taken */ int TheirScore; float BallX,BallY; int Time; float AvgBallX; int AvgBallUpdateTime;};/****************************************************************************/int RLforReceiver(int NumActions, int *actions, int *action_types, float *Confidences);#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -