📄 interface_classes.h

📁 CMAC神经网络机械臂控制的设计matlab源码
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12
  /* Sets learning parameters of the architecture corresponding to a given action.
     a : action
     argc : number of supplied arguments
     argv : array of arguments
  */

  void StateActionFA::setAllLearningParameters(int argc, char* argv[]);
  /* Sets (the same) learning parameters of architectures corresponding to each action.
     argc : number of supplied arguments
     argv : array of arguments
     What parameters exactly you send in argv depends on the implementation 
     of the class inherited from Approximator class: you send parameters 
     exactly as to the setLearningParameters() function of that class. 
  */

};

//////////////////////////////////////////////////////////////////////////////

struct Attributes{//implementation in environment.cpp 
  double* Entropy; //array where i-th item is the i-step Entropy
  int n; //up to which step entropy is computed
  double Controllability;
  double RiskFactor;
  double RFconst;
  double RewardVariance;
  double TransitionDistance;
  double TransitionVariability;

  Attributes();

  Attributes(int N, double c);
  /* N : up to which step the State Transition Entropy should be computed.
     c : (multiplicative) threshold for the risk factor (in [0,1)).
  */

  void setParameters(int N, double c);
  /* Sets parameters for the attributes' calculation:
     N : up to which step the State Transition Entropy should be computed.
     c : (multiplicative) threshold for the risk factor (in [0,1)).
  */

  ~Attributes();
};

//////////////////////////////////////////////////////////////////////////////
class Environment {//implementation of some non-virtual functions in environment.cpp

 protected:
  State CurrentState;//current state
  Action CurrentAction;//last action performed by the agent
  double reward;//reward after transition to state s under action a
  static long idum; //used by a random number generator
  static bool seeded;//indicates if the random number generator has been seeded during this program run
	
 public:

  Environment();
  /* Seeds and initiates random number generator.
     This constructor is automatically called when objects of the derived
     classes are created.
  */
	

  virtual void startState(State& start, bool& terminal)=0;
  /* Samples a start state. Sets CurrentState data member to that state 
     and also returns it as "start" paramter. Returns an indicatiof if 
     the sampled state is terminal in "terminal" parameter.   
   */

  virtual void setState(const State& s, bool& terminal)=0;
  /* Sets the CurrentState data member to state "s". Returns an indicatiof if 
     the sampled state is terminal in "terminal" parameter. 
  */

  virtual void transition(const Action& action, State& s_new, double& r_new, bool& terminal)=0;
  /* Implements a transtion form CurrentState in responce to the "action" 
     performed by the agent. Updates its internal variables 
     (CurrentAction and rewrd) and returns values to the agent.
     action: action performed by the agent
     s_new : return value - new state
     r_new : return value - new reward
     terminal: indication of whether s_new is a terminal state
  */

  virtual bool applicable(const State& s, const Action& a)=0;
  /* Checks if action a is applicable in state s.
   */

  virtual void bound(int i, bool& bounded, double& left, double& right)=0;
  /* Gives bounds on state variables' values
     i : index of state variable
     bounded: indicates if i^th variable is bounded
     left : left bound
     right: right bound
  */

  void getStateSpaceBounds(double* left, double* right);
  /* Returns bounds on state variables.
     left : array of left bounds
     right : array of right bounds
  */
	
  virtual void uniformStateSample(State& s)=0;
  /* Implements uniform state space sampling.
   */

  //The following functions empirically measure task attributes 

  void computeAttributes(Attributes& att, const State& startState, int Steps, int Transitions, const int* n, const ActionSet& as, StateActionFA* fa=NULL);
    /* Computes global values of the attributes for the state distribution
       as on the trajectory under some policy.
       att : attributes structureto return computed values;
       startState : state from which to start a random walk;
       Steps : maximum number of steps on the trajectory;
       Transitions: number of sample transitions from each state;
       n : array indicating into how many intervals each state variable ;
       should be discretized for the approximate calculation of attributes;
       as : action set for the current RL system;
       fa : pointer to the architecture that contains action value functions
       for each action. According to these value functions, 
       greedy policy will be exacuted. If it is desired to implement 
       uniformly random policy, make sure that parameters of the 
       architectures for all functions are the same (all values are the same).
    */

    void computeAttributes(Attributes& att, int SampleSize, int Transitions, const int* n, const ActionSet& as);
    /* Computes global values of the attributes for the uniform state 
       distribution. 
       att : attributes structureto return computed values;
       SampleSize : number of uniformly distributed samples across the state 
       space in which attribute values are computed and then averaged;
       Transitions: number of sample transitions from each state;
       n : array indicating into how many intervals each state variable;
       as : action set for the current RL system.
    */


    double multiStepEntropy(int N, int sampleSize, int Transitions, const int* n, const ActionSet& as);
    /* Computes multi-step state transition entropy.
       N : number of steps over which entropy should be computed;
       sampleSize : number of uniformly distributed samples across the state 
       space in which attribute values are computed and then averaged;
       Transitions: number of sample transitions from each state;
       n : array indicating into how many intervals each state variable;
       as : action set for the current RL system.
    */
	
 protected:
    void chooseAction(double epsilon, StateActionFA* fa, const ActionSet& actions, const State& s, Action& a);
    /* Implements an epsilon-greedy strategy based on action value 
       functions in the architecture pointed to by fa.
       epsilon : parameter for the epsilon-greedy strategy;
       fa : pointer to the architecture containing action value functions;
       actions : action set for the current RL system;
       s : state in which to choose action
       a : return value - chosen action
    */

	
    void actionSequence(int num, int n, int as_size, int* seq);
    /*	Used my multiStepEntropy() function.
     */
};

///////////////////////////////////////////////////////////////////////////


class Agent { //implementation of some non-virtual functions in agent.cpp

public:

  Agent(double g, const ActionSet& a_s, StateActionFA* const f, Environment* const e);
  /* Constructor.
     g : discount factor
     a_s : action set available to the agent
     f : pointer to the architectures containing either action-value functions 
     or random policy
     e : pointer to the environment
  */

  int initTrial(int N, bool learning, bool SaveTrajectory, const State* s = NULL, char* fileName = NULL, bool ComputeBellmanError = false); 
    /* Gets the start state from the environment
     and then calls appropriate act function to perform 
     the trial for a maximum of N steps)
     Argument "learning" indicates whether learning should take place.
     If yes, actAndLearn() function is called, otherwise
     act() function is called.
     Computes the return for this trial.
     Function returns the number of steps actually performed during the trial.

     N : maximal number of steps in the trial
     learning : indicates whether learning should take place
     SaveTrajectory : indicates whether the trajectory should be saved
     fileName : name of the file to which the trajectory should be saved.
     ComputeBellmanError : indicates if estimated Bellman Error should be computed. Has a default false value.
    */
			
  double getReturn();
  /* Gets return collected during the last trial
   */

  double getBellmanError();
  /* Returns BellmanError for the last trajectory traversed without learning   */

  virtual void setLearningParameters(int argc, char *argv[])=0;
  /*	Sets parameters of the RL learning algorithm
   */

  void setArchitectureParameters(const Action& a, int argc, char *argv[]);
  /* Sets parameters of the architecture (fa) representing
     value function or a policy distribution
     argc : number of arguments in argv array
     argv : array of arguments
     The two above arguments should be as they would be sent 
     to setArchitectureParameters() function of the derived approximator class.  
  */

  void saveArchitectureParameters(const Action& a, int argc, char *argv[]);
  /* Seves parameters of the architecture (fa) representing
     value function or a policy distribution
     argc : number of arguments in argv array
     argv : array of arguments
     The two above arguments should be as they would be sent 
     to saveArchitectureParameters() function of the derived approximator class.  	
  */

  virtual ~Agent();

 protected:

  //component structures
  struct StageInfo; //fully defined at the end of this class' declaration
  struct Trajectory;//fully defined at the end of this class' declaration

  //Data members
  State CurrentState;//current state of the environment	
  Action CurrentAction;//action chosen in the current state
  bool terminal; //indicates if the state is terminal
  double CurrentReward;
  const ActionSet& actions; //action set of the RL system
  StateActionFA* const fa; /* Pointer to an arcitecture representing
			      either policy probability distribution
			      or action-value functions.
			   */

  double gamma;	//discount factor
  double Return;//return collected during a trial
  Environment* const env;//pointer to the environment object
  int* ApplicableActions;//array to be used by chooseAction() function
  Trajectory* trajectory;
  double BellmanError;
	
  virtual int act(int N, bool SaveTrajectory, bool ComputeBellmanError)=0;

  /* Implements maximum of N successive steps of the trial
     or until a terminal state is entered by the environment
     Communicates with the environment to get current state and reward.
     Computes return collected on this trial.

     N : maximal number of steps in the trial
     SaveTrajectory : indicates whether trajectory should be saved
     ComputeBellmanError : indicates whether (estimated) Bellman error 
     should be computed for the state action pairs on the trajectory.

  */

  virtual int actAndLearn(int N, bool SaveTrajectory)=0;	
  /* Implements maximum of N successive steps of the trial
     or until the terminal state is entered by the environment
     Communicates with the environment to get 
     current state and reward.
     Calls the rlAlgorithm when appropriate. 
     Computes return collected on this trial.

     N : maximal number of steps in the trial;
     SaveTrajectory : indicates whether trajectory should be saved
  */

	
  virtual void chooseAction(const State& s, Action& a) =0 ;
  /* Implements behavior policy
     Uses fa - representation of the random policy or the action value functions 
     s : state in which the action should be performed
     a : chosen action
  */

  // component structures

  struct StageInfo{
    State state;
    Action action;
    double reward;

    double* Qvalue;
    double TDerror;

    StageInfo(){
      Qvalue = new double[Action::count];
    }

    ~StageInfo(){
      delete [] Qvalue;
    }
		
  };

  struct Trajectory{
    StageInfo* stage;
    int length;			//actual length of the recorded trajectory 

    Trajectory(int n){
				// n is the maximal number of stages in the trjectory
      stage = new StageInfo[n];
      length=0;
    }

    ~Trajectory(){
      delete [] stage;
    }

  };

};

int tokenize(char* sep, char* str, int** tokens);
/* Extracts tokens from string "str", which are separated by separators
   specified in "sep". Allocates array (*tokens) of the appropriate size
   and saves extracted tokens in that array. Returns the number of tokens.
*/
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -