📄 interface_classes.h

📁 CMAC神经网络机械臂控制的设计matlab源码
💻 H
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*	This file contains declarations of the standard classes 
	(most of them abstract classes) for the use in RL systems.
	File:		interface_classes.h
	Author:		Bohdana Ratitch
	Last modified:	May, 2002
*/

#include <iostream.h>
#include <fstream.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

//typedef short bool; //you may have uncomment this typedef, depending on the compiler version

#define DIR_SEP "/"
//#define DIR_SEP "\\" //use this statement if compiling for DOS or Windows 

#define true 1
#define false 0

float gasdev(long *idum); //routive for sampling from Gaussians
float ran1(long *idum);//routine for uniform sampling in [0,1] 

// STATE AND ACTION REPRESENTATION

struct State{ //implementation in sarepr.cpp

  static int dimensionality;// number of variables
  double* x;//array of variables describing a state

  State();
  State(int n);
  /* Construcs a "super"-state that combines n states with current dimensionality.
     Used by random MDP generator mainly.
  */

  State(State& s);
  void operator = (const State& s);
  ~State();
};

ostream& operator << (ostream& file, const State& s);
/* Overloaded operator for state output to a file or cout.
*/

////////////////////////////////////////////////////////////////////////////////////

struct Subset{//implementation in sarepr.cpp
  /* Hypercube in the state space.
   */
  double* left;//left bounds on state variables
  double* right;//right bounds on state variables
	
  Subset();
  Subset(int N);
  /* Size of the allocated arrays is (State::dimensionality * N)
   */
  ~Subset();
};
///////////////////////////////////////////////////////////////////////

struct Action{//implementation in sarepr.cpp
  /* For descrete actions
   */
		
  int id; /* by default is assigned the ordinal number 
	     as the actions are added to some action set.
	     In other words its value coincides with the action's array 
	     index in the action set to which it belongs 
	     (see ActionSet declaration below).
	  */

  char* description;//may be given a "name"
  double value;	//numerical value of the action
  static int count; //total number of Action objects created

  Action();
  /* Default constructor.
   */

  Action(const char* d);
  /* General constructor.
     Parameters: 
     d : description of an action
  */

  Action(const char* d, double v);
  /* General constructor.
     Parameters:
     d : description of an action
     v : numerical value of an action
  */
  
  void operator = (const Action& b);

  Action(Action& a);

  ~Action();
};

ostream& operator << (ostream& file, const Action& a);
/* Overloaded operator for action output to a file or cout.
*/

///////////////////////////////////////////////////////////////////////

struct ActionSet{//implementation in sarepr.cpp
  /* Groups together all actions for an RL system.
   */

  int size;//number of actions in the set
  Action* action;//array of actions that belong to this set	
  int added;//indicates how many actions have already been added to the set

  ActionSet();

  ActionSet(int n);
  /* General constructor.
     n : size of the action set
  */

  void create(int n);//as a constructor, can be called after object is created with default constructor

  void addAction(Action& a);
  /* Add action to the action set.
     a : action to be added.
  */

  void operator = (const ActionSet& a);

  ~ActionSet();
};

////////////////////////////////////////////////////////////////////////////////

struct TransitionSamples{//implemented in environment.cpp
  /* This structurte is used by the function ComputeAttributes() implemented in the base class Environment. 
   */

  int Transitions;//number of transitions made from currentState with action a 
  State currentState;	
  Action a;
  State* nextState;//array of collected samples of next states
  double* reward;//array of collected samples of rewards
  int* binIndexNS;//used to calculate an index of a state in a discretized space
  int B;//number of bins in which the state space is discretized
  double* prob;//estimates of the transition probabilites (for each bin)

  TransitionSamples();
  void setTransitionNumber(int T);
  void computeTransitionProbabilities(int b);
  ~TransitionSamples();
};

//////////////////////////////////////////////////////////////////

class Approximator{
  /* Abstract class - base for all FA methods.
   */

 protected:
  //these data members may be used to trace changes during learning
  double MaxParameterChange;
  int NumberParametersChanged;

 public:
  
  virtual int getSize()=0;
  /* Return the number of parameters in this architecture
   */

  virtual void predict(const State& s, double& output) =0;
  /* Predicts an output value for a given input.
     s : reference to the input (state)
     output : returned value of the predicted output
  */

  virtual void learn(const State& s, const double target)=0;
  /* Learns an input-output pair.
     s : input (state)
     target: target output value
  */

  virtual void computeGradient(const State& s, double* GradientVector)=0;
  /* Compute the gradient w.r.t. architecture parameters at the current parameters' values and input s
   */

  virtual void updateParameters(double* delta)=0;
  /* Update parameters by amounts in delta array, 
     possible multiplied by appropriate learning steps.
  */

  virtual void replaceTraces(const State& s, double replace)=0;
  /* Replace traces of parameters, activated by input state s to value replace.
   */

  virtual void decayTraces(double factor)=0;
  /* Decay (multiply) traces by factor.
   */

  virtual void accumulateTraces(const State& s, double amount)=0;
  /* Increment traces by amount for parameters activated by input s.
   */

  virtual void setArchitectureParameters(int argc, char *argv[])=0;
  /* Loads parameters of the architecture from a text file.
     argc : number of supplied arguments
     argv : array of arguments
  */

  virtual void saveArchitectureParameters(int argc, char *argv[])=0;
  /* Saves parameters of the architecture into a text file.
     argc : number of supplied arguments
     argv : array of arguments
  */

  virtual void setLearningParameters(int argc, char *argv[])=0;
  /* Sets learning parameters (e.g. learning step).
     argc : number of supplied arguments
     argv : array of arguments
  */
  
  double getMaxParameterChange(){
    double c=MaxParameterChange;
    MaxParameterChange=0;
    return c;
  }

  int getNumberParametersChanged(){
    return NumberParametersChanged;
  }

 virtual ~Approximator(){}

};

////////////////////////////////////////////////////////////////

class StateActionFA {//implemented in safa.cpp
  /* Contains separate approximator for each action.
   */
	
  int A;//Number of actions (architectures)
  Approximator** fa;/* Array of pointers to approximators.	
		       Each element is a base pointer to a derived approximator object.
		    */
 public:

  StateActionFA();
  ~StateActionFA();

  StateActionFA(int n, Approximator** f);
  /* General constructor.
     n : number of actions (architectures)
     f : pointer to the array of pointers to approximator objects
  */

  int getSize();
  /* Return number of parameters in one of the component architectures 
     (assuming that all of them have the same number of parameters). 
  */

  void getMaxParameterChange(double* changes);
  /* Returns an array of MaxParameterChanges for all component architectures */

  void getNumberParametersChanged(int* changes);
  /* Returns an array of the number of changed parameters for each component architecture
   */

  void predict(const Action& a, const State& s,  double& output);
  /* Predicts an output value with an approximator corresponding a given action.
     a : reference to an action
     s : reference to the input (state)
     output : returned value of the predicted output
  */
	
  void learn(const Action& a, const State& s, double target);	
  /* Learns an input-output pair with an approximator corresponding a given action.
     a : reference to an action
     s : reference to the input (state)
     target : target output value
  */

  void computeGradient(const Action& a, const State& s, double* GradientVector);
  /* Compute the gradient w.r.t. architecture parameters at the current parameters' values and input s.
   */

  void updateParameters(const Action& a, double* delta);
  /* Update parameters by amounts in delta array (possibly multiplied with appropriate learning step).
   */

  void clearTraces(const Action& a, const State& s, double replace);
  /* Clears traces for those actions that were not taken in state s.
     a : action for traces should NOT be replaced
     s : input (state)
     replace : value to which traces should be replaced (usually zero)
  */

  void replaceTraces(const Action& a, const State& s, double trace);
  /* Replaces traces of the architecture for action a for parameters activated by input s
   */

  void decayTraces(double factor);
  /* Decay (multiply) traces of all architectures by factor.
   */
	
  void accumulateTraces(const Action& a, const State& s, double amount);
  /* Increment traces by amount for the architecture of action a for parameters activated by s.
   */

  void setArchitectureParameters(const Action& a, int argc, char *argv[]);
  /* Loads parameters of the architecture corresponding to a given action.
     a : action
     argc : number of supplied arguments
     argv : array of arguments
     What parameters exactly you send in argv depends on the implementation 
     of the class inherited from Approximator class: you send parameters 
     exactly as to the setArchitectureParameters() function of that class.
  */
	
  void saveArchitectureParameters(const Action& a, int argc, char *argv[]);
  /* Saves parameters of the architecture corresponding to a given action.
     a : action
     argc : number of supplied arguments
     argv : array of arguments
     What parameters exactly you send in argv depends on the implementation 
     of the class inherited from Approximator class: you send parameters 
     exactly as to the saveArchitectureParameters() function of that class.
  */

  void saveAllArchitectureParameters(char** fileNames);
	
  void setLearningParameters(const Action& a, int argc, char *argv[]);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -