📄 ctheoreticalmodel.h
字号:
virtual rlt_real getPropability(int oldFeature, int action, int duration, int newFeature) = 0;
///Calculates the propabilities for a list of features
virtual rlt_real getPropability(CFeatureList *oldList, CAction *action, CFeatureList *newList);
/// Interface Function
/**
has to return a list of all Transitions containing the states which can be reach from the state executing the action.
*/
virtual CTransitionList* getForwardTransitions(int action, int state) = 0;
/// Interface Function
/**
has to return a list of all Transitions containing the states which can reach the state given state executing the action.
*/
virtual CTransitionList* getBackwardTransitions(int action, int state) = 0;
virtual unsigned int getNumFeatures();
};
/// Class for loading and storing a fixed Model
/**The class CFeatureStochasticModel implements all functions from the interface CAbstractFeatureStochasticModel, therefore it maintains a CStateActionTransitions for every state-action pair.
For storing the CStateActionTransitions object a 2 dimensional array is used.
The class provides additional functions for setting the probability of a transition.
If the transition doesn't exist, a new transition object is created with the specified probability, otherwise the probability is just set.
For the semi-MDP case, the duration of the transition can be specified as well. The given probability is then added to the existing transition probability and the duration factors
are all adopted so that there sum is again one. The model can only be used for loading, saving and directly setting the probabilities, its not able to learn anything from a learning trial.
*/
class CFeatureStochasticModel : public CAbstractFeatureStochasticModel
{
protected:
/// The array of state-action Transitions
CMyArray2D<CStateActionTransitions *> *stateTransitions;
/// Load the model from file, can only be used at the construktor of the class.
void loadASCII(FILE *stream);
/// Returns a new Transition object for the specified action
/** If the action is a multistep action, a CSemiMDPTransition is returned, otherwise a CTransition object. The transition
gets initialised with the given values.
*/
CTransition *getNewTransition(int startState, int endState, CAction *action, rlt_real propability);
public:
/// Loads the model from file.
CFeatureStochasticModel(CActionSet *actions, int numFeatures, FILE *file);
/// Creates a new model
CFeatureStochasticModel(CActionSet *actions, int numFeatures);
virtual ~CFeatureStochasticModel();
/// returns the Propability of the transition
/** Looks in the forward transitions of <oldFeature, action> wether a Transition to newFeature exists, if not 0 is returned, odtherwise
the propybility of the transition.
*/
virtual rlt_real getPropability(int oldFeature, int action, int newFeature);
// returns the Propability of the transition
/** Looks in the forward transitions of <oldFeature, action> wether a Transition to newFeature exists with the specified duration, if not 0 is returned, odtherwise
the propybility of the transition.
*/
virtual rlt_real getPropability(int oldFeature, int action, int duration, int newFeature);
void setPropability(rlt_real propability, int oldFeature, int action, int newFeature);
void setPropability(rlt_real propability, int oldFeature, int action, int duration, int newFeature);
/// Just returns thet forward trnasition list for the given state-action Pair.
virtual CTransitionList* getForwardTransitions(int action, int state);
/// Just returns thet forward trnasition list for the given state-action Pair.
virtual CTransitionList* getBackwardTransitions(int action, int state);
virtual void saveASCII(FILE *stream);
};
/*
class CFeatureStateVisitCounter : public CLearnDataObject, public CStateObject, public CSemiMDPListener
{
protected:
};
class CFeatureStateActionVisitCounter : public CLearnDataObject, public CStateObject, public CSemiMDPListener
{
protected:
};*/
/// Base class for all estimated models.
/**
Estimated Models estimate the propability of the state transition by counting the number of Transitions from a specific state action pair to a specific state and the number of visits from of the specific state-action pair. So
the estimated model is build on the fly, during learning.
This is done by the class CAbstractFeatureStochasticEstimatedModel. The class is subclass of CFeatureStochasticModel so it stores the transition probabilities in the Transition list. In addition it has an rlt_real array which stores
the visits of the state action pair (rlt_real is needed because feature visits can be rlt_real valued). The Transition-visits are not stored explicitly but can be recovered by multiplying the
probability with the visits of the state action pair.
\par
The class CAbstractFeatureStochasticEstimatedModel provides the function doUpdateStep for updating the transitions and the visit table when a specific feature is visited (with an given factor).
The function first calculates the visits of the Transitions (multiplying state-action visits with transition propability), updates the visits of the state-action pair (the factor of the feature is added), and then recalculates the new probabilities of the transitions (by dividing the transition visits through the
state action visits). Before this is done the feature factor is added to the specified transition's visits or a new Transition object is created if the transition hasn抰 existed by now.
\par
The class also has the possibility to forget transitions from the past, so the propabilities can adapt to changing models more quickly. This is done
by the timeFaktor. Each time an update occurs, the state-actoin visits are multiplied by the timeFaktor before updating. By default the time factor is 1.0, so nothing is forgotten.
<p>
There are additional functions for retrieving the transition and the state action and the state visits.
<p>
The subclasses of CAbstractFeatureStochasticEstimatedModel only have to implement the function nextStep(...) from the CSemiMDPListener interface. Indermediate steps don't need a special treatment, and
are updated like normal step.
*/
class CAbstractFeatureStochasticEstimatedModel : public CFeatureStochasticModel, public CSemiMDPListener, public CStateObject, public CLearnDataObject
{
protected:
CFeatureQFunction *stateActionVisits;
/// Updates the propabilities of the transitions from oldFeature and the given actions.
/**
The function first calculates the visits of the Transitions (multiplying state-action visits with transition propability), updates the visits of the state-action pair (the factor of the feature is added), and then recalculates the new probabilities of the transitions (by dividing the transition visits through the
state action visits). Before this is done the feature factor is added to the specified transition's visits or a new Transition object is created if the transition hasn抰 existed by now. For the SemiMDP case the duration is added to the transition after the updates.
*/
virtual void updateStep(int oldFeature, CAction *action, int newFeature, rlt_real Faktor);
public:
///Creates an new estimated model
CAbstractFeatureStochasticEstimatedModel(CStateProperties *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions, int numFeatures);
///Loads an estimated model from a file
CAbstractFeatureStochasticEstimatedModel(CStateProperties *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions, int numFeatures, FILE *file);
virtual ~CAbstractFeatureStochasticEstimatedModel();
/// the nextStep method, must be implemented by the subclasses
virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState) = 0;
/// intermediate Steps can be treated as normal steps in the model based case
virtual void intermediateStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
virtual void saveData(FILE *stream);
virtual void loadData(FILE *stream);
virtual void resetData();
/// Returns the Transition visits of the specified Transition
/** The transition visits show how often a specific transition has been occured and they are calcualted by multiplying the propability of the transition with the visits of the state action pair.
*/
rlt_real getTransitionsVisits(int oldFeature, CAction *action, int newFeature);
/// Returns the State Action Visits
/** Returns how often the given action was choosen in the given state. The State Action visits are stored in saVisits.
*/
rlt_real getStateActionVisits(int Feature, int action);
/// Returns how often the agent visited the given state
/** This is calculated by summing up the state action visits.
*/
rlt_real getStateVisits(int Feature);
};
/// Estimated Model for Discrete States
/**
Implements the fuction nextStep for updating. CDiscreteStochasticEstimatedModel updates the transitions of the specified discrete state number with visit factor 1.0.
To retrieve the state from the statecollection the in the constructor given discretizer is used.
@see CAbstractFeatureStochasticEstimatedModel
*/
class CDiscreteStochasticEstimatedModel : public CAbstractFeatureStochasticEstimatedModel
{
protected:
CAbstractStateDiscretizer *discretizer;
public:
CDiscreteStochasticEstimatedModel(CAbstractStateDiscretizer *discState, CFeatureQFunction *stateActionVisits, CActionSet *actions);
virtual ~CDiscreteStochasticEstimatedModel() {};
/// Updates the discrete state transition
virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
int getStateActionVisits(int Feature, int action);
int getStateVisits(int Feature);
};
/// Estimated Model for feature States
/**Implements the fuction nextStep for updating. CFeatureStochasticEstimatedModel updates all transitions for all combinations of start and end-states.
The visit factor is calculated by multiplying the 2 feature factors of the specific start and end-features.
To retrieve the state from the state collection the in the constructor given feature calculator is used.
*/
class CFeatureStochasticEstimatedModel : public CAbstractFeatureStochasticEstimatedModel
{
protected:
CFeatureCalculator *featCalc;
public:
CFeatureStochasticEstimatedModel(CFeatureCalculator *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions);
virtual ~CFeatureStochasticEstimatedModel() {};
///Updates the Transitions for feature states
/**Updates all transitions for all combinations of start and end-states.
The visit factor used by doUpdateStep is calculated by multiplying the 2 feature factors of the specific start and end-features.
To retrieve the state from the state collection the in the constructor given feature calculator is used.
*/
virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -