📄 cpolicies.h

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12

	/// Interface function for calculating the action ratings, has to be implemented by the subclasses
	virtual void getActionValues(CStateCollection *state, CActionSet *availableActions, rlt_real *actionValues, CActionDataSet *actionDataSet = NULL) = 0;


	virtual bool isDifferentiable() {return false;};

	virtual void getActionProbabilityGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
	virtual void getActionProbabilityLnGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);

	/// Interface function for calculating the derivative of an action factor.
	/** 
	The function has to calculate d_actionratings(action)/dw, which is for example dQ(s,a)/dw.
	*/
	virtual void getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState) {};
};

/// Stochastic Policy which computes its propabilities from the Q-Values of
/**
This stochastic policy calculates its action ratings according to the given Q-Function. The getActionValues function writes the Q-Values in the actionFactors array. 
The Q-Stochastic Policies also support gradient calculation. The policy is differentiable, if the distribution and the Q-Function are differentiable. The gradient d_actionratings(action) / dw calculated in the function getActionGradient is the same as dQ(s,a)/dw. 
*/


class CQStochasticPolicy : public CStochasticPolicy
{
protected:
/// QFunction of the policy, needed for action decision
	CAbstractQFunction *qfunction;
/// returns the action statistics object from the q-function
    virtual void getActionStatistics(CStateCollection *state, CAction *action, CActionStatistics *stat);

public:
	CQStochasticPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractQFunction *qfunction);
	~CQStochasticPolicy();

	virtual void getActionValues(CStateCollection *state, CActionSet *availableActions, rlt_real *actionValues, CActionDataSet *actionDataSet = NULL);

	virtual void getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
	virtual bool isDifferentiable();

	virtual CAbstractQFunction *getQFunction() {return qfunction;};
};

class CQFunctionFromTransitionFunction;


/// Stochastic Policy which calculates its action from a Dynamic Model and a V-Function
/** 
The policy calculates its action ratings with a 1 or more step forward view using the dynamic model. For every action the successor state s' is calculated and then the value of that state is determined. The action rating of that action is then: Q(s,a) = R(s,a,s') + gamma * V(s'). This calculation is done by an own Q-Function class CQFunctionFromTransitionFunction. This Q-Function also supports a larger search deep than 1, the search deep can be set with the parameter "SearchDepth". Be aware that large search deeps ( > 3) have large (exponational growing) computational costs.
<p>
The policy is differentiable if the distribution is differentiable and the V-Function is differentiable. The action rating gradient can be calculated easily since the reward doesn't depend on the weights, the derivative of action a is just dV(s')/dw, where s'is the successor state when taking action a.
<p>
CVMStochasticPolicy has following Parameters:
- inherits all Parameters from the action distribution
- "SearchDepth" : number of forward search steps in the value function.
- "DiscountFactor" : gamma
*/

class CVMStochasticPolicy : public CQStochasticPolicy
{
protected:
	CStateCollectionImpl *nextState;
	CStateCollectionImpl *intermediateState;

	CAbstractVFunction *vFunction;
	CQFunctionFromTransitionFunction *qFunctionFromTransitionFunction;
	CTransitionFunction *model;
	CRewardFunction *reward;
public:
	
	CVMStochasticPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractVFunction *vFunction, CTransitionFunction *model, CRewardFunction *reward, std::list<CStateModifier *> *modifiers);
	~CVMStochasticPolicy();

	virtual void getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);

	virtual bool isDifferentiable();
};

/*
/// Calculates the Exploration Gain for a given state action pair, OLD DOCUMENTATION
/**The exploration gain is the resulting gain of information about the model which is expected by executing an action. 
The Exploration gains usually depend on how often the action was choosen in the current state. 
\par
The CExplorationGain is the interface for all Exploration gain calculators. It provides a estimated model to get
the state action visits. It has 2 functions, rlt_real getExplorationGain(CAction *action, CStateCollection *state) decomposes the
state in its features and calls rlt_real getExplorationGain(int action, int feature). This is the interface functions implemented by all
subclasses.
@see CQExplorationGreedyPolicy


class CExplorationGain : public CStateObject, virtual public CParameterObject
{
protected:
	CAbstractFeatureStochasticEstimatedModel *model;
public:
/// Initializes the object with the estimated model and a state modifier to fetch the feature state from the state collection.
	CExplorationGain(CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *calc);

/// Decomposes the state into its features and calls getExplorationGain(int action, int feature)
	virtual rlt_real getExplorationGain(CAction *action, CStateCollection *state);
/// Interface function
	virtual rlt_real getExplorationGain(int action, int feature) = 0;
};

/// Calculates the exploration gain according with a logarithmic function
class CLogExplorationGain : public CExplorationGain
{
protected:

public:
	CLogExplorationGain(CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *calc);
/**
The exploration gain is calculated by 1 / ((log(saVisits + 1) + 1)

	virtual rlt_real getExplorationGain(int action, int feature);
};

/// Calculates the exploration gain according with a power function.
/**
You can set the exponent in the constructor. The class uses the formulae 
1 / (saVisits + 1) ^ power.

class CPowExplorationGain : public CExplorationGain
{
protected:
	//rlt_real power;
public:
	CPowExplorationGain(CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *calc, rlt_real power);

/**
The function uses the formulae 1 / (saVisits + 1) ^ power.

	virtual rlt_real getExplorationGain(int action, int feature);

	rlt_real getPower();
	void setPower(rlt_real power);
};

/// Policy which's attention is to collect information about the model.
/*Exploration Policies choose their action according to a exploration gain factor, 
i.e. the additional information you get from the model by executing this action. 
Exploration is very important to find good policies and for model based 
learning to estimated the model correctly. The drawback is obviously that you execute bad actions very often in favour 
for the information gain. This can be fatal if you need even a good policy during learning (e.g. some states can be fatal 
for you agent (a robot), so he must avoid comming into this states), 
but usually tonly the learned policy is important, so you should do much exploration.
This class calculates all exploration gains of the actions in the currentstate, 
and then chooses with the propability of max_a exploration_gain(a) 
the action with the maximum gain, and with propability  1 - maxgain it 
chooses the greedy action according to a Q-Function. 
@see CExplorationGain


class CExplorationDistribution: public CActionDistribution
{
protected:
	/// the exploration gain object
	CExplorationGain *explorationGain;
	CActionDistribution *distribution;
	//rlt_real alpha;
	rlt_real *exploration;
public:
	/// creates an exploration policy which acts greedy on the Q-Function if no exporation action has been chosen
	CExplorationDistribution(CActionSet *actions, CActionDistribution *distribution, CExplorationGain *explorationGain);
    ~CExplorationDistribution();

	CExplorationGain *getExplorationGain();
	void setExplorationGain(CExplorationGain *explorationGain);

	void setAlpha(rlt_real alpha);
	virtual rlt_real getAlpha(CStateCollection *state);

	virtual void getDistribution(CStateCollection *state, CActionSet *availableActions, rlt_real *actionValues);

	virtual bool isDifferentiable() {return distribution->isDifferentiable();};

	virtual void getGradientFactors(CStateCollection *state, CAction *usedAction, CActionSet *actions, rlt_real *actionFactors, CMyVector *gradientFactors);

};
*/

;

#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -