📄 cqfunction.h

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12
	virtual void updateValue(CState *state, CAction *action, rlt_real td, CActionData *data = NULL);
/// Sets the Value of the value function assigned to the given action
/** Calls the setValue Function of the specified value function. The given state must be the state
used by the value function (at least a state that can be used by the value function.
*/	
	virtual void setValue(CState *state, CAction *action, rlt_real qValue, CActionData *data = NULL); 
/// Returns the Value of the value function assigned to the given action
/** Returns the value of  the getValue Function of the specified value function. The given state must be the state
used by the value function (at least a state that can be used by the value function.
*/
	virtual rlt_real getValue(CState *state, CAction *action, CActionData *data = NULL);

/// Saves the Value Functoins
/**
Calls the saveValues method of all Value Functions
*/
	virtual void saveData(FILE *file);
/// Loads the Value Functions
/** Calls the loadValues method of all Value Functions. So the value Function list has already to be initialized
and the V-Functions has to be in the same order as they were when the Q-Function was save.*/
	virtual void loadData(FILE *file);
/// Calls saveValues with stdout as outputstream
	virtual void printValues();

/// Returns the value function assigned to the given action
	CAbstractVFunction *getVFunction(CAction *action);
/// Returns the indexth value function (so the value function assigned to the indexth action).
	CAbstractVFunction *getVFunction(int index);
/// Sets the Value-Function of the specified action.
/**
If bDeleteOld is true (default) the old Value Function is deleted
*/
	void setVFunction(CAction *action, CAbstractVFunction *vfunction, bool bDeleteOld = true);
/// Sets the Value-Function of the indexth action.
/**
If bDeleteOld is true (default) the old Value Function is deleted
*/
	void setVFunction(int index, CAbstractVFunction *vfunction, bool bDeleteOld = true);
/// Returns t磆e number of V-Functions, which is always the number of Actions
	int getNumVFunctions();

	virtual CAbstractQETraces *getStandardETraces();

	//virtual CStateProperties *getGradientCalculator(CAction *action);
	virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient);

	virtual int getNumWeights();

	virtual void getWeights(rlt_real *weights);
	virtual void setWeights(rlt_real *weights);

	void resetData();
};

/// Converts a VFunction and a Model to a Q-Function
/**The class calculates the Q-Value by combining the information from a model and a feature V-Function.
So the class obviuosly only provides the functions for getting a Q-Value. The Q-Value of an action is calculated
the following way: Q(s,a)=sum_{s'} P(s'|s,a)*(R(s,a,s') + gamma * V(s')).
<p>
This class is used for the policies if you only have a V-Function (e.g. model based learning), since policies can only handle 
Q-Functions.
*/

class CQFunctionFromStochasticModel :  public CAbstractQFunction, public CStateObject
{
protected:

/// The given V-Function
	CFeatureVFunction *vfunction;
/// The model
	CAbstractFeatureStochasticModel *model;
/// Discretizer used by the V-Function
	CStateProperties *discretizer;
/// feature Reward Function for the learning problem.
	CFeatureRewardFunction *rewardfunction;

/// state buffer
	CState *discState;

public:
/// Creates a new QFunction from VFunction object for the given V-Function and the given model, the discretizer is take nfrom the V-Function.
	CQFunctionFromStochasticModel(CFeatureVFunction *vfunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardfunction);

	virtual ~CQFunctionFromStochasticModel();

// Writes the Action-Values in the actionValues Array.
//	void getActionValues(CStateCollection *state, rlt_real *actionValues, CActionSet *actions);

/// Does nothing
	virtual void updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data = NULL) {};
/// Does nothing
	virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {}; 

/// getValue function for state collections
/** Calls the getValue function for the specific state (retrieved from the collection by the discretizer)*/
	virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);

/// getValue functoin for states
/**
Decomposes the feature state in its discrete state variables and calls the getValue(int, CAction *) function. The results are
wheighted by the feature factors and summed up. For discrete states obviously just the getValue(int, CAction *) with the discrete state
number is called.
*/
	virtual rlt_real getValue(CState *featState, CAction *action, CActionData *data = NULL);
/// calculates the Action-Value for a specific discrete state number
/**
The Q-Value of an action is calculated the following way: Q(s,a)= sum_{s'} P(s'|s,a)*(R(s,a,s') + gamma * V(s')).
*/
	virtual rlt_real getValue(int feature, CAction *action, CActionData *data = NULL);

	virtual CAbstractQETraces *getStandardETraces() {return NULL;};
};



/// Composed feature Q-Function
/**
The class CFeatureQFunction is a composed Q-Function which consists of feature value function with the same feature calculator (or the same discretizer).
Very often this ia all you need for learning, and CFeatureQFunction objects are easier to create. Its also needed because some learning algorithm expect the value functions to be all of the same kind (prioritized sweeping) and 
<p>
The class also provides methods for manipulating the features directly, without accessing the value functions explicitly. 
<p>
For creation of an feature Q-Function object you only need a state properties object of a feature calculator or discretizer. 
<p>
In addition you have the possibility to initialise you Q-Function with the values from a V-Function combined with a theoretical model (see CQFunctionFromStochasticModel). The difference to CQFunctionFromStochasticModel
is, that all Action-Values for all states get calculated and stored in the value Function. With CQFunctionFromStochasticModel the value of the state gets calculated directly by the model and V-Function.
So you can convert a Value Function to a Q-Function.
*/

class CFeatureQFunction : public CQFunction
{
protected:
/// Discretizer used for retrieving the state from the state collection
	CStateModifier *discretizer;
/// number of features from the Value-Functions
	unsigned int features;

	std::list<CFeatureVFunction *> *featureVFunctions;

/// initializes the V-Function list with CFeatureVFunction objects
	virtual void init();

/// initializes the V-Functions with the Values calculated by a V-Function, a theoretical model and a reward function
/**
The action values are calculated for each action in each state by the function CDynmaicProgramming::getActionValue and then they aer stored
in the V-Functions.
*/
	void initVFunctions(CFeatureVFunction *vfunction, CAbstractFeatureStochasticModel *model,  CFeatureRewardFunction *rewardFunction, rlt_real gamma);

public:
/// Creates an Q-Function with the specified discretizer.
	CFeatureQFunction(CActionSet *actions, CStateModifier *discretizer);
/// initializes the Value Functions with the values comming from a V-Function combined with a model and a reward funcion
/**
The actionset is taken from the model, the discretizer from the feature V-Function. The V-Funcions are initialized by the Function
initVFunctions.
*/
	CFeatureQFunction(CFeatureVFunction *vfunction, CAbstractFeatureStochasticModel *model,  CFeatureRewardFunction *rewardFunction,rlt_real gamma);
	
	virtual ~CFeatureQFunction();
	
/// Calls updateValue from the specified V-Function
/**
Allows direct feature manipulation without the need of state objects.
*/
	void updateValue(CFeature *state, CAction *action, rlt_real td, CActionData *data = NULL);
/// Calls setValue from the specified V-Function
/**
Allows direct feature manipulation without the need of state objects.
*/
	void setValue(int state, CAction *action, rlt_real qValue, CActionData *data = NULL); 
/// Returns the Value of a feature for a specific actoin.
/**
Allows direct feature manipulation without the need of state objects.
*/
	rlt_real getValue(int feature, CAction *action, CActionData *data = NULL);

	void setFeatureCalculator(CStateModifier *discretizer);
	CStateModifier *getFeatureCalculator();


	int getNumFeatures();

/// Saves the Values of the actions for each state in a readable tabular form
/** Tool for debugging and tracing the learning results*/
	void saveFeatureActionValueTable(FILE *stream);
/// Saves the index of the best action for each state in a readable tabular form
/** Tool for debugging and tracing the learning results*/
	void saveFeatureActionTable(FILE *stream);
};

class CComposedQFunction : public  CGradientQFunction
{
protected:
	std::list<CAbstractQFunction *> *qFunctions;

	virtual int getWeightsOffset(CAction *action);
	virtual void updateWeights(CFeatureList *features);

public:
	CComposedQFunction();
	virtual ~CComposedQFunction();

	virtual void saveData(FILE *file);
	virtual void loadData(FILE *file);
	virtual void printValues();

	virtual void getStatistics(CStateCollection *state, CAction *action, CActionSet *actions, CActionStatistics* statistics);

	/// Interface for updating a Q-Value
	virtual void updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data = NULL);
	/// Interface for setting a Q-Value
	virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL); 
	/// Interface for getting a Q-Value
	virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);

	void addQFunction(CAbstractQFunction *qFunction);

	std::list<CAbstractQFunction *> *getQFunctions();
	int getNumQFunctions();

	virtual CAbstractQETraces *getStandardETraces();

	//virtual CStateProperties *getGradientCalculator(CAction *action);

	virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient);


	virtual int getNumWeights();
	virtual void getWeights(rlt_real *weights);
	virtual void setWeights(rlt_real *weights);

	virtual void resetData();
};

/*
class CQTable : public CFeatureQFunction
{
	CAbstractStateDiscretizer *discretizer;
	virtual void init(int states);

public:
	CQTable(CActionSet *actions, CAbstractStateDiscretizer *state);
	
	~CQTable();
	
	void setDiscretizer(CAbstractStateDiscretizer *discretizer);
	CAbstractStateDiscretizer *getDiscretizer();
	
	int getNumStates();
};*/
#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -