📄 cqfunction.h

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 H
📖 第 1 页 / 共 2 页
字号:
12 下一页
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef CRIABSTRACTQFUNCTION_H
#define CRIABSTRACTQFUNCTION_H

#include <stdio.h>
#include "caction.h"
#include "cactionstatistics.h"
#include "cenvironmentmodel.h"
#include "cvfunction.h"
#include "cepisode.h"
#include "crewardfunction.h"
#include "cqetraces.h"
#include "cgradientfunction.h"
#include "ril_debug.h"

class CAbstractFeatureStochasticModel;
class CAbstractQETraces;

#define GRADIENTQFUNCTION 1
#define CONTINUOUSACTIONQFUNCTION 2


/// Interface for all Q-Functions
/**Q-Functions depend on the state and the action you can choose in the current state, so the policy is able to decide which action is best (usually 
the action with the highest Q-Value). CAbstractQFunction is the base class of all Q-Functions. It just provides the interface for getting, setting and updating (adding a value to the current value) Q-Values. These functions are again
getValue, setValue and updateValue, now there is always an action object as additional parameter. The class maintains an action set of all
actions which are stored in the Q-Function. In addition the class provides one function to
retrieve the action with the best Q-Value (getMax), one function for retrieving the value of these best action (getMaxValue) and there
is also a function which writes all Q-Values of a state (i.e. for all actions) in a rlt_real array (getActionValues). 
\par
The class also maintains a gamma factor which should be the same as the gamma factor of its value functions.
In addition the interface provides functions for storing and loading the Values of the QFunction (storeValues and loadValues).
@see CQFunction
*/
class CGradientQETraces;

class CAbstractQFunction : public CActionObject, virtual public CLearnDataObject
{
protected:
	int type;
public:
	bool mayDiverge;

	int getType();
	bool isType(int type);
	void addType(int Type);


/// Creates a QFunction, handling Q-Values for all actions in the actionset.
	CAbstractQFunction(CActionSet *actions);
	virtual ~CAbstractQFunction();

	virtual void saveData(FILE *file);
	virtual void loadData(FILE *file);
	virtual void printValues (){};

	virtual void resetData() {};

/// Writes the Q-Values of the specified actions in the actionValues array.
/** so the size of the array has to be at least the size of the action set.*/
	void getActionValues(CStateCollection *state, CActionSet *actions, rlt_real *actionValues, CActionDataSet *data = NULL);

/// Calculates the best action from a given action set.
/** Returns the best action from the availableActions action set. If several actions have the same 
best Q-Value, the first action which has this value in the action set is choosen.*/
	virtual CAction* getMax(CStateCollection *state, CActionSet *availableActions, CActionDataSet *data = NULL);
/// Returns the best action value from a given action set.
/** Returns the best action value from the availableActions action set.*/
	virtual rlt_real getMaxValue(CStateCollection *state, CActionSet *availableActions);
/// Returns the statistics for a given action.
    virtual void getStatistics(CStateCollection *state, CAction *action, CActionSet *actions, CActionStatistics* statistics);
	
/// Interface for updating a Q-Value
	virtual void updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data = NULL) {};
/// Interface for setting a Q-Value
	virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL){}; 
/// Interface for getting a Q-Value
	virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL) = 0;

	virtual CAbstractQETraces *getStandardETraces() = 0;

protected:
};


class CQFunctionSum : public CAbstractQFunction
{
protected:
	std::map<CAbstractQFunction *, rlt_real> *qFunctions;
public:
	CQFunctionSum(CActionSet *actions);
	~CQFunctionSum();


	/// Interface for getting a Q-Value
	virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);

	virtual CAbstractQETraces *getStandardETraces() {return NULL;};

	rlt_real getQFunctionFactor(CAbstractQFunction *qFunction);
	void setQFunctionFactor(CAbstractQFunction *qFunction, rlt_real factor);

	void addQFunction(CAbstractQFunction *qFunction, rlt_real factor);
	void removeQFunction(CAbstractQFunction *qFunction);


	void normFactors(rlt_real factor);

};

/// This exception is thrown if a value function has become divergent
/** 
There can be many reasons why a value function can become divergent, for example the learning rate is too high.
*/
class CDivergentQFunctionException : public CMyException
{
protected:
	virtual string getInnerErrorMsg();
public:
	string qFunctionName;
	CAbstractQFunction *qFunction;
	CState *state;
	rlt_real value;

	CDivergentQFunctionException(string qFunctionName, CAbstractQFunction *qFunction, CState *state, rlt_real value);
	virtual ~CDivergentQFunctionException(){};
};

class CGradientQFunction : public CAbstractQFunction, virtual public CGradientUpdateFunction
{
protected:
	CFeatureList *localGradientQFunctionFeatures;

public:
	CGradientQFunction(CActionSet *actions);
	~CGradientQFunction();

	virtual int getWeightsOffset(CAction *action) {return 0;};

	virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient) = 0;

	/// Interface for updating a Q-Value
	virtual void updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data = NULL);
	
	virtual void resetData() {CAbstractQFunction::resetData();};
	virtual void loadData(FILE *stream) {CGradientUpdateFunction::loadData(stream);};
	virtual void saveData(FILE *stream) {CGradientUpdateFunction::saveData(stream);};

	virtual CAbstractQETraces *getStandardETraces();
};

class CGradientDelayedUpdateQFunction : public CGradientQFunction, public CGradientDelayedUpdateFunction
{
protected:
	virtual void updateWeights(CFeatureList *dParams) {CGradientDelayedUpdateFunction::updateWeights(dParams);};

	CGradientQFunction *qFunction;
public:
	/// constructor, the properties are needed to fetch the state from the state collection.
	CGradientDelayedUpdateQFunction(CGradientQFunction *qFunction);
	virtual ~CGradientDelayedUpdateQFunction() {};

	virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
	virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientFeatures);

	virtual void resetData() {CGradientDelayedUpdateFunction::resetData();};

	///  Returns the number of weights.
	virtual int getNumWeights(){return CGradientDelayedUpdateFunction::getNumWeights();};

	virtual void getWeights(rlt_real *parameters) {CGradientDelayedUpdateFunction::getWeights(parameters);};
	virtual void setWeights(rlt_real *parameters) {CGradientDelayedUpdateFunction::setWeights(parameters);};

	virtual void loadData(FILE *stream) {CGradientQFunction::loadData(stream);};
	virtual void saveData(FILE *stream) {CGradientQFunction::saveData(stream);};

};

/// Compounded Q-Function consisting of V-Funcions
/**
For Q-Functions there is one value function for each action of the Q-Function, so its obvious to compose Q-Functions of
value Functions. RIL toolbox gives you the possibility to do this. This has the advantage that you can choose an own value functions for each action, so its possible to create own discretizations or 
even other kinds of value functions for an action. This possibility is only available for certain algorithm, e.g. the model based prioritized sweeping 
algorithm expects an Q-Function consisting of feature value functions with the same feature calculator (at least with the same number of features).
The composition of value function is modeled by the class CQFunction. The class maintains a list of value functions, which has the same size as the action set of the Q-Function.
You can assign specific value functions to specific actions. This is done by setVFunction. 
\par
If one of the functions for accessing the Q-Values is called (getValue, setValue, updateValue) the composed Q-Functions refers
the call to the value function of the specified action. There are 2 subclasses of CQFunction, one for feature Q-Functions and one for Q-Tables.
*/
class CQFunction : public CGradientQFunction
{
protected:
/// The list of V-Functions
/**Has the same order as the actionset, so the first qFunction coresponds to the first action.
*/
	std::map<CAction *, CAbstractVFunction *> *vFunctions;

	virtual int getWeightsOffset(CAction *action);
   
	virtual void updateWeights(CFeatureList *features);

public:
/// Creates a composed Q-Function for the given actions
/**
The Value-Functions list is initialized with NULL with the same size of the action set, so the V-Functions
has to be set by the user with the function setVFunction.*/
	CQFunction(CActionSet *actions);
	virtual ~CQFunction();

/// Updates the Value of the value function assigned to the given action
/** Calls the updateValue Function of the specified value function.
*/
	virtual void updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data = NULL);
/// Sets the Value of the value function assigned to the given action
/** Calls the setValue Function of the specified value function.
*/
	virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL); 
/// Returns the Value of the value function assigned to the given action
/** Returns the value of  the getValue Function of the specified value function.
*/
	virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);

/// Updates the Value of the value function assigned to the given action
/** Calls the updateValue Function of the specified value function. The given state must be the state
used by the value function (at least a state that can be used by the value function)
*/
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -