📄 cvfunction.h
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef C_ABSTRACTVFUNCTION_H
#define C_ABSTRACTVFUNCTION_H
#include <stdio.h>
#include "cstate.h"
#include "cfeaturefunction.h"
#include "cvetraces.h"
#include "cstatemodifier.h"
#include "cstatecollection.h"
#include "cdiscretizer.h"
#include "clearndataobject.h"
#include "cgradientfunction.h"
#include "cmyexception.h"
#include "ril_debug.h"
class CAbstractVETraces;
class CFeatureQFunction;
class CStochasticPolicy;
#define DIVERGENTVFUNCTIONVALUE 1000000
#define CONTINUOUSVFUNCTION 1
#define GRADIENTVFUNCTION 2
/// Interface reprenting a Value Function
/**Value functions return for each state the total discount reward which they expect to get from that state following a specific policy. Usually this is a Greedy Policy, greedy on the value function.
In the RIL toolbox the Q-Functions are composed of v-functions, one v-function for each action, so the Value Function is the essential part part of each learning algorithm.
The kind of the value function is for the most learning algorithm independent, so it does'nt matter what value functions you use for the Q-Function, the algorthm just works with the inteface of its v-function.
\par
The class CAbstractVFunction is the interface for all value functions. The interface provides a gamma-value for each value function, serving as discount factor.
The value functions have to implement functions for getting V-Values , setting V-Values and updating V-Values for an specific state. These three functions are:
<ul>
<li> getValue(CState *). The function returns the expected total discount reward for the given state. </li>
<li> setState(CState *, rlt_real value). This function is usually used for initialisation of th value function. It has to set the total discount reward of the state to the specified value as good as it is possible (for function approximators). </li>
<li> updateValue(CState *, rlt_real td) is the function usually used for learning. Adds the td value to the current value of the function. </li>
</ul>
All these function have an companion piece with state collections, which are used by the learning algorithm. So each value function maintains an own state properties object to retrieve the required state from the given state collection and call the demanded function with a state as parameter.
In the RIL toolbox there are 3 different kinds of value functions,
V-Tables, V-FeatureFunctions and V-Functions using Neural Networks (The Torch toolbox is used for the neural networks). All these
value functions support the possibilty to save and load the learned values.
The class is subclass of CStateObject, with the consequent state properties object the desired state is fetches from the
state collection.
<p>
The class also has a function getStandardETraces to determine which E-Traces should be used. The function has to return a
new instantiated CAbstractVETraces object for the V-Function, which is used to compose the CQETtraces object. The function returns
CStateVETraces as standard.
*/
class CAbstractVFunction : public CStateObject, virtual public CLearnDataObject {
protected:
int type;
void addType(int newType);
public:
bool mayDiverge;
/// constructor, the properties are needed to fetch the state from the state collection.
CAbstractVFunction(CStateProperties *properties);
virtual ~CAbstractVFunction();
virtual void resetData() {};
/// Calls updateValue(CState *state, rlt_real td) with the state assigned to the value function
virtual void updateValue(CStateCollection *state, rlt_real td);
/// Calls setValue(CState *state, rlt_real qValue) with the state assigned to the value function
virtual void setValue(CStateCollection *state, rlt_real qValue);
/// Calls setValue(CState *state, rlt_real qValue) with the state assigned to the value function
virtual rlt_real getValue(CStateCollection *state);
/// sets the value of the state to the current value + td
virtual void updateValue(CState *state, rlt_real td);
/// sets the value of the state, has to be implemented by the other V-Functions
virtual void setValue(CState *state, rlt_real qValue) {};
/// returns the value of the state, has to be implemented by the other V-Functions
virtual rlt_real getValue(CState *state) = 0;
/// Saves the Paramters of the Value Function
virtual void saveData(FILE *file);
/// Loads the Paramters of the Value Function
virtual void loadData(FILE *file);
/// Prints the Paramters of the Value Function
virtual void printValues (){};
int getType();
bool isType(int isT);
/// Returns a standard VETraces object
/**
The function has to return a new instantiated CAbstractVETraces object, which is used to compose the CQETtraces object. The function returns
CStateVETraces as standard.
*/
virtual CAbstractVETraces *getStandardETraces();
};
/// Value Function always returning zero
class CZeroVFunction : public CAbstractVFunction
{
protected:
public:
CZeroVFunction();
virtual rlt_real getValue(CState *state);
};
class CVFunctionSum : public CAbstractVFunction
{
protected:
std::map<CAbstractVFunction *, rlt_real> *vFunctions;
public:
CVFunctionSum();
~CVFunctionSum();
/// Interface for getting a Q-Value
virtual rlt_real getValue(CStateCollection *state);
virtual rlt_real getValue(CState *state) {return getValue((CStateCollection *) state);};
virtual CAbstractVETraces *getStandardETraces() {return NULL;};
rlt_real getVFunctionFactor(CAbstractVFunction *vFunction);
void setVFunctionFactor(CAbstractVFunction *vFunction, rlt_real factor);
void addVFunction(CAbstractVFunction *vFunction, rlt_real factor = 1.0);
void removeVFunction(CAbstractVFunction *vFunction);
void normFactors(rlt_real factor);
};
/// This exception is thrown if a value function has become divergent
/**
There can be many reasons why a value function can become divergent, for example the learning rate is too high. The exception is thrown if the absolute value of a state gets higher than 100000. If your value function is rlt_really that high, please scale your reward function.
*/
class CDivergentVFunctionException : public CMyException
{
protected:
virtual string getInnerErrorMsg();
public:
string vFunctionName;
CAbstractVFunction *vFunction;
CState *state;
rlt_real value;
CDivergentVFunctionException(string vFunctionName, CAbstractVFunction *vFunction, CState *state, rlt_real value);
virtual ~CDivergentVFunctionException(){};
};
///Interface for all classes that can use gradients for updating
/**
Gradient V-Functions are able to calculate the gradient of the V-Function with respect to the weights in the current state and can be also updated by a gradient object (represented as a CFeatureList object). In the toolbox all gradients are represented as feature lists, where the feature index is the weight index and the feature factor represents the gradient value of that weights. All weights that are not listed in the feature list have a zero gradient.
\par
For the gradient calculation all subclasses have to implement the function getGradient(CStateCollection *state, CFeatureList *gradientFeatures), where the gradient in the current state is calculated and written in the given feature list. The feature list is supposed to be empty.
\par
All gradient-VFunctions implement the interface CGradientUpdateFunction as the interface for updating a gradient function, so additionally the subclasses have to implement the functions:
- updateWeights(CFeatureList *gradient): Update the weights according to the gradient.
- getWeights(rlt_real *parameters), write all weights in the rlt_real array
- setWeights(rlt_real *parameters), set the weights according to the rlt_real array
- resetData(): reset all weights, needed when a new learning process is started
- getNumWeights(): return the number of weights.
As the V-Functions implement the gradient update interface, they can use varaible learning rates for different weights (see CAdaptiveEtaCalculator).
*/
class CGradientVFunction : public CAbstractVFunction, virtual public CGradientUpdateFunction
{
protected:
public:
/// constructor, the properties are needed to fetch the state from the state collection.
CGradientVFunction(CStateProperties *properties);
virtual ~CGradientVFunction();
/// Calls updateValue(CState *state, rlt_real td) with the state assigned to the value function
virtual void updateValue(CStateCollection *state, rlt_real td);
/// sets the value of the state to the current value + td
virtual void updateValue(CState *state, rlt_real td);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -