📄 ccontinuousactions.h
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef __CCONTINUOUSRL_H
#define __CCONTINUOUSRL_H
#include "cqfunction.h"
#include "caction.h"
#include "cdynamicprogramming.h"
#include "ctheoreticalmodel.h"
#include "cutility.h"
#include "cpolicies.h"
#include "ril_debug.h"
class CAbstractQFunction;
class CQFunction;
/// class for saving the continuous Values from a ContinuosAction
/**
@see CActionData
*/
class CContinuousActionData : public CMultiStepActionData, public CMyVector
{
protected:
public:
CContinuousActionData(CContinuousActionProperties *properties);
~CContinuousActionData();
CContinuousActionProperties *properties;
rlt_real *actionValues;
virtual void setActionValue(int dim, rlt_real value);
rlt_real getActionValue(int dim);
rlt_real *getActionValues();
void normalizeAction();
virtual void saveASCII(FILE *stream);
virtual void loadASCII(FILE *stream);
virtual void saveBIN(FILE *stream);
virtual void loadBIN(FILE *stream);
virtual void setData(CActionData *actionData);
};
class CContinuousActionProperties
{
protected:
unsigned int numActionValues;
rlt_real *minValues;
rlt_real *maxValues;
public:
CContinuousActionProperties(int numActionValues);
~CContinuousActionProperties();
unsigned int getNumActionValues();
rlt_real getMinActionValue(int dim);
rlt_real getMaxActionValue(int dim);
void setMinActionValue(int dim, rlt_real value);
void setMaxActionValue(int dim, rlt_real value);
};
class CContinuousAction : public CPrimitiveAction
{
protected:
CContinuousActionData *continuousActionData;
CContinuousActionProperties *properties;
CContinuousAction(CContinuousActionProperties *properties, CContinuousActionData *actionData);
public:
CContinuousAction(CContinuousActionProperties *properties);
~CContinuousAction();
CContinuousActionProperties *getContinuousActionProperties();
virtual CContinuousActionData *getContinuousActionData() {return continuousActionData;};
virtual CActionData *getNewActionData();
rlt_real getActionValue(int dim);
int getNumDimensions();
virtual void loadActionData(CActionData *data);
virtual bool equals(CAction *action);
virtual bool isSameAction(CAction *action, CActionData *data);
};
#define NO_RANDOM_CONTROLLER 0
#define EXTERN_RANDOM_CONTROLLER 1
#define INTERN_RANDOM_CONTROLLER 2
class CContinuousActionController : public CAgentController
{
protected:
CContinuousAction *contAction;
CContinuousActionController *randomController;
CContinuousActionData *noise;
int randomControllerMode;
public:
CContinuousActionController(CContinuousAction *contAction, int randomControllerMode = 1);
~CContinuousActionController();
virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action) = 0;
virtual CContinuousActionProperties *getContinuousActionProperties() {return contAction->getContinuousActionProperties();};
virtual CContinuousAction *getContinuousAction() {return contAction;};
virtual void setRandomController(CContinuousActionController *randomController);
virtual CContinuousActionController *getRandomController();
void setRandomControllerMode(int randomControllerMode);
int getRandomControllerMode();
virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
};
class CStaticContinuousAction : public CContinuousAction
{
protected:
CContinuousAction *contAction;
public:
CStaticContinuousAction(CContinuousAction *properties, rlt_real *actionValues);
~CStaticContinuousAction();
virtual void setContinuousAction(CContinuousActionData *contAction);
virtual void addToContinuousAction(CContinuousActionData *contAction, rlt_real factor);
CContinuousAction *getContinuousAction();
virtual void loadActionData(CActionData *actionData) {};
virtual void setData(CActionData *) {assert(true);};
virtual bool equals(CAction *action);
virtual bool isSameAction(CAction *action, CActionData *data);
};
class CLinearFAContinuousAction : public CStaticContinuousAction
{
protected:
public:
CLinearFAContinuousAction(CContinuousAction *properties, rlt_real *actionValues);
virtual rlt_real getActionFactor(CContinuousActionData *contAction) = 0;
};
class CContinuousRBFAction : public CLinearFAContinuousAction
{
protected:
rlt_real *rbfSigma;
public:
CContinuousRBFAction(CContinuousAction *properties, rlt_real *rbfCenter, rlt_real *rbfSigma);
~CContinuousRBFAction();
virtual rlt_real getActionFactor(CContinuousActionData *contAction);
};
class CContinuousActionLinearFA
{
protected:
CActionSet *contActions;
CContinuousActionProperties *actionProperties;
public:
CContinuousActionLinearFA(CActionSet *contActions, CContinuousActionProperties *properties);
~CContinuousActionLinearFA();
void getActionFactors(CContinuousActionData *action, rlt_real *actionFactors);
void getContinuousAction(unsigned int index, CContinuousActionData *action);
void getContinuousAction(CContinuousActionData *action, rlt_real *actionFactors);
int getNumContinuousActionFA();
};
class CCALinearFAQETraces;
class CContinuousActionQFunction : public CGradientQFunction
{
protected:
CContinuousAction *contAction;
public:
CContinuousActionQFunction(CContinuousAction *contAction);
~CContinuousActionQFunction();
virtual CAction *getMax(CStateCollection *, CActionSet *availableActions, CActionDataSet *actionDatas);
virtual void getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData) = 0;
virtual void updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data = NULL);
/// Sets the Value of the value function assigned to the given action
/** Calls the setValue Function of the specified value function.
*/
virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL);
/// Returns the Value of the value function assigned to the given action
/** Returns the value of the getValue Function of the specified value function.
*/
virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
virtual void updateCAValue(CStateCollection *state, CContinuousActionData *data, rlt_real td) = 0;
virtual void setCAValue(CStateCollection *state, CContinuousActionData *data, rlt_real qValue) = 0;
virtual rlt_real getCAValue(CStateCollection *state, CContinuousActionData *data) = 0;
virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient);
virtual void getCAGradient(CStateCollection *state, CContinuousActionData *data, CFeatureList *gradient) = 0;
CContinuousAction *getContinuousActionObject() {return contAction;};
virtual int getNumWeights() = 0;
virtual void getWeights(rlt_real *parameters) = 0;
virtual void setWeights(rlt_real *parameters) = 0;
//virtual CAbstractQETraces* getStandardETraces() = 0;
};
class CCALinearFAQFunction : public CContinuousActionQFunction, public CContinuousActionLinearFA
{
protected:
rlt_real *actionFactors;
rlt_real *CAactionValues;
CQFunction *qFunction;
CFeatureList *tempGradient;
virtual void updateWeights(CFeatureList *features);
public:
CCALinearFAQFunction(CQFunction *qFunction, CContinuousAction *returnAction);
~CCALinearFAQFunction();
virtual void getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData);
virtual void updateCAValue(CStateCollection *state, CContinuousActionData *data, rlt_real td);
virtual void setCAValue(CStateCollection *state, CContinuousActionData *data, rlt_real qValue);
virtual rlt_real getCAValue(CStateCollection *state, CContinuousActionData *data);
CQFunction *getQFunctionForCA();
virtual CAbstractQETraces* getStandardETraces();
virtual void getCAGradient(CStateCollection *state, CContinuousActionData *action, CFeatureList *gradient);
virtual int getNumWeights();
virtual void getWeights(rlt_real *weights);
virtual void setWeights(rlt_real *weights);
virtual int getWeightsOffset(CAction *action) {return 0;};
};
class CCALinearFAQETraces : public CQETraces
{
protected:
rlt_real *actionFactors;
CCALinearFAQFunction *contQFunc;
public:
CCALinearFAQETraces(CCALinearFAQFunction *qfunction);
virtual ~CCALinearFAQETraces();
virtual void addETrace(CStateCollection *State, CAction *action, rlt_real factor = 1.0, CActionData *data = NULL);
};
class CActionDistribution;
class CContinuousActionPolicy : public CContinuousActionController
{
protected:
CActionDistribution *distribution;
rlt_real *actionValues;
CAbstractQFunction *continuousActionQFunc;
CActionSet *continuousStaticActions;
rlt_real maximumDistance;
public:
CContinuousActionPolicy(CContinuousAction *contAction, CActionDistribution *distribution, CAbstractQFunction *continuousActionQFunc, CActionSet *continuousStaticActions, rlt_real maximumDistance);
virtual ~CContinuousActionPolicy();
virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
};
class CContinuousActionRandomPolicy : public CContinuousActionController, public CSemiMDPListener
{
protected:
CMyVector *lastNoise;
public:
CContinuousActionRandomPolicy(CContinuousAction *action, rlt_real sigma, rlt_real alpha);
virtual ~CContinuousActionRandomPolicy();
virtual void newEpisode();
virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
};
class CContinuousActionAddController : public CContinuousActionController
{
protected:
std::list<CContinuousActionController *> *controllers;
std::map<CContinuousActionController *,rlt_real> *controllerWeights;
CMyVector *actionValues;
public:
CContinuousActionAddController(CContinuousAction *action);
~CContinuousActionAddController();
virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
void addContinuousActionController(CContinuousActionController *controller, rlt_real weight = 1.0);
void setControllerWeight(CContinuousActionController *controller, rlt_real weight);
rlt_real getControllerWeight(CContinuousActionController *controller);
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -