📄 ctransitionfunction.h
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef __CTransitionFunction_H
#define __CTransitionFunction_H
#include "caction.h"
#include "cstatecollection.h"
#include "cstateproperties.h"
#include "ccontinuousactions.h"
#include "cenvironmentmodel.h"
#include "cqfunction.h"
#include "cutility.h"
#include "ril_debug.h"
#include "cregions.h"
#define DM_CONTINUOUSMODEL 1
#define DM_DERIVATIONUMODEL 2
#define DM_EXTENDEDACTIONMODEL 4
#define DM_RESET_TYPE_ALL_RANDOM 2
#define DM_RESET_TYPE_RANDOM 1
#define DM_RESET_TYPE_ZERO 0
class CTransitionFunction : public CStateObject, public CActionObject, virtual public CParameterObject
{
protected:
int type;
int resetType;
public:
CTransitionFunction(CStateProperties *properties, CActionSet *actions);
int getType();
void addType(int Type);
bool isType(int type);
virtual void transitionFunction(CState *oldstate, CAction *action, CState *newState, CActionData *data = NULL) = 0;
virtual void getDerivationU(CState *oldstate, CMyMatrix *derivation) {};
virtual bool isResetState(CState *state) {return false;};
virtual bool isFailedState(CState *state) {return false;};
virtual void getResetState(CState *resetState);
virtual void setResetType(int resetType);
};
class CExtendedActionTransitionFunction : public CTransitionFunction
{
protected:
CTransitionFunction *dynModel;
CStateCollectionImpl *intermediateState;
CStateCollectionImpl *nextState;
CActionDataSet *actionDataSet;
public:
CExtendedActionTransitionFunction(CActionSet *actions, CTransitionFunction *model, std::list<CStateModifier *> *modifiers) ;
~CExtendedActionTransitionFunction();
virtual void transitionFunction(CState *oldstate, CAction *action, CState *newState, CActionData *data = NULL);
virtual rlt_real transitionFunctionAndReward(CState *oldState, CAction *action, CState *newState, CActionData *data, CRewardFunction *reward, rlt_real gamma);
virtual void getDerivationU(CState *oldstate, CMyMatrix *derivation);
virtual bool isResetState(CState *state);
virtual bool isFailedState(CState *state);
virtual void getResetState(CState *resetState);
virtual void setResetType(int resetType);
};
class CComposedTransitionFunction : public CTransitionFunction
{
protected:
std::list<CTransitionFunction *> *TransitionFunction;
public:
CComposedTransitionFunction(CStateProperties *properties);
~CComposedTransitionFunction();
void addTransitionFunction(CTransitionFunction *model);
virtual void transitionFunction(CState *oldstate, CAction *action, CState *newState, CActionData *data = NULL);
};
class CDynamicContinuousTimeModel : public CTransitionFunction
{
protected:
rlt_real dt;
int simulationSteps;
CMyVector *derivation;
virtual void doSimulationStep(CState *oldState, rlt_real timeStep, CAction *action, CActionData *data);
public:
CDynamicContinuousTimeModel(CStateProperties *properties, CActionSet *actions, rlt_real dt);
virtual ~CDynamicContinuousTimeModel();
virtual void transitionFunction(CState *oldstate, CAction *action, CState *newState, CActionData *data = NULL);
rlt_real getTimeIntervall();
void setTimeIntervall(rlt_real dt);
void setSimulationSteps(int steps);
int getSimulationSteps();
virtual void getDerivationX(CState *oldstate, CAction *action, CMyVector *derivation, CActionData *data = NULL) = 0;
};
class CContinuousAction;
class CContinuousActionData;
class CDynamicContinuousTimeAndActionModel : public CDynamicContinuousTimeModel
{
protected:
CContinuousActionProperties *actionProp;
CContinuousAction *contAction;
public:
CDynamicContinuousTimeAndActionModel(CStateProperties *properties, CContinuousAction *action, rlt_real dt);
virtual ~CDynamicContinuousTimeAndActionModel();
virtual void getDerivationX(CState *oldState, CAction *action, CMyVector *derivationX, CActionData *data = NULL);
virtual void getCADerivationX(CState *oldState, CContinuousActionData *action, CMyVector *derivationX) = 0;
CContinuousAction *getContinuousAction();
};
class CDynamicLinearActionContinuousTimeModel : public CDynamicContinuousTimeAndActionModel
{
protected:
CMyVector *A;
CMyMatrix *B;
public:
CDynamicLinearActionContinuousTimeModel(CStateProperties *properties, CContinuousAction *action, rlt_real dt);
~CDynamicLinearActionContinuousTimeModel();
virtual void getCADerivationX(CState *oldState, CContinuousActionData *action, CMyVector *derivationX);
virtual void getDerivationU(CState *oldstate, CMyMatrix *derivation);
virtual CMyMatrix *getB(CState *state) = 0;
virtual CMyVector *getA(CState *state) = 0;
};
class CDynamicLinearContinuousTimeModel : public CDynamicLinearActionContinuousTimeModel
{
protected:
CMyMatrix *B;
CMyMatrix *AMatrix;
public:
CDynamicLinearContinuousTimeModel(CStateProperties *properties, CContinuousAction *action, rlt_real dt, CMyMatrix *A, CMyMatrix *B);
~CDynamicLinearContinuousTimeModel();
virtual CMyMatrix *getB(CState *state);
virtual CMyVector *getA(CState *state);
};
///*class CEnvironmentModel;
class CTransitionFunctionEnvironment : public CEnvironmentModel
{
protected:
CTransitionFunction *TransitionFunction;
CState *modelState;
CState *nextState;
CStateList *startStates;
int nEpisode;
bool createdStartStates;
CRegion *failedRegion;
CRegion *sampleRegion;
CRegion *targetRegion;
public:
CTransitionFunctionEnvironment(CTransitionFunction *model);
virtual ~CTransitionFunctionEnvironment();
virtual void doNextState(CPrimitiveAction *action);
virtual void doResetModel();
virtual void getState(CState *state);
virtual void setState(CState *state);
virtual void setStartStates(CStateList *startStates);
virtual void setStartStates(char *filename);
CTransitionFunction *getTransitionFunction() {return TransitionFunction;};
void setSampleRegion(CRegion *sampleRegion);
void setFailedRegion(CRegion *failedRegion);
void setTargetRegion(CRegion *sampleRegion);
};
class CQFunctionFromTransitionFunction : public CAbstractQFunction, public CStateModifiersObject
{
protected:
/// The given V-Function
CAbstractVFunction *vfunction;
/// The model
CTransitionFunction *model;
/// feature Reward Function for the learning problem.
CRewardFunction *rewardfunction;
/// state buffer
CStateCollectionImpl *intermediateState;
CStateCollectionImpl *nextState;
CStateCollectionList *stateCollectionList;
CActionDataSet *actionDataSet;
public:
/// Creates a new QFunction from VFunction object for the given V-Function and the given model, the discretizer is take nfrom the V-Function
CQFunctionFromTransitionFunction(CActionSet *actions, CAbstractVFunction *vfunction, CTransitionFunction *model, CRewardFunction *rewardfunction, std::list<CStateModifier *> *modifiers);
virtual ~CQFunctionFromTransitionFunction();
/// Writes the Action-Values in the actionValues Array.
//void getActionValues(CStateCollection *state, rlt_real *actionValues, CActionSet *actions);
/// Does nothing
virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {};
/// Does nothing
virtual void updateValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {};
/// getValue function for state collections
/** Calls the getValue function for the specific state (retrieved from the collection by the discretizer)*/
virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
rlt_real getValueDepthSearch(CStateCollectionList *state, CAction *action, CActionData *data, int depth);
virtual CAbstractQETraces *getStandardETraces() {return NULL;};
virtual void addStateModifier(CStateModifier *modifier);
};
/*
class CContinuousActionQFunctionFromTransitionFunction : public CContinuousActionQFunction, public CStateModifiersObject
{
protected:
/// The given V-Function
CAbstractVFunction *vfunction;
/// The model
CDynamicContinuousTimeAndActionModel *model;
/// feature Reward Function for the learning problem.
CRewardFunction *rewardfunction;
/// state buffer
CStateCollectionImpl *nextState;
public:
/// Creates a new QFunction from VFunction object for the given V-Function and the given model, the discretizer is take nfrom the V-Function
CContinuousActionQFunctionFromTransitionFunction(CContinuousAction *contAction, CAbstractVFunction *vfunction, CDynamicContinuousTimeAndActionModel *model, CRewardFunction *rewardfunction, std::list<CStateModifier *> *modifiers);
virtual ~CContinuousActionQFunctionFromTransitionFunction();
/// Writes the Action-Values in the actionValues Array.
//void getActionValues(CStateCollection *state, rlt_real *actionValues, CActionSet *actions);
/// Does nothing
virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {};
/// Does nothing
virtual void updateValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {};
/// getValue function for state collections
/** Calls the getValue function for the specific state (retrieved from the collection by the discretizer)
virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
virtual CAbstractQETraces *getStandardETraces() {return NULL;};
};
*/
class CContinuousTimeQFunctionFromTransitionFunction : public CAbstractQFunction, public CStateModifiersObject
{
protected:
/// The given V-Function
CVFunctionInputDerivationCalculator *vfunction;
/// The model
CDynamicContinuousTimeModel *model;
/// feature Reward Function for the learning problem.
CRewardFunction *rewardfunction;
CStateCollectionImpl *nextState;
CState *derivationXModel;
CState *derivationXVFunction;
virtual rlt_real getValueVDerivation(CStateCollection *state, CAction *action, CActionData *data, CMyVector *derivationXVFunction);
public:
/// Creates a new QFunction from VFunction object for the given V-Function and the given model, the discretizer is take nfrom the V-Function
CContinuousTimeQFunctionFromTransitionFunction(CActionSet *actions, CVFunctionInputDerivationCalculator *vfunction, CDynamicContinuousTimeModel *model, CRewardFunction *rewardfunction, std::list<CStateModifier *> *modifiers);
CContinuousTimeQFunctionFromTransitionFunction(CActionSet *actions, CVFunctionInputDerivationCalculator *vfunction, CDynamicContinuousTimeModel *model, CRewardFunction *rewardfunction);
virtual ~CContinuousTimeQFunctionFromTransitionFunction();
virtual void getActionValues(CStateCollection *state, CActionSet *actions, rlt_real *actionValues, CActionDataSet *actionDataSet);
/// Does nothing
virtual void setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {};
/// Does nothing
virtual void updateValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data = NULL) {};
/// getValue function for state collections
/** Calls the getValue function for the specific state (retrieved from the collection by the discretizer)*/
virtual rlt_real getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
virtual CAbstractQETraces *getStandardETraces() {return NULL;};
virtual void addStateModifier(CStateModifier *modifier);
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -