📄 chierarchicbehaviours.h

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 H
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef C_HIERARCHICBEHAVIOUR__H
#define C_HIERARCHICBEHAVIOUR__H

#include "cagent.h"
#include "crewardfunction.h"
#include "cvfunction.h"
#include "ctransitionfunction.h"
#include "cregions.h"

/*class COptimalValueBehaviour : public CHierarchicalSemiMarkovDecisionProcess
{
protected:
	CAbstractVFunction *rewardFunction;
public:
	COptimalValueBehaviour(CEpisode *currentEpisode, CAbstractVFunction *rewardFunction);

	virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);
};*/


class CSubGoalBehaviour : public CHierarchicalSemiMarkovDecisionProcess, public CStateReward
{
protected:
	std::map<CRegion *, std::pair<rlt_real, rlt_real> > *rewardFactors;
	
	std::list<CRegion *> *targetRegions;
	std::list<CRegion *> *failRegions;

	rlt_real standardReward;

	CRegion *availableRegion;
	CStateProperties *modelProperties;

	string subgoalName;

public:
	CSubGoalBehaviour(CStateProperties *modelProperties, CRegion *avialableRegion, char *subgoalName = "");
	virtual ~CSubGoalBehaviour();

	virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);
	virtual bool isAvailable(CStateCollection *currentState);

	virtual bool isInGoalRegion(CState *state);
	virtual bool isInFailRegion(CState *state);

	virtual rlt_real getStateReward(CState *modelState);
	virtual void getInputDerivation(CState *modelState, CMyVector *targetState);

	virtual void addTargetRegion(CRegion *target, rlt_real rewardFactor = 1.0, rlt_real rewardTau = 10);
	virtual void addFailRegion(CRegion *target, rlt_real rewardFactor = - 1.0, rlt_real rewardTau = 10);

	void setRewardFactor(CRegion *region, rlt_real rewardFactor);
	void setRewardTau(CRegion *region, rlt_real rewardTau);

	void setStandardReward(rlt_real l_standardReward) {this->standardReward = l_standardReward;};

	virtual CRegion *getAvailAbleRegion() {return availableRegion;};

	virtual void sendNextStep(CAction *action);

	string getSubGoalName() {return subgoalName;};
};

class CSubGoalController : public CAgentController
{
protected:
public:
	CSubGoalController(CActionSet *hierarchicActions);

	virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
};


class CSubGoalOutput : public CSemiMDPListener
{
protected:
	CSubGoalBehaviour *lastAction;
	CAgentController *policy;

public:
	CSubGoalOutput(CAgentController *policy);

	virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState);
	virtual void newEpisode();
};
/*
class CSubGoalTrainer : public CTransitionFunctionEnvironment
{
protected:
	CSubGoalBehaviour *subGoal;
	CRegion *sampleRegion;
public:
	CSubGoalTrainer(CTransitionFunction *transitionFunction, CSubGoalBehaviour *subGoal);

	virtual void doNextState(CPrimitiveAction *action);
	virtual void doResetModel();

	virtual void setSubGoal(CSubGoalBehaviour *subGoal);
	virtual void setSampleRegion(CRegion *l_sampleRegion);

};
*/

class CExtendedPrimitiveAction : public CExtendedAction
{
protected:
	CAction *primitiveAction;
public:
	int extendedActionDuration;

	CExtendedPrimitiveAction(CAction *primitiveAction, int extendedActionDuration);

	virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);

	virtual CAction* getNextHierarchyLevel(CStateCollection *state, CActionDataSet *actionDataSet = NULL);


};

/// This class represents an primitive action which gets executed until a specific (mostly discrete) state changes.
/**
This extended action subclass executes a primitive action until a specified state changes. This can be useful for example in gridworlds with local states. The state which has to change is given by "stateToChange" an will be most time a discrete state (continuous states or features normally always change). 
The isFinished method returns as long false as the 2 states (oldState and newState) are the same. 


*/
class CPrimitiveActionStateChange : public CExtendedAction
{
protected:
	/// The Properties of the state which has to change 
	CStateProperties *stateToChange;

	CPrimitiveAction *primitiveAction;

public:
	CPrimitiveActionStateChange(CPrimitiveAction *action, CStateProperties *stateToChange);

	/// Always returns primitiveAction.
	virtual CAction* getNextHierarchyLevel(CStateCollection *state, CActionDataSet *actionDataSet = NULL);

	// Returns true if the 2 states are not equal
	virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);

	// Sets the state which has to change
	void setStateToChange(CStateProperties *stateToChange);

};


#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -