📄 ctransitionfunction.cpp

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "ctransitionfunction.h"
#include <math.h>


CTransitionFunction::CTransitionFunction(CStateProperties *properties, CActionSet *actions) : CStateObject(properties), CActionObject(actions)
{
	type = 0;

	resetType = DM_RESET_TYPE_ZERO;
}

int CTransitionFunction::getType()
{
	return type;
}

void CTransitionFunction::addType(int Type)
{
	type = type | Type;	
}

bool CTransitionFunction::isType(int type)
{
	return (this->type & type) == type;
}

void CTransitionFunction::getResetState(CState *modelState)
{
	switch(resetType) 
	{
	case DM_RESET_TYPE_ZERO:
		{			
			for (unsigned int i = 0; i < modelState->getNumContinuousStates(); i++)
			{
				modelState->setContinuousState(i, 0.0);
			}
			for (unsigned int i = 0; i < modelState->getNumDiscreteStates(); i++)
			{
				modelState->setDiscreteState(i, 0);
			}
			break;
		}
	case DM_RESET_TYPE_ALL_RANDOM:
	case DM_RESET_TYPE_RANDOM:
		{			
			for (unsigned int i = 0; i < modelState->getNumContinuousStates(); i++)
			{
				rlt_real stateSize = properties->getMaxValue(i) - properties->getMinValue(i);
				rlt_real randNum = (((rlt_real)rand()) / RAND_MAX );
				modelState->setContinuousState(i, randNum * stateSize + properties->getMinValue(i));
			}
			for (unsigned int i = 0; i < modelState->getNumDiscreteStates(); i++)
			{
				modelState->setDiscreteState(i, rand() % properties->getDiscreteStateSize(i));
			}
			break;
		}	
		default:
		{
		}
	}
}

void CTransitionFunction::setResetType(int resetType)
{
	this->resetType = resetType;
}

CExtendedActionTransitionFunction::CExtendedActionTransitionFunction(CActionSet *actions, CTransitionFunction *model, std::list<CStateModifier *> *modifiers) : CTransitionFunction(model->getStateProperties(), actions)
{
	addType(DM_EXTENDEDACTIONMODEL);
	
	this->dynModel = model;

	addParameters(dynModel);
	addParameter("MaxHierarchicExecution", 50);

	intermediateState = new CStateCollectionImpl(properties, modifiers);
	nextState = new CStateCollectionImpl(properties, modifiers);
}

CExtendedActionTransitionFunction::~CExtendedActionTransitionFunction()
{
	delete intermediateState;
	delete nextState;
}

void CExtendedActionTransitionFunction::transitionFunction(CState *oldstate, CAction *action, CState *newState, CActionData *data)
{
	transitionFunctionAndReward(oldstate, action, newState, data, NULL, 0);
}

rlt_real CExtendedActionTransitionFunction::transitionFunctionAndReward(CState *oldState, CAction *action, CState *newState, CActionData *data, CRewardFunction *rewardFunction, rlt_real gamma)
{
	rlt_real reward = 0;

	CStateCollectionImpl *buf = NULL;

	intermediateState->getState(properties)->setState(oldState);

	if (action->isType(PRIMITIVEACTION))
	{
		dynModel->transitionFunction(oldState, action, newState, data);
		
		nextState->getState(properties)->setState(newState);

		if (rewardFunction)
		{
			reward = rewardFunction->getReward(intermediateState, action, nextState);
		}
	}

	if (action->isType(EXTENDEDACTION))
	{
		int duration = 0;

		CAction *primAction = NULL;
		CExtendedAction *extAction = NULL;

		extAction = dynamic_cast<CExtendedAction *>(action);
		
		do
		{
			primAction = action;

			CExtendedAction *extAction2;
			int lduration = 1;
				
			while (primAction->isType(EXTENDEDACTION))
			{
				extAction2 = dynamic_cast<CExtendedAction *>(primAction);
				primAction = extAction2->getNextHierarchyLevel(intermediateState);
			}


			CActionData *primActionData = actionDataSet->getActionData(primAction);
			if (primActionData)
			{
				primActionData->setData(primAction->getActionData());
			}

			dynModel->transitionFunction(intermediateState->getState(dynModel->getStateProperties()), action, nextState->getState(dynModel->getStateProperties()), primActionData);
			nextState->newModelState();

			if (primActionData && primAction->isType(MULTISTEPACTION))
			{
				lduration = dynamic_cast<CMultiStepActionData *>(data)->duration;
			}
			else
			{
				lduration = primAction->getDuration();
			}			

			duration += lduration;
			
			if (rewardFunction)
			{
				reward = reward * pow(gamma, lduration) + rewardFunction->getReward(intermediateState, primAction, nextState);
			}

			//exchange Model State

			buf = intermediateState;
			intermediateState = nextState;
			nextState = buf;
		}
		// Execute the action until the state changed
		while (duration < 	getParameter("MaxHierarchicExecution") && !extAction->isFinished(intermediateState, nextState));
		
		if (data)
		{
			dynamic_cast<CMultiStepActionData *>(data)->duration = duration;
		}

		newState->setState(nextState->getState(properties));
	}

	return reward;
}

void CExtendedActionTransitionFunction::getDerivationU(CState *oldstate, CMyMatrix *derivation)
{
	dynModel->getDerivationU(oldstate, derivation);
}

bool CExtendedActionTransitionFunction::isResetState(CState *state) 
{
	return dynModel->isResetState(state);
}
	
bool CExtendedActionTransitionFunction::isFailedState(CState *state)
{
	return dynModel->isFailedState(state);
}

void CExtendedActionTransitionFunction::getResetState(CState *resetState)
{
	dynModel->getResetState(resetState);
}

void CExtendedActionTransitionFunction::setResetType(int resetType)
{
	dynModel->setResetType(resetType);
}

CComposedTransitionFunction::CComposedTransitionFunction(CStateProperties *properties) : CTransitionFunction(properties, new CActionSet())
{
	this->TransitionFunction = new std::list<CTransitionFunction *>();
}

CComposedTransitionFunction::~CComposedTransitionFunction()
{
	delete TransitionFunction;
	delete actions;
}

void CComposedTransitionFunction::addTransitionFunction(CTransitionFunction *model)
{
	TransitionFunction->push_back(model);
	actions->add(model->getActions());
}

void CComposedTransitionFunction::transitionFunction(CState *oldstate, CAction *action, CState *newState, CActionData *data)
{
	std::list<CTransitionFunction *>::iterator it;

	for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
	{
		if ((*it)->getActions()->isMember(action))
		{
			(*it)->transitionFunction(oldstate, action, newState, data);
		}
	}
}


CDynamicContinuousTimeModel::CDynamicContinuousTimeModel(CStateProperties *properties, CActionSet *actions, rlt_real dt) : CTransitionFunction(properties, actions)
{
	this->dt = dt;
	this->simulationSteps = 1;

	addType(DM_CONTINUOUSMODEL);

	derivation = new CMyVector(getStateProperties()->getNumContinuousStates());
}

CDynamicContinuousTimeModel::~CDynamicContinuousTimeModel()
{
	delete derivation;
}
	
void CDynamicContinuousTimeModel::doSimulationStep(CState *state, rlt_real timeStep, CAction *action, CActionData *data)
{
	for (unsigned int i = 0; i < state->getNumContinuousStates(); i++)
	{
		this->getDerivationX(state, action, derivation, data);

		state->setContinuousState(i, state->getContinuousState(i) + timeStep * derivation->getElement(i));
	}
}



void CDynamicContinuousTimeModel::transitionFunction(CState *oldState, CAction *action, CState *newState, CActionData *data)
{
	rlt_real timestep = this->dt / simulationSteps;

	newState->setState(oldState);

	for (int i = 0; i < simulationSteps; i++)
	{
		this->doSimulationStep(newState, timestep, action, data);
	}
}

rlt_real CDynamicContinuousTimeModel::getTimeIntervall()
{
	return dt;
}
	
void CDynamicContinuousTimeModel::setTimeIntervall(rlt_real dt)
{
	this->dt = dt;
} 

void CDynamicContinuousTimeModel::setSimulationSteps(int steps)
{
	assert(steps > 0);
	this->simulationSteps = steps;
}

int CDynamicContinuousTimeModel::getSimulationSteps()
{
	return simulationSteps;
}


CDynamicContinuousTimeAndActionModel::CDynamicContinuousTimeAndActionModel(CStateProperties *properties, CContinuousAction *action, rlt_real dt) : CDynamicContinuousTimeModel(properties, new CActionSet(), dt)
{
	actions->add(action);
	this->actionProp = action->getContinuousActionProperties();
	this->contAction = action;
}

CDynamicContinuousTimeAndActionModel::~CDynamicContinuousTimeAndActionModel()
{
	delete actions;
}


void CDynamicContinuousTimeAndActionModel::getDerivationX(CState *oldState, CAction *action, CMyVector *derivationX, CActionData *data)
{
	assert(action->isType(CONTINUOUSACTION));
	
	if (data)
	{
		getCADerivationX(oldState, dynamic_cast<CContinuousActionData *>(data), derivationX);
	}
	else
	{
		getCADerivationX(oldState, dynamic_cast<CContinuousActionData *>(action->getActionData()), derivationX);
	}
}

CContinuousAction *CDynamicContinuousTimeAndActionModel::getContinuousAction()
{
	return contAction;
}

CDynamicLinearActionContinuousTimeModel::CDynamicLinearActionContinuousTimeModel(CStateProperties *properties, CContinuousAction *action, rlt_real dt) : CDynamicContinuousTimeAndActionModel(properties, action, dt)
{
	A = new CMyVector(properties->getNumContinuousStates());
	B = new CMyMatrix(properties->getNumContinuousStates(), actionProp->getNumActionValues());

	addType(DM_DERIVATIONUMODEL);
}

CDynamicLinearActionContinuousTimeModel::~CDynamicLinearActionContinuousTimeModel()
{
	delete A;
	delete B;
}

void CDynamicLinearActionContinuousTimeModel::getCADerivationX(CState *oldState, CContinuousActionData *contAction, CMyVector *derivationX)
{
	assert(oldState->getNumDimensions() == derivationX->getNumDimensions());
	CMyMatrix *B = getB(oldState);
	CMyVector *a = getA(oldState);


	B->multVector(contAction, derivationX); // x ' = B(x) * u
	derivationX->addVector(a); // x' = B(x) * u + a(x)
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -