📄 clearneddynamicmodel.cpp

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 CPP
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "clearneddynamicmodel.h"
#include "caction.h"
#include "ccontinuousactions.h"

#include <math.h>


CLearnedTransitionFunction::CLearnedTransitionFunction(CStateProperties *properties, CAction *action, CSupervisedLearner *learner, bool learnDuration) : CTransitionFunction(properties, new CActionSet())
{
	input = new CMyVector(learner->getNumInputs());
	target = new CMyVector(learner->getNumOutputs());

	this->learner = learner;
	actionToLearn = action;

	this->actions->add(actionToLearn);
	this->learnDuration = learnDuration;

	modelState = new CState(properties);
	outputError = new CMyVector(learner->getNumOutputs());

	lActionData = action->getNewActionData();

	addParameters(learner);
}

CLearnedTransitionFunction::~CLearnedTransitionFunction()
{
	delete input;
	delete target;

	delete modelState;
	delete outputError;

	delete lActionData;
}

void CLearnedTransitionFunction::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
	CAction *laction = action;

	if (laction->isType(CONTINUOUSSTATICACTION) && action != actionToLearn)
	{
		laction = dynamic_cast<CStaticContinuousAction *>(laction)->getContinuousAction();
	}
	if (laction == actionToLearn)
	{
		doLearnStep(oldState->getState(properties), laction, laction->getActionData(), newState->getState(properties));
	}
}

void CLearnedTransitionFunction::transitionFunction(CState *oldState, CAction *action, CState *newState, CActionData *data)
{
	getInputVector(oldState, action->getActionData(), input);

	learner->testExample(input, target);

	for (unsigned int i = 0;i < newState->getNumContinuousStates(); i++)
	{
		newState->setContinuousState(i, target->getElement(i));
	}

	/*
		newState->setDiscreteState(i + newState->getNumContinuousStates(), target->getElement(i));
	}*/


	if (learnDuration && actionToLearn->isType(MULTISTEPACTION) && data)
	{
		CMultiStepActionData *multAction= dynamic_cast<CMultiStepActionData *>(data);
		multAction->duration = my_round(target->getElement(newState->getNumContinuousStates()));
	}
}

void CLearnedTransitionFunction::getInputVector(CState *oldState, CActionData *data, CMyVector *input)
{
	input->initVector(0.0);
	for (unsigned int i = 0;i < oldState->getNumContinuousStates(); i++)
	{
		input->setElement(i, oldState->getContinuousState(i));
	}

	/*for (int i = 0;i < oldState->getNumDiscreteStates(); i++)
	{
		input->setElement(i + oldState->getNumContinuousStates(), oldState->getDiscreteState(i));
	}*/

	if (actionToLearn->isType(CONTINUOUSACTION))
	{
		CContinuousActionData *contAction = dynamic_cast<CContinuousActionData *>(data);
		for (unsigned int i = 0;i < contAction->getNumDimensions(); i++)
		{
			input->setElement(i + oldState->getNumContinuousStates(), contAction->getElement(i));
		}
	}

}

void CLearnedTransitionFunction::getTargetVector(CState *newState, CActionData *data, CMyVector *target)
{
	target->initVector(0.0);

	for (unsigned int i = 0;i < newState->getNumContinuousStates(); i++)
	{
		target->setElement(i, newState->getContinuousState(i));
	}

	/*for (int i = 0;i < newState->getNumDiscreteStates(); i++)
	{
		target->setElement(i + newState->getNumContinuousStates(), newState->getDiscreteState(i));
	}*/

	if (learnDuration && actionToLearn->isType(MULTISTEPACTION))
	{
		CMultiStepActionData *multAction = dynamic_cast<CMultiStepActionData *>(data);
		target->setElement(newState->getNumContinuousStates(), multAction->duration);
	}
}

void CLearnedTransitionFunction::doLearnStep(CState *oldState, CAction *action, CActionData *data, CState*newState)
{
	getInputVector(oldState, data, input);
	getTargetVector(newState, data, target);

	lActionData->setData(data);
	transitionFunction(oldState, action, modelState, lActionData);

	for (unsigned int i = 0; i < modelState->getNumContinuousStates(); i++)
	{
		outputError->setElement(i, - modelState->getSingleStateDifference(i, newState->getContinuousState(i)));
	}
	if (learnDuration && actionToLearn->isType(MULTISTEPACTION))
	{
		CMultiStepActionData *multAction1 = dynamic_cast<CMultiStepActionData *>(data);
		CMultiStepActionData *multAction2 = dynamic_cast<CMultiStepActionData *>(lActionData);

		outputError->setElement(modelState->getNumContinuousStates(), multAction2->duration - multAction1->duration);
	}

	learner->learnExample(input, target, outputError);
}

void CLearnedTransitionFunction::saveData(FILE *stream)
{
	learner->saveData(stream);
}

void CLearnedTransitionFunction::loadData(FILE *stream)
{
	learner->loadData(stream);
}

void CLearnedTransitionFunction::resetData()
{
	learner->resetData();
}

CComposedLearnedTransitionFunction::CComposedLearnedTransitionFunction(CStateProperties *properties) : CComposedTransitionFunction(properties)
{
	learnedModels = new std::map<CTransitionFunction *, CLearnedTransitionFunction *>;
}

CComposedLearnedTransitionFunction::~CComposedLearnedTransitionFunction()
{
	delete learnedModels;
}

void CComposedLearnedTransitionFunction::addTransitionFunction(CLearnedTransitionFunction *model)
{
	CComposedTransitionFunction::addTransitionFunction(model);
	(*learnedModels)[model] = model;

	addParameters(model);
}

void CComposedLearnedTransitionFunction::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
	std::list<CTransitionFunction *>::iterator it;

	for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
	{
		if ((*it)->getActions()->isMember(action))
		{
			(*learnedModels)[(*it)]->nextStep(oldState, action, newState);
		}
	}
}

void CComposedLearnedTransitionFunction::saveData(FILE *stream)
{
	std::list<CTransitionFunction *>::iterator it;

	for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
	{
		(*learnedModels)[(*it)]->saveData(stream);
	}
}

void CComposedLearnedTransitionFunction::loadData(FILE *stream)
{
	std::list<CTransitionFunction *>::iterator it;

	for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
	{
		(*learnedModels)[(*it)]->loadData(stream);
	}
}

void CComposedLearnedTransitionFunction::resetData()
{
	std::list<CTransitionFunction *>::iterator it;

	for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
	{
		(*learnedModels)[(*it)]->resetData();
	}
}

CStochasticModelTrainer::CStochasticModelTrainer(CTransitionFunction *trainModel, CActionSet *actions, CSemiMDPListener *learnModel) : CActionObject(actions)
{
	this->trainModel = trainModel;
	this->learnModel = learnModel;

	currentState = new CState(trainModel->getStateProperties());
	nextState = new CState(trainModel->getStateProperties());

	actionDataSet = new CActionDataSet(actions);

	addSemiMDPListener(learnModel);
}

CStochasticModelTrainer::~CStochasticModelTrainer()
{
	delete currentState;
	delete nextState;

	delete actionDataSet;
}

void CStochasticModelTrainer::doRandomLearnStep()
{
	for (unsigned int i = 0; i < currentState->getNumContinuousStates(); i++)
	{
		rlt_real width = currentState->getStateProperties()->getMaxValue(i) - currentState->getStateProperties()->getMinValue(i);
		currentState->setContinuousState(i, currentState->getStateProperties()->getMinValue(i) + (rlt_real) rand() / RAND_MAX * width); 
	}
	for (unsigned int i = 0; i < currentState->getNumDiscreteStates(); i++)
	{
		currentState->setDiscreteState(i,  rand() % currentState->getStateProperties()->getDiscreteStateSize(i)); 
	}

	int index = rand() % actions->size();

	CAction *action = actions->get(index);
	CActionData *data = actionDataSet->getActionData(action);

	if (action->isType(CONTINUOUSACTION))
	{
		CContinuousActionData *contData = dynamic_cast<CContinuousActionData *>(data);
		CContinuousAction *contAction = dynamic_cast<CContinuousAction *>(action);

		for (unsigned int i = 0; i < contData->getNumDimensions(); i++)
		{
			rlt_real width = contAction->getContinuousActionProperties()->getMaxActionValue(i) - contAction->getContinuousActionProperties()->getMinActionValue(i);
			currentState->setContinuousState(i, contAction->getContinuousActionProperties()->getMinActionValue(i) + (rlt_real) rand() / RAND_MAX * width); 
		}
	}

	trainModel->transitionFunction(currentState, action, nextState, data);
	action->loadActionData(data);
	this->sendNextStep(currentState, action, nextState);
}

void CStochasticModelTrainer::doNLearnSteps(int nSteps)
{
	for (int i = 0; i < nSteps; i++)
	{
		doRandomLearnStep();
	}
}

CTransitionFunctionComparer::CTransitionFunctionComparer(CTransitionFunction *dynModel, CStateProperties *compareState)
{
	this->compareModel = dynModel;
	this->compareState2Properties = compareState;
	this->compareState1 = new CState(compareModel->getStateProperties());

	resetStatistics();
}

void CTransitionFunctionComparer::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState)
{
	nExamples ++;
	compareModel->transitionFunction(oldState->getState(compareState2Properties), action, compareState1);

	rlt_real mse = 0;
	CState *compareState2 = nextState->getState(compareState2Properties);
	mse = 0.5 * pow(compareState1->getDistance(compareState2),2);
	contMse += mse;

	if (mse > maxMse)
	{
		maxMse = mse;
	}

	int failures = 0;
	for (unsigned int i = 0; i < compareState2->getNumDiscreteStates(); i++)
	{
		if(compareState1->getDiscreteState(i) != compareState2->getDiscreteState(i))
		{
			failures ++;
		}
	}
	discreteFailures += failures;

	if (failures > maxDiscreteFailure)
	{
		maxDiscreteFailure = failures;
	}
}

void CTransitionFunctionComparer::resetStatistics()
{
	nExamples = 0;

	this->contMse = 0;
	this->discreteFailures = 0;

	this->maxMse = 0;
	this->maxDiscreteFailure = 0;
}

rlt_real CTransitionFunctionComparer::getContinuousMSE()
{
	return contMse / nExamples;
}

rlt_real CTransitionFunctionComparer::getContinuousMaxSquaredError()
{
	return maxMse;
}

rlt_real CTransitionFunctionComparer::getDiscreteFailures()
{
	return discreteFailures / nExamples;
}

rlt_real CTransitionFunctionComparer::getMaxDiscreteFailure()
{
	return maxDiscreteFailure;
}
💿 文件大小 12117 K
👤 上传用户 5201314
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#R-Learning #学习算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -