📄 clearneddynamicmodel.cpp
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "clearneddynamicmodel.h"
#include "caction.h"
#include "ccontinuousactions.h"
#include <math.h>
CLearnedTransitionFunction::CLearnedTransitionFunction(CStateProperties *properties, CAction *action, CSupervisedLearner *learner, bool learnDuration) : CTransitionFunction(properties, new CActionSet())
{
input = new CMyVector(learner->getNumInputs());
target = new CMyVector(learner->getNumOutputs());
this->learner = learner;
actionToLearn = action;
this->actions->add(actionToLearn);
this->learnDuration = learnDuration;
modelState = new CState(properties);
outputError = new CMyVector(learner->getNumOutputs());
lActionData = action->getNewActionData();
addParameters(learner);
}
CLearnedTransitionFunction::~CLearnedTransitionFunction()
{
delete input;
delete target;
delete modelState;
delete outputError;
delete lActionData;
}
void CLearnedTransitionFunction::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
CAction *laction = action;
if (laction->isType(CONTINUOUSSTATICACTION) && action != actionToLearn)
{
laction = dynamic_cast<CStaticContinuousAction *>(laction)->getContinuousAction();
}
if (laction == actionToLearn)
{
doLearnStep(oldState->getState(properties), laction, laction->getActionData(), newState->getState(properties));
}
}
void CLearnedTransitionFunction::transitionFunction(CState *oldState, CAction *action, CState *newState, CActionData *data)
{
getInputVector(oldState, action->getActionData(), input);
learner->testExample(input, target);
for (unsigned int i = 0;i < newState->getNumContinuousStates(); i++)
{
newState->setContinuousState(i, target->getElement(i));
}
/*
newState->setDiscreteState(i + newState->getNumContinuousStates(), target->getElement(i));
}*/
if (learnDuration && actionToLearn->isType(MULTISTEPACTION) && data)
{
CMultiStepActionData *multAction= dynamic_cast<CMultiStepActionData *>(data);
multAction->duration = my_round(target->getElement(newState->getNumContinuousStates()));
}
}
void CLearnedTransitionFunction::getInputVector(CState *oldState, CActionData *data, CMyVector *input)
{
input->initVector(0.0);
for (unsigned int i = 0;i < oldState->getNumContinuousStates(); i++)
{
input->setElement(i, oldState->getContinuousState(i));
}
/*for (int i = 0;i < oldState->getNumDiscreteStates(); i++)
{
input->setElement(i + oldState->getNumContinuousStates(), oldState->getDiscreteState(i));
}*/
if (actionToLearn->isType(CONTINUOUSACTION))
{
CContinuousActionData *contAction = dynamic_cast<CContinuousActionData *>(data);
for (unsigned int i = 0;i < contAction->getNumDimensions(); i++)
{
input->setElement(i + oldState->getNumContinuousStates(), contAction->getElement(i));
}
}
}
void CLearnedTransitionFunction::getTargetVector(CState *newState, CActionData *data, CMyVector *target)
{
target->initVector(0.0);
for (unsigned int i = 0;i < newState->getNumContinuousStates(); i++)
{
target->setElement(i, newState->getContinuousState(i));
}
/*for (int i = 0;i < newState->getNumDiscreteStates(); i++)
{
target->setElement(i + newState->getNumContinuousStates(), newState->getDiscreteState(i));
}*/
if (learnDuration && actionToLearn->isType(MULTISTEPACTION))
{
CMultiStepActionData *multAction = dynamic_cast<CMultiStepActionData *>(data);
target->setElement(newState->getNumContinuousStates(), multAction->duration);
}
}
void CLearnedTransitionFunction::doLearnStep(CState *oldState, CAction *action, CActionData *data, CState*newState)
{
getInputVector(oldState, data, input);
getTargetVector(newState, data, target);
lActionData->setData(data);
transitionFunction(oldState, action, modelState, lActionData);
for (unsigned int i = 0; i < modelState->getNumContinuousStates(); i++)
{
outputError->setElement(i, - modelState->getSingleStateDifference(i, newState->getContinuousState(i)));
}
if (learnDuration && actionToLearn->isType(MULTISTEPACTION))
{
CMultiStepActionData *multAction1 = dynamic_cast<CMultiStepActionData *>(data);
CMultiStepActionData *multAction2 = dynamic_cast<CMultiStepActionData *>(lActionData);
outputError->setElement(modelState->getNumContinuousStates(), multAction2->duration - multAction1->duration);
}
learner->learnExample(input, target, outputError);
}
void CLearnedTransitionFunction::saveData(FILE *stream)
{
learner->saveData(stream);
}
void CLearnedTransitionFunction::loadData(FILE *stream)
{
learner->loadData(stream);
}
void CLearnedTransitionFunction::resetData()
{
learner->resetData();
}
CComposedLearnedTransitionFunction::CComposedLearnedTransitionFunction(CStateProperties *properties) : CComposedTransitionFunction(properties)
{
learnedModels = new std::map<CTransitionFunction *, CLearnedTransitionFunction *>;
}
CComposedLearnedTransitionFunction::~CComposedLearnedTransitionFunction()
{
delete learnedModels;
}
void CComposedLearnedTransitionFunction::addTransitionFunction(CLearnedTransitionFunction *model)
{
CComposedTransitionFunction::addTransitionFunction(model);
(*learnedModels)[model] = model;
addParameters(model);
}
void CComposedLearnedTransitionFunction::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
std::list<CTransitionFunction *>::iterator it;
for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
{
if ((*it)->getActions()->isMember(action))
{
(*learnedModels)[(*it)]->nextStep(oldState, action, newState);
}
}
}
void CComposedLearnedTransitionFunction::saveData(FILE *stream)
{
std::list<CTransitionFunction *>::iterator it;
for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
{
(*learnedModels)[(*it)]->saveData(stream);
}
}
void CComposedLearnedTransitionFunction::loadData(FILE *stream)
{
std::list<CTransitionFunction *>::iterator it;
for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
{
(*learnedModels)[(*it)]->loadData(stream);
}
}
void CComposedLearnedTransitionFunction::resetData()
{
std::list<CTransitionFunction *>::iterator it;
for (it = TransitionFunction->begin(); it != TransitionFunction->end(); it ++)
{
(*learnedModels)[(*it)]->resetData();
}
}
CStochasticModelTrainer::CStochasticModelTrainer(CTransitionFunction *trainModel, CActionSet *actions, CSemiMDPListener *learnModel) : CActionObject(actions)
{
this->trainModel = trainModel;
this->learnModel = learnModel;
currentState = new CState(trainModel->getStateProperties());
nextState = new CState(trainModel->getStateProperties());
actionDataSet = new CActionDataSet(actions);
addSemiMDPListener(learnModel);
}
CStochasticModelTrainer::~CStochasticModelTrainer()
{
delete currentState;
delete nextState;
delete actionDataSet;
}
void CStochasticModelTrainer::doRandomLearnStep()
{
for (unsigned int i = 0; i < currentState->getNumContinuousStates(); i++)
{
rlt_real width = currentState->getStateProperties()->getMaxValue(i) - currentState->getStateProperties()->getMinValue(i);
currentState->setContinuousState(i, currentState->getStateProperties()->getMinValue(i) + (rlt_real) rand() / RAND_MAX * width);
}
for (unsigned int i = 0; i < currentState->getNumDiscreteStates(); i++)
{
currentState->setDiscreteState(i, rand() % currentState->getStateProperties()->getDiscreteStateSize(i));
}
int index = rand() % actions->size();
CAction *action = actions->get(index);
CActionData *data = actionDataSet->getActionData(action);
if (action->isType(CONTINUOUSACTION))
{
CContinuousActionData *contData = dynamic_cast<CContinuousActionData *>(data);
CContinuousAction *contAction = dynamic_cast<CContinuousAction *>(action);
for (unsigned int i = 0; i < contData->getNumDimensions(); i++)
{
rlt_real width = contAction->getContinuousActionProperties()->getMaxActionValue(i) - contAction->getContinuousActionProperties()->getMinActionValue(i);
currentState->setContinuousState(i, contAction->getContinuousActionProperties()->getMinActionValue(i) + (rlt_real) rand() / RAND_MAX * width);
}
}
trainModel->transitionFunction(currentState, action, nextState, data);
action->loadActionData(data);
this->sendNextStep(currentState, action, nextState);
}
void CStochasticModelTrainer::doNLearnSteps(int nSteps)
{
for (int i = 0; i < nSteps; i++)
{
doRandomLearnStep();
}
}
CTransitionFunctionComparer::CTransitionFunctionComparer(CTransitionFunction *dynModel, CStateProperties *compareState)
{
this->compareModel = dynModel;
this->compareState2Properties = compareState;
this->compareState1 = new CState(compareModel->getStateProperties());
resetStatistics();
}
void CTransitionFunctionComparer::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState)
{
nExamples ++;
compareModel->transitionFunction(oldState->getState(compareState2Properties), action, compareState1);
rlt_real mse = 0;
CState *compareState2 = nextState->getState(compareState2Properties);
mse = 0.5 * pow(compareState1->getDistance(compareState2),2);
contMse += mse;
if (mse > maxMse)
{
maxMse = mse;
}
int failures = 0;
for (unsigned int i = 0; i < compareState2->getNumDiscreteStates(); i++)
{
if(compareState1->getDiscreteState(i) != compareState2->getDiscreteState(i))
{
failures ++;
}
}
discreteFailures += failures;
if (failures > maxDiscreteFailure)
{
maxDiscreteFailure = failures;
}
}
void CTransitionFunctionComparer::resetStatistics()
{
nExamples = 0;
this->contMse = 0;
this->discreteFailures = 0;
this->maxMse = 0;
this->maxDiscreteFailure = 0;
}
rlt_real CTransitionFunctionComparer::getContinuousMSE()
{
return contMse / nExamples;
}
rlt_real CTransitionFunctionComparer::getContinuousMaxSquaredError()
{
return maxMse;
}
rlt_real CTransitionFunctionComparer::getDiscreteFailures()
{
return discreteFailures / nExamples;
}
rlt_real CTransitionFunctionComparer::getMaxDiscreteFailure()
{
return maxDiscreteFailure;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -