📄 cvfunctionlearner.cpp
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ril_debug.h"
#include "cvfunctionlearner.h"
CAdaptiveParameterFromValueCalculator::CAdaptiveParameterFromValueCalculator(CAbstractVFunction *l_vFunction, int functionKind, rlt_real param0, rlt_real paramScale, rlt_real targetMin, rlt_real targetMax) : CAdaptiveParameterBoundedValuesCalculator(functionKind, param0, paramScale, targetMin, targetMax)
{
this->vFunction = l_vFunction;
}
CAdaptiveParameterFromValueCalculator::~CAdaptiveParameterFromValueCalculator()
{
}
void CAdaptiveParameterFromValueCalculator::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
targetValue = vFunction->getValue(newState);
}
CVFunctionLearner::CVFunctionLearner(CRewardFunction *rewardFunction, CAbstractVFunction *vFunction, CAbstractVETraces *eTraces) : CSemiMDPRewardListener(rewardFunction)
{
this->vFunction = vFunction;
this->eTraces = eTraces;
bExternETraces = true;
addParameter("VLearningRate", 0.2);
addParameter("DiscountFactor", 0.95);
addParameters(vFunction);
addParameters(eTraces);
}
CVFunctionLearner::CVFunctionLearner(CRewardFunction *rewardFunction, CAbstractVFunction *vFunction) : CSemiMDPRewardListener(rewardFunction)
{
this->vFunction = vFunction;
this->eTraces = vFunction->getStandardETraces();
bExternETraces = false;
addParameter("VLearningRate", 0.2);
addParameter("DiscountFactor", 0.95);
addParameters(vFunction);
addParameters(eTraces);
}
CVFunctionLearner::~CVFunctionLearner()
{
if (!bExternETraces)
{
delete eTraces;
}
}
rlt_real CVFunctionLearner::getLearningRate()
{
return getParameter("VLearningRate");
}
void CVFunctionLearner::setLearningRate(rlt_real learningRate)
{
setParameter("VLearningRate", learningRate);
}
void CVFunctionLearner::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
rlt_real td = getTemporalDifference(oldState, action, reward, nextState);
DebugPrint('t', "TD %f\n", td);
updateVFunction(oldState, nextState, action->getDuration(), td);
}
void CVFunctionLearner::intermediateStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
addETraces(oldState, nextState, action->getDuration());
vFunction->updateValue(oldState, getTemporalDifference(oldState, action, reward, nextState) * getLearningRate());
}
void CVFunctionLearner::updateVFunction(CStateCollection *oldState, CStateCollection *newState, int duration, rlt_real td)
{
eTraces->updateETraces(duration);
addETraces(oldState, newState, duration);
eTraces->updateVFunction(td * getLearningRate());
}
void CVFunctionLearner::addETraces(CStateCollection *oldState, CStateCollection *newState, int duration)
{
eTraces->addETrace(oldState);
}
CAbstractVETraces *CVFunctionLearner::getVETraces()
{
return eTraces;
}
rlt_real CVFunctionLearner::getTemporalDifference(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
rlt_real oldQValue = vFunction->getValue(oldState);
rlt_real newQValue = vFunction->getValue(nextState);
DebugPrint('t',"VFunctionLearner: oldQ %f,newQ %f, reward %f, ",oldQValue, newQValue, reward);
rlt_real temporalDifference = reward + pow(getParameter("DiscountFactor"), action->getDuration()) * newQValue - oldQValue;
sendErrorToListeners(temporalDifference, oldState, action, NULL);
return temporalDifference;
}
CAbstractVFunction *CVFunctionLearner::getVFunction()
{
return vFunction;
}
void CVFunctionLearner::newEpisode()
{
eTraces->resetETraces();
}
CVFunctionGradientLearner::CVFunctionGradientLearner(CRewardFunction *rewardFunction, CGradientVFunction *vFunction, CResidualFunction *residual, CResidualGradientFunction *residualGradientFunction) : CVFunctionLearner(rewardFunction, vFunction)
{
this->residual = residual;
this->residualGradientFunction = residualGradientFunction;
addParameters(residual);
addParameters(residualGradientFunction);
this->gradientVFunction = vFunction;
this->oldGradient = new CFeatureList();
this->newGradient = new CFeatureList();
this->residualGradient = new CFeatureList();
this->gradientETraces = dynamic_cast<CGradientVETraces *>(eTraces);
}
CVFunctionGradientLearner::~CVFunctionGradientLearner()
{
delete oldGradient;
delete newGradient;
delete residualGradient;
}
void CVFunctionGradientLearner::addETraces(CStateCollection *oldState, CStateCollection *newState, int duration)
{
oldGradient->clear();
newGradient->clear();
residualGradient->clear();
gradientVFunction->getGradient(oldState, oldGradient);
gradientVFunction->getGradient(newState, newGradient);
residualGradientFunction->getResidualGradient(oldGradient, newGradient, duration, residualGradient);
gradientETraces->addGradientETrace(residualGradient, - 1.0 );
}
rlt_real CVFunctionGradientLearner::getTemporalDifference(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
rlt_real temporalDifference = residual->getResidual(vFunction->getValue(oldState), reward, action->getDuration(), vFunction->getValue(nextState));
sendErrorToListeners(temporalDifference, oldState, action, NULL);
return temporalDifference;
}
CVFunctionResidualLearner::CVFunctionResidualLearner(CRewardFunction *rewardFunction, CGradientVFunction *vFunction, CResidualFunction *residual, CResidualGradientFunction *residualGradient, CAbstractBetaCalculator *betaCalc) : CVFunctionGradientLearner(rewardFunction, vFunction, residual, residualGradient)
{
this->betaCalculator = betaCalc;
this->residualETraces = new CGradientVETraces(gradientVFunction);
this->directGradientTraces = new CGradientVETraces(gradientVFunction);
this->residualGradientTraces = new CGradientVETraces(gradientVFunction);
addParameters(betaCalc);
addParameters(residualETraces);
addParameters(directGradientTraces, "Gradient");
addParameters(residualGradientTraces, "Gradient");
setParameter("GradientReplacingETraces", 0.0);
}
CVFunctionResidualLearner::~CVFunctionResidualLearner()
{
delete residualETraces;
delete residualGradientTraces;
delete directGradientTraces;
}
void CVFunctionResidualLearner::newEpisode()
{
CVFunctionGradientLearner::newEpisode();
residualETraces->resetETraces();
residualGradientTraces->resetETraces();
directGradientTraces->resetETraces();
}
void CVFunctionResidualLearner::addETraces(CStateCollection *oldState, CStateCollection *newState, int duration, rlt_real td)
{
oldGradient->clear();
newGradient->clear();
residualGradient->clear();
gradientVFunction->getGradient(oldState, oldGradient);
gradientVFunction->getGradient(newState, newGradient);
residualGradientFunction->getResidualGradient(oldGradient, newGradient, duration, residualGradient);
if (DebugIsEnabled('v'))
{
DebugPrint('v', "Residual Gradient: ");
residualGradient->saveASCII(DebugGetFileHandle('v'));
DebugPrint('v', "\n");
}
directGradientTraces->addGradientETrace(oldGradient, td);
residualGradientTraces->addGradientETrace(residualGradient, - td);
gradientETraces->addGradientETrace(oldGradient, 1.0);
residualETraces->addGradientETrace(residualGradient, -1.0);
}
void CVFunctionResidualLearner::updateVFunction(CStateCollection *oldState, CStateCollection *newState, int duration, rlt_real td)
{
gradientETraces->updateETraces(duration);
residualETraces->updateETraces(duration);
residualGradientTraces->updateETraces(1);
directGradientTraces->updateETraces(1);
rlt_real beta = betaCalculator->getBeta(directGradientTraces->getGradientETraces(), residualGradientTraces->getGradientETraces());
addETraces(oldState, newState, duration, td);
rlt_real learningRate = getLearningRate();
gradientETraces->updateVFunction(td * learningRate * (1 - beta));
residualETraces->updateVFunction(td * learningRate * beta);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -