📄 cpendulummodel.cpp
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpendulummodel.h"
#include <math.h>
CPendulumModel::CPendulumModel(rlt_real dt, rlt_real uMax , rlt_real dPhiMax, rlt_real length, rlt_real mass, rlt_real mu, rlt_real g) : CDynamicLinearActionContinuousTimeModel(new CStateProperties(2,0), new CContinuousAction(new CContinuousActionProperties(1)), dt)
{
this->uMax = uMax;
this->dPhiMax = dPhiMax;
this->length = length;
this->mass = mass;
this->mu = mu;
this->g = g;
/*addParameter("UMax", uMax);
addParameter("DPhiMax",dPhiMax);
addParameter("Length", length);
addParameter("Mass", mass);
addParameter("Friction", mu);
addParameter("Gravity", g);*/
actionProp->setMaxActionValue(0, uMax);
actionProp->setMinActionValue(0, -uMax);
properties->setMaxValue(0, M_PI);
properties->setMinValue(0, - M_PI);
properties->setPeriodicity(0, true);
properties->setMaxValue(1, dPhiMax);
properties->setMinValue(1, - dPhiMax);
}
CPendulumModel::~CPendulumModel()
{
delete properties;
delete actionProp;
delete contAction;
}
CMyMatrix *CPendulumModel::getB(CState *state)
{
/* rlt_real mass = getParameter("Mass");
rlt_real length = getParameter("Length");*/
B->setElement(0,0, 0.0);
B->setElement(1,0, 1 / (mass * pow(length,2)));
return B;
}
CMyVector *CPendulumModel::getA(CState *state)
{
/*rlt_real mass = getParameter("Mass");
rlt_real length = getParameter("Length");
rlt_real mu = getParameter("Friction");
rlt_real g = getParameter("Gravity");
*/
rlt_real dphi = state->getContinuousState(1);
A->setElement(0, dphi);
rlt_real ddphi = 1 / (mass * pow(length,2)) * (- mu * state->getContinuousState(1) + mass * g * length * sin(state->getContinuousState(0)));
A->setElement(1, ddphi);
return A;
}
void CPendulumModel::setParameter(string paramName, rlt_real value)
{
if (paramName == "UMax")
{
actionProp->setMaxActionValue(0, value);
actionProp->setMinActionValue(0, -value);
}
if (paramName == "DPhiMax")
{
properties->setMaxValue(1, value);
properties->setMinValue(1, - value);
}
CParameterObject::setParameter(paramName, value);
}
bool CPendulumModel::isFailedState(CState *state)
{
return false;
}
void CPendulumModel::getResetState(CState *resetState)
{
CTransitionFunction::getResetState(resetState);
if (resetType != DM_RESET_TYPE_ALL_RANDOM)
{
resetState->setContinuousState(1, 0);
}
}
void CPendulumModel::doSimulationStep(CState *state, rlt_real timestep, CAction *action, CActionData *data)
{
getDerivationX(state, action, derivation, data);
rlt_real ddPhi = derivation->getElement(1);
state->setContinuousState(0, state->getContinuousState(0) + timestep * derivation->getElement(0) + pow(timestep,2) / 2 * ddPhi);
state->setContinuousState(1, state->getContinuousState(1) + timestep * derivation->getElement(1));
}
CPendulumRewardFunction::CPendulumRewardFunction(CPendulumModel *model) : CStateReward(model->getStateProperties())
{
rewardFactor = 1.0;
}
rlt_real CPendulumRewardFunction::getStateReward(CState *state)
{
rlt_real Phi = state->getContinuousState(0);
return rewardFactor * (cos(Phi) - 1);
}
void CPendulumRewardFunction::getInputDerivation(CState *modelState, CMyVector *targetState)
{
rlt_real Phi = modelState->getState(properties)->getContinuousState(0);
targetState->setElement(1, 0);
targetState->setElement(0, - sin(Phi));
}
#ifdef RL_TOOLBOX_USE_QT
CQTPendulumVisualizer::CQTPendulumVisualizer( CPendulumModel *pendModel, QWidget *parent, const char *name) : CQTModelVisualizer(parent, name)
{
this->pendModel = pendModel;
phi = 0;
dphi = 0;
this->setCaption("Pendulum Model Visualizer");
}
void CQTPendulumVisualizer::doDrawState( QPainter *painter)
{
//QString s1 = "Phi = " + QString::number( phi );
//QString s2 = "Phi' = " + QString::number( dphi );
//painter->drawText(10,20, s1);
//painter->drawText(10,40, s2);
painter->drawLine(0, drawWidget->height() / 2, drawWidget->width(), drawWidget->height() / 2);
painter->translate(drawWidget->width() / 2, drawWidget->height() / 2);
painter->rotate(phi + 180);
painter->setBrush(black);
painter->drawRect(- 3, -5, 6, (pendModel->length * 100) + 10);
painter->translate(0,(pendModel->length * 100) + 5);
painter->drawEllipse(-6,-6,12,12);
painter->flush();
}
void CQTPendulumVisualizer::newDrawState(CStateCollection *state)
{
phi = state->getState()->getContinuousState(0) * 180 / M_PI;
dphi = state->getState()->getContinuousState(1) * 180 / M_PI;
}
#endif
CPendulumUpTimeCalculator::CPendulumUpTimeCalculator(rlt_real phi_up, rlt_real dt)
{
this->phi_up = phi_up;
this->dt = dt;
this->up_steps = 0;
}
void CPendulumUpTimeCalculator::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState)
{
if (fabs(oldState->getState()->getContinuousState(0)) < phi_up)
{
up_steps ++;
}
}
void CPendulumUpTimeCalculator::newEpisode()
{
up_steps = 0;
}
rlt_real CPendulumUpTimeCalculator::getUpTime()
{
return up_steps * dt;
}
int CPendulumUpTimeCalculator::getUpSteps()
{
return up_steps;
}
/*
bool CTestSuitePendulumUpTimeCalculatorEvaluator::isEpisodeSuccessFull(FILE *stream)
{
printf("Upsteps %d (needed %d)\n", upTimeCalc->getUpSteps(), neededUpSteps);
if (stream)
{
fprintf(stream,"Upsteps %d\n", upTimeCalc->getUpSteps());
}
return upTimeCalc->getUpSteps() >= neededUpSteps;
}
CTestSuitePendulumUpTimeCalculatorEvaluator::CTestSuitePendulumUpTimeCalculatorEvaluator(CAgent *agent, int neededSuccEpisodes, int maxEpisodes, int stepsPerEpisode, int neededUpSteps, rlt_real phi_up) : CTestSuiteEpisodesToLearnEvaluator(agent, neededSuccEpisodes, maxEpisodes, stepsPerEpisode)
{
this->neededUpSteps = neededUpSteps;
upTimeCalc = new CPendulumUpTimeCalculator(phi_up, 1.0);
agent->addSemiMDPListener(upTimeCalc);
}
CTestSuitePendulumUpTimeCalculatorEvaluator::~CTestSuitePendulumUpTimeCalculatorEvaluator()
{
agent->removeSemiMDPListener(upTimeCalc);
delete upTimeCalc;
}
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -