📄 cagent.cpp

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
// Copyright (C) 200
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "cagent.h"
#include "cepisode.h"
#include "ril_debug.h"
#include "ccontinuousactions.h"
#include <assert.h>

#ifdef WIN32

#include <conio.h>

bool RIL_Toolbox_KeyboardHit()
{
	bool result = _kbhit() != 0;
	if (result)
	{
		while (_kbhit() != 0) _getch();
	}
	return result;
}

void RIL_Toolbox_Set_Keypress()
{
}

void RIL_Toolbox_Reset_Keypress()
{
}

#else // UNIX

#include <poll.h>
#include <termios.h>
#include <unistd.h>

static struct termios RIL_Toolbox_stored_settings;

void RIL_Toolbox_Set_Keypress()
{
    struct termios new_settings;

    tcgetattr(0, &RIL_Toolbox_stored_settings);
    new_settings = RIL_Toolbox_stored_settings;
    /* Disable canonical mode, and set buffer size to 1 byte */
    new_settings.c_lflag &= (~ICANON);
    new_settings.c_cc[VTIME] = 0;
    new_settings.c_cc[VMIN] = 1;
    tcsetattr(0, TCSANOW, &new_settings);
    return;
}

void RIL_Toolbox_Reset_Keypress()
{
    tcsetattr(0, TCSANOW, &RIL_Toolbox_stored_settings);
    return;
}

bool RIL_Toolbox_KeyboardHit()
{
	pollfd p;
	p.fd = STDIN_FILENO;
    p.events = POLLIN;
    int numfds = poll(&p, 1, 1);
	return (numfds && p.revents);
}

#endif // WIN32

CSemiMDPSender::CSemiMDPSender()
{
	SMDPListeners = new std::list<CSemiMDPListener *>();
}

CSemiMDPSender::~CSemiMDPSender()
{
	delete SMDPListeners;
}

void CSemiMDPSender::addSemiMDPListener(CSemiMDPListener *listener)
{
	if (!isListenerAdded(listener))
	{
		SMDPListeners->push_back(listener);
	}
}

void CSemiMDPSender::removeSemiMDPListener(CSemiMDPListener *listener)
{
	SMDPListeners->remove(listener);
}

bool CSemiMDPSender::isListenerAdded(CSemiMDPListener *listener)
{
	for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++) 
	{
		if ((*it) == listener)
		{
			return true;
		}
	}
	return false;
}

void CSemiMDPSender::startNewEpisode()
{
	for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++) 
	{
		if ((*it)->enabled)
		{
			(*it)->newEpisode();
		}
	}
}

void CSemiMDPSender::sendNextStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState)
{
	int i = 0;
	clock_t ticks1, ticks2;

	for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++, i++) 
	{
	   if ((*it)->enabled)
	   {
			ticks1 = clock();
			(*it)->nextStep(lastState, action, currentState);
			ticks2 = clock();
			DebugPrint('t', "Time needed for listener %d: %d\n", i,ticks2-ticks1);
	   }
	}
}

void CSemiMDPSender::sendIntermediateStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState)
{
	for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++) 
	{
	   (*it)->intermediateStep(lastState, action, currentState);
	}
}


CSemiMarkovDecisionProcess::CSemiMarkovDecisionProcess() : CDeterministicController(new CActionSet()) 
{
	this->lastAction = NULL;

	currentSteps = 0;
	currentEpisodeNumber = 0;

	totalSteps = 0;
}

CSemiMarkovDecisionProcess::~CSemiMarkovDecisionProcess()
{
	delete actions;
}

/*
For the intermediate steps within an Extendedaction all the States occured while the ExtendedAction hasn't been finished, are also send with as the tuple 
Intermediate_State-Action-current_State. The duration of the Extendedaction gets also reduced in the intermediate Steps. 
*/
/** When the given action is finished (only MultiStepAction has the ability to be not finished) the step is sended to al Listeners. The Method also updates currentSteps.
@see CSemiMDPListener
*/
void CSemiMarkovDecisionProcess::sendNextStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState)
{
	currentSteps++;
	totalSteps ++;

	bool finished = true;
	int duration = 1;

	// Action has finished ?
	if (action->isType(MULTISTEPACTION))
	{
		CMultiStepActionData *multiAction = dynamic_cast<CMultiStepAction *>(action)->getMultiStepActionData();
		finished = multiAction->finished;
		// get Duration
		duration = multiAction->duration;

		if (action->isType(PRIMITIVEACTION))
		{
			// if there was a multistep-primitiv action, the intermediate steps hasn't been
			// recognized, so update currentSteps
			currentSteps += duration - 1;
		}
	}

	if (finished)
	{
		CDeterministicController::nextStep(lastState, action, currentState);	

		// No ExtendedAction, send normal Step
		CSemiMDPSender::sendNextStep(lastState, action, currentState);
	}
}


CAction* CSemiMarkovDecisionProcess::getLastAction()
{
	return lastAction;
}


void CSemiMarkovDecisionProcess::startNewEpisode()
{
	CDeterministicController::newEpisode();
	
	CSemiMDPSender::startNewEpisode();

	currentSteps = 0;
	currentEpisodeNumber ++;

	isFirstStep = true;

}
	

void CSemiMarkovDecisionProcess::addAction(CAction *action)
{
	actions->add(action);
	actionDataSet->addActionData(action);
}

CHierarchicalSemiMarkovDecisionProcess::CHierarchicalSemiMarkovDecisionProcess(CEpisode *loggedEpisode) : CSemiMarkovDecisionProcess(), CStateModifiersObject(loggedEpisode->getStateProperties())
{
	this->currentEpisode = loggedEpisode;
	pastState = new CStateCollectionImpl(currentEpisode->getStateProperties());
	currentState = new CStateCollectionImpl(currentEpisode->getStateProperties());

	addStateModifiers(currentEpisode->getStateModifiers());
}

CHierarchicalSemiMarkovDecisionProcess::CHierarchicalSemiMarkovDecisionProcess(CStateProperties *modelProperties, std::list<CStateModifier *> *modifiers) :CSemiMarkovDecisionProcess(), CStateModifiersObject(modelProperties)
{
	this->currentEpisode = NULL;

	pastState = new CStateCollectionImpl(modelProperties);
	currentState = new CStateCollectionImpl(modelProperties);

	if (modifiers)
	{
		addStateModifiers(modifiers);
	}
}


CHierarchicalSemiMarkovDecisionProcess::~CHierarchicalSemiMarkovDecisionProcess()
{
	delete pastState;
	delete currentState;
}

void CHierarchicalSemiMarkovDecisionProcess::addStateModifier(CStateModifier *modifier)
{
	pastState->addStateModifier(modifier);
	currentState->addStateModifier(modifier);

	CStateModifiersObject::addStateModifier(modifier);
}

void CHierarchicalSemiMarkovDecisionProcess::removeStateModifier(CStateModifier *modifier)
{
	pastState->removeStateModifier(modifier);
	currentState->removeStateModifier(modifier);

	CStateModifiersObject::removeStateModifier(modifier);
}

void CHierarchicalSemiMarkovDecisionProcess::sendNextStep(CAction *action)
{

	CDeterministicController::nextStep(pastState, action, currentState);	
	CSemiMarkovDecisionProcess::sendNextStep(pastState, action, currentState);


	if (action->isType(EXTENDEDACTION))
	{
		CExtendedAction *mAction = dynamic_cast<CExtendedAction *>(action);
		if (mAction->getMultiStepActionData()->finished && mAction->sendIntermediateSteps && currentEpisode != NULL)
		{
			// send the Intermediate Steps and the "rlt_real" Step of the ExtendedAction
		
			int oldDuration = mAction->getDuration();
			int episodeIndex = currentEpisode->getNumSteps() - 1;

			CAction *interAction = currentEpisode->getAction(episodeIndex);

			// set new duration of the extendedAction
			mAction->getMultiStepActionData()->duration = interAction->getDuration();

			// Send intermediate Steps
			if (mAction->sendIntermediateSteps)
			{
			
				interAction = currentEpisode->getAction(episodeIndex);

				// set new duration of the extendedAction
				mAction->getMultiStepActionData()->duration = interAction->getDuration();

				while (mAction->getMultiStepActionData()->duration < oldDuration)
				{
					assert(episodeIndex > 0);

					currentEpisode->getStateCollection(episodeIndex, pastState);
					CSemiMDPSender::sendIntermediateStep(pastState, mAction, currentState);	
					
					episodeIndex --;

					// set new duration of the extendedAction
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -