📄 crewardmodel.cpp

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "ril_debug.h"
#include "crewardmodel.h"

#include <math.h>

CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions)
{
	this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());

	for (int i = 0; i < rewardTable->getSize(); i++)
	{
		rewardTable->set1D(i, new CFeatureMap());
	}	
	
	this->model = model;

	this->bExternVisitSparse = true;
}

CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions)
{
	int i;

	this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());

	for (i = 0; i < rewardTable->getSize(); i++)
	{
		rewardTable->set1D(i, new CFeatureMap());
	}	
	
	this->visitTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());

	for (i = 0; i < visitTable->getSize(); i++)
	{
		visitTable->set1D(i, new CFeatureMap());
	}
	this->bExternVisitSparse = false;
}
	

CFeatureRewardModel::~CFeatureRewardModel()
{
	for (int i = 0; i < rewardTable->getSize(); i++)
	{
		delete rewardTable->get1D(i);
	}
	delete rewardTable;

	if (!bExternVisitSparse)
	{
		for (int i = 0; i < visitTable->getSize(); i++)
		{
			delete visitTable->get1D(i);
		}
		delete visitTable;
	}
}

rlt_real CFeatureRewardModel::getTransitionVisits(int oldState, int action, int newState)
{
	rlt_real visits = 0.0;
	if (!this->bExternVisitSparse)
	{	
		visits = visitTable->get(action, oldState)->getValue(newState);
	}
	else
	{
		CTransition *trans = model->getForwardTransitions(action, oldState)->getTransition(newState);
		if (trans == NULL) 
		{
			visits = 0;
		}
		else 
		{
			visits = trans->getPropability() * model->getStateActionVisits(oldState, action);
		}
	}
	return visits;
}

rlt_real CFeatureRewardModel::getReward(int oldState, CAction *action, int newState)
{
	int actionIndex = getActions()->getIndex(action);
	rlt_real transVisits = getTransitionVisits(oldState, actionIndex, newState);

	//assert(visitSparse->getFaktor(oldState, actionIndex, newState) > 0);


	if (transVisits > 0)
	{
		return rewardTable->get(actionIndex, oldState)->getValue(newState) / transVisits;
	}
	else
	{
		return 0.0;
	}
}

void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *newState)
{
	CFeatureMap *featMap;

	CState *oldS = oldState->getState(properties);
	CState *newS = newState->getState(properties);

	rlt_real oldreward = 0.0;
	rlt_real visits = 0.0;

	int actionIndex = getActions()->getIndex(action);
	
	int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE);
	switch (type)
	{
		case FEATURESTATE:
		{
			for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++)
			{
				int oldFeature = oldS->getDiscreteState(oldIndex);
				featMap = rewardTable->get(actionIndex, oldFeature);

				for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++)
				{
					int newFeature = newS->getDiscreteState(newIndex);

					oldreward = featMap->getValue(newFeature);
				

					(*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);
					
					if (!bExternVisitSparse)
					{
						visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature);
					
						(*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);;
					}
				}
			}
			break;
		}
		case DISCRETESTATE:
		{
			featMap = rewardTable->get(actionIndex, oldS->getDiscreteState(0));

			oldreward = featMap->getValue(newS->getDiscreteState(0));
		
			int feata = oldS->getDiscreteState(0);
			int featb = newS->getDiscreteState(0);

			(*featMap)[featb] = oldreward + reward;
			
			if (!bExternVisitSparse)
			{
				visits = visitTable->get(actionIndex, feata)->getValue(featb);
			
				(*visitTable->get(actionIndex, feata))[featb] = visits + 1.0;
			}
			break;
		}
	}
}


void CFeatureRewardModel::saveData(FILE *stream)
{
	CFeatureMap::iterator mapIt;
	CFeatureMap *featMap;
	fprintf(stream, "Reward Table\n");

	for (unsigned int action = 0; action < getNumActions(); action ++)
	{
		fprintf(stream, "Action %d:\n", action);
		for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
		{
			featMap = rewardTable->get(action, startState);

			fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
			
			for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
			{
				fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);			
			}
			fprintf(stream, "\n");
		}
		fprintf(stream, "\n");
	}

	if (!this->bExternVisitSparse)
	{
		fprintf(stream, "Visit Table\n");

		for (unsigned int action = 0; action < getNumActions(); action ++)
		{
			fprintf(stream, "Action %d:\n", action);
			for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
			{
				featMap = visitTable->get(action, startState);
	
				fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
			
				for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
				{
					fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);			
				}
				fprintf(stream, "\n");
			}
			fprintf(stream, "\n");
		}
	}
}

void CFeatureRewardModel::loadData(FILE *stream)
{
	CFeatureMap *featMap;
	fscanf(stream, "Reward Table\n");

	int buf, numVal = 0, endState;
	rlt_real reward;

	for (unsigned int action = 0; action < getNumActions(); action ++)
	{
		fscanf(stream, "Action %d:\n", &buf);
		for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
		{
			featMap = rewardTable->get(action, startState);

			featMap->clear();

			fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
			
			for (int i = 0; i < numVal; i ++)
			{
				fscanf(stream, "(%d %lf)", &endState, &reward);
				(*featMap)[endState] = reward;
			}
			fscanf(stream, "\n");
		}
		fscanf(stream, "\n");
	}

	if (!this->bExternVisitSparse)
	{
		fprintf(stream, "Visit Table\n");

		for (unsigned int action = 0; action < getNumActions(); action ++)
		{
			fscanf(stream, "Action %d:\n", &buf);
			for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
			{
				featMap = visitTable->get(action, startState);
	
				featMap->clear();

				fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
			
				for (int i = 0; i < numVal; i ++)
				{
					fscanf(stream, "(%d %lf)", &endState, &reward);
					(*featMap)[endState] = reward;
				}
				fscanf(stream, "\n");
			}
			fscanf(stream, "\n");
		}
	}
}

void CFeatureRewardModel::resetData()
{
	CFeatureMap *featMap;

	for (unsigned int action = 0; action < getNumActions(); action ++)
	{
		for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
		{
			featMap = rewardTable->get(action, startState);

			featMap->clear();
		}
	}

	if (!this->bExternVisitSparse)
	{
		for (unsigned int action = 0; action < getNumActions(); action ++)
		{
			for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
			{
				featMap = visitTable->get(action, startState);

				featMap->clear();
			}
		}
	}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -