⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ccontinuousactions.cpp

📁 强化学习算法(R-Learning)难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

#include "ccontinuousactions.h"
#include "ril_debug.h"
#include <assert.h>
#include <math.h>

CContinuousActionData::CContinuousActionData(CContinuousActionProperties *properties) : CMyVector(properties->getNumActionValues())
{
	this->properties = properties;
	actionValues = getData();
}

CContinuousActionData::~CContinuousActionData()
{
	delete actionValues;
}

void CContinuousActionData::setData(CActionData *actionData)
{
	if (isChangeAble())
	{
		CContinuousActionData *contData = dynamic_cast<CContinuousActionData *>(actionData);

		memcpy(this->actionValues, contData->actionValues, sizeof(rlt_real) * properties->getNumActionValues());
	}
}

void CContinuousActionData::setActionValue(int dim, rlt_real value)
{
	if(isChangeAble())
	{
		//assert(value <= properties->getMaxActionValue(dim) + 0.01 && value >= properties->getMinActionValue(dim) - 0.01);
	
		actionValues[dim] = value;
	}
}

void CContinuousActionData::normalizeAction()
{
	for (unsigned int dim = 0; dim < getNumDimensions(); dim ++)
	{
		if (actionValues[dim] > properties->getMaxActionValue(dim))
		{
			actionValues[dim] = properties->getMaxActionValue(dim);
		}
		else
		{
			if (actionValues[dim] < properties->getMinActionValue(dim))
			{
				actionValues[dim] = properties->getMinActionValue(dim);
			}
		}
	}
}

rlt_real CContinuousActionData::getActionValue(int dim)
{
	return actionValues[dim];
}

rlt_real *CContinuousActionData::getActionValues()
{
	return actionValues;
}

void CContinuousActionData::saveASCII(FILE *stream)
{
	fprintf(stream,"[");
	for (unsigned int i = 0; i < properties->getNumActionValues(); i++)
	{
		fprintf(stream, "%lf ", actionValues[i]);
	} 
	fprintf(stream, "]");
}

void CContinuousActionData::loadASCII(FILE *stream)
{
	fscanf(stream,"[");
	for (unsigned int i = 0; i < properties->getNumActionValues(); i++)
	{
		fscanf(stream, "%lf ", &actionValues[i]);
	}
	fscanf(stream, "]");
}

void CContinuousActionData::saveBIN(FILE *stream)
{
	fwrite(actionValues, sizeof(rlt_real), properties->getNumActionValues(), stream);
}

void CContinuousActionData::loadBIN(FILE *stream)
{
	fread(actionValues, sizeof(rlt_real), properties->getNumActionValues(), stream);
}

CContinuousActionProperties::CContinuousActionProperties(int numActionValues)
{
	this->numActionValues = numActionValues;

	minValues = new rlt_real[numActionValues];
	maxValues = new rlt_real[numActionValues];

	for (int i = 0; i < numActionValues; i++)
	{
		minValues[i] = 0.0;
		maxValues[i] = 1.0;
	}
}

CContinuousActionProperties::~CContinuousActionProperties()
{
	delete minValues;
	delete maxValues;
}

unsigned int CContinuousActionProperties::getNumActionValues()
{
	return numActionValues;
}

rlt_real CContinuousActionProperties::getMinActionValue(int dim)
{
	return minValues[dim];
}

rlt_real CContinuousActionProperties::getMaxActionValue(int dim)
{
	return maxValues[dim];
}

void CContinuousActionProperties::setMinActionValue(int dim, rlt_real value)
{
	minValues[dim] = value;
}

void CContinuousActionProperties::setMaxActionValue(int dim, rlt_real value)
{
	maxValues[dim] = value; 
}

CContinuousAction::CContinuousAction(CContinuousActionProperties *properties, CContinuousActionData *actionData) : CPrimitiveAction(actionData)
{
	continuousActionData = actionData;
	this->properties = properties;

	addType(CONTINUOUSACTION);
}

CContinuousAction::CContinuousAction(CContinuousActionProperties *properties) : CPrimitiveAction(new CContinuousActionData(properties))
{
	continuousActionData = dynamic_cast<CContinuousActionData *>(actionData);
	this->properties = properties;

	addType(CONTINUOUSACTION);
}

CContinuousAction::~CContinuousAction()
{
}

rlt_real CContinuousAction::getActionValue(int dim)
{
	return continuousActionData->getActionValue(dim);
}

int CContinuousAction::getNumDimensions()
{
	return continuousActionData->getNumDimensions();
}

bool CContinuousAction::equals(CAction *action)
{
	if (action->isType(CONTINUOUSSTATICACTION))
	{
		CStaticContinuousAction *staticAction = dynamic_cast<CStaticContinuousAction *>(action);
		return this == (staticAction->getContinuousAction());
	}
	else
	{
		return this == (action);
	}
}

bool CContinuousAction::isSameAction(CAction *action, CActionData *data)
{
	if (action->isType(CONTINUOUSACTION))
	{
		CContinuousAction *lcontAction = dynamic_cast<CContinuousAction *>(action);
		if (lcontAction->getContinuousActionProperties() == getContinuousActionProperties())
		{
			CContinuousActionData *lcontData;
			if (data)
			{
				lcontData = dynamic_cast<CContinuousActionData *>(data);	
			}
			else
			{
				lcontData = lcontAction->getContinuousActionData();
			}
			return continuousActionData->getDistance(lcontData) < 0.0001;
		}
		else
		{
			return false;
		}
	}
	else
	{
		return false;
	}
}

void CContinuousAction::loadActionData(CActionData *data)
{
	CPrimitiveAction::loadActionData(data);
	continuousActionData->normalizeAction();
}

CContinuousActionProperties *CContinuousAction::getContinuousActionProperties()
{
	return properties;
}

CActionData *CContinuousAction::getNewActionData()
{
	return dynamic_cast<CActionData *>(new CContinuousActionData(properties));
}

CContinuousActionController::CContinuousActionController(CContinuousAction *contAction, int l_randomControllerMode) : CAgentController(new CActionSet())
{
	this->contAction = contAction;
	actions->add(contAction);
	randomController = NULL;
	this->randomControllerMode = l_randomControllerMode;

	noise = dynamic_cast<CContinuousActionData *>(contAction->getNewActionData());
}

CContinuousActionController::~CContinuousActionController()
{
	delete actions;
	delete noise;
}

void CContinuousActionController::setRandomController(CContinuousActionController *randomController)
{
	this->randomController = randomController;
	addParameters(randomController);

}

CContinuousActionController *CContinuousActionController::getRandomController()
{
	return randomController;
}

void CContinuousActionController::setRandomControllerMode(int l_randomControllerMode)
{
	this->randomControllerMode = l_randomControllerMode;	
}

int CContinuousActionController::getRandomControllerMode()
{
	return randomControllerMode;	
}


CAction *CContinuousActionController::getNextAction(CStateCollection *state, CActionDataSet *dataSet)
{
	assert(dataSet != NULL);

	CContinuousActionData *actionData = dynamic_cast<CContinuousActionData *>(dataSet->getActionData(contAction));

	getNextContinuousAction(state, actionData);

	if (randomController && randomControllerMode == EXTERN_RANDOM_CONTROLLER)
	{
		randomController->getNextContinuousAction(state, noise);
		actionData->addVector(noise);
	}

	return contAction;
}

void CContinuousActionController::getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *l_noise)
{
	int tempRandomMode = randomControllerMode;

	randomControllerMode = NO_RANDOM_CONTROLLER;

	getNextContinuousAction(state, l_noise);

	randomControllerMode = tempRandomMode;

	l_noise->multScalar(-1.0);

	l_noise->addVector(action);
}

CStaticContinuousAction::CStaticContinuousAction(CContinuousAction *contAction, rlt_real *actionValues) : CContinuousAction(contAction->getContinuousActionProperties())
{
	this->contAction = contAction;

	memcpy(continuousActionData->actionValues, actionValues, sizeof(rlt_real) * properties->getNumActionValues());

	addType(CONTINUOUSSTATICACTION);

	actionData->setIsChangeAble(false);
}

CStaticContinuousAction::~CStaticContinuousAction()
{
}

void CStaticContinuousAction::setContinuousAction(CContinuousActionData *contAction)
{
	memcpy(contAction->getData(), continuousActionData->getData(), sizeof(rlt_real) * properties->getNumActionValues());
}

void CStaticContinuousAction::addToContinuousAction(CContinuousActionData *contAction, rlt_real factor)
{
	for (unsigned int i = 0; i < properties->getNumActionValues();i++)
	{
		contAction->setActionValue(i, contAction->getActionValue(i) + factor * getActionValue(i));
	}
}

CContinuousAction *CStaticContinuousAction::getContinuousAction()
{
	contAction->loadActionData(getActionData());
	return contAction;
}

bool CStaticContinuousAction::equals(CAction *action)
{
	if (action->isType(CONTINUOUSACTION) && !action->isType(CONTINUOUSSTATICACTION))
	{
		return getContinuousAction() == action;
	}
	else
	{
		return this == (action);
	}
}

bool CStaticContinuousAction::isSameAction(CAction *action, CActionData *data)
{
	if (action->isType(CONTINUOUSACTION) && !action->isType(CONTINUOUSSTATICACTION))
	{
		CContinuousActionData *lcontData;
		if (data)
		{
			lcontData = dynamic_cast<CContinuousActionData *>(data);	
		}
		else
		{
			lcontData = dynamic_cast<CContinuousActionData *>(action->getActionData());
		}
		if (contAction->getContinuousActionProperties() == getContinuousActionProperties())
		{
			return continuousActionData->getDistance(lcontData) < 0.0001;
		}
		else
		{
			return false;
		}	
	}
	else
	{
		return this == (action);
	}
}


CContinuousActionLinearFA::CContinuousActionLinearFA(CActionSet *contActions, CContinuousActionProperties *properties)
{
	this->actionProperties = properties;
	this->contActions = contActions;

}

CContinuousActionLinearFA::~CContinuousActionLinearFA()
{
}

void CContinuousActionLinearFA::getContinuousAction(CContinuousActionData *contAction, rlt_real *actionFactors)
{
	contAction->initVector(0.0);
	CActionSet::iterator it = contActions->begin();
	for (unsigned int i = 0; i < contActions->size(); it ++, i++)
	{
		CLinearFAContinuousAction *lFAcontAction = dynamic_cast<CLinearFAContinuousAction *>(*it);

		lFAcontAction->addToContinuousAction(contAction, actionFactors[i]);
	}
	
}


void CContinuousActionLinearFA::getActionFactors(CContinuousActionData *action, rlt_real *actionFactors)
{
	rlt_real sum = 0.0;
	rlt_real val = 0.0;
	unsigned int i = 0;
	CActionSet::iterator it;
	for (i = 0, it = contActions->begin(); it != contActions->end(); it++, i++)
	{
		val = dynamic_cast<CLinearFAContinuousAction *>(*it)->getActionFactor(action);
		sum += val;
		actionFactors[i] = val;
	}
	assert(sum > 0);
	for (i = 0;i < contActions->size() ; i++)
	{
		actionFactors[i] = actionFactors[i] / sum;
	}
}
	
void CContinuousActionLinearFA::getContinuousAction(unsigned int index, CContinuousActionData *action)
{
	assert(index < contActions->size());
	unsigned int i = 0;

	CActionSet::iterator it;
	for (i = 0, it = contActions->begin(); it != contActions->end(), i < index; it++, i++);
	
	if (it != contActions->end())
	{
		dynamic_cast<CLinearFAContinuousAction *>((*it))->setContinuousAction(action);
	}
}

int CContinuousActionLinearFA::getNumContinuousActionFA()
{
	return contActions->size();
}


CLinearFAContinuousAction::CLinearFAContinuousAction(CContinuousAction *contAction, rlt_real *actionValues) : CStaticContinuousAction(contAction, actionValues)
{
}

CContinuousRBFAction::CContinuousRBFAction(CContinuousAction *contAction, rlt_real *rbfCenter, rlt_real *rbfSigma) : CLinearFAContinuousAction(contAction, rbfCenter)
{
	this->rbfSigma = new rlt_real[properties->getNumActionValues()];

	memcpy(this->rbfSigma, rbfSigma, sizeof(rlt_real) * properties->getNumActionValues());
}

CContinuousRBFAction::~CContinuousRBFAction()
{
	delete rbfSigma;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -