⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ctestsuit.cpp

📁 强化学习算法(R-Learning)难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 4 页
字号:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <time.h>
#include <stdio.h>

#include "cpolicies.h"
#include "cagent.h"
#include "ril_debug.h"
#include "ctdlearner.h"
#include "clinearfafeaturecalculator.h"
#include "cvfunctionlearner.h"
#include "crewardmodel.h"
#include "ccontinuoustime.h"
#include <map>
#include "ctestsuit.h"
#include "cvfunction.h"

#include <math.h>

#include <iostream>
#include <sstream>
#include <string>



CTestSuite::CTestSuite(CAgent *agent, CAgentController *controller, CLearnDataObject *learnDataObject, char *l_testSuiteName) :testSuiteName(l_testSuiteName)
{
	this->agent = agent;
	this->learnDataObjects = new std::list<CLearnDataObject *>();

	learnDataObjects->push_back(learnDataObject);

	addParameters(controller);

	this->controller = controller;
	this->evaluationController = controller;
}

CTestSuite::CTestSuite(CAgent *agent, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *learnDataObject, char *l_testSuiteName) : testSuiteName(l_testSuiteName)
{
	this->agent = agent;
	this->learnDataObjects = new std::list<CLearnDataObject *>();

	learnDataObjects->push_back(learnDataObject);


	addParameters(controller);
	addParameters(evaluationController);

	

	this->controller = controller;
	this->evaluationController = evaluationController;
}

CTestSuite::~CTestSuite()
{
	delete learnDataObjects;
}

CAgentController *CTestSuite::getEvaluationController()
{
	return evaluationController;
}

void CTestSuite::setEvaluationController(CAgentController *l_evaluationController)
{
	this->evaluationController = l_evaluationController;
}


void CTestSuite::saveLearnedData(FILE *stream)
{
	std::list<CLearnDataObject *>::iterator it = learnDataObjects->begin();

	for (; it != learnDataObjects->end(); it ++)
	{
		(*it)->saveData(stream);
	}
}

void CTestSuite::loadLearnedData(FILE *stream)
{
	std::list<CLearnDataObject *>::iterator it = learnDataObjects->begin();

	for (; it != learnDataObjects->end(); it ++)
	{
		(*it)->loadData(stream);
	}	
}

void CTestSuite::resetLearnedData()
{
	std::list<CLearnDataObject *>::iterator it = learnDataObjects->begin();


	for (; it != learnDataObjects->end(); it ++)
	{
		(*it)->resetData();
	}

	resetParameterCalculators();
}

string CTestSuite::getTestSuiteName()
{
	return testSuiteName;
}

void CTestSuite::setTestSuiteName(string name)
{
	testSuiteName = name;
}


void CTestSuite::addLearnDataObject(CLearnDataObject *learnDataObject)
{
	learnDataObjects->push_back(learnDataObject);
}

CAgentController *CTestSuite::getController()
{
	return controller;
}

void CTestSuite::setController(CAgentController *controller)
{
	this->controller = controller;
}


CListenerTestSuite::CListenerTestSuite(CAgent *agent, CSemiMDPListener *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName) : CTestSuite(agent, controller, vFunction, testSuiteName)
{
	this->learnerObjects = new std::list<CSemiMDPListener *>();
	addToAgent = new std::map<CSemiMDPListener *, bool>();

	if (learner != NULL)
	{
		learnerObjects->push_back(learner);
		(*addToAgent)[learner] = true;
		addParameters(learner);

	}

}

CListenerTestSuite::CListenerTestSuite(CAgent *agent, CSemiMDPListener *learner, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName) : CTestSuite(agent, controller, evaluationController, vFunction, testSuiteName)
{
	this->learnerObjects = new std::list<CSemiMDPListener *>();
	addToAgent = new std::map<CSemiMDPListener *, bool>();

	if (learner != NULL)
	{
		learnerObjects->push_back(learner);
		(*addToAgent)[learner] = true;
		addParameters(learner);

	}
}

CListenerTestSuite::~CListenerTestSuite()
{
	delete learnerObjects;
	delete addToAgent;
}

void CListenerTestSuite::addLearnerObject(CSemiMDPListener *listener, bool addParams,bool addBack, bool addAgent)
{
	if (addParams)
	{
		addParameters(listener);
	}
	if (addBack)
	{
		learnerObjects->push_back(listener);
		(*addToAgent)[listener] = addAgent;
	}
	else
	{
		learnerObjects->push_front(listener);
		(*addToAgent)[listener] = addAgent;
	}

}

void CListenerTestSuite::addLearnersToAgent()
{
	std::list<CSemiMDPListener *>::iterator it = learnerObjects->begin();

	for (; it != learnerObjects->end(); it ++)
	{
		(*it)->enabled = true;
		if ((*addToAgent)[(*it)])
		{
			agent->addSemiMDPListener(*it);
		}
	}
}

void CListenerTestSuite::removeLearnersFromAgent()
{
	std::list<CSemiMDPListener *>::iterator it = learnerObjects->begin();

	for (; it != learnerObjects->end(); it ++)
	{
		if ((*addToAgent)[(*it)])
		{
			agent->removeSemiMDPListener(*it);
		}
		(*it)->enabled = false;
	}
}

void CListenerTestSuite::learn(int nEpisodes, int nStepsPerEpisode)
{
	addLearnersToAgent();
	agent->setController(controller);
	for (int i = 0; i < nEpisodes; i ++)
	{
		agent->startNewEpisode();
		agent->doControllerEpisode(1, nStepsPerEpisode);
	}
	removeLearnersFromAgent();
}

CPolicyGradientTestSuite::CPolicyGradientTestSuite(CAgent *agent, CPolicyGradientLearner *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName, int maxGradientUpdates) : CTestSuite(agent, controller, vFunction, testSuiteName)
{
	this->learner = learner;
	addParameters(learner);

	addParameter("MaxGradientUpdates", maxGradientUpdates);
}

CPolicyGradientTestSuite::CPolicyGradientTestSuite(CAgent *agent, CPolicyGradientLearner *learner, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName, int maxGradientUpdates) : CTestSuite(agent, controller, evaluationController, vFunction, testSuiteName)
{
	this->learner = learner;
	addParameters(learner);

//	addParameter("MaxGradientUpdates", maxGradientUpdates);

}
	
CPolicyGradientTestSuite::~CPolicyGradientTestSuite()
{

}

void CPolicyGradientTestSuite::learn(int nEpisodes, int nStepsPerEpisode)
{
	int actualSteps = agent->getTotalSteps();

	agent->setController(controller);
	while (agent->getTotalSteps() - actualSteps < nEpisodes * nStepsPerEpisode)
	{
		learner->learnPolicy(1, NULL, true);
	}
}

CTestSuiteCollection::CTestSuiteCollection()
{
	testSuiteMap = new std::map<string, CTestSuite *>();
}

CTestSuiteCollection::~CTestSuiteCollection()
{
	delete testSuiteMap;
}

void CTestSuiteCollection::addTestSuite(CTestSuite *testSuite)
{
	(*testSuiteMap)[testSuite->getTestSuiteName()] = testSuite;
}

void CTestSuiteCollection::removeTestSuite(CTestSuite *testSuite)
{
	(*testSuiteMap)[testSuite->getTestSuiteName()] = NULL;
}

int CTestSuiteCollection::getNumTestSuites()
{
	return testSuiteMap->size();
}

CTestSuite *CTestSuiteCollection::getTestSuite(string testSuiteName)
{
	return (*testSuiteMap)[testSuiteName];
}

CTestSuite *CTestSuiteCollection::getTestSuite(int index)
{
	CTestSuite *testSuite = NULL;
	std::map<string, CTestSuite *>::iterator it = testSuiteMap->begin();

	for (int i = 0; it != testSuiteMap->end(); it ++, i ++)
	{
		if ( i == index)
		{
			testSuite = (*it).second;
			break;
		}
	}
	return testSuite;
}

void CTestSuiteCollection::removeAllTestSuites()
{
	std::map<string, CTestSuite *>::iterator it = testSuiteMap->begin();

	for (int i = 0; it != testSuiteMap->end(); it ++, i++)
	{
		CTestSuite *testSuite = (*it).second;
		delete testSuite;
	}
	testSuiteMap->clear();
}

/*
CTestSuite::CTestSuite(int Type)
{
	this->Type = Type;
	properties = new CTestSuiteProperties();

	properties->addParameter("Alpha", 0.2);

	controller = NULL;
	learner = NULL;
}

CTestSuite::~CTestSuite()
{
	delete properties;
}

void CTestSuite::addLearner(CAgent *agent)
{
	agent->addSemiMDPListener(learner);
}

void CTestSuite::removeLearner(CAgent *agent)
{
	agent->removeSemiMDPListener(learner);
}

CAgentController *CTestSuite::getController()
{
	return controller;
}


CTestSuiteProperties *CTestSuite::getTestSuiteProperties()
{
	return properties;
}
*/
CTestSuiteEvaluator::CTestSuiteEvaluator(CAgent *agent, string ltestSuiteCollectionName, int nTrials, int numValuesPerTrial)  : evaluatorDirectory(""), testSuiteCollectionName(ltestSuiteCollectionName)
{
	this->agent = agent;
	addParameter("DivergentEvaluationValue", -1000000000.0);

	this->nTrials = nTrials;

	values = new std::list<rlt_real *>();

	exception = false;

	this->numValuesPerTrial = numValuesPerTrial;

}

CTestSuiteEvaluator::~CTestSuiteEvaluator()
{
	std::list<rlt_real *>::iterator it = values->begin();
	for (; it!= values->end(); it++)
	{
		delete *it;
	}
	delete values;
}


string CTestSuiteEvaluator::getEvaluatorDirectory()
{
	char directory[256];
#ifdef WIN32
	sprintf(directory, "%s\\%s",testSuiteCollectionName.c_str(), evaluatorDirectory.c_str());
#else
#ifdef EVALUATION_DIRECTORY
	sprintf(directory, "%s/%s/%s", EVALUATION_DIRECTORY, testSuiteCollectionName.c_str(), evaluatorDirectory.c_str());
#else
	sprintf(directory, "%s", evaluatorDirectory.c_str());

#endif
#endif
	return string(directory);
}

string CTestSuiteEvaluator::getEvaluationFileName(CTestSuite *testSuite)
{
	char evaluationFileName[255];

#ifdef WIN32
	sprintf(evaluationFileName, "%s\\%s_params.txt", getEvaluatorDirectory().c_str(),testSuite->getTestSuiteName().c_str());

#else
	sprintf(evaluationFileName, "%s/%s_params.txt", getEvaluatorDirectory().c_str(),testSuite->getTestSuiteName().c_str());
#endif
	return string(evaluationFileName);
}

string CTestSuiteEvaluator::getLearnDataFileName(CTestSuite *testSuite)
{
	char learnDataFileName[256];
	FILE *learnDataFile = NULL;
	int learnDataFileNumber = 0;
	do 
	{
		if (learnDataFile)
		{
			fclose(learnDataFile);
		}
#ifdef WIN32
		sprintf(learnDataFileName,"%s\\LearnData\\%s_%d.data", getEvaluatorDirectory().c_str(),				testSuite->getTestSuiteName().c_str(), learnDataFileNumber);
#else
		sprintf(learnDataFileName,"%s/LearnData/%s_%d.data", getEvaluatorDirectory().c_str(),				testSuite->getTestSuiteName().c_str(), learnDataFileNumber);
#endif
		learnDataFile = fopen(learnDataFileName, "r");

		learnDataFileNumber ++;	
	} 
	while(learnDataFile != NULL && learnDataFileNumber< 30000);

	if (learnDataFile)
	{
		fclose(learnDataFile);
	}
	return string(learnDataFileName);
}

void CTestSuiteEvaluator::clearValues()
{
	std::list<rlt_real *>::iterator it = values->begin();
	for (; it != values->end(); it ++)
	{
		delete *it;
	}
	values->clear();

}

void CTestSuiteEvaluator::getXLabel(char *xLabel, int i)
{
	sprintf(xLabel, "%d", i);
}

void CTestSuiteEvaluator::saveMatlabData(CParameters *testSuite, char *outFileName, char *inFileName)
{
	checkDirectories();

	exception = false;

	clearValues();

	char trialFile[255];

	if (inFileName != NULL)
	{
		sprintf(trialFile, "%s/%s", getEvaluatorDirectory().c_str(), inFileName);
	}
	else
	{
		printf("No input file given !!!\n");
		return;
	}

	loadEvaluationData(testSuite, trialFile);

	FILE *matlabFile = fopen(outFileName, "a");
	for (int i = 0; i < numValuesPerTrial; i ++)
	{
		char xLabel[80];
		getXLabel(xLabel, i);

		fprintf(matlabFile, "%s ", xLabel);
		std::list<rlt_real *>::iterator it = values->begin();
		rlt_real sum = 0.0;
		for (; it != values->end(); it ++)
		{
			sum += (*it)[i];
			fprintf(matlabFile, " %1.4f", (*it)[i]);
		}	
		fprintf(matlabFile, " %1.4f\n", sum / values->size());
	}
	fprintf(matlabFile, "\n");

	fclose(matlabFile);
}

rlt_real CTestSuiteEvaluator::evaluateTestSuite(CTestSuite *testSuite, bool loadEvaluationTrial)
{
	checkDirectories();

	exception = false;

	clearValues();
	

	printf("Evaluating TestSuite %s with Parameters:\n", testSuite->getTestSuiteName().c_str());
	testSuite->saveParameters(stdout);

	if (loadEvaluationTrial)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -