⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ctestsuit.cpp

📁 强化学习算法(R-Learning)难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 4 页
字号:
	itParam ++;

	if (it != beginValues->end())
	{
		values[2] = *it;
		parameters[2] = *itParam;
	}
	else
	{
		values[2] = values[1];
		parameters[2] = parameters[1];
	}


	while (iterations < maxIterations)
	{
		iterations ++;
		printf("Iteration %d, Found maximum, trying to isolate maximum\n", iterations);
		
		if (parameters[1] - parameters[0] > parameters[2] - parameters[1])
		{
			rlt_real newParameter = (parameters[0] + parameters[1]) / 2;
			testSuite->setParameter(paramName, newParameter);
			rlt_real newValue = evaluateParameters(testSuite);

			if (newValue > values[1])
			{
				parameters[2] = parameters[1];
				values[2] = values[1];
				parameters[1] = newParameter;
				values[1] = newValue;
			}
			else
			{
				parameters[0] = newParameter;
				values[0] = newValue;
			}
		}
		else
		{
			rlt_real newParameter = (parameters[2] + parameters[1]) / 2;
			testSuite->setParameter(paramName, newParameter);
			rlt_real newValue = evaluateParameters(testSuite);

			if (newValue > values[1])
			{
				parameters[0] = parameters[1];
				values[0] = values[1];
				parameters[1] = newParameter;
				values[1] = newValue;
			}
			else
			{
				parameters[2] = newParameter;
				values[2] = newValue;
			}
		}
		printf("Values after %d iteration: (%f: %f) (%f: %f) (%f: %f)\n",iterations, parameters[0],values[0], parameters[1],values[1], parameters[2], values[2]);
	}

	rlt_real maxParameter = parameters[1];
	maxValue = values[1];
	if (values[0] > values[1])
	{
		maxParameter = parameters[0];
		maxValue = values[0];
	}
	if (values[2] > maxValue)
	{
		maxParameter = parameters[2];
		maxValue = values[2];
	}
	
	testSuite->setParameter(paramName, maxParameter);

	delete beginParameters;
	delete beginValues;

	return maxValue;
}

rlt_real CTestSuiteParameterCalculator::calculateSingleBestParameter(std::list<rlt_real> *parameters, string paramName)
{
	std::list<rlt_real>::iterator paramIterator = parameters->begin();

	rlt_real max = -100.0;
	rlt_real maxParameter = 0.0;

	if (testSuite->getParameterIndex(paramName) >= 0)
	{
		printf("Testing Parameter %s, Values to Test %d\n", (paramName).c_str(), parameters->size());
		for (; paramIterator != parameters->end(); paramIterator++)
		{
			printf("Value for Parameter %s: %f\n", paramName.c_str(), *paramIterator);
			testSuite->setParameter(paramName, *paramIterator);
			rlt_real val = evaluateParameters(testSuite);
			if (val > max || (paramIterator == parameters->begin()))
			{
				max = val;
				maxParameter = *paramIterator;
			}
		}
		testSuite->setParameter(paramName, maxParameter);
		printf("Best Value %f for Parameter %s with Value %f\n", maxParameter, paramName.c_str(), max);
	}
	return maxParameter;
}


std::list<CParameters *> * CTestSuiteParameterCalculator::getParameterList(std::list<rlt_real *> *parameters, int *paramSize, std::list<string> *paramNames)
{
	std::list<CParameters *> *parameterList = new std::list<CParameters *>();

	int numParams = parameters->size();
	int *actualSteps = new int[numParams];

	std::list<rlt_real *>::iterator it = parameters->begin();
	std::list<string>::iterator itNames = paramNames->begin();

	int i = 0;
	for (i = 0; i < numParams; i++, it++,itNames++)
	{
		actualSteps[i] = 0;
	}

	int j = 0;
	while (actualSteps[numParams - 1] < paramSize[numParams - 1])
	{
		j = 0;

		int k = 0;
		CParameters *paramElem = new CParameters();

		for (itNames = paramNames->begin(), it = parameters->begin(), k = 0; k < numParams; k++, it++,itNames++)
		{
			paramElem->addParameter((*itNames), (*it)[actualSteps[k]]);
		}

		parameterList->push_back(paramElem);
		
		actualSteps[j] ++;

		while (j < numParams - 1 && actualSteps[j] >= paramSize[j])
		{
			actualSteps[j] = 0;
			j ++;
			actualSteps[j] ++;
		}
	}
	return parameterList;
}


rlt_real CTestSuiteParameterCalculator::evaluateParameters(CParameters *parameters, bool newEvaluation)
{
	testSuite->setParameters(parameters);

	rlt_real value = evaluator->evaluateTestSuite(testSuite);

	return value; 
}

CTestSuite *CTestSuiteParameterCalculator::getTestSuite()
{
	return this->testSuite;
}
/*
rlt_real CTestSuiteParameterCalculator::getBestCalculatedValue()
{
	std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();

	rlt_real max = 0;

	for (; it != evaluatedTestsuiteParameters->end();it++)
	{
		if ((*it).second.value > max || it == evaluatedTestsuiteParameters->begin())
		{
			max = (*it).second.value;
		}
	}
	return max;
}

CParameters *CTestSuiteParameterCalculator::getBestCalculatedParameters()
{
	std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();

	rlt_real max = 0;
	CParameters *maxParams = NULL;

	for (; it != evaluatedTestsuiteParameters->end();it++)
	{
		if ((*it).second.value > max || it == evaluatedTestsuiteParameters->begin())
		{
			max = (*it).second.value;
			maxParams = (*it).first;
		}
	}
	return maxParams;
}

rlt_real CTestSuiteParameterCalculator::getParametersValue(CParameters *parameters)
{
	std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();

	for (; it != evaluatedTestsuiteParameters->end();it++)
	{
		if ((*(*it).first) == (*parameters))
		{
			return (*it).second.value;
		}
	}
	return -1000;
}

CParameters *CTestSuiteParameterCalculator::getParametersFromIndex(int index)
{
	std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();

	CParameters *parameters = NULL;

	for (int i = 0; it != evaluatedTestsuiteParameters->end();it++, i++)
	{
		if (i == index)
		{
			parameters = (*it).first;
		}
	}
	return parameters;
}

int CTestSuiteParameterCalculator::getParametersIndex(CParameters *parameters)
{
	std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();

	for (int i = 0; it != evaluatedTestsuiteParameters->end();it++, i++)
	{
		if ((*(*it).first) == (*parameters))
		{
			return i;
		}
	}
	return -1;
}

int CTestSuiteParameterCalculator::getNumEvaluatedParameters()
{
	return evaluatedTestsuiteParameters->size();
}

string CTestSuiteParameterCalculator::getDataFileName(CParameters *parameters)
{

	std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();

	for (; it != evaluatedTestsuiteParameters->end();it++)
	{
		if ((*(*it).first) == (*parameters))
		{
			return (*it).second.learnDataFile;
		}
	}
	return string("");
}
*/

CResidualFunction *CResidualChooser::getResidual(int resNum, rlt_real timeIntervall)
{
	CResidualFunction *residual = NULL;
	switch (resNum)
	{
	case 1:
		{
			residual = new CDiscreteResidual(0.95); 
			break;
		}
	case 2:
		{
			residual = new CContinuousEulerResidual(timeIntervall, 1.0);
			break;	
		}
	case 3:
		{
			residual = new CContinuousCoulomResidual(timeIntervall, 1.0);
			break;
		}
	}
	return residual;
}

CResidualGradientFunction *CResidualChooser::getResidualGradient(int resNum, CResidualGradientFunction *residual)
{
	CAbstractBetaCalculator *betaCalc = NULL;
	CResidualGradientFunction *residualGradientFunction = NULL;
	switch (resNum)
	{
	case 1:
		{
			residualGradientFunction = new CDirectGradient();
			break;
		}
	case 2:
		{
			residualGradientFunction = residual;
			break;
		}
	case 3:
		{
			betaCalc = new CConstantBetaCalculator(0.4);
			residualGradientFunction = new CResidualBetaFunction(betaCalc, residual);
			break;
		}
	case 4:
		{
			betaCalc = new CVariableBetaCalculator(0.01, 1.0);
			residualGradientFunction = new CResidualBetaFunction(betaCalc, residual);
			break;
		}
	}
	return residualGradientFunction;
}

int CResidualChooser::getResidualFromInput()
{
	int residual = 0;
	do 
	{
		printf ("** Residual Selection **\n\n");
		printf ("1 ... Discrete Residual\n");
		printf ("2 ... Euler Residual\n");
		printf ("3 ... Coulom Residual\n");
		scanf ("%d", &residual);
		getchar();
	} while(residual >= 1 && residual <= 3);
	
	return residual;
}

int CResidualChooser::getResidualGradientFromInput()
{

	int residualGradient = 0;
	do 
	{
		printf("** Residual Gradient Selection **\n\n");
		printf("1 ... Direct Gradient\n");
		printf("2 ... Residual Gradient\n");
		printf("3 ... Residual Constant Beta\n");
		printf("4 ... Residual Variable Beta\n");
		scanf ("%d", &residualGradient);
		getchar();
	} while(residualGradient >= 1 && residualGradient <= 4);

	return residualGradient;
}

CAbstractBetaCalculator *CResidualChooser::getBetaCalculator(int resNum)
{
	CAbstractBetaCalculator *betaCalc = NULL;
	switch (resNum)
	{
	case 1:
		{
			betaCalc = new CConstantBetaCalculator(0.4);
			break;
		}
	case 2:
		{
			betaCalc = new CVariableBetaCalculator(0.01, 1.0);
			break;
		}
	}
	return betaCalc;
}

int CResidualChooser::getBetaCalculatorFromInput()
{
	int betaCalc = 0;
	do 
	{
		printf ("** Beta Calculator Selection **\n\n");
		printf ("1 ... Constant Beta Calculator\n");
		printf ("2 ... Variable Beta Calculator\n");
		scanf ("%d", &betaCalc);
		getchar();
	} while(betaCalc >= 1 && betaCalc <= 2);

	return betaCalc;
}

CActionDistribution *CPolicyChooser::getDistribution(int number)
{
	CActionDistribution *distribution = NULL;
	switch(number) 
	{
	case 1:
		{
			distribution = new CGreedyDistribution();
			break;
		}
	case 2:
		{
			distribution = new CEpsilonGreedyDistribution(0.01);
			break;
		}
	case 3:
		{
			distribution = new CSoftMaxDistribution(5);
			break;
		}
		
	default:
		{
		}
	}
	return distribution;
}

int CPolicyChooser::getDistributionFromInput()
{
	int distributionNum = 0;
	do 
	{
		printf ("** Policy Selection **\n\n");
		printf ("1 ... Greedy Distribution\n");
		printf ("2 ... Epsilon Greedy Distribution\n");
		printf ("3 ... SoftMax Distribution\n");
		scanf ("%d", &distributionNum);
		getchar();
	} while(distributionNum >= 1 && distributionNum <= 2);

	return distributionNum;
}

CTDLearner *CTDLearnerChooser::getQLearner(int estimationPolicyNum, int learnModeNum, int residualNum, int residualGradientNum, CGradientQFunction *qFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
	CTDLearner * learner = NULL;

	CActionDistribution *distribution = CPolicyChooser::getDistribution(estimationPolicyNum);
	CQStochasticPolicy *policy = new CQStochasticPolicy(qFunction->getActions(), distribution, qFunction);
	CResidualFunction *residual = CResidualChooser::getResidual(residualNum, dt);

	if (learnModeNum == 1)
	{
		CResidualGradientFunction *residualGradient = CResidualChooser::getResidualGradient(residualGradientNum, residual);

		learner = new CTDGradientLearner(rewardFunction, qFunction, policy, residual, residualGradient);
	}
	if (learnModeNum == 2)
	{
		CAbstractBetaCalculator *betaCalc = CResidualChooser::getBetaCalculator(residualGradientNum);

		learner = new CTDResidualLearner(rewardFunction, qFunction, policy, residual, residual, betaCalc);
	}

	return learner;
}

CTDLearner *CTDLearnerChooser::getQLearnerFromInput(CGradientQFunction *qFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
	printf("TD Learner Selection\n");
	printf("Estimation Policy:\n");
	int distributionNum = CPolicyChooser::getDistributionFromInput();
	int learnMode = 0;
	do 
	{
		printf ("** Learner Mode **\n\n");
		printf ("1 ... Residual Gradient Learner\n");
		printf ("2 ... Residual Learner\n");
		scanf ("%d", &learnMode);
		getchar();
	} while(learnMode >= 1 && learnMode <= 2);

	int residualNum = CResidualChooser::getResidualFromInput();
	int residualGradientNum = 0;
	if (learnMode == 1)
	{
		residualGradientNum = CResidualChooser::getResidualGradientFromInput();
	}
	else
	{
		residualGradientNum = CResidualChooser::getBetaCalculatorFromInput();
	}
	return getQLearner(distributionNum, learnMode, residualNum, residualGradientNum, qFunction, rewardFunction, dt);
}


CVFunctionLearner *CVFunctionLearnerChooser::getVFunctionLearner(int learnModeNum, int residualNum, int residualGradientNum, CGradientVFunction *vFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
	CVFunctionLearner * learner = NULL;

	CResidualFunction *residual = CResidualChooser::getResidual(residualNum, dt);

	if (learnModeNum == 1)
	{
		CResidualGradientFunction *residualGradient = CResidualChooser::getResidualGradient(residualGradientNum, residual);

		learner = new CVFunctionGradientLearner(rewardFunction, vFunction, residual, residualGradient);
	}
	if (learnModeNum == 2)
	{
		CAbstractBetaCalculator *betaCalc = CResidualChooser::getBetaCalculator(residualGradientNum);

		learner = new CVFunctionResidualLearner(rewardFunction, vFunction, residual, residual, betaCalc);
	}

	return learner;
}

CVFunctionLearner *CVFunctionLearnerChooser::getVFunctionLearnerFromInput(CGradientVFunction *vFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
	printf("V- Learner Selection\n");
	printf("Estimation Policy:\n");
	int learnMode = 0;
	do 
	{
		printf ("** Learner Mode **\n\n");
		printf ("1 ... Residual Gradient Learner\n");
		printf ("2 ... Residual Learner\n");
		scanf ("%d", &learnMode);
		getchar();
	} while(learnMode >= 1 && learnMode <= 2);

	int residualNum = CResidualChooser::getResidualFromInput();
	int residualGradientNum = 0;
	if (learnMode == 1)
	{
		residualGradientNum = CResidualChooser::getResidualGradientFromInput();
	}
	else
	{
		residualGradientNum = CResidualChooser::getBetaCalculatorFromInput();
	}
	return getVFunctionLearner(learnMode, residualNum, residualGradientNum, vFunction, rewardFunction, dt);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -