📄 ctestsuit.cpp
字号:
itParam ++;
if (it != beginValues->end())
{
values[2] = *it;
parameters[2] = *itParam;
}
else
{
values[2] = values[1];
parameters[2] = parameters[1];
}
while (iterations < maxIterations)
{
iterations ++;
printf("Iteration %d, Found maximum, trying to isolate maximum\n", iterations);
if (parameters[1] - parameters[0] > parameters[2] - parameters[1])
{
rlt_real newParameter = (parameters[0] + parameters[1]) / 2;
testSuite->setParameter(paramName, newParameter);
rlt_real newValue = evaluateParameters(testSuite);
if (newValue > values[1])
{
parameters[2] = parameters[1];
values[2] = values[1];
parameters[1] = newParameter;
values[1] = newValue;
}
else
{
parameters[0] = newParameter;
values[0] = newValue;
}
}
else
{
rlt_real newParameter = (parameters[2] + parameters[1]) / 2;
testSuite->setParameter(paramName, newParameter);
rlt_real newValue = evaluateParameters(testSuite);
if (newValue > values[1])
{
parameters[0] = parameters[1];
values[0] = values[1];
parameters[1] = newParameter;
values[1] = newValue;
}
else
{
parameters[2] = newParameter;
values[2] = newValue;
}
}
printf("Values after %d iteration: (%f: %f) (%f: %f) (%f: %f)\n",iterations, parameters[0],values[0], parameters[1],values[1], parameters[2], values[2]);
}
rlt_real maxParameter = parameters[1];
maxValue = values[1];
if (values[0] > values[1])
{
maxParameter = parameters[0];
maxValue = values[0];
}
if (values[2] > maxValue)
{
maxParameter = parameters[2];
maxValue = values[2];
}
testSuite->setParameter(paramName, maxParameter);
delete beginParameters;
delete beginValues;
return maxValue;
}
rlt_real CTestSuiteParameterCalculator::calculateSingleBestParameter(std::list<rlt_real> *parameters, string paramName)
{
std::list<rlt_real>::iterator paramIterator = parameters->begin();
rlt_real max = -100.0;
rlt_real maxParameter = 0.0;
if (testSuite->getParameterIndex(paramName) >= 0)
{
printf("Testing Parameter %s, Values to Test %d\n", (paramName).c_str(), parameters->size());
for (; paramIterator != parameters->end(); paramIterator++)
{
printf("Value for Parameter %s: %f\n", paramName.c_str(), *paramIterator);
testSuite->setParameter(paramName, *paramIterator);
rlt_real val = evaluateParameters(testSuite);
if (val > max || (paramIterator == parameters->begin()))
{
max = val;
maxParameter = *paramIterator;
}
}
testSuite->setParameter(paramName, maxParameter);
printf("Best Value %f for Parameter %s with Value %f\n", maxParameter, paramName.c_str(), max);
}
return maxParameter;
}
std::list<CParameters *> * CTestSuiteParameterCalculator::getParameterList(std::list<rlt_real *> *parameters, int *paramSize, std::list<string> *paramNames)
{
std::list<CParameters *> *parameterList = new std::list<CParameters *>();
int numParams = parameters->size();
int *actualSteps = new int[numParams];
std::list<rlt_real *>::iterator it = parameters->begin();
std::list<string>::iterator itNames = paramNames->begin();
int i = 0;
for (i = 0; i < numParams; i++, it++,itNames++)
{
actualSteps[i] = 0;
}
int j = 0;
while (actualSteps[numParams - 1] < paramSize[numParams - 1])
{
j = 0;
int k = 0;
CParameters *paramElem = new CParameters();
for (itNames = paramNames->begin(), it = parameters->begin(), k = 0; k < numParams; k++, it++,itNames++)
{
paramElem->addParameter((*itNames), (*it)[actualSteps[k]]);
}
parameterList->push_back(paramElem);
actualSteps[j] ++;
while (j < numParams - 1 && actualSteps[j] >= paramSize[j])
{
actualSteps[j] = 0;
j ++;
actualSteps[j] ++;
}
}
return parameterList;
}
rlt_real CTestSuiteParameterCalculator::evaluateParameters(CParameters *parameters, bool newEvaluation)
{
testSuite->setParameters(parameters);
rlt_real value = evaluator->evaluateTestSuite(testSuite);
return value;
}
CTestSuite *CTestSuiteParameterCalculator::getTestSuite()
{
return this->testSuite;
}
/*
rlt_real CTestSuiteParameterCalculator::getBestCalculatedValue()
{
std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();
rlt_real max = 0;
for (; it != evaluatedTestsuiteParameters->end();it++)
{
if ((*it).second.value > max || it == evaluatedTestsuiteParameters->begin())
{
max = (*it).second.value;
}
}
return max;
}
CParameters *CTestSuiteParameterCalculator::getBestCalculatedParameters()
{
std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();
rlt_real max = 0;
CParameters *maxParams = NULL;
for (; it != evaluatedTestsuiteParameters->end();it++)
{
if ((*it).second.value > max || it == evaluatedTestsuiteParameters->begin())
{
max = (*it).second.value;
maxParams = (*it).first;
}
}
return maxParams;
}
rlt_real CTestSuiteParameterCalculator::getParametersValue(CParameters *parameters)
{
std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();
for (; it != evaluatedTestsuiteParameters->end();it++)
{
if ((*(*it).first) == (*parameters))
{
return (*it).second.value;
}
}
return -1000;
}
CParameters *CTestSuiteParameterCalculator::getParametersFromIndex(int index)
{
std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();
CParameters *parameters = NULL;
for (int i = 0; it != evaluatedTestsuiteParameters->end();it++, i++)
{
if (i == index)
{
parameters = (*it).first;
}
}
return parameters;
}
int CTestSuiteParameterCalculator::getParametersIndex(CParameters *parameters)
{
std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();
for (int i = 0; it != evaluatedTestsuiteParameters->end();it++, i++)
{
if ((*(*it).first) == (*parameters))
{
return i;
}
}
return -1;
}
int CTestSuiteParameterCalculator::getNumEvaluatedParameters()
{
return evaluatedTestsuiteParameters->size();
}
string CTestSuiteParameterCalculator::getDataFileName(CParameters *parameters)
{
std::map<CParameters *, ParameterData>::iterator it = evaluatedTestsuiteParameters->begin();
for (; it != evaluatedTestsuiteParameters->end();it++)
{
if ((*(*it).first) == (*parameters))
{
return (*it).second.learnDataFile;
}
}
return string("");
}
*/
CResidualFunction *CResidualChooser::getResidual(int resNum, rlt_real timeIntervall)
{
CResidualFunction *residual = NULL;
switch (resNum)
{
case 1:
{
residual = new CDiscreteResidual(0.95);
break;
}
case 2:
{
residual = new CContinuousEulerResidual(timeIntervall, 1.0);
break;
}
case 3:
{
residual = new CContinuousCoulomResidual(timeIntervall, 1.0);
break;
}
}
return residual;
}
CResidualGradientFunction *CResidualChooser::getResidualGradient(int resNum, CResidualGradientFunction *residual)
{
CAbstractBetaCalculator *betaCalc = NULL;
CResidualGradientFunction *residualGradientFunction = NULL;
switch (resNum)
{
case 1:
{
residualGradientFunction = new CDirectGradient();
break;
}
case 2:
{
residualGradientFunction = residual;
break;
}
case 3:
{
betaCalc = new CConstantBetaCalculator(0.4);
residualGradientFunction = new CResidualBetaFunction(betaCalc, residual);
break;
}
case 4:
{
betaCalc = new CVariableBetaCalculator(0.01, 1.0);
residualGradientFunction = new CResidualBetaFunction(betaCalc, residual);
break;
}
}
return residualGradientFunction;
}
int CResidualChooser::getResidualFromInput()
{
int residual = 0;
do
{
printf ("** Residual Selection **\n\n");
printf ("1 ... Discrete Residual\n");
printf ("2 ... Euler Residual\n");
printf ("3 ... Coulom Residual\n");
scanf ("%d", &residual);
getchar();
} while(residual >= 1 && residual <= 3);
return residual;
}
int CResidualChooser::getResidualGradientFromInput()
{
int residualGradient = 0;
do
{
printf("** Residual Gradient Selection **\n\n");
printf("1 ... Direct Gradient\n");
printf("2 ... Residual Gradient\n");
printf("3 ... Residual Constant Beta\n");
printf("4 ... Residual Variable Beta\n");
scanf ("%d", &residualGradient);
getchar();
} while(residualGradient >= 1 && residualGradient <= 4);
return residualGradient;
}
CAbstractBetaCalculator *CResidualChooser::getBetaCalculator(int resNum)
{
CAbstractBetaCalculator *betaCalc = NULL;
switch (resNum)
{
case 1:
{
betaCalc = new CConstantBetaCalculator(0.4);
break;
}
case 2:
{
betaCalc = new CVariableBetaCalculator(0.01, 1.0);
break;
}
}
return betaCalc;
}
int CResidualChooser::getBetaCalculatorFromInput()
{
int betaCalc = 0;
do
{
printf ("** Beta Calculator Selection **\n\n");
printf ("1 ... Constant Beta Calculator\n");
printf ("2 ... Variable Beta Calculator\n");
scanf ("%d", &betaCalc);
getchar();
} while(betaCalc >= 1 && betaCalc <= 2);
return betaCalc;
}
CActionDistribution *CPolicyChooser::getDistribution(int number)
{
CActionDistribution *distribution = NULL;
switch(number)
{
case 1:
{
distribution = new CGreedyDistribution();
break;
}
case 2:
{
distribution = new CEpsilonGreedyDistribution(0.01);
break;
}
case 3:
{
distribution = new CSoftMaxDistribution(5);
break;
}
default:
{
}
}
return distribution;
}
int CPolicyChooser::getDistributionFromInput()
{
int distributionNum = 0;
do
{
printf ("** Policy Selection **\n\n");
printf ("1 ... Greedy Distribution\n");
printf ("2 ... Epsilon Greedy Distribution\n");
printf ("3 ... SoftMax Distribution\n");
scanf ("%d", &distributionNum);
getchar();
} while(distributionNum >= 1 && distributionNum <= 2);
return distributionNum;
}
CTDLearner *CTDLearnerChooser::getQLearner(int estimationPolicyNum, int learnModeNum, int residualNum, int residualGradientNum, CGradientQFunction *qFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
CTDLearner * learner = NULL;
CActionDistribution *distribution = CPolicyChooser::getDistribution(estimationPolicyNum);
CQStochasticPolicy *policy = new CQStochasticPolicy(qFunction->getActions(), distribution, qFunction);
CResidualFunction *residual = CResidualChooser::getResidual(residualNum, dt);
if (learnModeNum == 1)
{
CResidualGradientFunction *residualGradient = CResidualChooser::getResidualGradient(residualGradientNum, residual);
learner = new CTDGradientLearner(rewardFunction, qFunction, policy, residual, residualGradient);
}
if (learnModeNum == 2)
{
CAbstractBetaCalculator *betaCalc = CResidualChooser::getBetaCalculator(residualGradientNum);
learner = new CTDResidualLearner(rewardFunction, qFunction, policy, residual, residual, betaCalc);
}
return learner;
}
CTDLearner *CTDLearnerChooser::getQLearnerFromInput(CGradientQFunction *qFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
printf("TD Learner Selection\n");
printf("Estimation Policy:\n");
int distributionNum = CPolicyChooser::getDistributionFromInput();
int learnMode = 0;
do
{
printf ("** Learner Mode **\n\n");
printf ("1 ... Residual Gradient Learner\n");
printf ("2 ... Residual Learner\n");
scanf ("%d", &learnMode);
getchar();
} while(learnMode >= 1 && learnMode <= 2);
int residualNum = CResidualChooser::getResidualFromInput();
int residualGradientNum = 0;
if (learnMode == 1)
{
residualGradientNum = CResidualChooser::getResidualGradientFromInput();
}
else
{
residualGradientNum = CResidualChooser::getBetaCalculatorFromInput();
}
return getQLearner(distributionNum, learnMode, residualNum, residualGradientNum, qFunction, rewardFunction, dt);
}
CVFunctionLearner *CVFunctionLearnerChooser::getVFunctionLearner(int learnModeNum, int residualNum, int residualGradientNum, CGradientVFunction *vFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
CVFunctionLearner * learner = NULL;
CResidualFunction *residual = CResidualChooser::getResidual(residualNum, dt);
if (learnModeNum == 1)
{
CResidualGradientFunction *residualGradient = CResidualChooser::getResidualGradient(residualGradientNum, residual);
learner = new CVFunctionGradientLearner(rewardFunction, vFunction, residual, residualGradient);
}
if (learnModeNum == 2)
{
CAbstractBetaCalculator *betaCalc = CResidualChooser::getBetaCalculator(residualGradientNum);
learner = new CVFunctionResidualLearner(rewardFunction, vFunction, residual, residual, betaCalc);
}
return learner;
}
CVFunctionLearner *CVFunctionLearnerChooser::getVFunctionLearnerFromInput(CGradientVFunction *vFunction, CRewardFunction *rewardFunction, rlt_real dt)
{
printf("V- Learner Selection\n");
printf("Estimation Policy:\n");
int learnMode = 0;
do
{
printf ("** Learner Mode **\n\n");
printf ("1 ... Residual Gradient Learner\n");
printf ("2 ... Residual Learner\n");
scanf ("%d", &learnMode);
getchar();
} while(learnMode >= 1 && learnMode <= 2);
int residualNum = CResidualChooser::getResidualFromInput();
int residualGradientNum = 0;
if (learnMode == 1)
{
residualGradientNum = CResidualChooser::getResidualGradientFromInput();
}
else
{
residualGradientNum = CResidualChooser::getBetaCalculatorFromInput();
}
return getVFunctionLearner(learnMode, residualNum, residualGradientNum, vFunction, rewardFunction, dt);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -