📄 cpegasus.cpp
字号:
}
delete dModelGradient;
delete episodeGradient;
delete dPolicy;
delete dModelInput;
}
void CPEGASUSAnalyticalPolicyGradientCalculator::getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates)
{
printf("Pegasus Gradient Evaluation\n");
agent->addSemiMDPListener(this);
int horizon = my_round(getParameter("PEGASUSHorizon"));
CState *startState = new CState(dynModel->getStateProperties());
for (unsigned int i = 0; i < startStates->getNumStates(); i ++)
{
printf("Evaluate Episode %d\n", i);
agent->startNewEpisode();
startStates->getState(i, startState);
dynModel->setState(startState);
agent->doControllerEpisode(1, horizon);
gradientFeatures->add(episodeGradient, 1.0);
}
gradientFeatures->multFactor(1.0 / startStates->getNumStates());
rlt_real norm = sqrt(gradientFeatures->multFeatureList(gradientFeatures));
if (DebugIsEnabled())
{
DebugPrint('p', "Calculated Pegasus Gradient Norm: %f\n", norm);
DebugPrint('p', "Calculated Gradient:\n");
gradientFeatures->saveASCII(DebugGetFileHandle('p'));
}
printf("Finished Gradient Calculation, Gradient Norm: %f\n", norm);
delete startState;
agent->removeSemiMDPListener(this);
}
void CPEGASUSAnalyticalPolicyGradientCalculator::multMatrixFeatureList(CMyMatrix *matrix, CFeatureList *features, int index, std::list<CFeatureList *> *newFeatures)
{
CFeatureList::iterator itFeat = features->begin();
for (; itFeat != features->end(); itFeat ++)
{
std::list<CFeatureList *>::iterator itList = newFeatures->begin();
for (int row = 0; itList != newFeatures->end(); itList ++,row ++)
{
(*itList)->update((*itFeat)->featureIndex, (*itFeat)->factor * matrix->getElement(row, index));
}
}
}
void CPEGASUSAnalyticalPolicyGradientCalculator::nextStep(CStateCollection *oldStateCol, CAction *action, CStateCollection *newStateCol)
{
CState *oldState = oldStateCol->getState(dynModel->getStateProperties());
CState *nextState = newStateCol->getState(dynModel->getStateProperties());
CContinuousActionData *data = dynamic_cast<CContinuousActionData *>(action->getActionData());
// Clear 2nd StateGradient list
std::list<CFeatureList *>::iterator it = stateGradient2->begin();
for (; it != stateGradient2->end(); it ++)
{
(*it)->clear();
}
//Clear Model Gradient
it = dModelGradient->begin();
for (; it != dModelGradient->end(); it ++)
{
(*it)->clear();
}
// Derivation of the Reward Function
rewardFunction->getInputDerivation(nextState, dReward);
// Derivation of the Model
dynModeldInput->getInputDerivation(oldState, data, dModelInput);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "Pegasus Gradient Calculation:\n ");
DebugPrint('p', "State Gradient:\n ");
for (it = stateGradient1->begin(); it != stateGradient1->end(); it ++)
{
(*it)->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
DebugPrint('p', "dReward: ");
dReward->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
DebugPrint('p',"dModel: ");
dModelInput->saveASCII(DebugGetFileHandle('p'));
}
it = stateGradient1->begin();
for (unsigned int i = 0; i < dynModel->getNumContinuousStates(); i ++, it ++)
{
multMatrixFeatureList(dModelInput, *it, i, stateGradient2);
}
// Derivation of the policy
policydInput->getInputDerivation(oldStateCol, dPolicy);
if (DebugIsEnabled('p'))
{
DebugPrint('p',"dPolicy: ");
dPolicy->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
it = dModelGradient->begin();
for (int i = 0; it != dModelGradient->end(); it++, i++)
{
policy->getGradient(oldStateCol, i, *it);
}
it = stateGradient1->begin();
for (int i = 0; it != stateGradient1->end(); i ++, it ++)
{
multMatrixFeatureList(dPolicy, *it, i, dModelGradient);
}
it = dModelGradient->begin();
for (int i = 0; it != dModelGradient->end(); it++, i++)
{
multMatrixFeatureList(dModelInput, *it, i + dynModel->getNumContinuousStates(), stateGradient2);
}
if (DebugIsEnabled('p'))
{
DebugPrint('p', "Model Gradients:\n ");
for (it = dModelGradient->begin(); it != dModelGradient->end(); it ++)
{
(*it)->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
DebugPrint('p', "New State Gradient:\n ");
for (it = stateGradient2->begin(); it != stateGradient2->end(); it ++)
{
(*it)->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
}
rlt_real discountFactor = pow(getParameter("DiscountFactor"), steps);
dReward->multScalar(discountFactor);
it = stateGradient2->begin();
for (int i = 0; it != stateGradient2->end(); i ++, it ++)
{
episodeGradient->add(*it, dReward->getElement(i));
}
std::list<CFeatureList *> *tempGradient = stateGradient1;
stateGradient1 = stateGradient2;
stateGradient2 = tempGradient;
steps ++;
}
void CPEGASUSAnalyticalPolicyGradientCalculator::newEpisode()
{
std::list<CFeatureList *>::iterator it = stateGradient1->begin();
for (; it != stateGradient1->end(); it ++)
{
(*it)->clear();
}
episodeGradient->clear();
steps = 0;
}
CPEGASUSNumericPolicyGradientCalculator::CPEGASUSNumericPolicyGradientCalculator(CAgent *agent, CContinuousActionGradientPolicy *policy, CTransitionFunctionEnvironment *dynModel, CRewardFunction *rewardFunction, rlt_real stepSize, int startStates, int horizon, rlt_real gamma) : CPEGASUSPolicyGradientCalculator(policy, dynModel, startStates, horizon, gamma)
{
weights = new rlt_real[policy->getNumWeights()];
this->rewardFunction = rewardFunction;
this->agent = agent;
addParameter("PEGASUSNumericStepSize", stepSize);
addParameter("DiscountFactor", gamma);
}
CPEGASUSNumericPolicyGradientCalculator::~CPEGASUSNumericPolicyGradientCalculator()
{
delete [] weights;
}
void CPEGASUSNumericPolicyGradientCalculator::getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates)
{
CPolicyEvaluator *evaluator = new CValueSameStateCalculator(agent, rewardFunction, dynModel, startStates, (int)getParameter("PEGASUSHorizon"),getParameter("DiscountFactor"));
policy->getWeights(weights);
agent->setController(policy);
rlt_real stepSize = getParameter("PEGASUSNumericStepSize");
for (int i = 0; i < policy->getNumWeights(); i ++)
{
weights[i] -= stepSize;
policy->setWeights(weights);
rlt_real vMinus = evaluator->evaluatePolicy();
weights[i] += 2 * stepSize;
policy->setWeights(weights);
rlt_real vPlus = evaluator->evaluatePolicy();
weights[i] -= stepSize;
gradientFeatures->set(i, (vPlus - vMinus) / (2 * stepSize));
printf("Calculated derivation for weight %d : %f\n", i, gradientFeatures->getFeatureFactor(i));
}
policy->setWeights(weights);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -