📄 cpolicies.cpp
字号:
void CStochasticPolicy::getActionProbabilityLnGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState)
{
rlt_real prop = 0.0;
getActionProbabilities(state, actions, actionValues);
prop = actionValues[actions->getIndex(action)];
getActionProbabilityGradient(state, action, data, gradientState);
gradientState->multFactor(1 / prop);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "Policy Gradient Ln Factors:\n");
gradientState->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
}
CQStochasticPolicy::CQStochasticPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractQFunction *qfunction) : CStochasticPolicy(actions, distribution)
{
this->qfunction = qfunction;
addParameters(qfunction);
}
CQStochasticPolicy::~CQStochasticPolicy()
{
}
bool CQStochasticPolicy::isDifferentiable()
{
return (distribution->isDifferentiable() && qfunction->isType(GRADIENTQFUNCTION));
}
void CQStochasticPolicy::getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState)
{
gradientState->clear();
if (isDifferentiable())
{
CGradientQFunction *gradQFunc = dynamic_cast<CGradientQFunction *>(qfunction);
gradQFunc->getGradient(state, action, data, gradientState);
}
}
void CQStochasticPolicy::getActionStatistics(CStateCollection *state, CAction *action, CActionStatistics *stat)
{
this->qfunction->getStatistics(state, action,qfunction->getActions(), stat);
}
void CQStochasticPolicy::getActionValues(CStateCollection *state, CActionSet *availableActions, rlt_real *actionValues, CActionDataSet *actionDataSet)
{
for (unsigned int i = 0; i < availableActions->size(); actionValues[i++] = 0.0);
qfunction->getActionValues(state, availableActions, actionValues);
}
/*
void CQStochasticPolicy::updateGradient(CFeatureList *gradient, rlt_real factor)
{
if (qfunction->isType(GRADIENTQFUNCTION))
{
CGradientQFunction *gradQFunc = dynamic_cast<CGradientQFunction *>(qfunction);
gradQFunc->updateGradient(gradient, factor);
}
}
int CQStochasticPolicy::getNumWeights()
{
if (qfunction->isType(GRADIENTQFUNCTION))
{
return dynamic_cast<CGradientQFunction *>(qfunction)->getNumWeights();
}
else
{
return 0;
}
}*/
CVMStochasticPolicy::CVMStochasticPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractVFunction *vFunction, CTransitionFunction *model, CRewardFunction *reward, std::list<CStateModifier *> *modifiers) : CQStochasticPolicy(actions, distribution, new CQFunctionFromTransitionFunction(actions, vFunction, model, reward, modifiers))
{
this->vFunction = vFunction;
this->model = model;
this->reward = reward;
addParameters(vFunction);
// addParameters(model);
addParameter("DiscountFactor", 0.95);
nextState = new CStateCollectionImpl(model->getStateProperties());
intermediateState = new CStateCollectionImpl(model->getStateProperties());
nextState->addStateModifiers(modifiers);
intermediateState->addStateModifiers(modifiers);
}
CVMStochasticPolicy::~CVMStochasticPolicy()
{
delete nextState;
delete intermediateState;
}
/*
void CVMStochasticPolicy::getActionValues(CStateCollection *state, CActionSet *availableActions, rlt_real *actionValues, CActionDataSet *actionDataSet)
{
CActionSet::iterator it = availableActions->begin();
for (int i = 0; it != availableActions->end(); it ++, i++)
{
CPrimitiveAction *primAction = ((CPrimitiveAction *)(*it));
int duration = 1;
if (primAction->isStateToChange())
{
CStateCollectionImpl *buf = NULL;
nextState->getState(model->getStateProperties())->setState(state->getState(model->getStateProperties()));
CMultiStepActionData *data = NULL;
if (actionDataSet)
{
data = dynamic_cast<CMultiStepActionData *>(actionDataSet->getActionData(*it));
}
duration = 0;
do
{
//exchange Model State
buf = intermediateState;
intermediateState = nextState;
nextState = buf;
model->transitionFunction(intermediateState->getState(model->getStateProperties()), (*it), nextState->getState(model->getStateProperties()));
nextState->newModelState();
duration += primAction->getSingleExecutionDuration();
}
// Execute the action until the state changed
while (duration < primAction->maxStateToChangeDuration && !primAction->stateChanged(state, nextState));
if (data)
{
data->duration = duration;
}
}
else
{
model->transitionFunction(state->getState(model->getStateProperties()), *it, nextState->getState(model->getStateProperties()));
nextState->newModelState();
}
if (actionDataSet && (*it)->isType(MULTISTEPACTION))
{
CActionData *actionData = actionDataSet->getActionData(*it);
CMultiStepActionData *multiStepActionData = dynamic_cast<CMultiStepActionData *>(actionData);
duration = multiStepActionData->duration;
}
else
{
duration = (*it)->getDuration();
}
rlt_real rewardValue = reward->getReward(state, *it, nextState);
rlt_real value = vFunction->getValue(nextState);
actionValues[i] = rewardValue + pow(getParameter("DiscountFactor"), duration) * value;
if (DebugIsEnabled('p'))
{
DebugPrint('p', "VM Stochastic Policy: Action %d, State: ",i);
nextState->getState()->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', ", functionValue: %f, reward %f\n", value, rewardValue);
}
}
}*/
void CVMStochasticPolicy::getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState)
{
gradientState->clear();
if (isDifferentiable())
{
model->transitionFunction(state->getState(model->getStateProperties()), action, nextState->getState(model->getStateProperties()), data);
nextState->newModelState();
CGradientVFunction *gradVFunc = dynamic_cast<CGradientVFunction *>(this->vFunction);
gradVFunc->getGradient(nextState, gradientState);
int duration = 1;
if (data && action->isType(MULTISTEPACTION))
{
duration = dynamic_cast<CMultiStepActionData *>(data)->duration;
}
else
{
duration = action->getDuration();
}
gradientState->multFactor(pow(getParameter("DiscountFactor"), duration));
}
}
bool CVMStochasticPolicy::isDifferentiable()
{
return (distribution->isDifferentiable() && vFunction->isType(GRADIENTVFUNCTION));
}
/*
CExplorationDistribution::CExplorationDistribution(CActionSet *actions, CActionDistribution *distribution, CExplorationGain *explorationGain) : CActionDistribution()
{
this->explorationGain = explorationGain;
this->distribution = distribution;
addParameter("ExplorationFactor", 0.5);
addParameters(distribution);
addParameters(explorationGain);
exploration = new rlt_real[actions->size()];
}
void CExplorationDistribution::getDistribution(CStateCollection *state, CActionSet *availableActions, rlt_real *actionValues)
{
CActionSet::iterator it = availableActions->begin();
rlt_real norm = 0.0;
unsigned int i = 0;
for (i = 0, it = availableActions->begin(); i < availableActions->size(); i++, it++)
{
exploration[i] = explorationGain->getExplorationGain(*it, state);
norm += exploration[i];
}
distribution->getDistribution(state, availableActions,actionValues);
rlt_real alpha = getAlpha(state);
for (i = 0; i < availableActions->size(); i++)
{
actionValues[i] = alpha * exploration[i] + (1 - alpha) * actionValues[i];
}
}
void CExplorationDistribution::getGradientFactors(CStateCollection *state, CAction *usedAction, CActionSet *actions, rlt_real *actionFactors, CMyVector *gradientFactors)
{
}
CExplorationDistribution::~CExplorationDistribution()
{
delete exploration;
}
rlt_real CExplorationDistribution::getAlpha(CStateCollection *state)
{
return getParameter("ExplorationFactor");
}
void CExplorationDistribution::setAlpha(rlt_real alpha)
{
setParameter("ExplorationFactor", alpha);
}
CExplorationGain *CExplorationDistribution::getExplorationGain()
{
return this->explorationGain;
}
void CExplorationDistribution::setExplorationGain(CExplorationGain *explorationGain)
{
this->explorationGain = explorationGain;
}
CExplorationGain::CExplorationGain(CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *calc) : CStateObject(calc)
{
this->model = model;
}
rlt_real CExplorationGain::getExplorationGain(CAction *action, CStateCollection *state)
{
CState *featState = state->getState(properties);
int actionIndex = model->getActions()->getIndex(action);
rlt_real infGain = 0.0;
int type = featState->getStateProperties()->getType() & (FEATURESTATE | DISCRETESTATE);
switch (type)
{
case FEATURESTATE:
{
for (unsigned int i = 0; i < featState->getNumContinuousStates(); i++)
{
infGain += featState->getContinuousState(i) * this->getExplorationGain(actionIndex, featState->getDiscreteState(i));
}
break;
}
case DISCRETESTATE:
{
infGain = this->getExplorationGain(actionIndex, featState->getDiscreteState(0));
break;
}
default:
{
}
}
return infGain;
}
CLogExplorationGain::CLogExplorationGain(CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *calc) : CExplorationGain(model, calc)
{
}
rlt_real CLogExplorationGain::getExplorationGain(int action, int feature)
{
return 1.0 / (log(model->getStateActionVisits(feature, action) + 1) + 1);
}
CPowExplorationGain::CPowExplorationGain(CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *calc, rlt_real power) : CExplorationGain(model, calc)
{
addParameter("PowExplorationExponent", power);
}
rlt_real CPowExplorationGain::getExplorationGain(int action, int feature)
{
rlt_real expon = getParameter("PowExplorationExponent");
return pow(model->getStateActionVisits(feature, action) + 1, expon);
}
void CPowExplorationGain::setPower(rlt_real power)
{
setParameter("PowExplorationExponent", power);
}
rlt_real CPowExplorationGain::getPower()
{
return getParameter("PowExplorationExponent");
}
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -