📄 ccontinuousactions.cpp
字号:
rlt_real CContinuousRBFAction::getActionFactor(CContinuousActionData *dynAction)
{
rlt_real malDist = 0.0;
rlt_real faktor = 0.0;
for (unsigned int i = 0; i < properties->getNumActionValues(); i++)
{
malDist += pow((dynAction->getActionValue(i) - getActionValue(i)) / rbfSigma[i], 2);
}
malDist = malDist / 2;
faktor = exp(- malDist);
return faktor;
}
/*
CContinuousActionVFunction::CContinuousActionVFunction(CStateProperties *properties, CContinuousActionProperties *actionProp) : CAbstractVFunction(properties)
{
this->actionProp = actionProp;
addType(CONTINUOUSVFUNCTION);
}
void CContinuousActionVFunction::updateValue(CStateCollection *state, CContinuousAction *action, rlt_real td)
{
updateValue(state->getState(getStateProperties()), action, td);
}
void CContinuousActionVFunction::setValue(CStateCollection *state, CContinuousAction *action, rlt_real qValue)
{
setValue(state->getState(getStateProperties()), action, qValue);
}
rlt_real CContinuousActionVFunction::getValue(CStateCollection *state, CContinuousAction *action)
{
return getValue(state->getState(getStateProperties()), action);
}
*/
CContinuousActionQFunction::CContinuousActionQFunction(CContinuousAction *contAction) : CGradientQFunction(new CActionSet())
{
this->contAction = contAction;
actions->add(contAction);
addType(CONTINUOUSACTIONQFUNCTION);
}
CContinuousActionQFunction::~CContinuousActionQFunction()
{
delete actions;
}
CAction *CContinuousActionQFunction::getMax(CStateCollection *state, CActionSet *availableActions, CActionDataSet *actionDatas)
{
getBestContinuousAction(state, dynamic_cast<CContinuousActionData *>(actionDatas->getActionData(contAction)));
return contAction;
}
void CContinuousActionQFunction::updateValue(CStateCollection *state, CAction *action, rlt_real td, CActionData *data)
{
if (data != NULL)
{
updateCAValue(state, dynamic_cast<CContinuousActionData*>(data), td);
}
else
{
updateCAValue(state, dynamic_cast<CContinuousAction*>(action)->getContinuousActionData(), td);
}
}
void CContinuousActionQFunction::setValue(CStateCollection *state, CAction *action, rlt_real qValue, CActionData *data)
{
if (data != NULL)
{
setCAValue(state, dynamic_cast<CContinuousActionData*>(data), qValue);
}
else
{
setCAValue(state, dynamic_cast<CContinuousAction*>(action)->getContinuousActionData(), qValue);
}
}
rlt_real CContinuousActionQFunction::getValue(CStateCollection *state, CAction *action, CActionData *data)
{
rlt_real value = 0.0;
if (data != NULL)
{
value = getCAValue(state, dynamic_cast<CContinuousActionData *>(data));
}
else
{
value = getCAValue(state, dynamic_cast<CContinuousAction *>(action)->getContinuousActionData());
}
if (! mayDiverge && (value > DIVERGENTVFUNCTIONVALUE || value < - DIVERGENTVFUNCTIONVALUE))
{
throw new CDivergentQFunctionException("Continuous Action Q-Function", this, state->getState(), value);
}
return value;
}
void CContinuousActionQFunction::getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient)
{
if (data)
{
getCAGradient(state, dynamic_cast<CContinuousActionData *>(data), gradient);
}
else
{
getCAGradient(state, dynamic_cast<CContinuousActionData *>(action->getActionData()), gradient);
}
}
CCALinearFAQFunction::CCALinearFAQFunction(CQFunction *qFunction, CContinuousAction *contAction) : CContinuousActionQFunction(contAction), CContinuousActionLinearFA(qFunction->getActions(), contAction->getContinuousActionProperties())
{
this->qFunction = qFunction;
actionFactors = new rlt_real[qFunction->getNumActions()];
CAactionValues = new rlt_real[qFunction->getNumActions()];
tempGradient = new CFeatureList();
}
CCALinearFAQFunction::~CCALinearFAQFunction()
{
delete [] actionFactors;
delete [] CAactionValues;
delete tempGradient;
}
void CCALinearFAQFunction::getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData)
{
CStaticContinuousAction *action = dynamic_cast<CStaticContinuousAction *>(qFunction->getMax(state, qFunction->getActions()));
actionData->setData(action->getActionData());
/*rlt_real sum = 0.0;
rlt_real minVal = actionFactors[0];
for (unsigned int i = 0; i < qFunction->getActions()->size(); i++)
{
sum += actionFactors[i];
if (minVal > actionFactors[i])
{
minVal = actionFactors[i];
}
}
for (unsigned int i = 0; i < qFunction->getActions()->size(); i++)
{
if (sum - minVal * qFunction->getActions()->size() == 0)
{
actionFactors[i] = 1 / qFunction->getActions()->size();
}
else
{
actionFactors[i] = (actionFactors[i] - minVal) / (sum - minVal * qFunction->getActions()->size());
}
}*/
// getContinuousAction(actionData, actionFactors);
}
void CCALinearFAQFunction::updateCAValue(CStateCollection *state, CContinuousActionData *data, rlt_real td)
{
getActionFactors(data, actionFactors);
CActionSet::iterator it = qFunction->getActions()->begin();
for (unsigned int i = 0; i < qFunction->getNumActions(); it++, i++)
{
if (actionFactors[i] > 0.0001)
{
qFunction->getVFunction(*it)->updateValue(state, td * actionFactors[i]);
}
}
}
void CCALinearFAQFunction::setCAValue(CStateCollection *state, CContinuousActionData *data, rlt_real qValue)
{
getActionFactors(data, actionFactors);
CActionSet::iterator it = qFunction->getActions()->begin();
for (unsigned int i = 0; i < qFunction->getNumActions(); it++, i++)
{
DebugPrint('q', "Set LinearFAQFunction: Action %d, Value %f, ActionFactor %f\n", i, qValue, actionFactors[i]);
if (actionFactors[i] > 0.0001)
{
qFunction->getVFunction(*it)->setValue(state, qValue * actionFactors[i]);
}
}
}
rlt_real CCALinearFAQFunction::getCAValue(CStateCollection *state, CContinuousActionData *data)
{
getActionFactors(data, actionFactors);
this->getQFunctionForCA()->getActionValues(state, getQFunctionForCA()->getActions(), CAactionValues);
rlt_real value = 0.0;
for (unsigned int i = 0; i < qFunction->getNumActions(); i++)
{
value += CAactionValues[i] * actionFactors[i];
}
return value;
}
CQFunction *CCALinearFAQFunction::getQFunctionForCA()
{
return qFunction;
}
void CCALinearFAQFunction::updateWeights(CFeatureList *features)
{
qFunction->updateGradient(features, 1.0);
}
void CCALinearFAQFunction::getCAGradient(CStateCollection *state, CContinuousActionData *action, CFeatureList *gradient)
{
getActionFactors(action, actionFactors);
CActionSet::iterator it;
int i = 0;
for (it = this->contActions->begin(); it != contActions->end(); it++,i++)
{
tempGradient->clear();
qFunction->getGradient(state, *it, NULL, tempGradient);
tempGradient->multFactor(actionFactors[i]);
CFeatureList::iterator itFeat = tempGradient->begin();
for (; itFeat != tempGradient->end();itFeat++)
{
if (fabs((*itFeat)->factor) > 0.00001)
{
gradient->update((*itFeat)->featureIndex, (*itFeat)->factor);
}
}
}
}
int CCALinearFAQFunction::getNumWeights()
{
return qFunction->getNumWeights();
}
CAbstractQETraces* CCALinearFAQFunction::getStandardETraces()
{
return new CCALinearFAQETraces(this);
}
void CCALinearFAQFunction::getWeights(rlt_real *weights)
{
qFunction->getWeights(weights);
}
void CCALinearFAQFunction::setWeights(rlt_real *weights)
{
qFunction->setWeights(weights);
}
CCALinearFAQETraces::CCALinearFAQETraces(CCALinearFAQFunction *qfunction) : CQETraces(qfunction->getQFunctionForCA())
{
contQFunc = qfunction;
actionFactors = new rlt_real[qfunction->getNumContinuousActionFA()];
}
CCALinearFAQETraces::~CCALinearFAQETraces()
{
delete actionFactors;
}
void CCALinearFAQETraces::addETrace(CStateCollection *State, CAction *action, rlt_real factor, CActionData *data)
{
if (action->isType(CONTINUOUSACTION))
{
CContinuousActionData *contAction = NULL;
if (data == NULL)
{
contAction = dynamic_cast<CContinuousActionData *>(action->getActionData());
}
else
{
contAction = dynamic_cast<CContinuousActionData *>(data);
}
contQFunc->getActionFactors(contAction, actionFactors);
std::list<CAbstractVETraces *>::iterator it = vETraces->begin();
DebugPrint('e', "Adding CALinearFA Etraces: %f factor\n", factor);
for (unsigned int i = 0; i < qFunction->getNumActions();i++, it++)
{
(*it)->addETrace(State, factor * actionFactors[i]);
DebugPrint('e', "%f ", actionFactors[i]);
}
DebugPrint('e',"\n");
}
}
CContinuousActionPolicy::CContinuousActionPolicy(CContinuousAction *contAction, CActionDistribution *distribution, CAbstractQFunction *continuousActionQFunc, CActionSet *continuousStaticActions, rlt_real maximumDistance) : CContinuousActionController(contAction)
{
this->distribution = distribution;
this->continuousActionQFunc = continuousActionQFunc;
this->continuousStaticActions = continuousStaticActions;
actionValues = new rlt_real[continuousStaticActions->size()];
addParameter("CAPolicyMaximumActionDistance", maximumDistance);
addParameters(distribution);
}
CContinuousActionPolicy::~CContinuousActionPolicy()
{
delete [] actionValues;
}
void CContinuousActionPolicy::getNextContinuousAction(CStateCollection *state, CContinuousActionData *action)
{
continuousActionQFunc->getActionValues(state, continuousStaticActions, actionValues, NULL);
DebugPrint('p', "ContinuousActionPolicy ActionValues: ");
for (unsigned int i = 0; i < continuousStaticActions->size(); i++)
{
DebugPrint('p', "%f ", actionValues[i]);
}
DebugPrint('p',"\n");
distribution->getDistribution(state, continuousStaticActions, actionValues);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "ContinuousActionPolicy ActionFactors: ");
for (unsigned int i = 0; i < continuousStaticActions->size(); i++)
{
DebugPrint('p', "%f ", actionValues[i]);
}
DebugPrint('p',"\n");
}
CActionSet::iterator it = continuousStaticActions->begin();
int actionIndex = CDistributions::getSampledIndex(actionValues, continuousStaticActions->size());
CMyVector *sampledActionData = NULL;
for (int i = 0; i < actionIndex; i++, it++);
sampledActionData = dynamic_cast<CContinuousAction *>(*it)->getContinuousActionData();
it = continuousStaticActions->begin();
action->initVector(0);
rlt_real maximumDistance = getParameter("CAPolicyMaximumActionDistance");
if (maximumDistance > 0)
{
rlt_real sum = 0.0;
for (int i = 0; it != continuousStaticActions->end(); it ++, i++)
{
CStaticContinuousAction *contAction = dynamic_cast<CLinearFAContinuousAction *>(*it);
if (i == actionIndex || contAction->getContinuousActionData()->getDistance(sampledActionData) < maximumDistance)
{
contAction->addToContinuousAction(action, actionValues[i]);
sum += actionValues[i];
}
}
action->multScalar(1.0 / sum);
}
else
{
action->setVector(sampledActionData);
}
if (DebugIsEnabled('p'))
{
DebugPrint('p', "ContinuousActionPolicy Calculated Action: ");
action->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p',"\n");
}
}
CContinuousActionRandomPolicy::CContinuousActionRandomPolicy(CContinuousAction *action, rlt_real sigma, rlt_real alpha) : CContinuousActionController(action)
{
addParameter("RandomPolicySigma",sigma);
addParameter("RandomPolicySmoothFactor", alpha);
lastNoise = new CMyVector(action->getNumDimensions());
lastNoise->initVector(0.0);
}
CContinuousActionRandomPolicy::~CContinuousActionRandomPolicy()
{
delete lastNoise;
}
void CContinuousActionRandomPolicy::newEpisode()
{
lastNoise->initVector(0.0);
}
void CContinuousActionRandomPolicy::getNextContinuousAction(CStateCollection *state, CContinuousActionData *action)
{
rlt_real sigma = getParameter("RandomPolicySigma");
rlt_real alpha = getParameter("RandomPolicySmoothFactor");
for (unsigned int i = 0; i < action->getNumDimensions(); i++)
{
rlt_real randValue = 0.0;
if (sigma > 0.00001)
{
randValue = CDistributions::getNormalDistributionSample(0.0, sigma);
}
DebugPrint('p', "Random Controller : %f, %f\n", lastNoise->getElement(i), randValue);
lastNoise->setElement(i, lastNoise->getElement(i) * alpha + randValue);
action->setActionValue(i, lastNoise->getElement(i));
}
}
CContinuousActionAddController::CContinuousActionAddController(CContinuousAction *action) : CContinuousActionController(action)
{
this->controllers = new std::list<CContinuousActionController *>();
this->controllerWeights = new::map<CContinuousActionController *, rlt_real>();
actionValues = new CMyVector(action->getNumDimensions());
}
CContinuousActionAddController::~CContinuousActionAddController()
{
delete controllers;
delete controllerWeights;
delete actionValues;
}
void CContinuousActionAddController::getNextContinuousAction(CStateCollection *state, CContinuousActionData *action)
{
std::list<CContinuousActionController *>::iterator it = controllers->begin();
actionValues->initVector(0.0);
rlt_real weightsSum = 0.0;
for (; it != controllers->end();it ++)
{
action->initVector(0.0);
(*it)->getNextContinuousAction(state, action);
rlt_real weight = getControllerWeight(*it);
weightsSum += weight;
action->multScalar(weight);
actionValues->addVector(action);
}
actionValues->multScalar(1 / weightsSum);
action->setVector(actionValues);
}
void CContinuousActionAddController::addContinuousActionController(CContinuousActionController *controller, rlt_real weight)
{
controllers->push_back(controller);
(*controllerWeights)[controller] = weight;
}
void CContinuousActionAddController::setControllerWeight(CContinuousActionController *controller, rlt_real weight)
{
(*controllerWeights)[controller] = weight;
}
rlt_real CContinuousActionAddController::getControllerWeight(CContinuousActionController *controller)
{
return (*controllerWeights)[controller];
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -