📄 ccontinuousactiongradientpolicy.cpp
字号:
targetVector->setElement(row, col, inputDerivation->getElement(col));
}
}
}
void CContinuousActionFeaturePolicy::resetData()
{
std::list<CFeatureVFunction *>::iterator it = featureFunctions->begin();
for (; it != featureFunctions->end(); it ++)
{
(*it)->resetData();
}
}
CContinuousActionSigmoidPolicy::CContinuousActionSigmoidPolicy(CContinuousActionGradientPolicy *policy, CCAGradientPolicyInputDerivationCalculator *inputDerivation) : CContinuousActionGradientPolicy(policy->getContinuousAction(), policy->getStateProperties())
{
this->policy = policy;
this->inputDerivation = inputDerivation;
contData = new CContinuousActionData(policy->getContinuousActionProperties());
randomControllerMode = INTERN_RANDOM_CONTROLLER;
}
CContinuousActionSigmoidPolicy::~CContinuousActionSigmoidPolicy()
{
delete contData;
}
void CContinuousActionSigmoidPolicy::updateWeights(CFeatureList *dParams)
{
policy->updateGradient(dParams, 1.0);
}
int CContinuousActionSigmoidPolicy::getNumWeights()
{
return policy->getNumWeights();
}
void CContinuousActionSigmoidPolicy::getWeights(rlt_real *parameters)
{
policy->getWeights(parameters);
}
void CContinuousActionSigmoidPolicy::setWeights(rlt_real *parameters)
{
policy->setWeights(parameters);
}
void CContinuousActionSigmoidPolicy::resetData()
{
policy->resetData();
}
void CContinuousActionSigmoidPolicy::getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *l_noise)
{
if (randomControllerMode == INTERN_RANDOM_CONTROLLER)
{
CMyVector tempVector(this->contAction->getNumDimensions());
policy->getNextContinuousAction(state, l_noise);
tempVector.setVector(action);
for (int i = 0; i < tempVector.getNumDimensions(); i ++)
{
rlt_real umax = getContinuousActionProperties()->getMaxActionValue(i);
rlt_real umin = getContinuousActionProperties()->getMinActionValue(i);
rlt_real width = umax - umin;
rlt_real actionValue = tempVector.getElement(i);
actionValue = (actionValue - umin) / (umax - umin);
if (actionValue <= 0.0001)
{
actionValue = 0.0001;
}
else
{
if (actionValue >= 0.9999)
{
actionValue = 0.9999;
}
}
actionValue = (- log(1 / actionValue - 1) + 2) * width / 4 + umin;
tempVector.setElement(i, actionValue);
}
l_noise->multScalar(-1.0);
l_noise->addVector(&tempVector);
}
else
{
CContinuousActionController::getNoise(state, action, l_noise);
}
}
void CContinuousActionSigmoidPolicy::getNextContinuousAction(CStateCollection *state, CContinuousActionData *action)
{
policy->getNextContinuousAction(state, action);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "Sigmoid Policy, Action Values:");
action->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
noise->initVector(0.0);
if (randomController && this->randomControllerMode == INTERN_RANDOM_CONTROLLER)
{
randomController->getNextContinuousAction(state, noise);
}
if (DebugIsEnabled('p'))
{
DebugPrint('p', "Sigmoid Policy, Noise Values:");
noise->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\n");
}
action->addVector(noise);
for (unsigned int i = 0; i < action->getNumDimensions(); i ++)
{
rlt_real min = contAction->getContinuousActionProperties()->getMinActionValue(i);
rlt_real width = contAction->getContinuousActionProperties()->getMaxActionValue(i) - min;
action->setElement(i, - 2 + (action->getElement(i) - min) / width * 4);
action->setElement(i, min + width * (1.0 / (1.0 + my_exp(-action->getElement(i)))));
}
}
void CContinuousActionSigmoidPolicy::getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures)
{
policy->getNextContinuousAction(inputState, contData);
policy->getGradient(inputState, outputDimension, gradientFeatures);
rlt_real min = contAction->getContinuousActionProperties()->getMinActionValue(outputDimension);
rlt_real width = contAction->getContinuousActionProperties()->getMaxActionValue(outputDimension) - min;
DebugPrint('p', "Sigmoid Gradient Calculation: Action Value %f\n", contData->getActionValue(outputDimension));
contData->setElement(outputDimension, - 2 + (contData->getElement(outputDimension) - min) / width * 4);
rlt_real dSig = 1 / pow(1 + my_exp(- contData->getElement(outputDimension)), 2) * my_exp(- contData->getElement(outputDimension));
if (fabs(dSig) > 10000000)
{
printf("Infintity gradient!! : %f, %f, %f,%f \n", dSig, contData->getElement(outputDimension), min,width);
assert(true);
}
if (DebugIsEnabled('p'))
{
DebugPrint('p', "ContinuousActionPolicyGradient: ");
gradientFeatures->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "\nSaturationFactor; %f\n", 4* dSig);
}
gradientFeatures->multFactor(4 * dSig);
}
void CContinuousActionSigmoidPolicy::getInputDerivation(CStateCollection *inputState, CMyMatrix *targetVector)
{
policy->getNextContinuousAction(inputState, contData);
inputDerivation->getInputDerivation(inputState, targetVector);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "Inner Policy Input Derivation: ");
targetVector->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "Action Values: ");
contData->saveASCII(DebugGetFileHandle('p'));
}
for (unsigned int i = 0; i < contData->getNumDimensions(); i ++)
{
rlt_real min = contAction->getContinuousActionProperties()->getMinActionValue(i);
rlt_real width = contAction->getContinuousActionProperties()->getMaxActionValue(i) - min;
contData->setElement(i, - 2 + (contData->getElement(i) - min) / width * 4);
rlt_real dSig = 1 / pow(1 + my_exp(- contData->getElement(i)), 2) * my_exp(- contData->getElement(i));
if (DebugIsEnabled('p'))
{
DebugPrint('p', "SaturationFactor for dimension %d: %f (actionValue %f)\n", i, dSig);
}
for (unsigned int j = 0; j < targetVector->getNumColumns(); j++)
{
targetVector->setElement(i, j,4 * dSig * targetVector->getElement(i,j));
}
}
}
CCAGradientPolicyNumericInputDerivationCalculator::CCAGradientPolicyNumericInputDerivationCalculator(CContinuousActionGradientPolicy *policy, rlt_real stepSize, std::list<CStateModifier *> *modifiers)
{
this->policy = policy;
contDataPlus = new CContinuousActionData(policy->getContinuousActionProperties());
contDataMinus = new CContinuousActionData(policy->getContinuousActionProperties());
this->stateBuffer = new CStateCollectionImpl(policy->getStateProperties(), modifiers);
addParameter("NumericInputDerivationStepSize", stepSize);
}
CCAGradientPolicyNumericInputDerivationCalculator::~CCAGradientPolicyNumericInputDerivationCalculator()
{
delete contDataPlus;
delete contDataMinus;
delete stateBuffer;
}
void CCAGradientPolicyNumericInputDerivationCalculator::getInputDerivation(CStateCollection *inputStateCol, CMyMatrix *targetVector)
{
CStateProperties *modelState = policy->getStateProperties();
CState *inputState = stateBuffer->getState(modelState);
inputState->setState(inputStateCol->getState(modelState));
rlt_real stepSize = getParameter("NumericInputDerivationStepSize");
DebugPrint('p', "Calculating Numeric Policy Input Derivation\n");;
for (unsigned int col = 0; col < modelState->getNumContinuousStates(); col++)
{
rlt_real stepSize_i = (modelState->getMaxValue(col) - modelState->getMinValue(col)) * stepSize;
inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i);
stateBuffer->newModelState();
policy->getNextContinuousAction(stateBuffer, contDataPlus);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "State : ");
inputState->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "Action : ");
contDataPlus->saveASCII(DebugGetFileHandle('p'));
}
inputState->setContinuousState(col, inputState->getContinuousState(col) - 2 * stepSize_i);
stateBuffer->newModelState();
policy->getNextContinuousAction(stateBuffer, contDataMinus);
if (DebugIsEnabled('p'))
{
DebugPrint('p', "State : ");
inputState->saveASCII(DebugGetFileHandle('p'));
DebugPrint('p', "Action : ");
contDataMinus->saveASCII(DebugGetFileHandle('p'));
}
inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i);
for (int row = 0; row < policy->getNumOutputs(); row ++)
{
targetVector->setElement(row, col, (contDataPlus->getActionValue(row) - contDataMinus->getActionValue(row)) / (2 * stepSize_i));
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -