📄 cpegasus.cpp

📁 强化学习算法（R-Learning）难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
	}

	delete dModelGradient;

	delete episodeGradient;
	delete dPolicy;
	delete dModelInput;
}

void CPEGASUSAnalyticalPolicyGradientCalculator::getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates)
{
	printf("Pegasus Gradient Evaluation\n");
	agent->addSemiMDPListener(this);
	int horizon = my_round(getParameter("PEGASUSHorizon"));
	CState *startState = new CState(dynModel->getStateProperties());
	for (unsigned int i = 0; i < startStates->getNumStates(); i ++)
	{
		printf("Evaluate Episode %d\n", i);
		agent->startNewEpisode();
		startStates->getState(i, startState);
		dynModel->setState(startState);
		
		agent->doControllerEpisode(1, horizon);

		gradientFeatures->add(episodeGradient, 1.0);
	}

	gradientFeatures->multFactor(1.0 / startStates->getNumStates());
	rlt_real norm = sqrt(gradientFeatures->multFeatureList(gradientFeatures));

	if (DebugIsEnabled())
	{
		DebugPrint('p', "Calculated Pegasus Gradient Norm: %f\n", norm);
		DebugPrint('p', "Calculated Gradient:\n");

		gradientFeatures->saveASCII(DebugGetFileHandle('p'));
	}
	printf("Finished Gradient Calculation, Gradient Norm: %f\n", norm);

	delete startState;
	agent->removeSemiMDPListener(this);
}

void CPEGASUSAnalyticalPolicyGradientCalculator::multMatrixFeatureList(CMyMatrix *matrix, CFeatureList *features, int index, std::list<CFeatureList *> *newFeatures)
{
	CFeatureList::iterator itFeat = features->begin();

	for (; itFeat != features->end(); itFeat ++)
	{
		std::list<CFeatureList *>::iterator itList = newFeatures->begin();
		for (int row = 0; itList != newFeatures->end(); itList ++,row ++)
		{
			(*itList)->update((*itFeat)->featureIndex, (*itFeat)->factor * matrix->getElement(row, index));
		}
	}
}

void CPEGASUSAnalyticalPolicyGradientCalculator::nextStep(CStateCollection *oldStateCol, CAction *action, CStateCollection *newStateCol)
{
	CState *oldState = oldStateCol->getState(dynModel->getStateProperties());
	CState *nextState = newStateCol->getState(dynModel->getStateProperties());

	CContinuousActionData *data = dynamic_cast<CContinuousActionData *>(action->getActionData());

	// Clear 2nd StateGradient list
	std::list<CFeatureList *>::iterator it = stateGradient2->begin();

	for (; it != stateGradient2->end(); it ++)
	{
		(*it)->clear();
	}

	//Clear Model Gradient
	it = dModelGradient->begin();

	for (; it != dModelGradient->end(); it ++)
	{
		(*it)->clear();
	}

	// Derivation of the Reward Function
	rewardFunction->getInputDerivation(nextState, dReward);


	// Derivation of the Model
	dynModeldInput->getInputDerivation(oldState, data, dModelInput);

	if (DebugIsEnabled('p'))
	{
		DebugPrint('p', "Pegasus Gradient Calculation:\n ");
		DebugPrint('p', "State Gradient:\n ");
		for (it = stateGradient1->begin(); it != stateGradient1->end(); it ++)
		{
			(*it)->saveASCII(DebugGetFileHandle('p'));
			DebugPrint('p', "\n");
		}

		DebugPrint('p', "dReward: ");
		dReward->saveASCII(DebugGetFileHandle('p'));

		DebugPrint('p', "\n");
		DebugPrint('p',"dModel: ");
		dModelInput->saveASCII(DebugGetFileHandle('p'));
	}

	it = stateGradient1->begin();
	for (unsigned int i = 0; i < dynModel->getNumContinuousStates(); i ++, it ++)
	{
		multMatrixFeatureList(dModelInput, *it, i, stateGradient2);
	}

	// Derivation of the policy
	policydInput->getInputDerivation(oldStateCol, dPolicy);
	
	if (DebugIsEnabled('p'))
	{
		DebugPrint('p',"dPolicy: ");
		dPolicy->saveASCII(DebugGetFileHandle('p'));
		DebugPrint('p', "\n");
	}

	it = dModelGradient->begin();

	for (int i = 0; it != dModelGradient->end(); it++, i++)
	{
		policy->getGradient(oldStateCol, i, *it);
	}

	it = stateGradient1->begin();

	for (int i = 0; it != stateGradient1->end(); i ++, it ++)
	{
		multMatrixFeatureList(dPolicy, *it, i, dModelGradient);
	}

	it = dModelGradient->begin();

	for (int i = 0; it != dModelGradient->end(); it++, i++)
	{
		multMatrixFeatureList(dModelInput, *it, i + dynModel->getNumContinuousStates(), stateGradient2);
	}

	if (DebugIsEnabled('p'))
	{
		DebugPrint('p', "Model Gradients:\n ");
		for (it = dModelGradient->begin(); it != dModelGradient->end(); it ++)
		{
			(*it)->saveASCII(DebugGetFileHandle('p'));
			DebugPrint('p', "\n");

		}
		DebugPrint('p', "New State Gradient:\n ");

		for (it = stateGradient2->begin(); it != stateGradient2->end(); it ++)
		{
			(*it)->saveASCII(DebugGetFileHandle('p'));
			DebugPrint('p', "\n");

		}
	
	}

	
	rlt_real discountFactor = pow(getParameter("DiscountFactor"), steps);
	
	
	dReward->multScalar(discountFactor);

	it = stateGradient2->begin();
	for (int i = 0; it != stateGradient2->end(); i ++, it ++)
	{
		episodeGradient->add(*it, dReward->getElement(i));
	}

	std::list<CFeatureList *> *tempGradient = stateGradient1;
	stateGradient1 = stateGradient2;
	stateGradient2 = tempGradient;

	steps ++;
}

void CPEGASUSAnalyticalPolicyGradientCalculator::newEpisode()
{
	std::list<CFeatureList *>::iterator it = stateGradient1->begin();

	for (; it != stateGradient1->end(); it ++)
	{
		(*it)->clear();
	}
	episodeGradient->clear();
	steps = 0;
}


CPEGASUSNumericPolicyGradientCalculator::CPEGASUSNumericPolicyGradientCalculator(CAgent *agent, CContinuousActionGradientPolicy *policy, CTransitionFunctionEnvironment *dynModel, CRewardFunction *rewardFunction, rlt_real stepSize, int startStates, int horizon, rlt_real gamma) : CPEGASUSPolicyGradientCalculator(policy, dynModel, startStates, horizon, gamma)
{
	weights = new rlt_real[policy->getNumWeights()];

	this->rewardFunction = rewardFunction;
	this->agent = agent;

	addParameter("PEGASUSNumericStepSize", stepSize);
	addParameter("DiscountFactor", gamma);
}

CPEGASUSNumericPolicyGradientCalculator::~CPEGASUSNumericPolicyGradientCalculator()
{
	delete [] weights;
}

void CPEGASUSNumericPolicyGradientCalculator::getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates)
{
	CPolicyEvaluator *evaluator = new CValueSameStateCalculator(agent, rewardFunction, dynModel, startStates, (int)getParameter("PEGASUSHorizon"),getParameter("DiscountFactor"));
	policy->getWeights(weights);

	agent->setController(policy);

	rlt_real stepSize = getParameter("PEGASUSNumericStepSize");

	for (int i = 0; i < policy->getNumWeights(); i ++)
	{
		weights[i] -= stepSize;
		policy->setWeights(weights);
		rlt_real vMinus = evaluator->evaluatePolicy();
		weights[i] += 2 * stepSize;
		policy->setWeights(weights);
		rlt_real vPlus = evaluator->evaluatePolicy();

		weights[i] -= stepSize;

		gradientFeatures->set(i, (vPlus - vMinus) / (2 * stepSize));
		printf("Calculated derivation for weight %d : %f\n", i, gradientFeatures->getFeatureFactor(i));
	}
	policy->setWeights(weights);

}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -