⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ctestscripts.cpp

📁 强化学习算法(R-Learning)难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 5 页
字号:

	CVFunctionLearner *vNNLearnerDiscDirect = new CVFunctionLearner(rewardFunction, nnVFunction, vETraces);

	CAgentController *NNdiscVMPolicy = new CVMStochasticPolicy(this->staticContActions, new CSoftMaxDistribution(10), nnVFunction, dynModel, rewardFunction, list);

	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscDirect, NNdiscVMPolicy, nnVFunction, "VNNDiscDirectStateETracesDiscVMPolicy"));
}


void CMyTestSuiteCollection::addVNNTestSuiteAdaptiveLearningRate()
{

	agent->addStateModifier(nnState);

	CAdaptiveParameterFromAverageRewardCalculator *learningRateCalculator = new CAdaptiveParameterFromAverageRewardCalculator(rewardFunction, 1, 0.1, -0.09, -2.0, -0.3, 0.99);
	// NN V Functions
	CVFunctionNumericInputDerivationCalculator  *vFunctionInputDerivation = new CVFunctionNumericInputDerivationCalculator(dynModel->getStateProperties(), nnVFunction, 0.025, agent->getStateModifiers());

	CAbstractQFunction *qFunctionFromTransitionFunction = new CQFunctionFromTransitionFunction(staticContActions, rbfVFunction, dynModel, rewardFunction, agent->getStateModifiers());

	CVFunctionLearner *vNNLearnerDiscDirect = new CVFunctionGradientLearner(rewardFunction, nnVFunction, new CDiscreteResidual(0.95), new CDirectGradient());
	vNNLearnerDiscDirect->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CVFunctionLearner *vNNLearnerEulerDirect = new CVFunctionGradientLearner(rewardFunction, nnVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CDirectGradient());
	vNNLearnerEulerDirect->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CVFunctionLearner *vNNLearnerCoulomDirect = new CVFunctionGradientLearner(rewardFunction, nnVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CDirectGradient());
	vNNLearnerCoulomDirect->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CVFunctionLearner *vNNLearnerDiscConstBeta = new CVFunctionGradientLearner(rewardFunction, nnVFunction, new CDiscreteResidual(0.95), new CResidualBetaFunction(new CConstantBetaCalculator(0.4), new CDiscreteResidual(0.95)));

	CVFunctionLearner *vNNLearnerEulerConstBeta = new CVFunctionGradientLearner(rewardFunction, nnVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0),new CResidualBetaFunction(new CConstantBetaCalculator(0.4), new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0)));

	CVFunctionLearner *vNNLearnerCoulomConstBeta = new CVFunctionGradientLearner(rewardFunction, nnVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CResidualBetaFunction(new CConstantBetaCalculator(0.4), new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0)));


	vNNLearnerDiscConstBeta->addAdaptiveParameter("VLearningRate", learningRateCalculator);
	vNNLearnerEulerConstBeta->addAdaptiveParameter("VLearningRate", learningRateCalculator);
	vNNLearnerCoulomConstBeta->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CVFunctionLearner *vNNLearnerDiscVarBeta = new CVFunctionResidualLearner(rewardFunction, nnVFunction, new CDiscreteResidual(0.95), new CDiscreteResidual(0.95),  new CVariableBetaCalculator(0.01, 0.9));
	vNNLearnerDiscVarBeta->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CVFunctionLearner *vNNLearnerEulerVarBeta = new CVFunctionResidualLearner(rewardFunction, nnVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CVariableBetaCalculator(0.01, 0.9));
	vNNLearnerEulerVarBeta->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CVFunctionLearner *vNNLearnerCoulomVarBeta = new CVFunctionResidualLearner(rewardFunction, nnVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CVariableBetaCalculator(0.01, 0.9));
	vNNLearnerCoulomVarBeta->addAdaptiveParameter("VLearningRate", learningRateCalculator);

	CAgentController *NNdiscVMPolicy = new CVMStochasticPolicy(staticContActions, new CSoftMaxDistribution(10.0), nnVFunction, dynModel, rewardFunction, agent->getStateModifiers());


	CAgentController *NNcontVMPolicy = new CContinuousTimeVMPolicy(staticContActions, new CSoftMaxDistribution(10.0), vFunctionInputDerivation, dynModel, rewardFunction);

	CContinuousTimeAndActionSigmoidVMPolicy *NNcontSigPolicy = new CContinuousTimeAndActionSigmoidVMPolicy(dynModel->getContinuousAction(), vFunctionInputDerivation, dynModel);

	NNcontSigPolicy->setParameter("SigmoidPolicyCFactor", 10.0);

	CContinuousTimeAndActionBangBangVMPolicy *NNcontBangBangPolicy = new CContinuousTimeAndActionBangBangVMPolicy(dynModel->getContinuousAction(), vFunctionInputDerivation, dynModel);

	NNcontSigPolicy->setRandomController(contExploration);
	NNcontBangBangPolicy->setRandomController(contExploration);

	CContinuousActionPolicy *contAddPolicy = new CContinuousActionPolicy(dynModel->getContinuousAction(), new CSoftMaxDistribution(1000.0), qFunctionFromTransitionFunction, staticContActions, 5.5);

	contAddPolicy->setRandomController(contExploration);


	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscDirect, NNdiscVMPolicy, nnVFunction, "VNNDiscDirectDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerDirect, NNdiscVMPolicy, nnVFunction, "VNNEulerDirectDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomDirect, NNdiscVMPolicy, nnVFunction, "VNNCoulomDirectDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscConstBeta, NNdiscVMPolicy, nnVFunction, "VNNDiscConstBetaDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomConstBeta, NNdiscVMPolicy, nnVFunction, "VNNCoulomConstBetaDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerConstBeta, NNdiscVMPolicy, nnVFunction, "VNNEulerConstBetaDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscVarBeta, NNdiscVMPolicy, nnVFunction, "VNNDiscVarBetaDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerVarBeta, NNdiscVMPolicy, nnVFunction, "VNNEulerVarBetaDiscVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomVarBeta, NNdiscVMPolicy, nnVFunction, "VNNCoulomVarBetaDiscVMPolicyAL"));

	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscDirect, NNcontVMPolicy, nnVFunction, "VNNDiscDirectContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerDirect, NNcontVMPolicy, nnVFunction, "VNNEulerDirectContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomDirect, NNcontVMPolicy, nnVFunction, "VNNCoulomDirectContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscConstBeta, NNcontVMPolicy, nnVFunction, "VNNDiscConstBetaContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomConstBeta, NNcontVMPolicy, nnVFunction, "VNNCoulomConstBetaContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerConstBeta, NNcontVMPolicy, nnVFunction, "VNNEulerConstBetaContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscVarBeta, NNcontVMPolicy, nnVFunction, "VNNDiscVarBetaContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerVarBeta, NNcontVMPolicy, nnVFunction, "VNNEulerVarBetaContVMPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomVarBeta, NNcontVMPolicy, nnVFunction, "VNNCoulomVarBetaContVMPolicyAL"));

	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscDirect, NNcontSigPolicy, nnVFunction, "VNNDiscDirectContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerDirect, NNcontSigPolicy, nnVFunction, "VNNEulerDirectContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomDirect, NNcontSigPolicy, nnVFunction, "VNNCoulomDirectContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscConstBeta, NNcontSigPolicy, nnVFunction, "VNNDiscConstBetaContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomConstBeta, NNcontSigPolicy, nnVFunction, "VNNCoulomConstBetaContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerConstBeta, NNcontSigPolicy, nnVFunction, "VNNEulerConstBetaContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscVarBeta, NNcontSigPolicy, nnVFunction, "VNNDiscVarBetaContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerVarBeta, NNcontSigPolicy, nnVFunction, "VNNEulerVarBetaContSigPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomVarBeta, NNcontSigPolicy, nnVFunction, "VNNCoulomVarBetaContSigPolicyAL"));

	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscDirect, NNcontBangBangPolicy, nnVFunction, "VNNDiscDirectContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerDirect, NNcontBangBangPolicy, nnVFunction, "VNNEulerDirectContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomDirect, NNcontBangBangPolicy, nnVFunction, "VNNCoulomDirectContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscConstBeta, NNcontBangBangPolicy, nnVFunction, "VNNDiscConstBetaContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomConstBeta, NNcontBangBangPolicy, rbfVFunction, "VNNCoulomConstBetaContBangBangPolicy"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerConstBeta, NNcontBangBangPolicy, nnVFunction, "VNNEulerConstBetaContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscVarBeta, NNcontBangBangPolicy, nnVFunction, "VNNDiscVarBetaContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerVarBeta, NNcontBangBangPolicy, nnVFunction, "VNNEulerVarBetaContBangBangPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomVarBeta, NNcontBangBangPolicy, nnVFunction, "VNNCoulomVarBetaContBangBangPolicyAL"));

	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscDirect, contAddPolicy, nnVFunction, "VNNDiscDirectContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerDirect, contAddPolicy, nnVFunction, "VNNEulerDirectContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomDirect, contAddPolicy, nnVFunction, "VNNCoulomDirectContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscConstBeta, contAddPolicy, nnVFunction, "VNNDiscConstBetaContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomConstBeta, contAddPolicy, nnVFunction, "VNNCoulomConstBetaContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerConstBeta, contAddPolicy, nnVFunction, "VNNEulerConstBetaContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerDiscVarBeta, contAddPolicy, nnVFunction, "VNNDiscVarBetaContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerEulerVarBeta, contAddPolicy, nnVFunction, "VNNEulerVarBetaContAddPolicyAL"));
	addTestSuite(new CListenerTestSuite(agent, vNNLearnerCoulomVarBeta, contAddPolicy, nnVFunction, "VNNCoulomVarBetaContAddPolicyAL"));

}

void CMyTestSuiteCollection::addVGaussianSigmoidNetworksTestSuites()
{
	agent->addStateModifier(this->gaussSigmoidState);

	// NN V Functions
	CVFunctionNumericInputDerivationCalculator  *vFunctionInputDerivation = new CVFunctionNumericInputDerivationCalculator(dynModel->getStateProperties(), gaussSigmoidVFunction, 0.025, agent->getStateModifiers());

	CAbstractQFunction *qFunctionFromTransitionFunction = new CQFunctionFromTransitionFunction(staticContActions, gaussSigmoidVFunction, dynModel, rewardFunction, agent->getStateModifiers());

	CVFunctionLearner *VGaussSig1LearnerDiscDirect = new CVFunctionGradientLearner(rewardFunction, gaussSigmoidVFunction, new CDiscreteResidual(0.95), new CDirectGradient());

	CVFunctionLearner *VGaussSig1LearnerEulerDirect = new CVFunctionGradientLearner(rewardFunction, gaussSigmoidVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CDirectGradient());

	CVFunctionLearner *VGaussSig1LearnerCoulomDirect = new CVFunctionGradientLearner(rewardFunction, gaussSigmoidVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CDirectGradient());

	CVFunctionLearner *VGaussSig1LearnerDiscC

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -