⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ctheoreticalmodel.cpp

📁 强化学习算法(R-Learning)难得的珍贵资料
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	loadASCII(stream);
}

CFeatureStochasticModel::CFeatureStochasticModel(CActionSet *actions, int numFeatures) : CAbstractFeatureStochasticModel(actions,  numFeatures)
{
	stateTransitions = new CMyArray2D<CStateActionTransitions *>(getNumActions(), numFeatures);
	
	for (int i = 0; i < stateTransitions->getSize(); i++)
	{
		stateTransitions->set1D(i, new CStateActionTransitions());
	}
}

CFeatureStochasticModel::~CFeatureStochasticModel()
{
	CStateActionTransitions *saPair = NULL;

	for (int i = 0; i < stateTransitions->getSize(); i++)
	{
		saPair = stateTransitions->get1D(i);
		delete saPair;
	}
	
	delete stateTransitions;
}

CTransition *CFeatureStochasticModel::getNewTransition(int startState, int endState, CAction *action, rlt_real prop)
{
	if (action->isType(MULTISTEPACTION))
	{
		return new CSemiMDPTransition(startState, endState, prop);
	}
	else
		return new CTransition(startState, endState, prop);
}

void CFeatureStochasticModel::setPropability(rlt_real propability, int oldState, int action, int newState)
{
	CStateActionTransitions *saTrans = stateTransitions->get(action, oldState);

	if (saTrans->getForwardTransitions()->isMember(newState))
	{
		saTrans->getForwardTransitions()->getTransition(newState)->setPropability(propability);
	}
	else
	{
		CTransition *trans = getNewTransition(oldState, newState, actions->get(action), propability); 

		saTrans->getForwardTransitions()->addTransition(trans);
		stateTransitions->get(action, newState)->getBackwardTransitions()->addTransition(trans);
	}
}

rlt_real CFeatureStochasticModel::getPropability(int oldState, int action, int newState)
{
	assert(action > 0);
	
	CStateActionTransitions *saTrans = stateTransitions->get(action, oldState);

	CTransition *trans = saTrans->getForwardTransitions()->getTransition(newState);
	
	if (trans == NULL)
	{
		return 0.0;
	}

	return trans->getPropability();
}

rlt_real CFeatureStochasticModel::getPropability(int oldFeature, int action, int duration, int newFeature)
{
	if (actions->get(action)->isType(MULTISTEPACTION))
	{
		CSemiMDPTransition *trans = (CSemiMDPTransition *) stateTransitions->get(action, oldFeature)->getForwardTransitions()->getTransition(newFeature);

		if (trans != NULL)
		{
			return trans->getPropability() * trans->getDurationFaktor(duration);
		}

		else
		{
			return 0.0;
		}
	}
	else
		return getPropability(oldFeature, action, newFeature);
}

void CFeatureStochasticModel::setPropability(rlt_real propability, int oldFeature, int action, int duration, int newFeature)
{
	if (actions->get(action)->isType(MULTISTEPACTION))
	{

		CSemiMDPTransition *trans = (CSemiMDPTransition *) stateTransitions->get(action, oldFeature)->getForwardTransitions()->getTransition(newFeature);
		if (trans != NULL)
		{
			rlt_real durationProp = trans->getDurationFaktor(duration) * trans->getPropability();
			trans->setPropability(trans->getPropability()  - durationProp  + propability);
			trans->addDuration(duration, (propability - durationProp) / trans->getPropability());
		}
		else
		{
			trans = (CSemiMDPTransition *) getNewTransition(oldFeature, newFeature, actions->get(action), propability);
			trans->addDuration(duration, 1.0);
		}
	}
	else
		setPropability(propability, oldFeature, action, newFeature);
}

CTransitionList *CFeatureStochasticModel::getForwardTransitions(int action, int oldState)
{
	return stateTransitions->get(action, oldState)->getForwardTransitions();
}

CTransitionList *CFeatureStochasticModel::getBackwardTransitions(int action, int oldState)
{
	return stateTransitions->get(action, oldState)->getBackwardTransitions();
}

void CFeatureStochasticModel::saveASCII(FILE *stream)
{
	CTransitionList *transList;
	CTransitionList::iterator it;

	for (unsigned int i = 0; i < getNumActions(); i++)
	{
		fprintf(stream,"Action %d\n", i);

		for (unsigned int startState = 0; startState < numFeatures; startState++)
		{
			transList = stateTransitions->get(i, startState)->getForwardTransitions();
			fprintf(stream, "Startstate %d [%d]: ", startState, transList->size());

			for (it = transList->begin(); it != transList->end(); it++)
			{
				(*it)->saveASCII(stream, true);
			}	
			fprintf(stream, "\n");
		}
	}
}

void CFeatureStochasticModel::loadASCII(FILE *stream)
{
	int action = 0;
	int startState = 0;
	int numTransitions = 0;
		
	for (unsigned int i = 0; i < getNumActions(); i++)
	{
		fscanf(stream,"Action %d\n", &action);

		for (unsigned int j = 0; j < numFeatures; j ++)
		{
			assert(fscanf(stream, "Startstate %d [%d]: ", &startState, &numTransitions) == 2);

			for (int k = 0; k < numTransitions; k++)
			{
				CTransition *newTrans = getNewTransition(0,0,actions->get(i), 0.0);
				newTrans->loadASCII(stream, j, true);
				stateTransitions->get(i,j)->getForwardTransitions()->addTransition(newTrans);
				stateTransitions->get(i, newTrans->getEndState())->getBackwardTransitions()->addTransition(newTrans);	
			}	
			fscanf(stream, "\n");
		}
	}
}


CAbstractFeatureStochasticEstimatedModel::CAbstractFeatureStochasticEstimatedModel(CStateProperties *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions, int numFeatures) : CFeatureStochasticModel(actions, numFeatures), CStateObject(properties)
{
	this->stateActionVisits = stateActionVisits;
}


CAbstractFeatureStochasticEstimatedModel::~CAbstractFeatureStochasticEstimatedModel()
{
}

void CAbstractFeatureStochasticEstimatedModel::saveData(FILE *stream)
{
	CFeatureStochasticModel::saveASCII(stream);
}

void CAbstractFeatureStochasticEstimatedModel::loadData(FILE *stream)
{
	rlt_real factor= 0.0;

	CFeatureStochasticModel::loadASCII(stream);
}

void CAbstractFeatureStochasticEstimatedModel::resetData()
{
	
	CStateActionTransitions *saPair = NULL;

	for (int i = 0; i < stateTransitions->getSize(); i++)
	{
		saPair = stateTransitions->get1D(i);
		delete saPair;
		stateTransitions->set1D(i, new CStateActionTransitions());
	}	
	stateActionVisits->resetData();
}

rlt_real CAbstractFeatureStochasticEstimatedModel::getStateActionVisits(int Feature, int action)
{
	CAction *actionObj = actions->get(action);
	return stateActionVisits->getValue(Feature, actionObj, NULL);
}

rlt_real CAbstractFeatureStochasticEstimatedModel::getStateVisits(int Feature)
{
	rlt_real sum = 0;

	for (unsigned int i = 0; i < getNumActions();  i++)
	{
		sum += getStateActionVisits(i, Feature);
	}
	return sum;
}

void CAbstractFeatureStochasticEstimatedModel::intermediateStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState)
{
	nextStep(oldState, action, nextState);
}

void CAbstractFeatureStochasticEstimatedModel::updateStep(int oldFeature, CAction *action, int newFeature, rlt_real factor)
{
	rlt_real propability = 0.0;
	rlt_real timeFactor = getParameter("EstimatedModelForgetFactor");

	bool found = false;

	int actionIndex = getActions()->getIndex(action);

	rlt_real newSAVisits = stateActionVisits->getValue(oldFeature, action, NULL);
	rlt_real oldSAVisits = newSAVisits - factor;

	if (newSAVisits < 0.0001)
	{
		return;
	}

	CTransitionList *transList = stateTransitions->get(actionIndex, oldFeature)->getForwardTransitions();
	
	CTransitionList::iterator trans = transList->begin();
	
	for (; trans != transList->end(); trans++)
	{
		propability = (*trans)->getPropability() * oldSAVisits;

		if ((*trans)->getEndState() == newFeature)
		{
			found = true;
			propability += factor;
			
			if (action->isType(MULTISTEPACTION))
			{
				int duration = dynamic_cast<CMultiStepAction *>(action)->getDuration();
				CSemiMDPTransition *semiTrans = (CSemiMDPTransition *) (*trans);
				semiTrans->addDuration(duration, factor / (propability));
			}
		}
		propability = propability / newSAVisits; 

		assert(propability >= 0);
		(*trans)->setPropability(propability);
	}
	
	if (! found)
	{
		setPropability(factor / newSAVisits, oldFeature, actionIndex, newFeature);
		if (action->isType(MULTISTEPACTION))
		{
			int duration = dynamic_cast<CMultiStepAction *>(action)->getDuration();
			CSemiMDPTransition *semiTrans = (CSemiMDPTransition *) transList->getTransition(newFeature);
			semiTrans->addDuration(duration, 1.0);			
			
		}
	}
}

CDiscreteStochasticEstimatedModel::CDiscreteStochasticEstimatedModel(CAbstractStateDiscretizer *discState, CFeatureQFunction *stateActionVisits, CActionSet *actions) : CAbstractFeatureStochasticEstimatedModel(discState, stateActionVisits, actions, discState->getDiscreteStateSize())
{
	
}

void CDiscreteStochasticEstimatedModel::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
	int oldStateNum = oldState->getState(properties)->getDiscreteState(0);
    int newStateNum = newState->getState(properties)->getDiscreteState(0);;
    
	updateStep(oldStateNum, action, newStateNum, 1.0);
}

int CDiscreteStochasticEstimatedModel::getStateActionVisits(int state, int action)
{
	return (int)floor(CAbstractFeatureStochasticEstimatedModel::getStateActionVisits(state, action));
}

int CDiscreteStochasticEstimatedModel::getStateVisits(int state)
{
	int sum = 0;

	for (unsigned int i = 0; i < getNumActions();  i++)
	{
		sum += (int)floor(CAbstractFeatureStochasticEstimatedModel::getStateVisits(state));
	}
	return sum;
}

CFeatureStochasticEstimatedModel::CFeatureStochasticEstimatedModel(CFeatureCalculator *featCalc, CFeatureQFunction *stateActionVisits, CActionSet *actions) : CAbstractFeatureStochasticEstimatedModel(featCalc, stateActionVisits, actions, featCalc->getNumFeatures())
{
	addParameter("EstimatedModelMinimumUpdateFactor",0.005);
}

void CFeatureStochasticEstimatedModel::nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
	//int actionIndex = getModelProperties()->getActions()->index(action);

	CState *oldS = oldState->getState(properties);
	CState *newS = newState->getState(properties);

	rlt_real minimumUpdate = getParameter("EstimatedModelMinimumUpdateFactor");

	for (unsigned int i = 0; i < oldS->getNumContinuousStates(); i++)
	{
		for (unsigned int j = 0; j < oldS->getNumContinuousStates(); j++)
		{
			rlt_real factor = oldS->getContinuousState(i) * newS->getContinuousState(j);
			if (factor > minimumUpdate)
			{
				updateStep(oldS->getDiscreteState(i), action, newS->getDiscreteState(j), factor);
			}
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -