📄 crewardmodel.cpp
字号:
}
CFeatureStateRewardModel::CFeatureStateRewardModel(CRewardFunction *function, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function)
{
rewards = new rlt_real[discretizer->getDiscreteStateSize(0)];
visits = new rlt_real[discretizer->getDiscreteStateSize(0)];
for (unsigned int i = 0; i < discretizer->getDiscreteStateSize(0); i++)
{
rewards[i] = 0.0;
visits[i] = 0.0;
}
rewardMean = 0.0;
numRewards = 0;
}
CFeatureStateRewardModel::~CFeatureStateRewardModel()
{
delete [] rewards;
delete [] visits;
}
rlt_real CFeatureStateRewardModel::getReward(CState *oldState, CAction *action, CState *newState)
{
rlt_real reward = 0.0;
if (newState->getStateProperties()->isType(FEATURESTATE))
{
for (unsigned int i = 0; i < newState->getNumContinuousStates(); i++)
{
reward += newState->getContinuousState(i) * getReward(newState->getDiscreteState(i));
}
}
else
{
if (newState->getStateProperties()->isType(DISCRETESTATE))
{
reward = getReward(newState->getDiscreteState(0));
}
}
return reward;
}
rlt_real CFeatureStateRewardModel::getReward(int oldState, CAction *action, int newState)
{
return getReward(newState);
}
rlt_real CFeatureStateRewardModel::getReward(int newState)
{
rlt_real numVisits = visits[newState];
rlt_real reward = 0.0;
if (numVisits > 0)
{
reward = rewards[newState] / numVisits;
}
else
{
if (numRewards > 0)
{
reward = rewardMean/ numRewards;
}
}
return reward;
}
void CFeatureStateRewardModel::nextStep(CStateCollection *oldStateCol, CAction *action, rlt_real reward, CStateCollection *newStateCol)
{
CState *newState = newStateCol->getState(discretizer);
if (newState->getStateProperties()->isType(FEATURESTATE))
{
for (unsigned int i = 0; i < newState->getNumContinuousStates(); i++)
{
rewards[newState->getDiscreteState(i)] += reward * newState->getContinuousState(i);
visits[newState->getDiscreteState(i)] += newState->getContinuousState(i);
}
}
else
{
if (newState->getStateProperties()->isType(DISCRETESTATE))
{
rewards[newState->getDiscreteState(0)] += reward ;
visits[newState->getDiscreteState(0)] += 1.0;
}
}
}
void CFeatureStateRewardModel::saveData(FILE *stream)
{
fprintf(stream, "State-Reward Table (%d Features):\n", discretizer->getDiscreteStateSize(0));
for (unsigned int i = 0; i < discretizer->getDiscreteStateSize(0); i++)
{
fprintf(stream, "%f ", rewards[i]);
}
fprintf(stream, "\n");
fprintf(stream, "State-Reward Visit Table (%d Features):\n", discretizer->getDiscreteStateSize(0));
for (unsigned int i = 0; i < discretizer->getDiscreteStateSize(0); i++)
{
fprintf(stream, "%f ", visits[i]);
}
fprintf(stream, "\n");
}
void CFeatureStateRewardModel::loadData(FILE *stream)
{
rlt_real bufNumRewards = 0.0;
rewardMean = 0.0;
int buffer;
fscanf(stream, "State-Reward Table (%d Features):\n", &buffer);
for (unsigned int i = 0; i < discretizer->getDiscreteStateSize(0); i++)
{
fscanf(stream, "%lf ", &rewards[i]);
rewardMean += rewards[i];
}
fscanf(stream, "\n");
fscanf(stream, "State-Reward Visit Table (%d Features):\n", &buffer);
for (unsigned int i = 0; i < discretizer->getDiscreteStateSize(0); i++)
{
fscanf(stream, "%lf ", &visits[i]);
bufNumRewards += visits[i];
}
fscanf(stream, "\n");
numRewards = (int) floor(bufNumRewards);
}
void CFeatureStateRewardModel::resetData()
{
for (unsigned int i = 0; i < discretizer->getDiscreteStateSize(0); i++)
{
rewards[i] = 0.0;
visits[i] = 0.0;
}
rewardMean = 0.0;
numRewards = 0;
}
/*
CFeatureSemiMDPRewardModel::CFeatureSemiMDPRewardModel(CModelProperties *properties, CSemiMDPRewardFunction *function, CAbstractFeatureSimulatedSemiMDPModel *model, CFeatureCalculator *discretizer) : CFeatureSemiMDPRewardFunction(discretizer), CSemiMDPRewardListener(fuction), CModelObject(properties), CStateObject(properties)
{
this->rewardTable = new CMyArray2D<std::map<int,CFeatureMap *>*>(properties->getNumActions(), discretizer->getNumFeatures());
for (int i = 0; i < rewardTable->getSize(); i++)
{
rewardTable->set1D(i, new std::map<int,CFeatureMap *>());
}
this->model = model;
}
CFeatureSemiMDPRewardModel::~CFeatureSemiMDPRewardModel()
{
std::map<int, CFeatureMap *>::iterator it;
for (int i = 0; i < rewardTable->getSize(); i++)
{
for (it = rewardTable->get1D(i)->begin();it != rewardTable->get1D(i)->end(); it ++)
{
delete (*it).second;
}
delete rewardTable->get1D(i);
}
delete rewardTable;
}
rlt_real CFeatureSemiMDPRewardModel::getTransitionVisits(int oldState, int action, int duration, int newState)
{
rlt_real visits = 0.0;
CTransition *trans = model->getForwardTransitions(action, oldState)->getTransition(newState);
if (trans == NULL)
{
visits = 0;
}
else
{
visits = ((CSemiMDPTransition *)trans)->getDurationPropability(duration) * model->getStateActionVisits(oldState, action);
}
return visits;
}
rlt_real CFeatureSemiMDPRewardModel::getReward(int oldState, CAction *action, int duration, int newState)
{
int actionIndex = this->getModelProperties()->getActions()->index(action);
rlt_real transVisits = getTransitionVisits(oldState, actionIndex, duration, newState);
//assert(visitSparse->getFaktor(oldState, actionIndex, newState) > 0);
CFeatureMap *map = (*rewardTable->get(actionIndex, oldState))[newState];
if (transVisits > 0 && map != NULL)
{
return map->getValue(duration) / transVisits;
}
else
{
return 0.0;
}
}
void CFeatureSemiMDPRewardModel::nextStep(CState *oldState, CAction *action, int duration, rlt_real reward, CState *newState)
{
CFeatureList::iterator oldIt;
CFeatureList::iterator newIt;
CFeatureMap *featMap;
CFeature *feat = NULL;
discretizer->getFeatureList(oldState, tmpList1);
discretizer->getFeatureList(newState, tmpList2);
rlt_real oldreward = 0.0;
rlt_real visits = 0.0;
int actionIndex = this->getModelProperties()->getActions()->index(action);
oldIt = tmpList1->begin();
newIt = tmpList2->begin();
for (oldIt = tmpList1->begin(); oldIt != tmpList1->end(); oldIt++)
{
for (newIt = tmpList2->begin(); newIt != tmpList2->end(); newIt++)
{
featMap = (*rewardTable->get(actionIndex, (*oldIt)->featureIndex))[(*newIt)->featureIndex];
if (featMap != NULL)
{
oldreward = featMap->getValue((*newIt)->featureIndex);
int feata = (*newIt)->featureIndex;
int featb = (*oldIt)->featureIndex;
(*featMap)[duration] = oldreward + reward * (*oldIt)->factor * (*newIt)->factor;
}
}
}
tmpList1->clearAndDelete();
tmpList2->clearAndDelete();
}
void CFeatureSemiMDPRewardModel::saveASCII(FILE *stream)
{
CFeatureMap::iterator mapIt;
CFeatureMap *featMap;
fprintf(stream, "Reward Table\n");
for (int action = 0; action < getModelProperties()->getNumActions(); action ++)
{
fprintf(stream, "Action %d:\n", action);
for (int startState = 0; startState < discretizer->getNumFeatures(); startState ++)
{
std::map<int, CFeatureMap *>::iterator featIt = rewardTable->get(action, startState)->begin();
fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
for (; featIt != rewardTable->get(action, startState)->end(); featIt ++)
{
fprintf(stream, "(%d [%d] (", (*featIt).first, (*featIt).second->size());
featMap = (*featIt).second;
for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
{
fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);
}
fprintf(stream,"))");
}
fprintf(stream, "\n");
}
fprintf(stream, "\n");
}
}
void CFeatureSemiMDPRewardModel::loadASCII(FILE *stream)
{
CFeatureMap *featMap;
fscanf(stream, "Reward Table\n");
int buf, numVal = 0, endState;
rlt_real reward;
for (int action = 0; action < getModelProperties()->getNumActions(); action ++)
{
fscanf(stream, "Action %d:\n", &buf);
for (int startState = 0; startState < discretizer->getNumFeatures(); startState ++)
{
fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
for (int i = 0; i < numVal; i ++)
{
int numDurations = 0, bufDuration = 0;
fscanf(stream, "(%d [%d] (", &endState, &numDurations);
if ((*rewardTable->get(action, startState))[endState] == NULL)
{
(*rewardTable->get(action, startState))[endState] = new CFeatureMap();
}
featMap = (*rewardTable->get(action, startState))[endState];
featMap->clear();
for (int dur = 0; dur < numDurations; dur ++)
{
fprintf(stream, "(%d %lf)", &bufDuration, &reward);
(*featMap)[bufDuration] = reward;
}
fscanf(stream,"))");
}
fscanf(stream, "\n");
}
fscanf(stream, "\n");
}
}
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -