📄 behav.imp
字号:
} else { return (*this)(action->BelongsTo()->GetPlayer()->GetNumber(), action->BelongsTo()->GetNumber(), m_support.Find(action)); }}template <class T>const T &BehavProfile<T>::RealizProb(const Node *node) const{ return m_realizProbs[node->number];}template <class T>T &BehavProfile<T>::RealizProb(const Node *node){ return m_realizProbs[node->number];}template <class T>const T &BehavProfile<T>::BeliefProb(const Node *node) const{ return m_beliefs[node->number];}template <class T>T &BehavProfile<T>::BeliefProb(const Node *node){ return m_beliefs[node->number];}template <class T>T BehavProfile<T>::IsetProb(const Infoset *iset) const{ T prob = (T) 0; for (int i = 1; i <= iset->NumMembers(); i++) { prob += RealizProb(iset->GetMember(i)); } return prob;}template <class T>const T &BehavProfile<T>::IsetValue(const Infoset *iset) const{ return m_infosetValues(iset->GetPlayer()->GetNumber(), iset->GetNumber());}template <class T>T &BehavProfile<T>::IsetValue(const Infoset *iset){ return m_infosetValues(iset->GetPlayer()->GetNumber(), iset->GetNumber());}template <class T> const T &BehavProfile<T>::Regret(const Action * act) const{ return m_gripe(act->BelongsTo()->GetPlayer()->GetNumber(), act->BelongsTo()->GetNumber(), act->number);} template <class T> T &BehavProfile<T>::Regret(const Action * act){ return m_gripe(act->BelongsTo()->GetPlayer()->GetNumber(), act->BelongsTo()->GetNumber(), act->number);}//-------------------------------------------------------------------------// BehavProfile<T>: General data access -- public functions//-------------------------------------------------------------------------template <class T>const T &BehavProfile<T>::GetRealizProb(const Node *node){ ComputeSolutionData(); return RealizProb(node);}template <class T>const T &BehavProfile<T>::GetBeliefProb(const Node *node){ ComputeSolutionData(); return BeliefProb(node);}template <class T>gVector<T> BehavProfile<T>::GetNodeValue(const Node *node){ ComputeSolutionData(); return NodeValues(node);}template <class T>T BehavProfile<T>::GetIsetProb(const Infoset *iset){ ComputeSolutionData(); return IsetProb(iset);}template <class T>const T &BehavProfile<T>::GetIsetValue(const Infoset *iset){ ComputeSolutionData(); return IsetValue(iset);}template <class T>T BehavProfile<T>::GetActionProb(const Action *act) const{ return ActionProb((Action *)act);}template <class T>const T &BehavProfile<T>::GetActionValue(const Action *act) const{ // Hack to cast away const; ComputeSolutionData() should be // const, but some stuff needs fixed before that can happen ((BehavProfile *) this)->ComputeSolutionData(); return ActionValue(act);}template <class T>const T &BehavProfile<T>::GetRegret(const Action * act){ ComputeSolutionData(); return Regret(act);}//-------------------------------------------------------------------------// BehavProfile<T>: Computation of interesting quantities//-------------------------------------------------------------------------//--------------// Payoff//--------------template <class T>void BehavProfile<T>::Payoff(Node *node, T prob, int player, T &value) const{ Infoset * iset = node->infoset; if (node->outcome) { value += prob * Payoff(node->outcome, player); } if (node->children.Length()) { const gArray<Action *> &acts = m_support.Actions(iset); for (int act = 1; act <= acts.Length(); act++) Payoff(node->GetChild(acts[act]), prob * ActionProb(acts[act]), player, value); }}template <class T> T BehavProfile<T>::Payoff(int player) const{ T value = (T) 0; Payoff(m_efg->RootNode(), (T) 1, player, value); return value;}//---------------// Derivatives//---------------//// The following routines compute the derivatives of quantities as// the probability of the action 'p_oppAction' is changed.// See Turocy (2001), "Computing the Quantal Response Equilibrium// Correspondence" for details.// These assume that the profile is interior (totally mixed),// and that the game is of perfect recall//template <class T>T BehavProfile<T>::DiffActionValue(const Action *p_action, const Action *p_oppAction) const{ ((BehavProfile<T> *) this)->ComputeSolutionData(); T deriv = (T) 0; Infoset *infoset = p_action->BelongsTo(); EFPlayer *player = p_action->BelongsTo()->GetPlayer(); for (int i = 1; i <= infoset->NumMembers(); i++) { Node *member = infoset->GetMember(i); // gout << member->number << ' ' << player->GetNumber() << ' ' << p_action->BelongsTo()->GetNumber() << ' ' << p_action->number << ' ' << p_oppAction->BelongsTo()->GetPlayer()->GetNumber() << ' ' << p_oppAction->BelongsTo()->GetNumber() << ' ' << p_oppAction->number << ' '; deriv += DiffRealizProb(member, p_oppAction) * (NodeValue(member->GetChild(p_action->number), player->GetNumber()) - ActionValue(p_action)); // gout << DiffRealizProb(member, p_oppAction) << ' '; deriv += RealizProb(member) * DiffNodeValue(member->GetChild(p_action->number), player, p_oppAction); // gout << DiffNodeValue(member->GetChild(p_action->number), player, p_oppAction); } // gout << ' ' << (deriv / IsetProb(infoset)) << '\n'; return deriv / IsetProb(p_action->BelongsTo());}template <class T>T BehavProfile<T>::DiffRealizProb(const Node *p_node, const Action *p_oppAction) const{ ((BehavProfile<T> *) this)->ComputeSolutionData(); T deriv = (T) 1; bool isPrec = false; const Node *node = p_node; while (node->GetParent()) { Action *prevAction = node->GetAction(); if (prevAction != p_oppAction) { deriv *= GetActionProb(prevAction); } else { isPrec = true; } node = node->GetParent(); } return (isPrec) ? deriv : (T) 0.0;}template <class T>T BehavProfile<T>::DiffNodeValue(const Node *p_node, const EFPlayer *p_player, const Action *p_oppAction) const{ ((BehavProfile<T> *) this)->ComputeSolutionData(); if (m_efg->NumChildren(p_node) > 0) { Infoset *infoset = p_node->GetInfoset(); if (infoset == p_oppAction->BelongsTo()) { // We've encountered the action; since we assume perfect recall, // we won't encounter it again, and the downtree value must // be the same. return m_nodeValues(p_node->GetChild(p_oppAction->GetNumber())->number, p_player->GetNumber()); } else { T deriv = (T) 0; for (int act = 1; act <= infoset->NumActions(); act++) { deriv += (DiffNodeValue(p_node->GetChild(act), p_player, p_oppAction) * ActionProb(infoset->Actions()[act])); } return deriv; } } else { // If we reach a terminal node and haven't encountered p_oppAction, // derivative wrt this path is zero. return (T) 0; }}// // Computation of Cached solution data// template <class T>void BehavProfile<T>::ComputeSolutionDataPass2(const Node *node){ if (node->outcome) { for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) { m_nodeValues(node->number, pl) += Payoff(node->outcome, pl); } } Infoset * iset = node->infoset; if(iset) { if (IsetProb(iset) != IsetProb(iset) * (T) 0) BeliefProb(node) = RealizProb(node) / IsetProb(iset); const gArray<Node *> &children = m_efg->Children(node); // push down payoffs from outcomes attached to non-terminal nodes for (int child = 1; child <= children.Length(); child++) { m_nodeValues.SetRow(children[child]->number, m_nodeValues.Row(node->number)); } for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) { m_nodeValues(node->number, pl) = (T) 0; } for (int child = 1; child <= children.Length(); child++) { ComputeSolutionDataPass2(children[child]); // gVector<T> s = NodeValue(children[child]); const Action * act = children[child]->GetAction(); for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) { m_nodeValues(node->number, pl) += ActionProb(act) * m_nodeValues(children[child]->number, pl); } if (!iset->IsChanceInfoset()) { T &cpay = ActionValue(act); if (IsetProb(iset) != IsetProb(iset) * (T) 0) { cpay += BeliefProb(node) * m_nodeValues(children[child]->number, iset->GetPlayer()->GetNumber()); } else { cpay = (T) 0; } } } }}// compute realization probabilities for nodes and isets. template <class T>void BehavProfile<T>::ComputeSolutionDataPass1(const Node *node){ if (node->GetParent()) { RealizProb(node) = RealizProb(node->GetParent()) * ActionProb(node->GetAction()); } else { RealizProb(node) = (T) 1; } if (node->GetInfoset()) { const gArray<Node *> &children(m_efg->Children(node)); for (int i = 1; i <= children.Length(); i++) { ComputeSolutionDataPass1(children[i]); } }}template <class T>void BehavProfile<T>::ComputeSolutionData(void){ if (!m_cached_data) { m_actionValues = (T) 0; m_nodeValues = (T) 0; m_infosetValues = (T) 0; m_gripe = (T) 0; ComputeSolutionDataPass1(m_efg->RootNode()); ComputeSolutionDataPass2(m_efg->RootNode()); for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) { for (int iset = 1; iset <= m_efg->NumInfosets()[pl]; iset++) { Infoset *infoset = m_efg->Players()[pl]->Infosets()[iset]; IsetValue(infoset) = (T) 0; for (int act = 1; act <= infoset->NumActions(); act++) { Action *action = infoset->Actions()[act]; IsetValue(infoset) += ActionProb(action) * ActionValue(action); } for (int act = 1; act <= infoset->NumActions(); act++) { Action *action = infoset->Actions()[act]; Regret(action) = (ActionValue(action) - IsetValue(infoset)) * IsetProb(infoset); } } } m_cached_data = true; }}template <class T>void BehavProfile<T>::BehaviorStrat(const efgGame &E, int pl, Node *n)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -