📄 behav.imp

📁 Gambit 是一个游戏库理论软件
💻 IMP
📖 第 1 页 / 共 3 页
字号:
  }  else {    return (*this)(action->BelongsTo()->GetPlayer()->GetNumber(),		   action->BelongsTo()->GetNumber(),		   m_support.Find(action));  }}template <class T>const T &BehavProfile<T>::RealizProb(const Node *node) const{  return m_realizProbs[node->number];}template <class T>T &BehavProfile<T>::RealizProb(const Node *node){  return m_realizProbs[node->number];}template <class T>const T &BehavProfile<T>::BeliefProb(const Node *node) const{  return m_beliefs[node->number];}template <class T>T &BehavProfile<T>::BeliefProb(const Node *node){  return m_beliefs[node->number];}template <class T>T BehavProfile<T>::IsetProb(const Infoset *iset) const{  T prob = (T) 0;  for (int i = 1; i <= iset->NumMembers(); i++) {    prob += RealizProb(iset->GetMember(i));  }  return prob;}template <class T>const T &BehavProfile<T>::IsetValue(const Infoset *iset) const{  return m_infosetValues(iset->GetPlayer()->GetNumber(), iset->GetNumber());}template <class T>T &BehavProfile<T>::IsetValue(const Infoset *iset){  return m_infosetValues(iset->GetPlayer()->GetNumber(), iset->GetNumber());}template <class T> const T &BehavProfile<T>::Regret(const Action * act) const{  return m_gripe(act->BelongsTo()->GetPlayer()->GetNumber(),		 act->BelongsTo()->GetNumber(), act->number);}	template <class T> T &BehavProfile<T>::Regret(const Action * act){  return m_gripe(act->BelongsTo()->GetPlayer()->GetNumber(),		 act->BelongsTo()->GetNumber(), act->number);}//-------------------------------------------------------------------------//   BehavProfile<T>: General data access -- public functions//-------------------------------------------------------------------------template <class T>const T &BehavProfile<T>::GetRealizProb(const Node *node){   ComputeSolutionData();  return RealizProb(node);}template <class T>const T &BehavProfile<T>::GetBeliefProb(const Node *node){   ComputeSolutionData();  return BeliefProb(node);}template <class T>gVector<T> BehavProfile<T>::GetNodeValue(const Node *node){   ComputeSolutionData();  return NodeValues(node);}template <class T>T BehavProfile<T>::GetIsetProb(const Infoset *iset){   ComputeSolutionData();  return IsetProb(iset);}template <class T>const T &BehavProfile<T>::GetIsetValue(const Infoset *iset){   ComputeSolutionData();  return IsetValue(iset);}template <class T>T BehavProfile<T>::GetActionProb(const Action *act) const{   return ActionProb((Action *)act);}template <class T>const T &BehavProfile<T>::GetActionValue(const Action *act) const{   // Hack to cast away const; ComputeSolutionData() should be  // const, but some stuff needs fixed before that can happen  ((BehavProfile *) this)->ComputeSolutionData();  return ActionValue(act);}template <class T>const T &BehavProfile<T>::GetRegret(const Action * act){   ComputeSolutionData();  return Regret(act);}//-------------------------------------------------------------------------//   BehavProfile<T>: Computation of interesting quantities//-------------------------------------------------------------------------//--------------// Payoff//--------------template <class T>void BehavProfile<T>::Payoff(Node *node, T prob, int player, T &value) const{  Infoset * iset = node->infoset;  if (node->outcome) {    value += prob * Payoff(node->outcome, player);  }  if (node->children.Length())  {    const gArray<Action *> &acts = m_support.Actions(iset);    for (int act = 1; act <= acts.Length(); act++)       Payoff(node->GetChild(acts[act]), prob * ActionProb(acts[act]), player, value);  }}template <class T> T BehavProfile<T>::Payoff(int player) const{  T value = (T) 0;  Payoff(m_efg->RootNode(), (T) 1, player, value);  return value;}//---------------// Derivatives//---------------//// The following routines compute the derivatives of quantities as// the probability of the action 'p_oppAction' is changed.// See Turocy (2001), "Computing the Quantal Response Equilibrium// Correspondence" for details.// These assume that the profile is interior (totally mixed),// and that the game is of perfect recall//template <class T>T BehavProfile<T>::DiffActionValue(const Action *p_action,				   const Action *p_oppAction) const{  ((BehavProfile<T> *) this)->ComputeSolutionData();  T deriv = (T) 0;  Infoset *infoset = p_action->BelongsTo();  EFPlayer *player = p_action->BelongsTo()->GetPlayer();  for (int i = 1; i <= infoset->NumMembers(); i++) {    Node *member = infoset->GetMember(i);    //    gout << member->number << ' ' << player->GetNumber() << ' ' << p_action->BelongsTo()->GetNumber() << ' '  << p_action->number << ' ' << p_oppAction->BelongsTo()->GetPlayer()->GetNumber() << ' ' << p_oppAction->BelongsTo()->GetNumber() << ' ' << p_oppAction->number << ' ';    deriv += DiffRealizProb(member, p_oppAction) *      (NodeValue(member->GetChild(p_action->number), player->GetNumber()) -       ActionValue(p_action));    //    gout << DiffRealizProb(member, p_oppAction) << ' ';    deriv += RealizProb(member) *      DiffNodeValue(member->GetChild(p_action->number), player, p_oppAction);    //    gout << DiffNodeValue(member->GetChild(p_action->number), player, p_oppAction);  }  //  gout << ' ' << (deriv / IsetProb(infoset)) << '\n';  return deriv / IsetProb(p_action->BelongsTo());}template <class T>T BehavProfile<T>::DiffRealizProb(const Node *p_node,				  const Action *p_oppAction) const{  ((BehavProfile<T> *) this)->ComputeSolutionData();  T deriv = (T) 1;  bool isPrec = false;  const Node *node = p_node;  while (node->GetParent()) {    Action *prevAction = node->GetAction();    if (prevAction != p_oppAction) {      deriv *= GetActionProb(prevAction);    }    else {      isPrec = true;    }    node = node->GetParent();  }   return (isPrec) ? deriv : (T) 0.0;}template <class T>T BehavProfile<T>::DiffNodeValue(const Node *p_node, const EFPlayer *p_player,				 const Action *p_oppAction) const{  ((BehavProfile<T> *) this)->ComputeSolutionData();  if (m_efg->NumChildren(p_node) > 0) {    Infoset *infoset = p_node->GetInfoset();    if (infoset == p_oppAction->BelongsTo()) {      // We've encountered the action; since we assume perfect recall,      // we won't encounter it again, and the downtree value must      // be the same.      return m_nodeValues(p_node->GetChild(p_oppAction->GetNumber())->number,			  p_player->GetNumber());    }    else {      T deriv = (T) 0;      for (int act = 1; act <= infoset->NumActions(); act++) {	deriv += (DiffNodeValue(p_node->GetChild(act), p_player, p_oppAction) *		  ActionProb(infoset->Actions()[act]));      }      return deriv;    }  }  else {    // If we reach a terminal node and haven't encountered p_oppAction,    // derivative wrt this path is zero.    return (T) 0;  }}// // Computation of Cached solution data// template <class T>void BehavProfile<T>::ComputeSolutionDataPass2(const Node *node){  if (node->outcome) {    for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) {       m_nodeValues(node->number, pl) += Payoff(node->outcome, pl);    }  }  Infoset * iset = node->infoset;  if(iset) {    if (IsetProb(iset) != IsetProb(iset) * (T) 0)      BeliefProb(node) = RealizProb(node) / IsetProb(iset);        const gArray<Node *> &children = m_efg->Children(node); 	    // push down payoffs from outcomes attached to non-terminal nodes     for (int child = 1; child <= children.Length(); child++) {       m_nodeValues.SetRow(children[child]->number, 			  m_nodeValues.Row(node->number));    }        for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) {      m_nodeValues(node->number, pl) = (T) 0;    }    for (int child = 1; child <= children.Length(); child++)  {      ComputeSolutionDataPass2(children[child]);      //      gVector<T> s = NodeValue(children[child]);      const Action * act = children[child]->GetAction();      for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) {	m_nodeValues(node->number, pl) +=	  ActionProb(act) * m_nodeValues(children[child]->number, pl);      }      if (!iset->IsChanceInfoset()) {	T &cpay = ActionValue(act);	if (IsetProb(iset) != IsetProb(iset) * (T) 0) {	  cpay += BeliefProb(node) * m_nodeValues(children[child]->number, iset->GetPlayer()->GetNumber());	}	else {	  cpay = (T) 0;	}      }    }  }}// compute realization probabilities for nodes and isets.  template <class T>void BehavProfile<T>::ComputeSolutionDataPass1(const Node *node){  if (node->GetParent()) {    RealizProb(node) = RealizProb(node->GetParent()) * ActionProb(node->GetAction());  }  else {    RealizProb(node) = (T) 1;  }    if (node->GetInfoset()) {    const gArray<Node *> &children(m_efg->Children(node));    for (int i = 1; i <= children.Length(); i++) {      ComputeSolutionDataPass1(children[i]);    }  }}template <class T>void BehavProfile<T>::ComputeSolutionData(void){  if (!m_cached_data) {    m_actionValues = (T) 0;    m_nodeValues = (T) 0;    m_infosetValues = (T) 0;    m_gripe = (T) 0;    ComputeSolutionDataPass1(m_efg->RootNode());    ComputeSolutionDataPass2(m_efg->RootNode());    for (int pl = 1; pl <= m_efg->NumPlayers(); pl++) {      for (int iset = 1; iset <= m_efg->NumInfosets()[pl]; iset++) {	Infoset *infoset = m_efg->Players()[pl]->Infosets()[iset];	IsetValue(infoset) = (T) 0;	for (int act = 1; act <= infoset->NumActions(); act++) {	  Action *action = infoset->Actions()[act];	  IsetValue(infoset) += ActionProb(action) * ActionValue(action);	}	for (int act = 1; act <= infoset->NumActions(); act++) {	  Action *action = infoset->Actions()[act];	  Regret(action) = (ActionValue(action) - IsetValue(infoset)) * IsetProb(infoset);	}      }    }    m_cached_data = true;  }}template <class T>void BehavProfile<T>::BehaviorStrat(const efgGame &E, int pl,				    Node *n)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -