📄 acoins.cc

📁 基于学习的深度优先搜索算法
💻 CC
📖 第 1 页 / 共 3 页
字号:
inline graph_t::entry_t*graph_t::add_tip( entry_t *parent, const action_t &a, int obs, const state_t &child ){  std::map<state_t,entry_t*>::iterator ni = nodes_.find( child );  if( ni != nodes_.end() )    {      (*ni).second->add_parent( parent );      return( (*ni).second );    }  else    {      entry_t *e = new entry_t( child );      if( parent ) e->add_parent( parent );      nodes_.insert( std::make_pair( child, e ) );      tips_.insert( e );      return( e );    }}inline boolgraph_t::expand( uchar_t n, entry_t &node, std::multiset<entry_t*,entry_t> &S ){  bool found = false;  for( int m = n>>1; m > 0; --m )    for( int lhs1 = m; lhs1 >= 0; --lhs1 )    for( int lhs2 = m; lhs2 >= 0; --lhs2 )      if( lhs1 + lhs2 <= node.s().lhs() ) {        for( int ls1 = m-lhs1; ls1 >= 0; --ls1 )        for( int ls2 = m-lhs2; ls2 >= 0; --ls2 )          if( ls1 + ls2 <= node.s().ls() ) {            for( int hs1 = m-lhs1-ls1; hs1 >= 0; --hs1 )            for( int hs2 = m-lhs2-ls2; hs2 >= 0; --hs2 )              if( (hs1+hs2 <= node.s().hs()) && (m+m-lhs1-lhs2-ls1-ls2-hs1-hs2 <= node.s().s()) )                {                  assert( m - lhs1 - ls1 - hs1 + m - lhs2 - ls2 - hs2 <= node.s().s() );                  action_t a( lhs1, ls1, hs1, m-lhs1-ls1-hs1, lhs2, ls2, hs2, m-lhs2-ls2-hs2 );                  if( !node.s().useful(a) ) continue;                  for( int obs = -1; obs < 2; ++obs )                    {                      state_t s_obs = node.s().next( a, obs );                      add_tip( &node, a, obs, s_obs );                    }                  found = true;                }          }      }  return( found );}voidgraph_t::expand_and_update( uchar_t n, entry_t &node, hash_t &hash, qvalue_func_t qvalue ){  std::multiset<entry_t*,entry_t> S;  // expand node  assert( node.fringe() && !hash.solved( node.s() ) );  assert( nodes_.find( node.s() ) != nodes_.end() );  node.set_fringe( false );  if( node.s().terminal(n) )    {      hash.update( node.s(), node.s().terminal_value() );      hash.mark( node.s() );    }  else    {      ++expansions;      if( !expand( n, node, S ) )        {          hash.update( node.s(), node.s().terminal_value() );          hash.mark( node.s() );        }    }  S.insert( &node );  node.visit();  // iterate over S: Step 10 from Nilsson's  while( !S.empty() )    {      entry_t *e = *S.begin();      S.erase( e );      e->unvisit();      assert( e->index() == 0 );      // backprop: Step 11 from Nilsson's      bool change = e->revise( n, hash, qvalue );      if( change )        {          // update parents: Step 12 from Nilsson's          for( std::set<entry_t*>::const_iterator pi = e->parents().begin(); pi != e->parents().end(); ++pi )            {              bool found = false;              size_t index = 0;              action_t a = (*pi)->marked();              assert( (*pi)->s().useful( a ) );              for( int obs = -1; obs < 2; ++obs )                {                  state_t s_next = (*pi)->s().next( a, obs );                  if( s_next == e->s() ) found = true;                  std::map<state_t,entry_t*>::const_iterator it = nodes_.find( s_next );                  if( (it != nodes_.end()) && (*it).second->visited() ) ++index;                }              if( found && (*pi)->visited() )                {                  S.erase( *pi );                  (*pi)->dec_index();                  S.insert( *pi );                }              else if( found )                {                  (*pi)->set_index( index );                  S.insert( *pi );                }            }        }    }  assert( S.empty() );  // compute best partial solution graph: Step 4 from Nilsson's  tips_.clear();  if( !hash.solved( root_->s() ) )    {      std::list<entry_t*> stack;      stack.push_front( root_ );      while( !stack.empty() )        {          entry_t *e = stack.front();          stack.pop_front();          e->visit();          assert( !hash.solved( e->s() ) );          if( e->fringe() )            tips_.insert( e );          else if( !(e->marked() == action_t()) )            {              bool solved = true;              action_t a = e->marked();              assert( e->s().useful( a ) );              for( int obs = -1; obs < 2; ++obs )                {                  state_t s_next = e->s().next( a, obs );                  if( !hash.solved( s_next ) )                    {                      solved = false;                      std::map<state_t,entry_t*>::iterator ni = nodes_.find( s_next );                      assert( ni != nodes_.end() );                      if( !(*ni).second->visited() ) stack.push_front( (*ni).second );                    }                }              if( solved ) { e->set_fringe( true ); tips_.insert( e ); }            }          else            {              e->set_fringe( true );              tips_.insert( e );            }          if( tips_.size() > 0 ) break;        }    }  assert( hash.solved( root_->s() ) || (tips_.size() > 0) );  // clean visited flags  for( std::map<state_t,entry_t*>::iterator ni = nodes_.begin(); ni != nodes_.end(); ++ni )    (*ni).second->unvisit();}boolldfsBound( int depth, uchar_t n, hash_t &hash, state_t s, unsigned bound, qvalue_func_t qvalue ){  if( verbose > 0 ) std::cout << s << ", bound=" << bound << std::endl;  if( s.terminal(n) || (hash.upper(s) <= bound) )    {      if( (verbose ) && !hash.solved(s) )        {          indent( std::cout, depth );          std::cout << "terminal=" << s.terminal_value() << std::endl;        }      if( s.terminal(n) ) hash.update( s, s.terminal_value() );      return( true );    }  ++expansions;  bool flag = false;  assert( hash.value( s ) <= bound );  for( int m = n>>1; m > 0; --m )  for( int lhs1 = m; lhs1 >= 0; --lhs1 )  for( int lhs2 = m; lhs2 >= 0; --lhs2 )    if( lhs1 + lhs2 <= s.lhs() ) {      for( int ls1 = m-lhs1; ls1 >= 0; --ls1 )      for( int ls2 = m-lhs2; ls2 >= 0; --ls2 )        if( ls1 + ls2 <= s.ls() ) {          for( int hs1 = m-lhs1-ls1; hs1 >= 0; --hs1 )          for( int hs2 = m-lhs2-ls2; hs2 >= 0; --hs2 )            if( (hs1+hs2 <= s.hs()) && (m+m-lhs1-lhs2-ls1-ls2-hs1-hs2 <= s.s()) )              {                assert( m - lhs1 - ls1 - hs1 + m - lhs2 - ls2 - hs2 <= s.s() );                action_t a( lhs1, ls1, hs1, m-lhs1-ls1-hs1, lhs2, ls2, hs2, m-lhs2-ls2-hs2 );                unsigned qv = (hash.*qvalue)( s, a );                if( !s.useful(a) ) continue;                if( qv > bound ) continue;                flag = true;                for( int obs = -1; obs < 2; ++obs )                  {                     state_t s_obs = s.next( a, obs );                    unsigned nbound = bound - 1;                    if( qvalue == &hash_t::QValueAdd )                      {                        for( int o = -1; o < 2; ++o )                          if( obs != o ) nbound -= hash.value( s.next( a, o ) );                      }                    flag = ldfsBound( 1+depth, n, hash, s_obs, nbound, qvalue );                    if( flag )                      {                        unsigned qv = (hash.*qvalue)( s, a );                        flag = (qv <= bound);                      }                    if( !flag ) break;                  }                if( flag ) goto end_A_loop;              }          }      }  end_A_loop:  if( flag )    {      hash.update_upper( s, bound );      if( verbose )        {          indent( std::cout, depth );          std::cout << "upper=" << bound << std::endl;        }    }  else    {      unsigned bqv = hash.bestQValue( n, s, qvalue );      assert( bqv > hash.value( s ) );      hash.update( s, bqv );      if( verbose > 0 )        {          indent( std::cout, depth );          std::cout << s << ", update=" << bqv << std::endl;        }    }  return( flag );}size_tldfsBoundDriver( uchar_t n, hash_t &hash, state_t s, qvalue_func_t qvalue ){  expansions = 0;  unsigned iterations = 0;  while( hash.value( s ) < hash.upper( s ) )    {      ++iterations;      ldfsBound( 0, n, hash, s, hash.value( s ), qvalue );    }  return( iterations );}boolldfs( size_t depth, uchar_t n, hash_t &hash, const state_t s, qvalue_func_t qvalue ){  if( verbose ) { indent( std::cout, depth ); std::cout << s << std::endl; }  if( s.terminal(n) || hash.solved(s) )    {      if( (verbose ) && !hash.solved(s) )        {          indent( std::cout, depth );          std::cout << "terminal=" << s.terminal_value() << std::endl;        }      if( s.terminal(n) ) hash.update( s, s.terminal_value() );      hash.mark( s );      return( true );    }  ++expansions;  bool flag = false;  unsigned bound = hash.value( s );  for( int m = n>>1; m > 0; --m )  for( int lhs1 = m; lhs1 >= 0; --lhs1 )  for( int lhs2 = m; lhs2 >= 0; --lhs2 )    if( lhs1 + lhs2 <= s.lhs() ) {      for( int ls1 = m-lhs1; ls1 >= 0; --ls1 )      for( int ls2 = m-lhs2; ls2 >= 0; --ls2 )        if( ls1 + ls2 <= s.ls() ) {          for( int hs1 = m-lhs1-ls1; hs1 >= 0; --hs1 )          for( int hs2 = m-lhs2-ls2; hs2 >= 0; --hs2 )            if( (hs1+hs2 <= s.hs()) && (m+m-lhs1-lhs2-ls1-ls2-hs1-hs2 <= s.s()) )              {                assert( m - lhs1 - ls1 - hs1 + m - lhs2 - ls2 - hs2 <= s.s() );                action_t a( lhs1, ls1, hs1, m-lhs1-ls1-hs1, lhs2, ls2, hs2, m-lhs2-ls2-hs2 );                if( !s.useful(a) ) continue;                unsigned qv = (hash.*qvalue)( s, a );                if( qv > bound ) continue;                flag = true;                for( int obs = -1; obs < 2; ++obs )                  {                     state_t s_obs = s.next( a, obs );                    flag = ldfs( 1+depth, n, hash, s_obs, qvalue );                    if( flag )                      {                        unsigned qv = (hash.*qvalue)( s, a );                        flag = (qv <= bound);                      }                    if( !flag ) break;                  }                if( flag ) goto end_A_loop;              }          }      }  end_A_loop:  if( flag )    {      hash.mark( s );      if( verbose )        {          indent( std::cout, depth );          std::cout << "mark " << s << " with " << hash.value(s) << std::endl;        }    }  else    {      unsigned bqv = hash.bestQValue( n, s, qvalue );      assert( (qvalue != &hash_t::QValueAdd) || (bqv > hash.value(s)) );      hash.update( s, bqv );      if( verbose )        {          indent( std::cout, depth );          std::cout << s << ", update=" << bqv << std::endl;        }    }  return( flag );}size_tldfsDriver( uchar_t n, hash_t &hash, const state_t s, qvalue_func_t qvalue ){  expansions = 0;  size_t iterations = 0;  while( !hash.solved( s ) )    {      ++iterations;      ldfs( 0, n, hash, s, qvalue );      if( verbose ) std::cout << "V(s0)=" << hash.value(s) << std::endl;    }  return( iterations );}voidgenerateStateSpace( uchar_t n, const state_t &s0, std::set<state_t> &space ){  std::list<state_t> queue;  queue.push_back( s0 );  space.insert( s0 );  while( !queue.empty() )    {      state_t s = queue.front();      queue.pop_front();      if( s.terminal(n) ) continue;      for( int m = n>>1; m > 0; --m )      for( int lhs1 = m; lhs1 >= 0; --lhs1 )      for( int lhs2 = m; lhs2 >= 0; --lhs2 )        if( lhs1 + lhs2 <= s.lhs() ) {          for( int ls1 = m-lhs1; ls1 >= 0; --ls1 )          for( int ls2 = m-lhs2; ls2 >= 0; --ls2 )            if( ls1 + ls2 <= s.ls() ) {              for( int hs1 = m-lhs1-ls1; hs1 >= 0; --hs1 )              for( int hs2 = m-lhs2-ls2; hs2 >= 0; --hs2 )                if( (hs1+hs2 <= s.hs()) && (m+m-lhs1-lhs2-ls1-ls2-hs1-hs2 <= s.s()) )                  {                    assert( m - lhs1 - ls1 - hs1 + m - lhs2 - ls2 - hs2 <= s.s() );                    action_t a( lhs1, ls1, hs1, m-lhs1-ls1-hs1, lhs2, ls2, hs2, m-lhs2-ls2-hs2 );                    if( !s.useful(a) ) continue;                    for( int obs = -1; obs < 2; ++obs )                      {                        state_t s_obs = s.next( a, obs );                        if( space.find( s_obs ) == space.end() )                          {                            queue.push_back( s_obs );                            space.insert( s_obs );                          }                      }                  }              }          }    }}size_tvalueIteration( uchar_t n, hash_t &hash, const state_t s0, qvalue_func_t qvalue ){  std::set<state_t> space;  generateStateSpace( n, s0, space );  unsigned residual = 1;  size_t iterations = 0;  while( residual > 0 )    {      residual = 0;      for( std::set<state_t>::const_iterator si = space.begin(); si != space.end(); ++si )        if( !(*si).terminal(n) )          {            unsigned value = hash.value( *si );            unsigned bqv = hash.bestQValue( n, *si, qvalue );            residual = MAX(residual,bqv-value);            hash.update( *si, bqv );          }        else          hash.update( *si, (*si).terminal_value() );      ++iterations;    }  return( iterations );}size_taostar( uchar_t n, hash_t &hash, const state_t s, qvalue_func_t qvalue ){  graph_t graph;  expansions = 0;  if( s.terminal(n) )    {      hash.update( s, s.terminal_value() );      hash.mark( s );    }  graph_t::entry_t *root = graph.add_tip( 0, action_t(), 0, s );  graph.set_root( root );  size_t iterations = 0;  while( !hash.solved( s ) )    {      assert( graph.tips().size() != 0 );      graph_t::entry_t &node = graph.choose_tip();      graph.expand_and_update( n, node, hash, qvalue );      ++iterations;    }  return( iterations );}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -