📄 neuro_intercept_bms.c

📁 Brainstormers(头脑风暴)队是05年robocup冠军,这是05年Brainstormers公布的源代码,Brainstormers是robocup老牌的德国强队
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      state_memory[state_memory_ctr].state.ball_vel = ball_vel;      if(prob_easy >=drand48()){	//TRAIN_PROT("generated easy state: "<<state_memory[state_memory_ctr].state<<endl);	state_memory_ctr ++;      }            state_memory[state_memory_ctr].state.my_pos = Vector(x,y);      state_memory[state_memory_ctr].state.my_vel = my_vel;      state_memory[state_memory_ctr].state.my_angle = ANGLE(a);      state_memory[state_memory_ctr].state.ball_pos = Vector (0,0);      state_memory[state_memory_ctr].state.ball_vel = ball_vel;      //TRAIN_PROT("generated state: "<<state_memory[state_memory_ctr].state<<endl);      state_memory_ctr ++; // else: do not consider !    } // while statememorycounter < 10000#if 0    state_memory_ctr= 0;  // simply overwrite memory    for(Value x = -regular::x_range; x<= regular::x_range; x += regular::dx){    for(Value y = -regular::x_range; y<= regular::x_range; y += regular::dx){      for(Value vx = -regular::v_range; vx<= regular::v_range; vx += regular::dv){	for(Value vy = -regular::v_range; vy<= regular::v_range; vy += regular::dv){#if 0	  for(Value vx_ball = -regular::vball_range; vx_ball<= regular::vball_range; vx_ball += regular::dvball){	    for(Value vy_ball = -regular::vball_range; vy_ball<= regular::vball_range; vy_ball += regular::dvball){#endif	  for(Value vx_ball = 0; vx_ball<= regular::vball_range; vx_ball += regular::dvball){	    //	    for(Value vy_ball = 0; vy_ball<= regular::vball_range; vy_ball += regular::dvball){	    for(Value vy_ball = 0; vy_ball<= 0; vy_ball += regular::dvball){	      for(int asteps = 0; asteps<= regular::angle_steps; asteps ++ ){		if(regular::prob < drand48())		  continue;		a = asteps * PI/(float)(regular::angle_steps);		if(asteps ==regular::angle_steps)		  a = PI;		ball_vel = Vector(vx_ball,vy_ball);		if(SQUARE(ball_vel.x) + SQUARE(ball_vel.y) >= SQUARE(ServerOptions::ball_speed_max)){		  ball_vel.normalize(ServerOptions::ball_speed_max);		}		my_vel = Vector(vx,vy);		if(SQUARE(my_vel.x)+SQUARE(my_vel.y)>=SQUARE(MY_MAX_SPEED *ServerOptions::player_decay)){		  my_vel.normalize(MY_MAX_SPEED *ServerOptions::player_decay);		}		     		if((my_vel.norm() > 0.16) && // 1.0 * 0.4 * 0.4		   (Tools::get_abs_angle(my_vel.arg() - a) > 5/180. *PI)){#if 0		  TRAIN_PROT("max speed "<<my_vel.norm()<<" and vel angle "			     <<RAD2DEG(my_vel.arg())<<" differs from my angle "			     <<RAD2DEG(a)<<endl);#endif		  continue;		}#if 0		if(Vector(x,y).norm()> train_loops_ctr)		  continue;#endif#if 0		if(x==0){		  state_memory[state_memory_ctr].state.my_pos = Vector(1.2,0);		  state_memory[state_memory_ctr].state.my_vel = my_vel;		  state_memory[state_memory_ctr].state.my_angle = a;		  state_memory[state_memory_ctr].state.ball_pos = Vector (0,0);		  state_memory[state_memory_ctr].state.ball_vel = ball_vel;		  state_memory_ctr ++; // else: do not consider !		}		if(y==0){		  state_memory[state_memory_ctr].state.my_pos = Vector(0,1.2);		  state_memory[state_memory_ctr].state.my_vel = my_vel;		  state_memory[state_memory_ctr].state.my_angle = a;		  state_memory[state_memory_ctr].state.ball_pos = Vector (0,0);		  state_memory[state_memory_ctr].state.ball_vel = ball_vel;		  state_memory_ctr ++; // else: do not consider !		}#endif		Vector mypos = Vector(x,y);		mypos.normalize(1.2);		state_memory[state_memory_ctr].state.my_pos = mypos;		state_memory[state_memory_ctr].state.my_vel = my_vel;		state_memory[state_memory_ctr].state.my_angle = a;		state_memory[state_memory_ctr].state.ball_pos = Vector (0,0);		state_memory[state_memory_ctr].state.ball_vel = ball_vel;		if(0.1 >=drand48())		  state_memory_ctr ++;		state_memory[state_memory_ctr].state.my_pos = Vector(x,y);		state_memory[state_memory_ctr].state.my_vel = my_vel;		state_memory[state_memory_ctr].state.my_angle = a;		state_memory[state_memory_ctr].state.ball_pos = Vector (0,0);		state_memory[state_memory_ctr].state.ball_vel = ball_vel;		state_memory_ctr ++; // else: do not consider !	      }	    }	  }	}      }          }      }#endif  } // if regular states  for(int i=0;i<state_memory_ctr; i++){    if(is_success(state_memory[i].state)){      target_val = costs_success;    }    else{      MyState successor_state;      target_val = costs_per_action + 	get_value_of_best_successor(state_memory[i].state,successor_state);#if 0 // Residual Gradient      get_features(successor_state, training_set.input[training_set.ctr]);      float tmp_target = evaluate(state_memory[i].state) - costs_per_action;;      training_set.target[training_set.ctr] = tmp_target;      training_set.ctr ++;#endif    }    get_features(state_memory[i].state, training_set.input[training_set.ctr]);    if((adjust_targets == true) && (target_val > (train_loops_ctr+1) * costs_per_action))      target_val = (train_loops_ctr+1) * costs_per_action;    if(target_val > 0.9)      target_val = 0.9;          training_set.target[training_set.ctr] = target_val;    //    if(target_val >= .99* costs_per_action)    training_set.ctr ++;  }  TRAIN_PROT("Generated n training pats: "<<training_set.ctr	     <<" No. of regular states "<<state_memory_ctr<<endl);}#if 0void NeuroIntercept::Tools::get_successor_state(MyState const &state, Cmd_Main const &cmd, MyState &next_state) {  Angle na;  Angle a= state.my_angle.get_value_0_p2PI();   Tools::model_cmd_main( state.my_pos, state.my_vel, a,state.ball_pos,state.ball_vel, 			 cmd ,next_state.my_pos,			 next_state.my_vel,na,next_state.ball_pos, next_state.ball_vel );  next_state.my_angle= na;  if((next_state.my_vel.norm()/ServerOptions::player_decay) > 1.08 * MY_MAX_SPEED){    LOG_ERR(0,<<"Max speed too high "<<next_state.my_vel.norm()/ServerOptions::player_decay);    LOG_ERR(0,<<"Position change: "<<(next_state.my_pos - state.my_pos).norm());  }}#endifValue NeuroIntercept::get_value_of_best_successor(MyState const &state, 						  MyState &successor_state) {  MyState next_state;  Value best_val;  bool best_val_ok= false;  if(learn12step == 1){    Cmd dummy;    int steps;    if(onetwostep_intercept->get_cmd(dummy,state,steps)){ // I can do it in 1 or two steps!      //TRAIN_PROT("Get Value: I can do it in "<<steps<<" from here "<<state<<endl);      return(costs_success + costs_per_action * (steps-1));    }  }  if(do_pi){ // policy iteration: just take neuro intercept    Cmd cmd;    intercept->get_cmd(cmd,state.my_pos,state.my_vel,		       state.my_angle,state.ball_pos,state.ball_vel);    Tools::get_successor_state(state,cmd.cmd_main ,next_state);    successor_state = next_state;    return (evaluate(next_state));  }  itr_actions.reset();  Value val;  while ( Cmd_Main const* action = itr_actions.next() ) {    Tools::get_successor_state(state,*action ,next_state);    val= evaluate( next_state );    if ( !best_val_ok || val < best_val ) {      best_val= val;      successor_state = next_state;      best_val_ok= true;    }  }  if ( best_val_ok )     return best_val;  LOG_ERR(0,<<"NeuroIntercept: Error: did not find best successor state");  LOG_MOV(0,<<"NeuroIntercept: Error: did not find best successor state");  return -1; // error}void NeuroIntercept::train_nn(){  float error,tss;   float pattern_stress;  int step1_ctr = 0;  int step2_ctr = 0;  if(init_mode == 0){    net->init_weights(0,init_param);    net->set_update_f(1,learn_params); // 1 is Rprop  }  else if(init_mode == 1){    net->init_weights(2,init_param);  }  else if(init_mode == 2){    //    net->init_weights(2,init_param);    net->init_weights(2,init_param);    learn_params[0]= learn_param;    net->set_update_f(1,learn_params); // 1 is Rprop  }  net->save_net("init.net");  for(int n=0;n<num_epochs; n++){    tss = 0.0;    step1_ctr = 0;    step2_ctr = 0;    for(int p=0;p<training_set.ctr;p++){      net->forward_pass(training_set.input[p],net->out_vec);      pattern_stress = 1.0;      for(int i=0;i<net->topo_data.out_count;i++){	if(training_set.target[p] == costs_success){#if 0	  TRAIN_PROT("Special pattern: "<<p<<" feature 0: "		     <<training_set.input[p][0]<<" target "		     <<training_set.target[p]<<endl);#endif	}	if(fabs(training_set.target[p]-costs_per_action)<0.0001){#if 0	  TRAIN_PROT("Special pattern: "<<p<<" feature 0: "		     <<training_set.input[p][0]<<" target "		     <<training_set.target[p]<<endl);#endif	  step1_ctr ++;	  pattern_stress = stress;	}	if(fabs(training_set.target[p]-2*costs_per_action)<0.0001){#if 0	  TRAIN_PROT("Special pattern: "<<p<<" feature 0: "		     <<training_set.input[p][0]<<" target "		     <<training_set.target[p]<<endl);#endif	  step2_ctr ++;	  pattern_stress = stress;	}#if 0	if(n==num_epochs-1){	  TRAIN_PROT("pattern: "<<p<<" "		     <<training_set.input[p][0]<<" "		     <<training_set.input[p][1]<<" "		     <<training_set.input[p][2]<<" "		     <<training_set.input[p][3]<<" "		     <<training_set.input[p][4]<<" "		     <<training_set.input[p][5]<<" "		     <<" output "<<net->out_vec[0]		     <<" target "<<training_set.target[p]<<endl);	}#endif	error = net->out_vec[i] = net->out_vec[i] - training_set.target[p]; 	net->out_vec[i] *= pattern_stress;	/* out_vec := dE/do = (o-t) */	tss += error * error;      }       net->backward_pass(net->out_vec,net->in_vec);    }    //   TRAIN_PROT(endl);    net->update_weights();  /* learn by epoch */    LOG_MOV(0,<<"NeuroIntercept: epoch "<<n<<" TSS: "<<tss);    LOG_ERR(0,<<"NeuroIntercept: epoch "<<n<<" TSS: "<<tss<<" per pattern "<<tss/float(training_set.ctr));    if(n==0 || (n%20) == 0 || n==num_epochs -1)      TRAIN_PROT("Train: Epoch "<<n<<" TSS: "<<tss<<" per pattern "<<tss/float(training_set.ctr)<<endl);  }  TRAIN_PROT("Counted "<<step1_ctr<<" 1 step patterns and "<<step2_ctr<<" two step pats"<<endl);}void NeuroIntercept::do_sequence(const MyState & initial_state, Value *result, const int N){  MyState cur_state, next_state;  Cmd cmd;  int n;#define SEQ_VERBOSE 0  int imax;  if(do_reference)    imax = 2;  else    imax = 1;  for(int i=0;i<imax;i++){    result[i] = 0;    for(int k=0;k<num_testrepeats;k++){      n=0;      cur_state = initial_state;      while(n<N &&(cur_state.my_pos.sqr_distance(cur_state.ball_pos)		   >SQUARE(ServerOptions::kickable_area))){	cmd.cmd_main.unset_lock();	if(i==0)	  neuro_decide(cur_state, cmd);	else{	  if(cur_state.my_pos.distance(cur_state.ball_pos) > test::x_range){ // stop sequence	    //TRAIN_PROT("Do Sequence: ball too far: "<<cur_state<<endl);	    result[i] = N;	    return;	  }	  intercept->get_cmd(cmd,cur_state.my_pos,cur_state.my_vel,			     cur_state.my_angle,cur_state.ball_pos,cur_state.ball_vel);	}		Tools::get_successor_state(cur_state, cmd.cmd_main, next_state, do_stochastic);#if SEQ_VERBOSE	TRAIN_PROT("t:"<<n<<" controller "<<i<<" state(t) "<<cur_state<<" cmd: "<<cmd.cmd_main<<endl);	//<<" state(t+1) "<<next_state<<endl);#endif	cur_state = next_state;	n++;      }#if SEQ_VERBOSE      TRAIN_PROT(endl);#endif      result[i] += n;    } // for k    result[i] /= (float)num_testrepeats;  }}void NeuroIntercept::do_test(){  Value average[2];  int won[2];  TRAIN_PROT("\nDo Test:"<<endl);#if 0  for(int i=0;i<test_memory_ctr; i++){    Value J = evaluate( test_memory[i].state );    TRAIN_PROT("Eval: "<<test_memory[i].state.my_pos<<test_memory[i].state.my_vel	       <<test_memory[i].state.ball_pos	       <<test_memory[i].state.ball_vel<<RAD2DEG(test_memory[i].state.my_angle.get_value())	       <<" J: "<<J<<" estimated cycles2go "<<J/costs_per_action<<endl);  }#endif  for(int i=0;i<test_memory_ctr; i++){    //TRAIN_PROT("do sequence "<<i<<" "<<test_memory[i].state<<endl);    if(test_memory_result[i][1] < max_sequence_len)      do_sequence(test_memory[i].state, test_memory_result[i], max_sequence_len);    else       //      TRAIN_PROT("Test Seq "<<i<<" not valid, sungs intercept needs max. "<<endl);      ;  }  do_reference = false; // do it only once  average[0]=   average[1]=0.0;  won[0]= 0;  won[1]=0;  int i;  for(i=0;i<test_memory_ctr; i++){    if(test_memory_result[i][1] == max_sequence_len)      continue;    //TRAIN_PROT("i: "<<i<<endl);    average[0] += test_memory_result[i][0];    average[1] += test_memory_result[i][1];    Value J = evaluate( test_memory[i].state );    if(test_memory_result[i][0]<test_memory_result[i][1]){      won[0] ++;      //cout<<"\nWon 0 incremented: "<<won[0]<<endl;      TRAIN_PROT("+ Sequence "<<i<<" "<<test_memory[i].state		 <<" J: "<<J<<" ("<<(J/costs_per_action)<<") "		 <<" neuro: " <<test_memory_result[i][0]		 <<" classic Sung "<<test_memory_result[i][1]<<endl);    }    else if(test_memory_result[i][0]>test_memory_result[i][1]){      won[1] ++;      TRAIN_PROT("- Sequence "<<i<<" "<<test_memory[i].state		 <<" J: "<<J<<" ("<<(J/costs_per_action)<<") "		 <<" neuro: " <<test_memory_result[i][0]		 <<" classic Sung "<<test_memory_result[i][1]<<endl);    }    else{      TRAIN_PROT("  Sequence "<<i<<" "<<test_memory[i].state		 <<" J: "<<J<<" ("<<(J/costs_per_action)<<") "		 <<" neuro: " <<test_memory_result[i][0]		 <<" classic Sung "<<test_memory_result[i][1]<<endl);    }  }  TRAIN_PROT(endl);  if(average[0]==average[1]){    TRAIN_PROT("JUHUUU, Neuro zieht gleich"<<endl);    statistics::total_draws ++;  }  else if(average[0]<average[1]){    TRAIN_PROT("JUHUU, SUPRRRR, Neuro gewinnt!"<<endl);    statistics::total_wins ++;  }  TRAIN_PROT("Average: neuro: "<<average[0]<<" won "<<won[0]	     <<" classic Sung "<<average[1]<<" won "<<won[1]	     <<" total wins "<<statistics::total_wins	     <<" total draws "<<statistics::total_draws	     <<" total sum "<<statistics::total_draws + statistics::total_wins	     <<endl);  TRAIN_RESULT(train_loops_ctr<<" "<<average[0]<<" "<<won[0]	     <<" "<<average[1]<<" "<<won[1]	     <<" "<<statistics::total_wins	     <<" "<<statistics::total_draws	     <<" "<<statistics::total_draws + statistics::total_wins	     <<endl);  resultfile.flush();}void NeuroIntercept::generate_test_state(const Vector mypos, const Vector myvel,const Vector ballpos,const Vector ballvel,const Angle myangle ){  if(test_memory_ctr >= TEST_MEMORY_SIZE_ICPT){    LOG_MOV(0,<<"NeuroIntercept: Warning Test memory full");    return;  }  test_memory[test_memory_ctr].state.my_pos = mypos;  test_memory[test_memory_ctr].state.my_vel = myvel;  test_memory[test_memory_ctr].state.ball_pos = ballpos;  test_memory[test_memory_ctr].state.ball_vel = ballvel;  test_memory[test_memory_ctr].state.my_angle = ANGLE(myangle);  test_memory_ctr ++;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -