📄 neuro_intercept_bms.c
字号:
MyState state; get_cur_state( state ); if(op_mode == 3){ Intercept2 inter; int time; // inter.intercept( WSinfo::ball->pos, WSinfo::ball->vel, WSinfo::me, time, cmd.cmd_main); inter.intercept( state.ball_pos, state.ball_vel, WSinfo::me, time, cmd.cmd_main); LOG_MOV(0,<<"NeuroIntercept: Using ANALYTICAL intercept. estimated steps 2 go "<<time); return true; } int num_cycles; Cmd intercept_cmd; intercept->get_cmd(intercept_cmd,state.my_pos,state.my_vel, state.my_angle,state.ball_pos,state.ball_vel,num_cycles); //intercept->get_cmd(intercept_cmd, num_cycles); // determine the number of cycles2go if((op_mode == 2) || (num_cycles > neuro_maxcycles)){// Sungs intercept int steps; if(onetwostep_intercept->get_cmd(cmd,state,steps)){ // check onetwostep first //TRAIN_PROT("NIcpt: onetwostep takes "<<steps<<" steps"<<endl); LOG_MOV(0,<<"NeuroIntercept: Onetwostep takes control"); return true; } LOG_MOV(0,<<"NeuroIntercept: op mode 2 or estimated cycles "<<num_cycles<<" >neuro_maxcycles: " <<neuro_maxcycles<<" : Calling Sung's intercept"); cmd.cmd_main.clone(intercept_cmd.cmd_main); /* intercept->get_cmd(cmd,state.my_pos,state.my_vel, state.my_angle,state.ball_pos,state.ball_vel); */ return true; } if(op_mode == 1){ // store and learn if(use_regular_states){ learn(cmd); } else{ // use store states mechanism if(num_stored <= store_per_cycle){ store_state(); num_stored ++; } if(num_stored == store_per_cycle){ learn(cmd); num_stored = 0; } } // use store states } // op_mode == learn if(WSinfo::is_ball_kickable()){ LOG_MOV(0,<<"NeuroIntercept: ball reached"); LOG_ERR(0,<<"Yeah I reached the ball!!"); cmd.cmd_main.set_turn(0); // Ok, I'm fine, do nothing return false; } neuro_decide(state, cmd); if(op_mode == 1){ check_cmd(cmd); } LOG_MOV(0,<<"NeuroIntercept: Selected CMD: "<<cmd.cmd_main); return true;}void NeuroIntercept::set_virtual_state(Vector const ballpos, Vector const ballvel){ virtual_state.ballpos = ballpos; virtual_state.ballvel = ballvel; virtual_state.my_pos.x=60.0; virtual_state.valid_at = WSinfo::ws->time;}void NeuroIntercept::set_virtual_state(Vector const mypos, Vector const myvel, ANGLE myang, Vector const ballpos, Vector const ballvel){ virtual_state.my_pos=mypos; virtual_state.my_vel=myvel; virtual_state.my_angle=myang; virtual_state.ballpos = ballpos; virtual_state.ballvel = ballvel; virtual_state.valid_at = WSinfo::ws->time;}void NeuroIntercept::get_cur_state( MyState & state) { if(virtual_state.valid_at == WSinfo::ws->time){ // currently valid state.ball_pos = virtual_state.ballpos; state.ball_vel = virtual_state.ballvel; if(virtual_state.my_pos.x!=60.0){ state.my_pos=virtual_state.my_pos; state.my_vel=virtual_state.my_vel; state.my_angle=virtual_state.my_angle;} else{ state.my_pos= WSinfo::me->pos; state.my_vel= WSinfo::me->vel; state.my_angle= WSinfo::me->ang; } LOG_MOV(0,<<"Neuro Intercept: Virtual ballpos is set: pos "<<state.ball_pos <<" vel "<<state.ball_vel); } else{ // take actual values state.my_pos= WSinfo::me->pos; state.my_vel= WSinfo::me->vel; state.my_angle= WSinfo::me->ang; state.ball_pos = WSinfo::ball->pos; state.ball_vel = WSinfo::ball->vel; }}void NeuroIntercept::get_features(MyState const& state, float * net_in) { /** features of neuro intercept ball: (0) distance between player and ball (1) angle between player`s view direction and ball (-PI ~ PI) (2) player velocity x-direction (rotated) (3) player velocity y-direction (rotated) (4) ball velocity x-direction (rotated) (5) ball velocity y-direction (rotated) */ Vector ball_vel = state.ball_vel; Vector my_pos = state.my_pos - state.ball_pos; Vector my_vel = state.my_vel; Value my_angle = state.my_angle.get_value(); float angle = my_pos.arg(); /* rotate whole system (pos.y should be zero*/ my_pos.rotate(2*PI - angle); my_vel.rotate(2*PI - angle); ball_vel.rotate(2*PI - angle); my_angle = Tools::get_angle_between_mPI_pPI((my_angle - angle)-PI); if(my_angle<0) { my_vel.y = -my_vel.y; ball_vel.y = -ball_vel.y; } #if 1 net_in[0] = my_pos.norm(); //distance to target net_in[1] = fabs(my_angle); //abs. relative view angle to target net_in[2] = my_vel.x; //velocity in x direction net_in[3] = my_vel.y; net_in[4] = 1.0*ball_vel.x; net_in[5] = 1.0*ball_vel.y;#endif}bool NeuroIntercept::check_onestep(Cmd & cmd, const MyState &state){#define onestep_safetymargin .1 Vector my_pos = state.my_pos; Vector my_vel = state.my_vel; Vector ball_pos = state.ball_pos; Vector ball_vel = state.ball_vel; ANGLE my_ang = state.my_angle; int bestdash = 200; Value balldist,closest = 200; int dash; Vector dummy1,dummy2; Vector my_new_pos; Vector ball_new_pos; ANGLE my_new_angle; Cmd_Main command; for(int dashabs=0;dashabs<=100;dashabs+=50){ for(int sign=1;sign>=-1;sign-=2){ dash=sign*dashabs; command.unset_lock(); command.unset_cmd(); command.set_dash(dash); Tools::model_cmd_main(my_pos,my_vel,my_ang, ball_pos,ball_vel, command, my_new_pos, dummy1, my_new_angle, ball_new_pos, dummy2); balldist = (my_new_pos - ball_new_pos).norm(); if(balldist <= ServerOptions::kickable_area - onestep_safetymargin ){ if((closest>0.7) && (balldist<closest)){ bestdash=dash; closest=balldist; } } } } if(bestdash < 200){ if(bestdash==0){ // Hey, I can get the Ball without dashing, so let's turn toward opponent goal Angle p_moment = ((Tools::opponent_goalpos()-my_pos).ARG() - my_ang).get_value_mPI_pPI(); p_moment=p_moment*(1.0+(ServerOptions::inertia_moment*(my_vel.norm()))); if (p_moment > 3.14) p_moment = 3.14; if (p_moment < -3.14) p_moment = -3.14; p_moment = Tools::get_angle_between_null_2PI(p_moment); cmd.cmd_main.set_turn(p_moment); return true; } cmd.cmd_main.set_dash(bestdash); return true; } return false;}bool NeuroIntercept::neuro_decide(const MyState &state, Cmd & cmd) { //cmd.cmd_main.set_dash(20); return true; MyState next_state; Cmd_Main best_action; Value best_val; bool best_val_ok= false; /* if(check_onestep(cmd, state)) return true; */ int steps; if(onetwostep_intercept->get_cmd(cmd,state,steps)){ //TRAIN_PROT("NIcpt: onetwostep takes "<<steps<<" steps"<<endl); LOG_MOV(0,<<"NeuroIntercept: Onetwostep takes control"); return true; } //if(op_mode == 1){#if 1 Value jnn = evaluate(state); jnn -= costs_success; LOG_MOV(0,"NeuroIntercept: Estimated cycles2intercept: "<<(jnn/ costs_per_action) <<" Jnn: "<<jnn <<" dist2ball "<<state.my_pos.distance(state.ball_pos));#endif //} itr_actions.reset(); while ( Cmd_Main const* action = itr_actions.next() ) { Tools::get_successor_state(state,*action,next_state); Value val= evaluate( next_state ); //LOG_MOV(1,"NeuroIntercept: Test Action; "<<*action<<" J:: "<<val); //TRAIN_PROT("Select action "<<info<<" successor "<<next_state<<" J: "<<val<<endl); if ( !best_val_ok || val < best_val ) { best_val= val; best_action= *action; best_val_ok= true; } } // TRAIN_PROT(endl); if ( best_val_ok ) return cmd.cmd_main.clone( best_action ); return false;}bool NeuroIntercept::is_failure( MyState const& state) { return false;}bool NeuroIntercept::is_success( MyState const& state) { if(state.my_pos.sqr_distance(state.ball_pos)<=SQUARE(ServerOptions::kickable_area - safety_margin)) return true; return false;}Value NeuroIntercept::evaluate( MyState const& state ) { if(is_failure(state)) return costs_failure; if(is_success(state)) return costs_success; if(learn12step == 2){ Cmd dummy; int steps; if(onetwostep_intercept->get_cmd(dummy,state,steps)){ // I can do it in 1 or two steps! //TRAIN_PROT("Evaluate: I can do it in "<<steps<<" from state "<<state<<endl); return (costs_success + costs_per_action * steps); } } get_features(state, net->in_vec); net->forward_pass(net->in_vec,net->out_vec); return(net->out_vec[0]);}/***********************************************************************************//* LEARNING STUFF *//***********************************************************************************/bool NeuroIntercept::learn(Cmd &cmd){ char savename[50]; TRAIN_PROT("\nStart Training "<<endl); LOG_ERR(0,<<"NeuroIntercept: It's time 2 learn"); do_test(); for(int i=0;i<repeat_mainlearningloop; i++){ TRAIN_PROT("Starting Training loop "<<i+1<<endl); generate_training_patterns(); //print_memory(); train_nn(); train_loops_ctr ++; sprintf(savename,"%strained.%d.net",save_name_suffix,train_loops_ctr); net->save_net(savename); sprintf(savename,"%strained.current.net",save_name_suffix); net->save_net(savename); // save additionally do_test(); } LOG_ERR(0,<<"NeuroIntercept: Finished training cylce"); return false; // learning set no cmd }void NeuroIntercept::check_cmd(Cmd &cmd) { MyState next_state,state; get_cur_state(state); Angle a= state.my_angle.get_value_0_p2PI(); Angle na; Vector tmp= Vector(60,60); Tools::model_cmd_main( state.my_pos, state.my_vel, a,tmp,tmp, cmd.cmd_main,next_state.my_pos, next_state.my_vel,na,tmp, tmp ); if((next_state.my_pos.x >= ServerOptions::pitch_length/2.) || (next_state.my_pos.x <= - ServerOptions::pitch_length/2.) || (next_state.my_pos.y <= - ServerOptions::pitch_width/2.) || (next_state.my_pos.y >= ServerOptions::pitch_width/2.)){ if(cmd.cmd_main.get_type() == Cmd_Main::TYPE_TURN){ LOG_MOV(0,<<"check turn cmd "); } LOG_MOV(0,<<"NeuroIntercept: Cmd would bring me out of pitch; Reset" <<" my next pos "<<next_state.my_pos ); cmd.cmd_main.unset_lock(); cmd.cmd_main.unset_cmd(); cmd.cmd_main.set_turn(PI/2.); }}void NeuroIntercept::store_state(){ //MyState state; if(state_memory_ctr >= STATE_MEMORY_SIZE_ICPT){ LOG_MOV(0,<<"NeuroIntercept: Warning state memory full"); return; } // get_cur_state( state ); // state_memory[state_memory_ctr].state.my_pos = state.my_pos; get_cur_state(state_memory[state_memory_ctr].state); state_memory_ctr ++; LOG_MOV(0,<<"NeuroIntercept: Store state "<<state_memory_ctr);}void NeuroIntercept::print_memory(){#if 1 for(int i=0;i<state_memory_ctr; i++){ TRAIN_PROT("NeuroIntercept: Stored state "<<i<<" : " << state_memory[i].state.my_pos << state_memory[i].state.my_vel << RAD2DEG(state_memory[i].state.my_angle.get_value()) << state_memory[i].state.ball_pos << state_memory[i].state.ball_vel << RAD2DEG(state_memory[i].state.ball_vel.arg()) <<endl ); }#endif#if 0 for(int i=0;i<training_set.ctr; i++){ LOG_MOV(1,<<"NeuroIntercept: Training pattern "<<i<<" : " <<training_set.input[i][0]<<" " <<training_set.input[i][1]<<" " <<training_set.input[i][2]<<" " <<training_set.input[i][3]<<" " <<training_set.input[i][4]<<" " <<training_set.input[i][5]<<" " <<" ---> "<<training_set.target[i]<<" "); }#endif}void NeuroIntercept::generate_training_patterns(){ float target_val; training_set.ctr =0; Vector my_vel; Vector ball_vel; // Value y= 0; Value a; Angle ballvel_angle; //srand48(16514); // always draw the same samples if(use_regular_states){ state_memory_ctr = 0; while(state_memory_ctr < num_trainstates){ Value x=GET_RANDOM(-regular::x_range,regular::x_range); Value y=GET_RANDOM(-regular::x_range,regular::x_range); a = GET_RANDOM(0,PI); my_vel.x =GET_RANDOM(-regular::v_range,regular::v_range); my_vel.y =GET_RANDOM(-regular::v_range,regular::v_range); if(vball_angle_min == 0 && vball_angle_max == 0){ // standard case ball_vel.x =GET_RANDOM(0,regular::vball_range); ball_vel.y =0; } else{ do{ ball_vel.x =GET_RANDOM(-regular::vball_range,regular::vball_range); ball_vel.y =GET_RANDOM(-regular::vball_range,regular::vball_range); ballvel_angle = Tools::get_angle_between_null_2PI(ball_vel.arg()); } while(ballvel_angle<vball_angle_min || ballvel_angle > vball_angle_max); } if(SQUARE(ball_vel.x) + SQUARE(ball_vel.y) >= SQUARE(ServerOptions::ball_speed_max)){ ball_vel.normalize(ServerOptions::ball_speed_max); } if(SQUARE(my_vel.x)+SQUARE(my_vel.y)>=SQUARE(MY_MAX_SPEED *ServerOptions::player_decay)){ my_vel.normalize(MY_MAX_SPEED *ServerOptions::player_decay); } if((my_vel.norm() > 0.16) && // 1.0 * 0.4 * 0.4 (Tools::get_abs_angle(my_vel.arg() - a) > 2/180. *PI)){ continue; } Vector mypos = Vector(x,y); mypos.normalize(1.2); state_memory[state_memory_ctr].state.my_pos = mypos; state_memory[state_memory_ctr].state.my_vel = my_vel; state_memory[state_memory_ctr].state.my_angle = ANGLE(a); state_memory[state_memory_ctr].state.ball_pos = Vector (0,0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -