📄 neuro_intercept_bms.c
字号:
state_memory[state_memory_ctr].state.ball_vel = ball_vel; if(prob_easy >=drand48()){ //TRAIN_PROT("generated easy state: "<<state_memory[state_memory_ctr].state<<endl); state_memory_ctr ++; } state_memory[state_memory_ctr].state.my_pos = Vector(x,y); state_memory[state_memory_ctr].state.my_vel = my_vel; state_memory[state_memory_ctr].state.my_angle = ANGLE(a); state_memory[state_memory_ctr].state.ball_pos = Vector (0,0); state_memory[state_memory_ctr].state.ball_vel = ball_vel; //TRAIN_PROT("generated state: "<<state_memory[state_memory_ctr].state<<endl); state_memory_ctr ++; // else: do not consider ! } // while statememorycounter < 10000#if 0 state_memory_ctr= 0; // simply overwrite memory for(Value x = -regular::x_range; x<= regular::x_range; x += regular::dx){ for(Value y = -regular::x_range; y<= regular::x_range; y += regular::dx){ for(Value vx = -regular::v_range; vx<= regular::v_range; vx += regular::dv){ for(Value vy = -regular::v_range; vy<= regular::v_range; vy += regular::dv){#if 0 for(Value vx_ball = -regular::vball_range; vx_ball<= regular::vball_range; vx_ball += regular::dvball){ for(Value vy_ball = -regular::vball_range; vy_ball<= regular::vball_range; vy_ball += regular::dvball){#endif for(Value vx_ball = 0; vx_ball<= regular::vball_range; vx_ball += regular::dvball){ // for(Value vy_ball = 0; vy_ball<= regular::vball_range; vy_ball += regular::dvball){ for(Value vy_ball = 0; vy_ball<= 0; vy_ball += regular::dvball){ for(int asteps = 0; asteps<= regular::angle_steps; asteps ++ ){ if(regular::prob < drand48()) continue; a = asteps * PI/(float)(regular::angle_steps); if(asteps ==regular::angle_steps) a = PI; ball_vel = Vector(vx_ball,vy_ball); if(SQUARE(ball_vel.x) + SQUARE(ball_vel.y) >= SQUARE(ServerOptions::ball_speed_max)){ ball_vel.normalize(ServerOptions::ball_speed_max); } my_vel = Vector(vx,vy); if(SQUARE(my_vel.x)+SQUARE(my_vel.y)>=SQUARE(MY_MAX_SPEED *ServerOptions::player_decay)){ my_vel.normalize(MY_MAX_SPEED *ServerOptions::player_decay); } if((my_vel.norm() > 0.16) && // 1.0 * 0.4 * 0.4 (Tools::get_abs_angle(my_vel.arg() - a) > 5/180. *PI)){#if 0 TRAIN_PROT("max speed "<<my_vel.norm()<<" and vel angle " <<RAD2DEG(my_vel.arg())<<" differs from my angle " <<RAD2DEG(a)<<endl);#endif continue; }#if 0 if(Vector(x,y).norm()> train_loops_ctr) continue;#endif#if 0 if(x==0){ state_memory[state_memory_ctr].state.my_pos = Vector(1.2,0); state_memory[state_memory_ctr].state.my_vel = my_vel; state_memory[state_memory_ctr].state.my_angle = a; state_memory[state_memory_ctr].state.ball_pos = Vector (0,0); state_memory[state_memory_ctr].state.ball_vel = ball_vel; state_memory_ctr ++; // else: do not consider ! } if(y==0){ state_memory[state_memory_ctr].state.my_pos = Vector(0,1.2); state_memory[state_memory_ctr].state.my_vel = my_vel; state_memory[state_memory_ctr].state.my_angle = a; state_memory[state_memory_ctr].state.ball_pos = Vector (0,0); state_memory[state_memory_ctr].state.ball_vel = ball_vel; state_memory_ctr ++; // else: do not consider ! }#endif Vector mypos = Vector(x,y); mypos.normalize(1.2); state_memory[state_memory_ctr].state.my_pos = mypos; state_memory[state_memory_ctr].state.my_vel = my_vel; state_memory[state_memory_ctr].state.my_angle = a; state_memory[state_memory_ctr].state.ball_pos = Vector (0,0); state_memory[state_memory_ctr].state.ball_vel = ball_vel; if(0.1 >=drand48()) state_memory_ctr ++; state_memory[state_memory_ctr].state.my_pos = Vector(x,y); state_memory[state_memory_ctr].state.my_vel = my_vel; state_memory[state_memory_ctr].state.my_angle = a; state_memory[state_memory_ctr].state.ball_pos = Vector (0,0); state_memory[state_memory_ctr].state.ball_vel = ball_vel; state_memory_ctr ++; // else: do not consider ! } } } } } } }#endif } // if regular states for(int i=0;i<state_memory_ctr; i++){ if(is_success(state_memory[i].state)){ target_val = costs_success; } else{ MyState successor_state; target_val = costs_per_action + get_value_of_best_successor(state_memory[i].state,successor_state);#if 0 // Residual Gradient get_features(successor_state, training_set.input[training_set.ctr]); float tmp_target = evaluate(state_memory[i].state) - costs_per_action;; training_set.target[training_set.ctr] = tmp_target; training_set.ctr ++;#endif } get_features(state_memory[i].state, training_set.input[training_set.ctr]); if((adjust_targets == true) && (target_val > (train_loops_ctr+1) * costs_per_action)) target_val = (train_loops_ctr+1) * costs_per_action; if(target_val > 0.9) target_val = 0.9; training_set.target[training_set.ctr] = target_val; // if(target_val >= .99* costs_per_action) training_set.ctr ++; } TRAIN_PROT("Generated n training pats: "<<training_set.ctr <<" No. of regular states "<<state_memory_ctr<<endl);}#if 0void NeuroIntercept::Tools::get_successor_state(MyState const &state, Cmd_Main const &cmd, MyState &next_state) { Angle na; Angle a= state.my_angle.get_value_0_p2PI(); Tools::model_cmd_main( state.my_pos, state.my_vel, a,state.ball_pos,state.ball_vel, cmd ,next_state.my_pos, next_state.my_vel,na,next_state.ball_pos, next_state.ball_vel ); next_state.my_angle= na; if((next_state.my_vel.norm()/ServerOptions::player_decay) > 1.08 * MY_MAX_SPEED){ LOG_ERR(0,<<"Max speed too high "<<next_state.my_vel.norm()/ServerOptions::player_decay); LOG_ERR(0,<<"Position change: "<<(next_state.my_pos - state.my_pos).norm()); }}#endifValue NeuroIntercept::get_value_of_best_successor(MyState const &state, MyState &successor_state) { MyState next_state; Value best_val; bool best_val_ok= false; if(learn12step == 1){ Cmd dummy; int steps; if(onetwostep_intercept->get_cmd(dummy,state,steps)){ // I can do it in 1 or two steps! //TRAIN_PROT("Get Value: I can do it in "<<steps<<" from here "<<state<<endl); return(costs_success + costs_per_action * (steps-1)); } } if(do_pi){ // policy iteration: just take neuro intercept Cmd cmd; intercept->get_cmd(cmd,state.my_pos,state.my_vel, state.my_angle,state.ball_pos,state.ball_vel); Tools::get_successor_state(state,cmd.cmd_main ,next_state); successor_state = next_state; return (evaluate(next_state)); } itr_actions.reset(); Value val; while ( Cmd_Main const* action = itr_actions.next() ) { Tools::get_successor_state(state,*action ,next_state); val= evaluate( next_state ); if ( !best_val_ok || val < best_val ) { best_val= val; successor_state = next_state; best_val_ok= true; } } if ( best_val_ok ) return best_val; LOG_ERR(0,<<"NeuroIntercept: Error: did not find best successor state"); LOG_MOV(0,<<"NeuroIntercept: Error: did not find best successor state"); return -1; // error}void NeuroIntercept::train_nn(){ float error,tss; float pattern_stress; int step1_ctr = 0; int step2_ctr = 0; if(init_mode == 0){ net->init_weights(0,init_param); net->set_update_f(1,learn_params); // 1 is Rprop } else if(init_mode == 1){ net->init_weights(2,init_param); } else if(init_mode == 2){ // net->init_weights(2,init_param); net->init_weights(2,init_param); learn_params[0]= learn_param; net->set_update_f(1,learn_params); // 1 is Rprop } net->save_net("init.net"); for(int n=0;n<num_epochs; n++){ tss = 0.0; step1_ctr = 0; step2_ctr = 0; for(int p=0;p<training_set.ctr;p++){ net->forward_pass(training_set.input[p],net->out_vec); pattern_stress = 1.0; for(int i=0;i<net->topo_data.out_count;i++){ if(training_set.target[p] == costs_success){#if 0 TRAIN_PROT("Special pattern: "<<p<<" feature 0: " <<training_set.input[p][0]<<" target " <<training_set.target[p]<<endl);#endif } if(fabs(training_set.target[p]-costs_per_action)<0.0001){#if 0 TRAIN_PROT("Special pattern: "<<p<<" feature 0: " <<training_set.input[p][0]<<" target " <<training_set.target[p]<<endl);#endif step1_ctr ++; pattern_stress = stress; } if(fabs(training_set.target[p]-2*costs_per_action)<0.0001){#if 0 TRAIN_PROT("Special pattern: "<<p<<" feature 0: " <<training_set.input[p][0]<<" target " <<training_set.target[p]<<endl);#endif step2_ctr ++; pattern_stress = stress; }#if 0 if(n==num_epochs-1){ TRAIN_PROT("pattern: "<<p<<" " <<training_set.input[p][0]<<" " <<training_set.input[p][1]<<" " <<training_set.input[p][2]<<" " <<training_set.input[p][3]<<" " <<training_set.input[p][4]<<" " <<training_set.input[p][5]<<" " <<" output "<<net->out_vec[0] <<" target "<<training_set.target[p]<<endl); }#endif error = net->out_vec[i] = net->out_vec[i] - training_set.target[p]; net->out_vec[i] *= pattern_stress; /* out_vec := dE/do = (o-t) */ tss += error * error; } net->backward_pass(net->out_vec,net->in_vec); } // TRAIN_PROT(endl); net->update_weights(); /* learn by epoch */ LOG_MOV(0,<<"NeuroIntercept: epoch "<<n<<" TSS: "<<tss); LOG_ERR(0,<<"NeuroIntercept: epoch "<<n<<" TSS: "<<tss<<" per pattern "<<tss/float(training_set.ctr)); if(n==0 || (n%20) == 0 || n==num_epochs -1) TRAIN_PROT("Train: Epoch "<<n<<" TSS: "<<tss<<" per pattern "<<tss/float(training_set.ctr)<<endl); } TRAIN_PROT("Counted "<<step1_ctr<<" 1 step patterns and "<<step2_ctr<<" two step pats"<<endl);}void NeuroIntercept::do_sequence(const MyState & initial_state, Value *result, const int N){ MyState cur_state, next_state; Cmd cmd; int n;#define SEQ_VERBOSE 0 int imax; if(do_reference) imax = 2; else imax = 1; for(int i=0;i<imax;i++){ result[i] = 0; for(int k=0;k<num_testrepeats;k++){ n=0; cur_state = initial_state; while(n<N &&(cur_state.my_pos.sqr_distance(cur_state.ball_pos) >SQUARE(ServerOptions::kickable_area))){ cmd.cmd_main.unset_lock(); if(i==0) neuro_decide(cur_state, cmd); else{ if(cur_state.my_pos.distance(cur_state.ball_pos) > test::x_range){ // stop sequence //TRAIN_PROT("Do Sequence: ball too far: "<<cur_state<<endl); result[i] = N; return; } intercept->get_cmd(cmd,cur_state.my_pos,cur_state.my_vel, cur_state.my_angle,cur_state.ball_pos,cur_state.ball_vel); } Tools::get_successor_state(cur_state, cmd.cmd_main, next_state, do_stochastic);#if SEQ_VERBOSE TRAIN_PROT("t:"<<n<<" controller "<<i<<" state(t) "<<cur_state<<" cmd: "<<cmd.cmd_main<<endl); //<<" state(t+1) "<<next_state<<endl);#endif cur_state = next_state; n++; }#if SEQ_VERBOSE TRAIN_PROT(endl);#endif result[i] += n; } // for k result[i] /= (float)num_testrepeats; }}void NeuroIntercept::do_test(){ Value average[2]; int won[2]; TRAIN_PROT("\nDo Test:"<<endl);#if 0 for(int i=0;i<test_memory_ctr; i++){ Value J = evaluate( test_memory[i].state ); TRAIN_PROT("Eval: "<<test_memory[i].state.my_pos<<test_memory[i].state.my_vel <<test_memory[i].state.ball_pos <<test_memory[i].state.ball_vel<<RAD2DEG(test_memory[i].state.my_angle.get_value()) <<" J: "<<J<<" estimated cycles2go "<<J/costs_per_action<<endl); }#endif for(int i=0;i<test_memory_ctr; i++){ //TRAIN_PROT("do sequence "<<i<<" "<<test_memory[i].state<<endl); if(test_memory_result[i][1] < max_sequence_len) do_sequence(test_memory[i].state, test_memory_result[i], max_sequence_len); else // TRAIN_PROT("Test Seq "<<i<<" not valid, sungs intercept needs max. "<<endl); ; } do_reference = false; // do it only once average[0]= average[1]=0.0; won[0]= 0; won[1]=0; int i; for(i=0;i<test_memory_ctr; i++){ if(test_memory_result[i][1] == max_sequence_len) continue; //TRAIN_PROT("i: "<<i<<endl); average[0] += test_memory_result[i][0]; average[1] += test_memory_result[i][1]; Value J = evaluate( test_memory[i].state ); if(test_memory_result[i][0]<test_memory_result[i][1]){ won[0] ++; //cout<<"\nWon 0 incremented: "<<won[0]<<endl; TRAIN_PROT("+ Sequence "<<i<<" "<<test_memory[i].state <<" J: "<<J<<" ("<<(J/costs_per_action)<<") " <<" neuro: " <<test_memory_result[i][0] <<" classic Sung "<<test_memory_result[i][1]<<endl); } else if(test_memory_result[i][0]>test_memory_result[i][1]){ won[1] ++; TRAIN_PROT("- Sequence "<<i<<" "<<test_memory[i].state <<" J: "<<J<<" ("<<(J/costs_per_action)<<") " <<" neuro: " <<test_memory_result[i][0] <<" classic Sung "<<test_memory_result[i][1]<<endl); } else{ TRAIN_PROT(" Sequence "<<i<<" "<<test_memory[i].state <<" J: "<<J<<" ("<<(J/costs_per_action)<<") " <<" neuro: " <<test_memory_result[i][0] <<" classic Sung "<<test_memory_result[i][1]<<endl); } } TRAIN_PROT(endl); if(average[0]==average[1]){ TRAIN_PROT("JUHUUU, Neuro zieht gleich"<<endl); statistics::total_draws ++; } else if(average[0]<average[1]){ TRAIN_PROT("JUHUU, SUPRRRR, Neuro gewinnt!"<<endl); statistics::total_wins ++; } TRAIN_PROT("Average: neuro: "<<average[0]<<" won "<<won[0] <<" classic Sung "<<average[1]<<" won "<<won[1] <<" total wins "<<statistics::total_wins <<" total draws "<<statistics::total_draws <<" total sum "<<statistics::total_draws + statistics::total_wins <<endl); TRAIN_RESULT(train_loops_ctr<<" "<<average[0]<<" "<<won[0] <<" "<<average[1]<<" "<<won[1] <<" "<<statistics::total_wins <<" "<<statistics::total_draws <<" "<<statistics::total_draws + statistics::total_wins <<endl); resultfile.flush();}void NeuroIntercept::generate_test_state(const Vector mypos, const Vector myvel,const Vector ballpos,const Vector ballvel,const Angle myangle ){ if(test_memory_ctr >= TEST_MEMORY_SIZE_ICPT){ LOG_MOV(0,<<"NeuroIntercept: Warning Test memory full"); return; } test_memory[test_memory_ctr].state.my_pos = mypos; test_memory[test_memory_ctr].state.my_vel = myvel; test_memory[test_memory_ctr].state.ball_pos = ballpos; test_memory[test_memory_ctr].state.ball_vel = ballvel; test_memory[test_memory_ctr].state.my_angle = ANGLE(myangle); test_memory_ctr ++;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -