📄 neuro_intercept_bms.c
字号:
/*Brainstormers 2D (Soccer Simulation League 2D)PUBLIC SOURCE CODE RELEASE 2005Copyright (C) 1998-2005 Neuroinformatics Group, University of Osnabrueck, GermanyThis program is free software; you can redistribute it and/ormodify it under the terms of the GNU General Public Licenseas published by the Free Software Foundation; either version 2of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/#include "neuro_intercept_bms.h"#include "log_macros.h"#include "ws_info.h"#include "tools.h"#include "valueparser.h"#include "intercept2.h" // new go2posNet * NeuroIntercept::net;bool NeuroIntercept::initialized= false;int NeuroIntercept::op_mode;bool NeuroIntercept::do_stochastic;int NeuroIntercept::learn12step;int NeuroIntercept::init_mode;int NeuroIntercept::max_sequence_len;long NeuroIntercept::num_trainstates;long NeuroIntercept::num_teststates;Value NeuroIntercept::init_param;Value NeuroIntercept::learn_param;Value NeuroIntercept::prob_easy;Value NeuroIntercept::vball_angle_min;Value NeuroIntercept::vball_angle_max;int NeuroIntercept::num_stored;int NeuroIntercept::num_epochs;int NeuroIntercept::store_per_cycle;int NeuroIntercept::repeat_mainlearningloop;int NeuroIntercept::state_memory_ctr;int NeuroIntercept::test_memory_ctr;int NeuroIntercept::num_testrepeats;int NeuroIntercept::neuro_maxcycles;Value NeuroIntercept::safety_margin;Value NeuroIntercept::costs_per_action;Value NeuroIntercept::stress;bool NeuroIntercept::use_regular_states;bool NeuroIntercept::do_net_init;bool NeuroIntercept::do_pi;bool NeuroIntercept::do_reference;bool NeuroIntercept::adjust_targets;#define TRAIN_PROT(XXX)(cout<<XXX)#define TRAIN_RESULT(XXX)(resultfile<<XXX)#define GET_RANDOM(X,Y)((X)+drand48() *((Y)-(X)))namespace regular{ float x_range, dx, v_range, dv, vball_range, dvball; int angle_steps; float prob;}namespace test{ float x_range, dx, v_range, dv, vball_range, dvball; int angle_steps; float prob;}namespace statistics{ long total_wins, total_draws;}NeuroIntercept::~NeuroIntercept(){ if(op_mode == 1){// do learning delete [] state_memory; delete [] test_memory; for(int i= 0;i<STATE_MEMORY_SIZE_ICPT;i++){ delete [] training_set.input[i]; } delete [] training_set.input; delete [] training_set.target; } cout<<"NeuroIntercept: DeInit "<<endl;}NeuroIntercept::NeuroIntercept(){ intercept = new InterceptBall; onetwostep_intercept = new OneTwoStep_Intercept; op_mode = 0; init_mode = 0; num_stored = 0; state_memory_ctr = 0; test_memory_ctr = 0; do_net_init = false; stress = 1.0; do_pi = false; adjust_targets = true; srand48(16514); // always draw the same samples statistics::total_wins = 0; statistics::total_draws = 0; sprintf(save_name_suffix,"%s","");#define MY_MAX_SPEED 1.0 // this is the maximum speed for a homogeneous player!#if 0#define RANGE 10#define DX .5#define VRANGE .4#define DV .2#define VRANGE_ball 1.0#define DV_ball .5#define DA PI/8.#endif regular::x_range = 10; regular::dx = .5; regular::v_range = .4; regular::dv = .2; regular::vball_range = 0.; regular::dvball = .5; regular::angle_steps= 8; regular::prob=1.0; test::x_range = 8; test::dx = 4; test::v_range = .4; test::dv = .4; test::vball_range = .5; test::dvball = .5; test::angle_steps= 3; test::prob=1.0; num_epochs= 50; store_per_cycle = 100; repeat_mainlearningloop = 10000; safety_margin = 0.15; costs_per_action = 0.02; use_regular_states = true; learn_params[0] = .1; learn_params[1] = 1.0; learn_params[2] = 0.0; learn_params[3] = 0.0; init_param = 0.5; learn_param = 0.0001; train_loops_ctr = 0; num_trainstates = 10000; num_teststates = 30; prob_easy = 0.1; learn12step = 0; vball_angle_min = 0.0; vball_angle_max = 0.0; do_stochastic = false; num_testrepeats = 1; do_reference = true; neuro_maxcycles = 100; ValueParser vp(CommandLineOptions::policy_conf,"NeuroIntercept_bms"); //vp.set_verbose(true); vp.get("do_pi", do_pi); vp.get("adjust_targets", adjust_targets); vp.get("learn12step", learn12step); vp.get("num_trainstates", num_trainstates); vp.get("num_teststates", num_teststates); vp.get("neuro_maxcycles", neuro_maxcycles); vp.get("num_testrepeats", num_testrepeats); vp.get("max_sequence_len", max_sequence_len); vp.get("prob_easy", prob_easy); vp.get("vball_angle_min", vball_angle_min); vp.get("vball_angle_max", vball_angle_max); vp.get("init_mode", init_mode); vp.get("init_param", init_param); vp.get("learn_param", learn_param); vp.get("stress", stress); vp.get("op_mode", op_mode); vp.get("do_stochastic", do_stochastic); vp.get("do_net_init", do_net_init); vp.get("num_epochs", num_epochs); vp.get("safety_margin", safety_margin); vp.get("costs_per_action", costs_per_action); vp.get("repeat_mainlearningloop", repeat_mainlearningloop); vp.get("train_loops_ctr", train_loops_ctr); vp.get("xrange", regular::x_range); vp.get("dx", regular::dx); vp.get("vrange", regular::v_range); vp.get("dv", regular::dv); vp.get("vballrange", regular::vball_range); vp.get("dvball", regular::dvball); vp.get("angle_steps", regular::angle_steps); vp.get("test::xrange", test::x_range); vp.get("test::dx", test::dx); vp.get("test::vrange", test::v_range); vp.get("test::dv", test::dv); vp.get("test::vballrange", test::vball_range); vp.get("test::dvball", test::dvball); vp.get("test::angle_steps", test::angle_steps); vp.get("prob", regular::prob); vp.get("test::prob", test::prob); vp.get("save_name_suffix", save_name_suffix,500); if(op_mode == 1){// do learning if(init_mode == 1) learn_params[0] = .0001; if(initialized){// do learning if(do_net_init) net->init_weights(0,.5); net->set_update_f(1,learn_params); // 1 is Rprop } state_memory = new MyStateMemoryEntry[STATE_MEMORY_SIZE_ICPT]; test_memory = new MyStateMemoryEntry[TEST_MEMORY_SIZE_ICPT]; training_set.input = new (float*)[STATE_MEMORY_SIZE_ICPT]; training_set.target = new float[STATE_MEMORY_SIZE_ICPT]; for(int i= 0;i<STATE_MEMORY_SIZE_ICPT;i++){ training_set.input[i] = new float[NUM_ICPT_FEATURES]; } // generate test memory#if 0 generate_test_state(Vector(3,0), Vector(0,0), Vector(0,0), Vector(0,0), 0.8 *PI); generate_test_state(Vector(5,0), Vector(0,0), Vector(0,0), Vector(0,0), 0.7 *PI); generate_test_state(Vector(5,0), Vector(.4,0), Vector(0,0), Vector(0,0), 0.7 *PI); generate_test_state(Vector(5,0), Vector(-.4,0), Vector(0,0), Vector(0,0), 0.7 *PI); generate_test_state(Vector(5,0), Vector(.2,2), Vector(0,0), Vector(0,0), 0.7 *PI); generate_test_state(Vector(5,0), Vector(-.2,2), Vector(0,0), Vector(0,0), 0.7 *PI); generate_test_state(Vector(5,0), Vector(.2,-2), Vector(0,0), Vector(0,0), 0.7 *PI); generate_test_state(Vector(10,0), Vector(0,0), Vector(0,0), Vector(0,0), 0.6 *PI);#endif // generate_test_state(Vector(5,0), Vector(-.4,0), Vector(0,0), Vector(0,0), 0.); //generate_test_state(Vector(5,0), Vector(.4,0), Vector(0,0), Vector(0,0), 0.); //generate_test_state(Vector(0,0), Vector(0,0), Vector(15.,4.), Vector(-2.7,0), 0.25 *PI); //generate_test_state(Vector(0,0), Vector(0,0), Vector(12.,-1.), Vector(-1.9,1.1), 0.25 *PI); //generate_test_state(Vector(0,0), Vector(0,0), Vector(18.,0.), Vector(0.263312,0.3761128), 0.125 *PI); generate_test(); }#if 1 TRAIN_PROT("NeuroIntercept_bms: Read Parameters. "<<endl <<"op_mode "<<op_mode <<" do_net_init "<<do_net_init <<" num_epochs "<<num_epochs <<" safety_margin "<<safety_margin <<" costs_per_action "<<costs_per_action<<endl <<"save name suffix "<<save_name_suffix<<endl <<"x_range "<<regular::x_range <<" dx "<<regular::dx <<" v_range "<<regular::v_range <<" dv "<<regular::dv <<endl <<"vball_range "<<regular::vball_range <<" dvball "<<regular::dvball <<" angle_steps "<<regular::angle_steps<<endl <<endl);#endif char resfilename[600]; sprintf(resfilename,"%strain.res",save_name_suffix); resultfile.open(resfilename); training_set.ctr = 0;}void NeuroIntercept::generate_test(){ Vector my_vel; Vector ball_vel; // Value y= 0; Value a; Angle ballvel_angle; const int test_mode = 1; // earlier version was 0 //const int num_teststates = 30; if(test_mode == 1){ while(test_memory_ctr < num_teststates){ Value x=GET_RANDOM(-test::x_range,test::x_range); Value y=GET_RANDOM(-test::x_range,test::x_range); a = GET_RANDOM(0,PI); my_vel.x =GET_RANDOM(-test::v_range,test::v_range); my_vel.y =GET_RANDOM(-test::v_range,test::v_range); if(vball_angle_min == 0 && vball_angle_max == 0){ // standard case ball_vel.x =GET_RANDOM(0,test::vball_range); ball_vel.y =0; } else{ do{ ball_vel.x =GET_RANDOM(-test::vball_range,test::vball_range); ball_vel.y =GET_RANDOM(-test::vball_range,test::vball_range); ballvel_angle = Tools::get_angle_between_null_2PI(ball_vel.arg()); } while(ballvel_angle<vball_angle_min || ballvel_angle > vball_angle_max); } if(SQUARE(ball_vel.x) + SQUARE(ball_vel.y) >= SQUARE(ServerOptions::ball_speed_max)){ ball_vel.normalize(ServerOptions::ball_speed_max); } if(SQUARE(my_vel.x)+SQUARE(my_vel.y)>=SQUARE(MY_MAX_SPEED *ServerOptions::player_decay)){ my_vel.normalize(MY_MAX_SPEED *ServerOptions::player_decay); } if((my_vel.norm() > 0.16) && // 1.0 * 0.4 * 0.4 (Tools::get_abs_angle(my_vel.arg() - a) > 2/180. *PI)){ continue; } Vector mypos = Vector(x,y); if(mypos.norm() < 1.2) mypos.normalize(1.2); generate_test_state(mypos,my_vel, Vector(0,0),ball_vel,a); } return; } //test_mode == 1 for(Value x = -test::x_range; x<= test::x_range; x += test::dx){ for(Value y = -test::x_range; y<= test::x_range; y += test::dx){ for(Value vx = -test::v_range; vx<= test::v_range; vx += test::dv){ for(Value vy = -test::v_range; vy<= test::v_range; vy += test::dv){#if 0 for(Value vx_ball = -test::vball_range; vx_ball<= test::vball_range; vx_ball += test::dvball){ for(Value vy_ball = -test::vball_range; vy_ball<= test::vball_range; vy_ball += test::dvball){#endif for(Value vx_ball = 0; vx_ball<= test::vball_range; vx_ball += test::dvball){ //for(Value vy_ball =0; vy_ball<= test::vball_range; vy_ball += test::dvball){ for(Value vy_ball =0; vy_ball<=0; vy_ball += test::dvball){ for(int asteps = 0; asteps<= test::angle_steps; asteps ++ ){ a = asteps * PI/(float)(test::angle_steps); if(asteps ==test::angle_steps) a = PI; ball_vel = Vector(vx_ball,vy_ball); if(SQUARE(ball_vel.x) + SQUARE(ball_vel.y) >= SQUARE(ServerOptions::ball_speed_max)){ ball_vel.normalize(ServerOptions::ball_speed_max); } my_vel = Vector(vx,vy); if(SQUARE(my_vel.x)+SQUARE(my_vel.y)>=SQUARE(MY_MAX_SPEED *ServerOptions::player_decay)){ my_vel.normalize(MY_MAX_SPEED *ServerOptions::player_decay); } if((my_vel.norm() >= 0.15) && (Tools::get_abs_angle(my_vel.arg() - a) > 2/180. *PI)){#if 0 TRAIN_PROT("max speed "<<my_vel.norm()<<" and vel angle " <<RAD2DEG(my_vel.arg())<<" differs from my angle " <<RAD2DEG(a)<<endl);#endif continue; } if(test::prob >= drand48()){ if(x==0) generate_test_state(Vector(1.2,y),my_vel, Vector(0,0),ball_vel,a); else generate_test_state(Vector(x,y),my_vel, Vector(0,0),ball_vel,a); } // take test state } } } } } } }}bool NeuroIntercept::init(char const * conf_file, int argc, char const* const* argv) { if(initialized) return true; // only initialize once... initialized= true; InterceptBall::init(conf_file,argc,argv); net= new Net(); /* load neural network */ // char netname[] = "./data/nets_neuro_intercept/intercept_5_10.net"; char netname[500]; sprintf(netname,"train.net"); ValueParser vp(CommandLineOptions::policy_conf,"NeuroIntercept_bms"); //vp.set_verbose(true); vp.get("load_net", netname,500); if(net->load_net(netname) == FILE_ERROR){ ERROR_OUT << "NeuroIntercept_bms: No net-file found "<<netname<<" - stop loading\n"; initialized = false; exit(0); return false; } cout<<"\nNeuroIntercept_bms successfully initialized. Net used: "<<netname<<endl; return true;} int NeuroIntercept::get_steps2intercept(){ int num_cycles; Cmd intercept_cmd, cmd; if(op_mode == 2){ if(onetwostep_intercept->get_cmd(cmd,num_cycles) == true) return num_cycles; intercept->get_cmd(intercept_cmd, num_cycles); // determine the number of cycles2go return num_cycles; } if(op_mode == 3){ Intercept2 inter; inter.intercept( WSinfo::ball->pos, WSinfo::ball->vel, WSinfo::me, num_cycles, cmd.cmd_main); return num_cycles; } return -1; }bool NeuroIntercept::get_cmd(Cmd & cmd) { if ( ! initialized ) { ERROR_OUT << "NeuroIntercept not intialized"; return false; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -