📄 p4_sock_cr.c
字号:
#include "p4.h"#include "p4_sys.h"#ifdef SCYLD_BEOWULF#include <sys/bproc.h>#endifint create_remote_processes(struct p4_procgroup *pg){ struct p4_procgroup_entry *pe; struct net_initial_handshake hs; int i, serv_port, serv_fd, rm_fd, rm_fds[P4_MAXPROCS], rm_num; net_setup_anon_listener(MAX_P4_CONN_BACKLOG, &serv_port, &serv_fd); if (execer_starting_remotes) { if (pg->num_entries > 1) put_execer_port(serv_port); for (i=1, pe = pg->entries+1; i < pg->num_entries; i++, pe++) { rm_fd = net_accept(serv_fd); hs.pid = (int) htonl(getpid()); net_send(rm_fd, &hs, sizeof(hs), P4_FALSE); net_recv(rm_fd, &hs, sizeof(hs)); rm_num = (int) ntohl(hs.rm_num); rm_fds[rm_num] = rm_fd; } for (i=1, pe = pg->entries+1; i < pg->num_entries; i++, pe++) { pe = pg->entries+i; net_slave_info(pe, rm_outfile_head, rm_fds[i], i); } } else { for (i=1, pe = pg->entries+1; i < pg->num_entries; i++, pe++) { rm_fd = net_create_slave(serv_port,serv_fd, pe->host_name, pe->slave_full_pathname, pe->username, pe->rm_rank );#ifdef SCYLD_BEOWULF if (rm_fd < 0) break;#endif net_slave_info(pe, rm_outfile_head, rm_fd, i); } }#ifdef SCYLD_BEOWULF if (rm_fd == -2) /* We are an rforked child */ return (-2);#endif close( serv_fd ); return (0);}P4VOID net_slave_info(pe, outfile, rm_fd, rm_num)struct p4_procgroup_entry *pe;char *outfile;int rm_fd, rm_num;{ struct bm_rm_msg msg; P4BOOL done; int type, status, port, remote_switch_port; int slave_idx, slave_pid, pidx, rm_ind; msg.type = p4_i_to_n(INITIAL_INFO); msg.numinproctab = p4_i_to_n(p4_global->num_in_proctable); msg.rm_num = p4_i_to_n( rm_num ); msg.numslaves = p4_i_to_n(pe->numslaves_in_group); if (strlen( outfile ) >= P4_MAX_PGM_LEN) { p4_error( "Output filename must be less than ", P4_MAX_PGM_LEN ); } strncpy(msg.outfile, outfile, P4_MAX_PGM_LEN); msg.debug_level = p4_i_to_n(p4_remote_debug_level); msg.memsize = p4_i_to_n(globmemsize); msg.logging_flag = p4_i_to_n(logging_flag); strcpy(msg.application_id, p4_global->application_id); strcpy(msg.version, P4_PATCHLEVEL); if ( strlen( pe->slave_full_pathname ) >= P4_MAX_PGM_LEN ) { p4_error( "Program names must be less than ", P4_MAX_PGM_LEN ); } strncpy(msg.pgm, pe->slave_full_pathname, P4_MAX_PGM_LEN ); strncpy(msg.wdir, p4_wd, P4_MAX_PGM_LEN ); net_send(rm_fd, &msg, sizeof(msg), P4_FALSE); port = -1; pidx = -1; for (done = P4_FALSE; !done;) { status = net_recv(rm_fd, &msg, sizeof(msg)); if (status == PRECV_EOF) { p4_dprintf("OOPS! got EOF in net_slave_info\n"); return; } type = p4_n_to_i(msg.type); switch (type) { case REMOTE_LISTENER_INFO: port = p4_n_to_i(msg.port); break; case REMOTE_MASTER_INFO: case REMOTE_SLAVE_INFO: if (type == REMOTE_MASTER_INFO) rm_ind = P4_TRUE; else rm_ind = P4_FALSE; slave_idx = p4_n_to_i(msg.slave_idx); slave_pid = p4_n_to_i(msg.slave_pid); remote_switch_port = p4_n_to_i(msg.switch_port); if (port == -1) p4_dprintf("OOPS! got slave_info w/o getting port first\n"); /* big master installing remote processes */ pidx = install_in_proctable(rm_num,port,slave_pid, pe->host_name, pe->host_name, slave_idx, msg.machine_type,remote_switch_port); p4_dprintfl(90, "net_slave_info: adding connection to %d (%d) \n", pidx,rm_num); if (p4_local->conntab[pidx].type == CONN_REMOTE_SWITCH) { p4_local->conntab[pidx].switch_port = remote_switch_port; p4_local->conntab[pidx].port = rm_fd; } else if (p4_local->conntab[pidx].type == CONN_REMOTE_NON_EST) { if (type == REMOTE_MASTER_INFO) { p4_local->conntab[pidx].type = CONN_REMOTE_EST; p4_local->conntab[pidx].port = rm_fd; p4_local->conntab[pidx].same_data_rep = same_data_representation(p4_local->my_id,pidx); } } else { p4_error("net_slave_info: invalid conn type in conntab\n", p4_local->conntab[pidx].type); } break; case REMOTE_SLAVE_INFO_END: done = P4_TRUE; break; } }}/* This routine is called if the net_accept fails to complete quickly */#include <sys/time.h>#ifndef TIMEOUT_VALUE #define TIMEOUT_VALUE 300#endifstatic char *curhostname = 0;static char errbuf[512];static int child_pid = 0;/* active_fd is the fd that we're waiting on when the timeout happened */static int active_fd = -1;P4VOID p4_accept_timeout ( int );P4VOID p4_accept_timeout( int sigval){ /* First, we should check that the timeout has actually be reached, and this isn't some other alarm */ if (child_pid) { kill( child_pid, SIGQUIT ); } if (curhostname) { sprintf( errbuf, "Timeout in making connection to remote process on %s", curhostname ); p4_error( errbuf, 0 ); } else { p4_error( "Timeout in making connection to remote process", 0 ); } if (active_fd >= 0) close( active_fd ); exit(1);}#ifdef HAVE_SYS_WAIT_H#include <sys/wait.h>#endifP4VOID p4_accept_sigchild ( int );P4VOID p4_accept_sigchild( int sigval ){ int status; /* See if this is a child that we're waiting on */ if (!child_pid) return; /* If we did not find sys/wait.h , WHOHANG won't be defined. What can we do? */ if (waitpid( child_pid, &status, WNOHANG )) { /* waitpid returns 0 if the child hasn't exited */ } if (curhostname) { sprintf( errbuf, "Child process exited while making connection to remote process on %s", curhostname ); p4_error( errbuf, 0 ); } else { p4_error( "Child process exited while making connection to remote process", 0 ); } if (active_fd >= 0) close( active_fd ); exit(1);}/* * Run the slave pgm on host; returns the file descriptor of the * connection to the slave. This creates the remote slave, which * in turn is responsible for creating the slaves. */int net_create_slave( int serv_port, int serv_fd, char *host, char *pgm, char *username, int rm_rank ){ struct net_initial_handshake hs; char myhostname[100]; char remote_shell[P4_MAX_PGM_LEN]; char serv_port_c[64]; int rc; char rm_rank_str[12];#ifdef USE_OLD_SERVER struct net_message_t msg; int success, connection_fd;#endif int slave_fd; int fcntl_flags; char *am_slave_c = "-p4amslave";# if defined(SYMMETRY) || defined(SUN) || \ defined(DEC5000) || defined(SGI) || \ defined(RS6000) || defined(HP) || \ defined(NEXT) || defined(CRAY) || \ defined(CONVEX) || defined(KSR) || \ defined(FX2800) || defined(FX2800_SWITCH) || \ defined(SP1)/* char *getpw_ss (char *); */# endif sprintf( rm_rank_str, "%d", rm_rank );# if defined(SP1) strcpy(myhostname,p4_global->proctable[0].host_name); p4_dprintfl(80,"net_create_slave: myhost=%s\n",myhostname);# else myhostname[0] = '\0'; get_qualified_hostname(myhostname,sizeof(myhostname));# endif if (hand_start_remotes) { printf("waiting for process on host %s:\n%s %s %d %s\n", host, pgm, myhostname, serv_port, am_slave_c); rc = 0; } else { /* try to connect to (secure) server */# if !defined(P4_DO_NOT_USE_SERVER) /* Do not try the secure server by default. The attempt to contact the default secure server port can cause the startup step to hang, due to IP security settings that cause some connections to go unacknowledged (not even refused). Currently, the test for this is on the sserver_port, which is initialized to -1 (rather than the old default of 753). */ /***** secure server stuff *******/ p4_dprintfl(20, "trying to create remote slave on %s via server\n",host); rc = start_slave(host, username, pgm, serv_port, am_slave_c, getpw_ss); if (rc < -1) { extern char *start_prog_error; p4_dprintfl(20,"Warning from secure server: %s\n", start_prog_error); } else if (rc == 0) p4_dprintfl(10, "created remote slave on %s via server\n",host); /*****************************************/ else { /* A -1 is failure, not warning */ extern char *start_prog_error; p4_dprintfl( 20, "Failed to connect to secure server: %s\n", start_prog_error ); }# else rc = -1;# endif } if (rc <= -1) {#ifdef USE_OLD_SERVER /* try to connect to (old) server */ connection_fd = net_conn_to_listener(host, UNRESERVED_PORT, 1); if (connection_fd >= 0) { p4_dprintfl(20, "creating remote slave on %s via old server\n",host); msg.type = p4_i_to_n(NET_EXEC); strcpy(msg.pgm, pgm); strcpy(msg.host, myhostname); strcpy(msg.am_slave, am_slave_c); msg.port = p4_i_to_n(serv_port); net_send(connection_fd, &msg, sizeof(msg), P4_FALSE); net_recv(connection_fd, &msg, sizeof(msg)); success = p4_n_to_i(msg.success); if (!success) { p4_dprintf("create failed: %s\n", msg.message); return (-1); } close(connection_fd); p4_dprintfl(10, "created remote slave on %s via old server\n",host); } else#endif /* USE_OLD_SERVER */ {#ifdef SCYLD_BEOWULF int node_num; int curr_node; p4_dprintfl(20, "trying to create remote slave on %s\n",host); sprintf(serv_port_c,"%d",serv_port); node_num=bproc_getnodebyname(host); if(node_num==BPROC_NODE_NONE) p4_error("net_create_slave: host not a bproc node",node_num); curr_node=bproc_currnode(); if(curr_node==node_num) { p4_dprintfl(20, "spawning slave via regular fork\n"); rc=child_pid=fork(); } else { p4_dprintfl(20, "spawning slave via bproc\n"); rc=child_pid=fork(); if(!child_pid) { rc=bproc_move(node_num); if(rc==-1) { p4_error("net_create_slave: bproc_move",rc); }} } if(!rc) { reset_fork_p4(); /* reset some global crap */ curhostname = 0; /* global crap */ child_pid=0; /* global crap */ active_fd=-1; /* global crap */ close(serv_fd); /* this helps p4_printf routines */ sprintf(whoami_p4, "p%d_%d", p4_get_my_id(), getpid()); p4_dprintfl(20, "bproc: (pid=%d)\n",getpid()); p4_local = NULL; p4_global = NULL; SIGNAL_P4(SIGALRM,SIG_DFL); SIGNAL_P4(LISTENER_ATTN_SIGNAL,SIG_DFL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -