📄 p4_utils.c
字号:
/* free assorted data structures */ if ( !p4_global->local_communication_only ) p4_free(listener_info); /* only allocated in this case */ if (p4_local->procgroup) p4_free(p4_local->procgroup); p4_free(p4_local->conntab); p4_shfree((P4VOID *)(p4_local->queued_messages->m.qs)); p4_free(p4_local->queued_messages);#ifdef CAN_DO_XDR p4_free(p4_local->xdr_buff);#endif p4_free(p4_local); free_avail_quels(); /* (in p4_global) */ for (i = 0; i < P4_MAX_MSG_QUEUES; i++) p4_shfree((P4VOID *)(p4_global->shmem_msg_queues[i].m.qs)); p4_shfree((P4VOID *)(p4_global->cluster_barrier.m.qs)); p4_shfree((P4VOID *)(p4_global));# if defined(SYSV_IPC) p4_dprintfl(90, "removing SYS V IPCs\n"); remove_sysv_ipc();# endif# if defined(SGI) && defined(VENDOR_IPC) unlink(p4_sgi_shared_arena_filename);# endif return (0);}/* static variables private to fork_p4 and zap_p4_processes */static int n_pids = 0;static int pid_list[P4_MAXPROCS];#ifdef SCYLD_BEOWULFint reset_fork_p4( void ){ n_pids = 0; return 0;}#endifint fork_p4( void )/* Wrapper round fork for sole purpose of keeping track of pids so that can signal error conditions. See zap_p4_processes.*/{ int pid;# if defined(IPSC860) || defined(CM5) || defined(NCUBE) || defined(SP1_EUI) || defined(SP1_EIUH) p4_error("fork_p4: nodes cannot fork processes",0); # else if (p4_global->n_forked_pids >= P4_MAXPROCS) p4_error("forking too many local processes; max = ", P4_MAXPROCS); p4_global->n_forked_pids++; fflush(stdout); pid = fork(); if (pid > 0) { /* Parent process */ pid_list[n_pids++] = pid;#if defined(SUN_SOLARIS)/***** { processorid_t proc = 0; if(p_online(proc,P_STATUS) != P_ONLINE) printf("Could not bind parent to processor 0\n"); else { processor_bind(P_PID,P_MYID,proc, &proc); printf("Bound parent to processor 0 , previous binding was %d\n", proc); } }*****/#endif } else if (pid == 0) { /* Child process */ pid_list[n_pids++] = getppid(); } else p4_error("fork_p4: fork failed", pid);# endif return pid;}P4VOID zap_p4_processes( void ){ int n; if (p4_global == NULL) return; n = p4_global->n_forked_pids; p4_dprintfl(99,"DOING ZAP of %d local processes\n",n); while (n--) { if (pid_list[n] > 0) { kill(pid_list[n], SIGINT); } }}P4VOID zap_remote_p4_processes( void ){ int i; int my_id; struct proc_info *dest_pi; char *dest_host; int dest_id, dest_listener, dest_listener_con_fd, dest_pid; struct slave_listener_msg msg; int prev_port; char prev_hostname[HOSTNAME_LEN]; p4_dprintfl(70,"killing remote processes\n"); my_id = p4_get_my_id(); dest_pi = get_proc_info(0); strcpy(prev_hostname,dest_pi->host_name); prev_port = dest_pi->port; for (i = 0; i < p4_global->num_in_proctable; i++) { dest_id = i; if (dest_id != my_id) { dest_pi = get_proc_info(i); dest_host = dest_pi->host_name; dest_listener = dest_pi->port; dest_pid = dest_pi->unix_id; p4_dprintfl(40, "zap: my_id=%d dest_id=%d dest_host=%s dest_pid=%d " "dest_listener=%d\n", my_id, i, dest_host, dest_pid, dest_listener); p4_dprintfl(40, "zap: enter loop to connect to dest listener %s\n",dest_host); if (dest_listener < 0) continue; /* try 2 times (~4 seconds with sleeps in net_conn_to_listener) */ dest_listener_con_fd = net_conn_to_listener(dest_host,dest_listener,2); if (dest_listener_con_fd == -1) continue; /********** RMB: old scheme waits a long time num_tries = 1; p4_has_timedout( 0 ); while((dest_listener_con_fd = net_conn_to_listener(dest_host,dest_listener,1)) == -1) { num_tries++; if (p4_has_timedout( 1 )) { p4_error( "Timeout in establishing connection to remote process", 0 ); } } **********/ p4_dprintfl(40, "zap_remote_p4_processes: dest_listener_con_fd=%d\n", dest_listener_con_fd); /* send it kill-clients-and-die message */ msg.type = p4_i_to_n(KILL_SLAVE); msg.from = p4_i_to_n(my_id); msg.to_pid = p4_i_to_n(dest_pid); p4_dprintfl(40, "zap_remote_p4_processes: sending KILL_SLAVE to %d on fd=%d size=%d\n", dest_id,dest_listener_con_fd,sizeof(msg)); net_send(dest_listener_con_fd, &msg, sizeof(msg), P4_FALSE); p4_dprintfl(40, "zap_remote_p4_processes: sent KILL_SLAVE to dest_listener\n"); /* Construct a die message for remote listener */ if (strcmp(prev_hostname,dest_pi->host_name) != 0 || prev_port != dest_pi->port) { /* The listener closes the connection after receiving a message, so we need to get a new connection in order to send it another message. Thanks to Vincent Newsum <Newsum@fel.tno.nl> for this fix */ dest_listener_con_fd = net_conn_to_listener(dest_host,dest_listener,2); if (dest_listener_con_fd == -1) continue; msg.type = p4_i_to_n(DIE); msg.from = p4_i_to_n(my_id); p4_dprintfl(40, "zap_remote_p4_processes: sending DIE to %d on fd=%d size=%d\n", dest_id,dest_listener_con_fd,sizeof(msg)); net_send(dest_listener_con_fd, &msg, sizeof(msg), P4_FALSE); p4_dprintfl(40, "zap_remote_p4_processes: sent DIE to dest_listener\n"); strcpy(prev_hostname,dest_pi->host_name); prev_port = dest_pi->port; } } } /* kill own listener */ if (p4_local->listener_fd > 0) { p4_dprintfl(40, "zap_remote_p4_processes: sending DIE to my listener\n"); msg.type = p4_i_to_n(DIE); msg.from = p4_i_to_n(p4_get_my_id()); net_send(p4_local->listener_fd, &msg, sizeof(msg), P4_FALSE); close( p4_local->listener_fd ); p4_local->listener_fd = -1; } p4_dprintfl(40, "zap_remote_p4_processes: done\n");}P4VOID get_qualified_hostname(char *str, int maxlen){ str[maxlen-1] = 0;# if (defined(IPSC860) && !defined(IPSC860_SOCKETS)) || \ (defined(CM5) && !defined(CM5_SOCKETS)) || \ (defined(NCUBE) && !defined(NCUBE_SOCKETS)) || \ (defined(SP1_EUI)) || \ (defined(SP1_EUIH)) strncpy(str,"cube_node",maxlen-1);# else# if defined(SUN_SOLARIS) || defined(MEIKO_CS2) if (*str == '\0') { if (p4_global) strncpy(str,p4_global->my_host_name,maxlen-1); else if (sysinfo(SI_HOSTNAME, str, maxlen-1) == -1) p4_error("could not get qualified hostname", getpid()); }# else if (*str == '\0') { if (p4_global) strncpy(str,p4_global->my_host_name,maxlen-1); else gethostname_p4(str, maxlen); }# endif if (*local_domain != '\0' && !index(str,'.')) { strncat(str,".",maxlen-1); strncat(str,local_domain,maxlen-1); }#endif}#ifdef CAN_DO_SWITCH_MSGSint getswport(char *hostname){ char local_host[MAXHOSTNAMELEN]; if (strcmp(hostname, "local") == 0) { local_host[0] = '\0'; get_qualified_hostname(local_host,sizeof(local_host)); return getswport(local_host); } if (strcmp(hostname, "hurley") == 0) return 1; if (strcmp(hostname, "hurley.tcg.anl.gov") == 0) return 1; if (strcmp(hostname, "hurley.mcs.anl.gov") == 0) return 1; if (strcmp(hostname, "campus.mcs.anl.gov") == 0) return 2; if (strcmp(hostname,"mpp1") == 0) return 3; if (strcmp(hostname,"mpp2") == 0) return 28; if (strcmp(hostname,"mpp3") == 0) return 6; if (strcmp(hostname,"mpp4") == 0) return 7; if (strcmp(hostname,"mpp7") == 0) return 14; if (strcmp(hostname,"mpp8") == 0) return 25; if (strcmp(hostname,"mpp9") == 0) return 20; if (strcmp(hostname,"mpp10") == 0) return 11; return -1;}#endifP4BOOL same_data_representation( int id1, int id2 ){ struct proc_info *p1 = &(p4_global->proctable[id1]); struct proc_info *p2 = &(p4_global->proctable[id2]); return (data_representation(p1->machine_type) == data_representation(p2->machine_type));}/* Given rank and places to put the hostname and image names, returns * the pid, and fills in the host and image names of the process with * the given rank. Returns 0 if the rank is invalid. */int p4_proc_info(int i, char **hostname, char **exename){ if (((unsigned) i) >= p4_global->num_in_proctable) { *hostname = 0; return(0); } else { struct proc_info *p1 = &(p4_global->proctable[i]);#ifdef SCYLD_BEOWULF /* Allow kludgy forcing of all processes to appear on * the master node for Scyld testing. */ if (getenv ("USE_BTRACE")) *hostname = "-1"; else#endif *hostname = p1->host_name; /* Get the executable name from the procgroup */ *exename = p4_local->procgroup->entries[i].slave_full_pathname; return (p1->unix_id); }} #ifdef OLD_EXECERP4VOID put_execer_port( int port ){ int fd; char job_filename[64]; char port_c[16]; sprintf(port_c,"%d",port); strncpy(job_filename,"/tmp/p4_",64); strncat(job_filename,execer_jobname,64); if ((fd = open(job_filename, O_WRONLY | O_CREAT | O_TRUNC, 0600)) < 0) { p4_error("put_execer_port: open failed ",fd); } if ((write(fd,port_c,strlen(port_c)+1)) != strlen(port_c)+1) { p4_error("put_execer_port: write failed ",(-1)); } close(fd);}int get_execer_port(char *master_hostname){ int port, num_read, sleep_time, status; FILE *fp; char cmd[P4_MAX_PGM_LEN]; sprintf(cmd,"rsh %s cat /tmp/p4_%s",master_hostname,execer_jobname); num_read = 0; sleep_time = 4; while (num_read != 1 && sleep_time < 128) { if ((fp = (FILE *) popen(cmd,"r")) == NULL) { wait(&status); /* for the rsh started by popen */ sleep(sleep_time); sleep_time *= 2; } else { num_read = fscanf(fp,"%d",&port); pclose(fp); } } if (num_read != 1) { p4_error("get_execer_port: never got good port",(-1)); } return(port);}void clean_execer_port( void ){ char job_filename[64]; if (execer_starting_remotes && execer_mynodenum == 0) { strncpy(job_filename,"/tmp/p4_",64); strncat(job_filename,execer_jobname,64); unlink(job_filename); }}#elseP4VOID put_execer_port(int port){ struct sockaddr_in s_in; int len = sizeof(s_in); int fd, cc; /* send my local listening number to execer_mastport */ fd = socket(PF_INET, SOCK_DGRAM, 0); if (fd < 0) p4_error("put_execer_port: socket", errno); s_in.sin_family = AF_INET; s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); s_in.sin_port = htons(execer_mastport); cc = sendto(fd, &port, sizeof(port), 0, (struct sockaddr *)&s_in, len); if (cc < 0) p4_error("put_execer_port: sendto", errno); if (cc != sizeof(port)) p4_error("put_execer_port: partial write", 0); if (close(fd) < 0) p4_error("put_execer_port: close", errno);}void clean_execer_port(void){}#endif/* high-resolution clock, made out of p4_clock and p4_ustimer */static int clock_start_ms;static usc_time_t ustimer_start;static usc_time_t usrollover;P4VOID init_usclock( void ){ clock_start_ms = p4_clock(); ustimer_start = p4_ustimer(); usrollover = usc_rollover_val();}double p4_usclock( void ){ int elapsed_ms, q; usc_time_t ustimer_end; double rc, roll, beginning, end; if (usrollover == 0) return( .001*p4_clock() ); elapsed_ms = p4_clock() - clock_start_ms; /* milliseconds */ ustimer_end = p4_ustimer(); /* terminal segment */ q = elapsed_ms / (int)(usrollover/1000);/* num rollover-sized intervals*/ /* q+1 is the maximum number of rollovers that could have occurred */ if (ustimer_start <= ustimer_end) q = q - 1; /* now q+1 is the number of rollovers that did occur */ beginning = (double)(usrollover - ustimer_start); /* initial segment */ end = ustimer_end; /* terminal segment */ roll = (double)(usrollover * 0.000001); /* rollover in seconds */ rc = (double) (((beginning + end ) * 0.000001) + (q * roll)); return(rc);}#ifndef p4_CheckSighandlerP4VOID p4_CheckSighandler( sigf )int (*sigf)();{ if (sigf != SIG_IGN && sigf != SIG_DFL && sigf != SIG_ERR) { printf( "Replaced a non-default signal in P4\n" ); }}#endif#ifdef SCYLD_BEOWULFextern int beowulf_sched_shim(char *type, int **map) __attribute__ ((weak));static voidbeowulf_init(void){ int node; int *map=NULL; int count=0; struct p4_procgroup_entry *pe; struct passwd *pwent = getpwuid( getuid() ); /* If execer is already used don't overwrite it. */ if (execer_pg) return; /* If procgroup_file is set externally, don't use scheduler */#if 1 if (strncmp (procgroup_file, "procgroup", 8) != 0) return;#endif /* Call the Schedule Shim (if available) */ if (beowulf_sched_shim) count = beowulf_sched_shim ("mpich-p4", &map); else return; /* Allocate a process group and copy the map into it. */ execer_pg = p4_alloc_procgroup(); pe = execer_pg->entries; execer_pg->num_entries = count; for (node = 0; node < count; node++) { int cnt; snprintf (pe->host_name, HOSTNAME_LEN, "%d", map[node]); pe->numslaves_in_group = (node != 0); cnt = readlink ("/proc/self/exe", pe->slave_full_pathname, sizeof (pe->slave_full_pathname)); if ((cnt == -1) || (cnt == sizeof (pe->slave_full_pathname))) strncpy(pe->slave_full_pathname, "self", 4); else pe->slave_full_pathname[cnt] = 0; strncpy(pe->username, pwent->pw_name, 10); pe++; } /* Need to move to rank 0 node. */ if (map[0] != -1) bproc_move (map[0]); free (map); dump_procgroup(execer_pg, 50); return;}#endif /* SCYLD_BEOWULF */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -