📄 mom_start.c
字号:
kill_task(ptask, SIGKILL); ptask->ti_qs.ti_exitstat = exiteval; ptask->ti_qs.ti_status = TI_STATE_EXITED; task_save(ptask); sprintf(log_buffer, "task %d terminated", ptask->ti_qs.ti_task); LOG_EVENT(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); exiting_tasks = 1; }}/* * creat the master pty, this particular * piece of code depends on multiplexor /dev/ptc */int open_master(rtn_name) char **rtn_name; /* RETURN name of slave pts */{ int ptc; if ((ptc = open("/dev/ptc", O_RDWR | O_NOCTTY, 0)) < 0) { return (-1); } *rtn_name = ttyname(ptc); /* name of slave side */ return (ptc);}/* * struct sig_tbl = map of signal names to numbers, * see req_signal() in ../requests.c */struct sig_tbl sig_tbl[] = { { "NULL", 0 }, { "HUP", SIGHUP }, { "INT", SIGINT }, { "QUIT", SIGQUIT }, { "ILL", SIGILL }, { "TRAP", SIGTRAP }, { "ABRT", SIGABRT }, { "EMT", SIGEMT }, { "FPE", SIGFPE }, { "KILL", SIGKILL }, { "BUS", SIGBUS }, { "SEGV", SIGSEGV }, { "SYS", SIGSYS }, { "PIPE", SIGPIPE }, { "ALRM", SIGALRM }, { "TERM", SIGTERM }, { "URG", SIGURG }, { "STOP", SIGSTOP }, { "TSTP", SIGTSTP }, { "CONT", SIGCONT }, { "CHLD", SIGCHLD }, { "TTIN", SIGTTIN }, { "TTOU", SIGTTOU }, { "IO", SIGIO }, { "XCPU", SIGXCPU }, { "XFSZ", SIGXFSZ }, { "MSG", SIGMSG }, { "WINCH", SIGWINCH }, { "PWR", SIGPWR }, { "USR1", SIGUSR1 }, { "USR2", SIGUSR2 }, { "PROF", SIGPROF }, { "DANGER", SIGDANGER }, { "VTALRM", SIGVTALRM }, { "MIGRATE", SIGMIGRATE }, { "PRE", SIGPRE }, { "VIRT", SIGVIRT }, { "ALRM1", SIGALRM1 }, { "WAITING", SIGWAITING }, { "KAP", SIGKAP }, { "GRANT", SIGGRANT }, { "RETRACT", SIGRETRACT }, { "SOUND", SIGSOUND }, { "SAK", SIGSAK }, { (char *)0, -1 }};#if IBM_SP2==2 /* IBM SP2 with PSSP 3.1 *//* * The following routines are used to load and unload the routing information * for the IBM high speed switch on the SP. Each node must specify the same * connection information. At the end of the job the information is unloaded. */static int name_to_sw_num(nn) char *nn;{ int i; char *pa; char *pb; extern struct swtbl_num swtbl_num[]; extern int ibm_sp2_num_nodes; for (i=0; i<ibm_sp2_num_nodes; i++) { pa = nn; pb = swtbl_num[i].sw_name; while (*pa && *pb && (*pa == *pb)) { pa++; pb++; } if ( ((*pa == '\0') && (*pb == '\0')) || ((*pa == '\0') && (*pb == '.')) || ((*pa == '.') && (*pb == '\0')) ) return swtbl_num[i].sw_num; } return -1;} static int build_swtbl_array(pjob, psa) job *pjob; struct ST_NODE_INFO **psa;{ int i; int j; struct ST_NODE_INFO *swtbl; swtbl = (struct ST_NODE_INFO *)calloc(pjob->ji_numvnod, sizeof(struct ST_NODE_INFO)); if (swtbl == NULL) return PBSE_SYSTEM; for (i=0; i<pjob->ji_numvnod; i++) { /* pull node name out of vnodent/hnodent struct */ (void)strcpy((swtbl+i)->st_node_name, pjob->ji_vnods[i].vn_host->hn_host); /* now find switch node number that matches name */ if ((j = name_to_sw_num((swtbl+i)->st_node_name)) == -1) { (void)free(swtbl); return PBSE_UNKNODE; } (swtbl+i)->st_virtual_task_id = i; (swtbl+i)->st_switch_node_num = j; (swtbl+i)->st_window_id = pjob->ji_vnods[i].vn_index; } *psa = swtbl; return PBSE_NONE;}/* * load_sp_switch - load the IBM SP switch table with the nodes/window_id * to be used by this job. Also write a file into the "aux" directory * with the array of window_ids in task order. This file is used by * pbspd.c to place the correct window_id into the environment based on * the value of MP_CHILD passed by IBM's poe. */int load_sp_switch(pjob) job *pjob;{ int i; int rc = 0; struct ST_NODE_INFO *pswa; static char *id = "load_sp_switch"; char buf[MAXPATHLEN+1]; FILE *fwin; extern int internal_state; sscanf(pjob->ji_wattr[(int)JOB_ATR_Cookie].at_val.at_str,"%x",&job_key); if (job_key < 0) job_key = -job_key; if ((rc = build_swtbl_array(pjob, &pswa)) != PBSE_NONE) { sprintf(log_buffer, "build swtbl node array failed %d", rc); log_record(PBSEVENT_DEBUG, 0, id, log_buffer); return -1; } rc = swtbl_load_table(ST_VERSION, pjob->ji_qs.ji_un.ji_momt.ji_exuid, getpid(), job_key, mom_host, pjob->ji_numvnod, pjob->ji_qs.ji_jobid, pswa); if (rc != ST_SUCCESS) { sprintf(log_buffer, "swtbl_load_table failed with %d", rc); log_record(PBSEVENT_SYSTEM|PBSEVENT_ADMIN|PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,log_buffer); internal_state |= INUSE_DOWN; state_to_server(1); /* tell server we are down */ } else { /* success */ (void)sprintf(buf, "%s/aux/%s.SW", path_home, pjob->ji_qs.ji_jobid); if ((fwin = fopen(buf, "w")) == NULL) { sprintf(log_buffer, "cannot open %s", buf); log_err(errno, id, log_buffer); return -1; } (void)fchmod(fileno(fwin), 0644); for (i=0; i<pjob->ji_numvnod; i++) fprintf(fwin, "%d\n", pjob->ji_vnods[i].vn_index); (void)fclose(fwin); } (void)free(pswa); return rc;}/* * unload_sp_switch - unload the job/node information from the switch * Also remove the aux *.SW file created when the switch is loaded above. */void unload_sp_switch(pjob) job *pjob;{ int i; int rc = 0; char buf[MAXPATHLEN+1]; struct vnodent *pvp; extern int internal_state; for (i=0; i<pjob->ji_numvnod; ++i) { pvp = &pjob->ji_vnods[i]; if (pvp->vn_host->hn_node == pjob->ji_nodeid) { rc = swtbl_unload_table(ST_VERSION, "css0", pjob->ji_qs.ji_un.ji_momt.ji_exuid, pvp->vn_index); DBPRT(("Unloading switch window %d\n", pvp->vn_index)) if (rc != ST_SUCCESS) { sprintf(log_buffer,"error %d unloading switch table window %d for job %s", rc, pvp->vn_index, pjob->ji_qs.ji_jobid); log_err(PBSE_SYSTEM, "unload_sp_switch", log_buffer); if (rc != ST_SWITCH_NOT_LOADED) { rc = swtbl_clean_table(ST_VERSION, "css0", ST_ALWAYS_KILL,pvp->vn_index); } else if (rc != ST_SUCCESS) { sprintf(log_buffer,"error %d cleaning switch table window %d for job %s", rc, pvp->vn_index, pjob->ji_qs.ji_jobid); log_err(PBSE_SYSTEM, "unload_sp_switch", log_buffer); internal_state |= INUSE_DOWN; state_to_server(1); /* tell server we are down */ } } } } (void)sprintf(buf, "%s/aux/%s.SW", path_home, pjob->ji_qs.ji_jobid); (void)unlink(buf); return;} /* * query_adp - query the SP switch adaptor, are we on line */void query_adp(){ int rc; enum ST_ADAPTER_STATUS st; extern int internal_state; static char *id = "query_adp"; if ((rc = swtbl_query_adapter(ST_VERSION, "css0", &st)) != ST_SUCCESS) { if ((internal_state & INUSE_DOWN) == 0) { log_record(PBSEVENT_SYSTEM, rc, id, "cannot query adaptor"); internal_state |= INUSE_DOWN|UPDATE_MOM_STATE; } return; } switch (st) { case ADAPTER_READY: if (internal_state & INUSE_DOWN) { internal_state &= ~INUSE_DOWN; /* we are not down */ internal_state |= UPDATE_MOM_STATE; log_record(PBSEVENT_SYSTEM, 0, id, "adaptor up"); } break; case ADAPTER_NOTREADY: if ((internal_state & INUSE_DOWN) == 0) { /* mark that we are down */ internal_state |= INUSE_DOWN|UPDATE_MOM_STATE; log_record(PBSEVENT_SYSTEM, 0, id, "adaptor down"); } break; }} #endif /* IBM SP */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -