📄 mom_mach.c
字号:
* Return TRUE if any process in the job is over limit for memory usage. */static int overmem_proc(pjob, limit) job *pjob; unsigned long limit;{ int i; for (i=0; i<nproc; i++) { register struct procsinfo *pp = &proc_tbl[i]; if (pp->pi_state == SNONE) continue; if (!injob(pjob, pp->pi_sid)) continue; if (ctob(pp->pi_size) > limit) return (TRUE); } return (FALSE);}extern char *msg_momsetlim;/* * Internal error routine */int error(string, value) char *string; int value;{ int i = 0; char *message; assert(string != NULL); assert(*string != '\0'); assert(value > PBSE_); /* minimum PBS error number */ assert(value <= PBSE_NOSYNCMSTR); /* maximum PBS error number */ assert(pbs_err_to_txt[i].err_no != 0); do { if (pbs_err_to_txt[i].err_no == value) break; } while (pbs_err_to_txt[++i].err_no != 0); assert(pbs_err_to_txt[i].err_txt != NULL); message = *pbs_err_to_txt[i].err_txt; assert(message != NULL); assert(*message != '\0'); (void)fprintf(stderr, msg_momsetlim, string, message); (void)fflush(stderr); return (value);}/* * Establish system-enforced limits for the job. * * Run through the resource list, checking the values for all items * we recognize. * * If set_mode is SET_LIMIT_SET, then also set hard limits for the * system enforced limits (not-polled). * If anything goes wrong with the process, return a PBS error code * and print a message on standard error. A zero-length resource list * is not an error. * * If set_mode is SET_LIMIT_SET the entry conditions are: * 1. MOM has already forked, and we are called from the child. * 2. The child is still running as root. * 3. Standard error is open to the user's file. * * If set_mode is SET_LIMIT_ALTER, we are beening called to modify * existing limits. Cannot alter those set by setrlimit (kernel) * because we are the wrong process. */int mom_set_limits(pjob, set_mode) job *pjob; int set_mode; /* SET_LIMIT_SET or SET_LIMIT_ALTER */{ char *id = "mom_set_limits"; char *pname; int retval; unsigned long value; /* place in which to build resource value */ resource *pres; struct rlimit reslim; unsigned long mem_limit = 0; DBPRT(("%s: entered\n", id)) assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);/* * Cycle through all the resource specifications, * setting limits appropriately. */ while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "cput") == 0) { /* cpu time - check, if less than pcput use it */ retval = gettime(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); } else if (strcmp(pname, "pcput") == 0) { /* process cpu time - set */ retval = gettime(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); reslim.rlim_cur = reslim.rlim_max = (unsigned long)((double)value / cputfactor); if (setrlimit(RLIMIT_CPU, &reslim) < 0) return (error("RLIMIT_CPU", PBSE_SYSTEM)); } else if (strcmp(pname, "file") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); if (value > INT_MAX) return (error(pname, PBSE_BADATVAL)); reslim.rlim_cur = reslim.rlim_max = value; if (setrlimit(RLIMIT_FSIZE, &reslim) < 0) return (error(pname, PBSE_SYSTEM)); } } else if (strcmp(pname, "vmem") == 0) { /* check */ retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); if ((mem_limit == 0) || (value < mem_limit)) mem_limit = value; } else if (strcmp(pname, "pvmem") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); if (value > INT_MAX) return (error(pname, PBSE_BADATVAL)); if ((mem_limit == 0) || (value < mem_limit)) mem_limit = value; } } else if (strcmp(pname, "pmem") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); reslim.rlim_cur = reslim.rlim_max = value; if (setrlimit(RLIMIT_RSS, &reslim) < 0) return (error("RLIMIT_RSS", PBSE_SYSTEM)); } } else if (strcmp(pname, "walltime") == 0) { /* Check */ retval = gettime(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); } else if (strcmp(pname, "nice") == 0) { /* set nice */ if (set_mode == SET_LIMIT_SET) { errno = 0; if ((nice((int)pres->rs_value.at_val.at_long) == -1) && (errno != 0)) return (error(pname, PBSE_BADATVAL)); } } else if ((pres->rs_defin->rs_flags & ATR_DFLAG_RMOMIG) == 0) /* don't recognize and not marked as ignore by mom */ return (error(pname, PBSE_UNKRESC)); pres = (resource *)GET_NEXT(pres->rs_link); } if (set_mode == SET_LIMIT_SET) { /* if either of vmem or pvmem was given, set sys limit to lesser */ if (mem_limit != 0) { reslim.rlim_cur = reslim.rlim_max = mem_limit; if (setrlimit(RLIMIT_DATA, &reslim) < 0) return (error("RLIMIT_DATA", PBSE_SYSTEM)); if (setrlimit(RLIMIT_STACK, &reslim) < 0) return (error("RLIMIT_STACK", PBSE_SYSTEM)); } } return (PBSE_NONE);}/* * State whether MOM main loop has to poll this job to determine if some * limits are being exceeded. * * Sets flag TRUE if polling is necessary, FALSE otherwise. Actual * polling is done using the mom_over_limit machine-dependent function. */int mom_do_poll(pjob) job *pjob;{ char *id = "mom_do_poll"; char *pname; resource *pres; DBPRT(("%s: entered\n", id)) assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "walltime") == 0 || strcmp(pname, "cput") == 0 || strcmp(pname, "pvmem") == 0 || strcmp(pname, "vmem") == 0) return (TRUE); pres = (resource *)GET_NEXT(pres->rs_link); } return (FALSE);}/* * Setup for polling. * */int mom_open_poll(){ char *id = "mom_open_poll"; DBPRT(("%s: entered\n", id)) proc_tbl = malloc(ASIZE*sizeof(struct procsinfo)); proctot = ASIZE; return (PBSE_NONE);}#if IBM_SP2==1#define JMRETRY 2/*** Get job manager info.** Return the number of jobs or -1 on error;*/intgetjobstat(){ char *id = "getjobstat"; int cnt; int sock; if (job_tbl) jmq_jobs_free(&job_tbl, njob); njob = 0; for (cnt=0; cnt < JMRETRY; cnt++) { if ((sock = jm_connect_ub(NULL)) >= 0) break; switch (errno) { case EINTR: break; default: log_err(errno, id, "jm_connect_ub failed"); return -1; } } if (cnt == JMRETRY) { log_err(errno, id, "jm_connect_ub retry exhausted"); return -1; } for (cnt=0; cnt < JMRETRY; cnt++) { DBPRT(("%s: jm socket %d cnt %d\n", id, sock, cnt)) if ((njob = jmq_jobs_status(sock, &job_tbl)) >= 0) break; sprintf(log_buffer, "jmq_jobs_status failed %d", njob); log_err(errno, id, log_buffer); log_buffer[0] = '\0'; } jm_disconnect(sock);/* SDRCloseSession(); */#ifdef DEBUG { int i, j, k; for (i=0; i<njob; i++) { struct JM_JOB_STATUS *jp = &job_tbl[i]; printf("-------------------------------\n"); printf("name: %s\n", jp->jm_user_name); printf("desc: %s\n", jp->jm_job_description); printf("%s pid=%d id=%d type=%d ncpus=%d\n", jp->jm_time_allocated, jp->jm_client_pid, jp->jm_job_id, jp->jm_adapter_type, jp->jm_num_nodes); for (j=0; j<jp->jm_num_nodes; j++) { struct JM_NODE_IN_JOB *np = &jp->jm_node_in_job[j]; printf("\t+++++++++++++++++++\n"); printf("\tnode %s\n", np->jm_node_name); printf("\tusage=(%d/%d) tasks=%d\n\t", np->jm_cpu_usage, np->jm_adapter_usage, np->jm_num_virtual_tasks); for (k=0; k<np->jm_num_virtual_tasks; k++) { int vid = np->jm_virtual_task_ids[k]; printf("(%d) ", vid); } printf("\n"); } } }#endif /* DEBUG */ return njob;}/* * Internal session number of cpu decoding routine. * * Accepts a job pointer. Returns TRUE if the nodes used * by any session fall outside those allowed. */static unsigned long nodes_ses(pjob) job *pjob;{ char *id = "nodes_ses"; int i, j, k; resource *pres; char *nodes; char *badnodes = ""; DBPRT(("%s: entered\n", id)) /* ** The variable "nodes" is the string of plus sign separated ** node names specified by the server. ** We want to check the nodes used by any process in the job ** to these and return TRUE if they are not a subset. */ nodes = pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str; for (i=0; i<nproc; i++) { register struct procsinfo *pp = &proc_tbl[i]; if (pp->pi_state == SNONE) continue; if (!injob(pjob, pp->pi_sid)) continue; /* ** found a process in the job ** loop to see if any JM job shows this ** proc as its client pid */ DBPRT(("%s: pid=%d\n", id, pp->pi_pid)) for (j=0; j<njob; j++) { struct JM_JOB_STATUS *jp = &job_tbl[j]; int len, hit; char *end; if (jp->jm_client_pid != pp->pi_pid) continue; /* ** the plot thickens ** a JM job has been found to be part of this session ** check to see if the nodes associated with ** the job are shown in "nodes" */ DBPRT(("%s: job pid %d nodes %d\n", id, pp->pi_pid, jp->jm_num_nodes)) hit = 0; for (k=0; k<jp->jm_num_nodes; k++) { struct JM_NODE_IN_JOB *np = &jp->jm_node_in_job[k]; /* ** if we find a match, everything is okay, ** the job is staying within its limits */ if (match(np->jm_node_name, nodes)) continue; /* ** if we get here without a match ** a node being used in the actual job ** was compared to every node this ** job is allowed to use (nodes) ** and it was not in the set */ hit = 1; /* ** see if this node is already listed ** in the "badnodes" list */ if (match(np->jm_node_name, badnodes)) continue; /* ** not in the "badnodes" list so we need ** to add it */ if ((len = strlen(badnodes)) == 0) { badnodes = strdup(np->jm_node_name); continue; } badnodes = realloc(badnodes, len + strlen(np->jm_node_name) + 4); strcat(badnodes, "+"); strcat(badnodes, np->jm_node_name); } if (hit == 0) continue; sprintf(log_buffer, "rouge pid %d using node(s) %s", jp->jm_client_pid, jp->jm_node_in_job[0].jm_node_name); for (k=1; k<jp->jm_num_nodes; k++) { struct JM_NODE_IN_JOB *np = &jp->jm_node_in_job[k]; int n = strlen(log_buffer); end = &log_buffer[n]; sprintf(end, "+%s", np->jm_node_name); } log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } } if (strlen(badnodes)) { sprintf(log_buffer, "node(s) %.450s outside allowed list %.450s", badnodes, nodes); DBPRT(("%s: %s\n", id, log_buffer)) free(badnodes); return(TRUE); } else return (FALSE);}#endif /* IBM_SP2 *//* * Declare start of polling loop. * * Until the next call to mom_get_sample, all mom_over_limit calls will * use the same data. Returns a PBS error code. */int mom_get_sample(){ char *id = "mom_get_sample"; struct procsinfo *pp; int num, addnum; pid_t pid; DBPRT(("%s: entered\n", id)) addnum = proctot; nproc = 0; pid = 0; pp = proc_tbl; while ((num = getprocs(pp, sizeof(struct procsinfo), NULL, sizeof(struct fdsinfo), &pid, addnum)) > 0) { DBPRT(("%s: loop start: got %d\n", id, num)) nproc += num; if (num < addnum) break; proctot += ASIZE; addnum = ASIZE; proc_tbl = realloc(proc_tbl, proctot*sizeof(struct procsinfo)); pp = &proc_tbl[nproc]; } if (num == -1) { log_err(errno, id, "getprocs"); return PBSE_SYSTEM; } DBPRT(("%s: nproc = %d\n", id, nproc))#if IBM_SP2==1 if (getjobstat() == -1) return PBSE_SYSTEM;#endif /* IBM_SP2 */ return (PBSE_NONE);}/* * Measure job resource usage and compare with its limits. * * If it has exceeded any well-formed polled limit return TRUE. * Otherwise, return FALSE. */int mom_over_limit(pjob) job *pjob;{ char *id = "mom_over_limit"; char *pname; int retval; unsigned long value, num; resource *pres; assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); DBPRT(("%s: entered\n", id))#if IBM_SP2==1 if (nodes_ses(pjob)) return (TRUE);#endif /* IBM_SP2 */ pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); for ( ; pres != NULL; pres = (resource *)GET_NEXT(pres->rs_link)) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "cput") == 0) { retval = gettime(pres, &value); if (retval != PBSE_NONE) continue; if ((num = cput_sum(pjob)) > value) { sprintf(log_buffer, "cput %lu exceeded limit %lu", num, value); return (TRUE); } } else if (strcmp(pname, "vmem") == 0) { retval = getsize(pres, &value); if (retval != PBSE_NONE) continue; if ((num = mem_sum(pjob)) > value) { sprintf(log_buffer, "vmem %lu exceeded limit %lu", num, value); return (TRUE); } } else if (strcmp(pname, "pvmem") == 0) { retval = getsize(pres, &value); if (retval != PBSE_NONE) continue; if (overmem_proc(pjob, value)) { sprintf(log_buffer, "pvmem exceeded limit %lu", value); return (TRUE); } } else if (strcmp(pname, "walltime") == 0) { retval = gettime(pres, &value); if (retval != PBSE_NONE) continue; num = (unsigned long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor); if (num > value) { sprintf(log_buffer, "walltime %d exceeded limit %d", num, value); return (TRUE); } } } return (FALSE);}/* * Update the job attribute for resources used. * * The first time this is called for a job, set up resource entries for * each resource that can be reported for this machine. Fill in the * correct values. Return an error code. * * Assumes that the session ID attribute has already been set. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -