📄 mom_mach.c
字号:
if (lastseg != seg2) numseg++; lastseg = seg2; numseg = numseg*pagesize/cnt; numseg += mp->pr_wsize*pagesize/MA_WSIZE_FRAC/cnt; resisub += numseg; DBPRT(("%s: %d\t%luk\t%luk\n", id, i, numseg/1024, resisub/1024)) } resisize += resisub; DBPRT(("%s: %s subtotal rss %lu\n", id, pi.pr_fname, resisub)) } DBPRT(("%s: total rss %lu\n\n", id, resisize)) return (resisize);}/* * Return TRUE if any process in the session is over limit for memory usage. */static int overmem_proc(phead, limit) list_head *phead; unsigned long limit;{ char *id = "overmem_proc"; int fd; char procname[100]; struct dirent *dent; prpsinfo_t pi; rewinddir(pdir); for (fd = -1; (dent = readdir(pdir)) != NULL; close(fd)) { if (!isdigit(dent->d_name[0])) continue; sprintf(procname, procfmts, dent->d_name); if ((fd = open(procname, O_RDONLY)) == -1) continue; if (ioctl(fd, PIOCPSINFO, &pi) == -1) { if (errno != ENOENT) { sprintf(log_buffer, "%s: ioctl(PIOCPSINFO)", procname); log_err(errno, id, log_buffer); } continue; } if (!injob(phead, &pi)) continue; if (pagesize * pi.pr_size > limit) return (TRUE); } return (FALSE);}extern char *msg_momsetlim;/* * Internal error routine */int error(string, value) char *string; int value;{ int i = 0; char *message; assert(string != NULL); assert(*string != '\0'); assert(value > PBSE_); /* minimum PBS error number */ assert(value <= PBSE_NOSYNCMSTR); /* maximum PBS error number */ assert(pbs_err_to_txt[i].err_no != 0); do { if (pbs_err_to_txt[i].err_no == value) break; } while (pbs_err_to_txt[++i].err_no != 0); assert(pbs_err_to_txt[i].err_txt != NULL); message = *pbs_err_to_txt[i].err_txt; assert(message != NULL); assert(*message != '\0'); (void)fprintf(stderr, msg_momsetlim, string, message); (void)fflush(stderr); return (value);}/* * Establish system-enforced limits for the job. * * Run through the resource list, checking the values for all items * we recognize. * * If set_mode is SET_LIMIT_SET, then also set hard limits for the * system enforced limits (not-polled). * If anything goes wrong with the process, return a PBS error code * and print a message on standard error. A zero-length resource list * is not an error. * * If set_mode is SET_LIMIT_SET the entry conditions are: * 1. MOM has already forked, and we are called from the child. * 2. The child is still running as root. * 3. Standard error is open to the user's file. * * If set_mode is SET_LIMIT_ALTER, we are beening called to modify * existing limits. Cannot alter those set by setrlimit (kernel) * because we are the wrong process. */int mom_set_limits(pjob, set_mode) job *pjob; int set_mode; /* SET_LIMIT_SET or SET_LIMIT_ALTER */{ char *id = "mom_set_limits"; char *pname; int retval; unsigned long value; /* place in which to build resource value */ resource *pres; struct rlimit reslim; unsigned long mem_limit = 0; DBPRT(("%s: entered\n", id)) assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);/* * Cycle through all the resource specifications, * setting limits appropriately. */ while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "cput") == 0) { /* cpu time - check, if less than pcput use it */ retval = gettime(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); } else if (strcmp(pname, "pcput") == 0) { /* process cpu time - set */ retval = gettime(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); reslim.rlim_cur = reslim.rlim_max = (unsigned long)((double)value / cputfactor); if (setrlimit(RLIMIT_CPU, &reslim) < 0) return (error("RLIMIT_CPU", PBSE_SYSTEM)); } else if (strcmp(pname, "file") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); if (value > INT_MAX) return (error(pname, PBSE_BADATVAL)); reslim.rlim_cur = reslim.rlim_max = value; if (setrlimit(RLIMIT_FSIZE, &reslim) < 0) return (error(pname, PBSE_SYSTEM)); } } else if (strcmp(pname, "vmem") == 0) { /* check */ retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); if ((mem_limit == 0) || (value < mem_limit)) mem_limit = value; } else if (strcmp(pname, "pvmem") == 0) { /* set */ retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); if (value > INT_MAX) return (error(pname, PBSE_BADATVAL)); if ((mem_limit == 0) || (value < mem_limit)) mem_limit = value; } else if (strcmp(pname, "mem") == 0) { /* ignore */ } else if (strcmp(pname, "pmem") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); reslim.rlim_cur = reslim.rlim_max = value; if (setrlimit(RLIMIT_RSS, &reslim) < 0) return (error("RLIMIT_RSS", PBSE_SYSTEM)); } } else if (strcmp(pname, "walltime") == 0) { /* Check */ retval = gettime(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); } else if (strcmp(pname, "nice") == 0) { /* set nice */ if (set_mode == SET_LIMIT_SET) { errno = 0; if ((nice((int)pres->rs_value.at_val.at_long) == -1) && (errno != 0)) return (error(pname, PBSE_BADATVAL)); } } else if ((pres->rs_defin->rs_flags & ATR_DFLAG_RMOMIG) == 0) /* don't recognize and not marked as ignore by mom */ return (error(pname, PBSE_UNKRESC)); pres = (resource *)GET_NEXT(pres->rs_link); } if (set_mode == SET_LIMIT_SET) { /* if either of vmem or pvmem was given, set sys limit to lesser */ if (mem_limit != 0) { reslim.rlim_cur = reslim.rlim_max = mem_limit; if (setrlimit(RLIMIT_DATA, &reslim) < 0) return (error("RLIMIT_DATA", PBSE_SYSTEM)); if (setrlimit(RLIMIT_STACK, &reslim) < 0) return (error("RLIMIT_STACK", PBSE_SYSTEM)); } } return (PBSE_NONE);}/* * State whether MOM main loop has to poll this job to determine if some * limits are being exceeded. * * Sets flag TRUE if polling is necessary, FALSE otherwise. Actual * polling is done using the mom_over_limit machine-dependent function. */int mom_do_poll(pjob) job *pjob;{ char *id = "mom_do_poll"; char *pname; resource *pres; DBPRT(("%s: entered\n", id)) assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "walltime") == 0 || strcmp(pname, "cput") == 0 || strcmp(pname, "pcput") == 0 || strcmp(pname, "pvmem") == 0 || strcmp(pname, "vmem") == 0) return (TRUE); pres = (resource *)GET_NEXT(pres->rs_link); } return (FALSE);}/* * Setup for polling. * * Open kernel device and get namelist info. */int mom_open_poll(){ char *id = "mom_open_poll"; DBPRT(("%s: entered\n", id)) pagesize = getpagesize(); return (PBSE_NONE);}/* * Declare start of polling loop. * * A no-op on the SGI. */int mom_get_sample(){ return (PBSE_NONE);}/* * Measure job resource usage and compare with its limits. * * If it has exceeded any well-formed polled limit return TRUE. * Otherwise, return FALSE. */int mom_over_limit(pjob) job *pjob;{ char *id = "mom_over_limit"; char *pname; int retval; unsigned long value, num; resource *pres; assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); for ( ; pres != NULL; pres = (resource *)GET_NEXT(pres->rs_link)) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "cput") == 0) { retval = gettime(pres, &value); if (retval != PBSE_NONE) continue; if ((num = cput_sum(&pjob->ji_tasks)) > value) { sprintf(log_buffer, "cput %lu exceeded limit %lu", num, value); return (TRUE); } } else if (strcmp(pname, "pcput") == 0) { retval = gettime(pres, &value); if (retval != PBSE_NONE) continue; if (overcpu_proc(&pjob->ji_tasks, value)) { sprintf(log_buffer, "pcput exceeded limit %lu", value); return (TRUE); } } else if (strcmp(pname, "vmem") == 0) { retval = getsize(pres, &value); if (retval != PBSE_NONE) continue; if ((num = mem_sum(&pjob->ji_tasks)) > value) { sprintf(log_buffer, "vmem %lu exceeded limit %lu", num, value); return (TRUE); } } else if (strcmp(pname, "pvmem") == 0) { retval = getsize(pres, &value); if (retval != PBSE_NONE) continue; if (overmem_proc(&pjob->ji_tasks, value)) { sprintf(log_buffer, "pvmem exceeded limit %lu", value); return (TRUE); } } else if (strcmp(pname, "walltime") == 0) { retval = gettime(pres, &value); if (retval != PBSE_NONE) continue; num = (unsigned long)((double)value * wallfactor); if (num > value) { sprintf(log_buffer, "walltime %d exceeded limit %d", num, value); return (TRUE); } } } return (FALSE);}/* * Update the job attribute for resources used. * * The first time this is called for a job, set up resource entries for * each resource that can be reported for this machine. Fill in the * correct values. Return an error code. */int mom_set_use(pjob) job *pjob;{ char *id = "mom_set_use"; resource *pres; attribute *at; resource_def *rd; unsigned long *lp, lnum; assert(pjob != NULL); at = &pjob->ji_wattr[(int)JOB_ATR_resc_used]; assert(at->at_type == ATR_TYPE_RESC); at->at_flags |= ATR_VFLAG_MODIFY; if ((at->at_flags & ATR_VFLAG_SET) == 0) { at->at_flags |= ATR_VFLAG_SET; rd = find_resc_def(svr_resc_def, "cput", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_LONG; rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_SIZE; pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */ pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ; rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_LONG; rd = find_resc_def(svr_resc_def, "mem", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_SIZE; pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */ pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ; } rd = find_resc_def(svr_resc_def, "cput", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); lp = (unsigned long *)&pres->rs_value.at_val.at_long; lnum = cput_sum(&pjob->ji_tasks); *lp = MAX(*lp, lnum); rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); lp = &pres->rs_value.at_val.at_size.atsv_num; lnum = (mem_sum(&pjob->ji_tasks) + 1023) >> 10; /* in KB */ *lp = MAX(*lp, lnum); rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); pres->rs_value.at_val.at_long = (long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor); rd = find_resc_def(svr_resc_def, "mem", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); lp = &pres->rs_value.at_val.at_size.atsv_num; lnum = (resi_sum(&pjob->ji_tasks) + 1023) >> 10; /* in KB */ *lp = MAX(*lp, lnum); return (PBSE_NONE);}/* * Kill a task session. * Call with the task pointer and a signal number. */int kill_task(ptask, sig) task *ptask; int sig;{ char *id = "kill_task"; int ct = 0; int fd; char procname[100]; struct dirent *dent; prpsinfo_t pi; int sesid; sesid = ptask->ti_qs.ti_sid; if (sesid <= 1) return 0; rewinddir(pdir); for (fd = -1; (dent = readdir(pdir)) != NULL; close(fd)) { if (!isdigit(dent->d_name[0])) continue; sprintf(procname, procfmts, dent->d_name); if ((fd = open(procname, O_RDONLY)) == -1) continue; if (ioctl(fd, PIOCPSINFO, &pi) == -1) { if (errno != ENOENT) { sprintf(log_buffer, "%s: ioctl(PIOCPSINFO)", procname); log_err(errno, id, log_buffer); } continue; } if (sesid == pi.pr_sid) { (void)kill(pi.pr_pid, sig); ++ct; } } return ct;}/* * Clean up everything related to polling. */int mom_close_poll(){ char *id = "mom_close_poll"; DBPRT(("%s: entered\n", id)) if (pdir) { if (closedir(pdir) != 0) { log_err(errno, id, "closedir"); return (PBSE_SYSTEM); } } return (PBSE_NONE);}/* * mom_does_chkpnt - return 1 if mom supports checkpoint * 0 if not */int mom_does_chkpnt(){ return (0);}/* * Checkpoint the job. * * If abort is true, kill it too. */int mach_checkpoint(ptask, file, abort) task *ptask; char *file; int abort;{ return (-1);}/* * Restart the job from the checkpoint file. * * Return a -1 on error or sid. */long mach_restart(ptask, file)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -