📄 mom_mach.c
字号:
rlim64_t resisize, resisub; int num, i, j; int inproc = 0; prpsinfo_t *pi; prmap_sgi_t *mp; u_long lastseg, nbps; DBPRT(("%s: entered pagesize %d\n", id, pagesize)) resisize = 0; if (pjob->ji_globid == NULL) return (resisize); lastseg = 99999; nbps = (pagesize / sizeof(uint_t)) * pagesize; /* sysmacros.h says "4Meg" ...hmmm */ for (i=0; i<nproc; i++) { pi = &proc_array[i].info; if (!injob(pjob, &proc_array[i].procash)) if (!inproc) continue; else break; DBPRT(("%s: %s(%d:%d) rss %llu (%lu pages)\n", id, pi->pr_fname, pi->pr_sid, pi->pr_pid, (rlim64_t)((rlim64_t)pi->pr_rssize*(rlim64_t)pagesize), pi->pr_rssize)) resisub = 0; num = proc_array[i].map_num; mp = proc_array[i].map; for (j=0; j<num; j++, mp++) { u_long cnt = mp->pr_mflags >> MA_REFCNT_SHIFT; u_long end = (u_long)mp->pr_vaddr + mp->pr_size - 1; u_long seg1 = (u_long)mp->pr_vaddr / nbps; u_long seg2 = end / nbps; rlim64_t numseg = seg2 - seg1; if (lastseg != seg2) numseg++; lastseg = seg2; numseg = numseg*pagesize/cnt; numseg += mp->pr_wsize*pagesize/MA_WSIZE_FRAC/cnt; resisub += numseg; DBPRT(("%s: %d\t%lluk\t%lluk\n", id, j, numseg/1024, resisub/1024)) } resisize += resisub; DBPRT(("%s: %s subtotal rss %llu\n", id, pi->pr_fname, resisub)) } DBPRT(("%s: total rss %llu\n\n", id, resisize)) return (resisize);}#else /* COMPLEX_MEM_CALC==0 *//* * Internal session mem (workingset) size function. SIMPLE CALC VERSION * * Returns in a 64 bit intege the number of bytes used by session */static rlim64_t resi_sum(pjob) job *pjob;{ static char id[] = "resi_sum"; int i; int inproc = 0; rlim64_t resisize, resisub; prpsinfo_t *pi; DBPRT(("%s: entered pagesize %d\n", id, pagesize)) resisize = 0; for (i=0; i<nproc; i++) { pi = &proc_array[i].info; if (!injob(pjob, &proc_array[i].procash)) if (!inproc) continue; else break; DBPRT(("%s: %s(%d:%d) rss %llu (%lu pages)\n", id, pi->pr_fname, pi->pr_sid, pi->pr_pid, (rlim64_t)((rlim64_t)pi->pr_rssize*(rlim64_t)pagesize), pi->pr_rssize)) resisize += (rlim64_t)((rlim64_t)pagesize * pi->pr_rssize); } DBPRT(("%s: total rss %llu\n\n", id, resisize)) return (resisize);}#endif /* COMPLEX_MEM_CALC *//* * Return TRUE if any process in the session is over limit for memory usage. */static int overmem_proc(pjob, limit) job *pjob; rlim64_t limit;{ int i; int inproc = 0; prpsinfo_t *pi; if (pjob->ji_globid == NULL) return (FALSE); for (i=0; i<nproc; i++) { pi = &proc_array[i].info; if (!injob(pjob, &proc_array[i].procash)) if (!inproc) continue; else break; if ((rlim64_t)((rlim64_t)pagesize * pi->pr_size) > limit) return (TRUE); } return (FALSE);}extern char *msg_momsetlim;/* * Internal error routine */int error(string, value) char *string; int value;{ int i = 0; char *message; assert(string != NULL); assert(*string != '\0'); assert(value > PBSE_); /* minimum PBS error number */ assert(value <= PBSE_NOSYNCMSTR); /* maximum PBS error number */ assert(pbs_err_to_txt[i].err_no != 0); do { if (pbs_err_to_txt[i].err_no == value) break; } while (pbs_err_to_txt[++i].err_no != 0); assert(pbs_err_to_txt[i].err_txt != NULL); message = *pbs_err_to_txt[i].err_txt; assert(message != NULL); assert(*message != '\0'); (void)fprintf(stderr, msg_momsetlim, string, message); (void)fflush(stderr); return (value);}/* * Establish system-enforced limits for the job. * * Run through the resource list, checking the values for all items * we recognize. * * If set_mode is SET_LIMIT_SET, then also set hard limits for the * system enforced limits (not-polled). * If anything goes wrong with the process, return a PBS error code * and print a message on standard error. A zero-length resource list * is not an error. * * If set_mode is SET_LIMIT_SET the entry conditions are: * 1. MOM has already forked, and we are called from the child. * 2. The child is still running as root. * 3. Standard error is open to the user's file. * * If set_mode is SET_LIMIT_ALTER, we are beening called to modify * existing limits. Cannot alter those set by setrlimit (kernel) * because we are the wrong process. */int mom_set_limits(pjob, set_mode) job *pjob; int set_mode; /* SET_LIMIT_SET or SET_LIMIT_ALTER */{ char *id = "mom_set_limits"; char *pname; int retval; rlim64_t sizeval; /* place to build 64 bit value */ unsigned long value; /* place in which to build resource value */ resource *pres; struct rlimit64 res64lim; rlim64_t mem_limit = 0;#if NODEMASK != 0 __uint64_t rvalue; __uint64_t nodemask;#endif /* NODEMASK */ DBPRT(("%s: entered\n", id)) assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);/* * Cycle through all the resource specifications, * setting limits appropriately. */ while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "ncpus") == 0) { char hold[16]; extern struct var_table vtable; retval = getlong(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); sprintf(hold, "%d", (int)pres->rs_value.at_val.at_long); bld_env_variables(&vtable, "NCPUS", hold); } else if (strcmp(pname, "cput") == 0) { /* check */ /* cpu time - check, if less than pcput use it */ retval = getlong(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); } else if (strcmp(pname, "pcput") == 0) { /* process cpu time - set */ retval = getlong(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); res64lim.rlim_cur = res64lim.rlim_max = (unsigned long)((double)value / cputfactor); if (setrlimit64(RLIMIT_CPU, &res64lim) < 0) return (error("RLIMIT_CPU", PBSE_SYSTEM)); } else if (strcmp(pname, "file") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &sizeval); if (retval != PBSE_NONE) return (error(pname, retval)); res64lim.rlim_cur = res64lim.rlim_max = sizeval; if (setrlimit64(RLIMIT_FSIZE, &res64lim) < 0) return (error(pname, PBSE_SYSTEM)); } } else if (strcmp(pname, "vmem") == 0) { /* check */ retval = getsize(pres, &sizeval); if (retval != PBSE_NONE) return (error(pname, retval)); if ((mem_limit == 0) || (sizeval < mem_limit)) mem_limit = sizeval; } else if (strcmp(pname, "pvmem") == 0) { /* set */ retval = getsize(pres, &sizeval); if (retval != PBSE_NONE) return (error(pname, retval)); if ((mem_limit == 0) || (sizeval < mem_limit)) mem_limit = sizeval; } else if (strcmp(pname, "mem") == 0) { /* ignore */ } else if (strcmp(pname, "pmem") == 0) { /* set */ if (set_mode == SET_LIMIT_SET) { retval = getsize(pres, &sizeval); if (retval != PBSE_NONE) return (error(pname, retval)); res64lim.rlim_cur = res64lim.rlim_max = sizeval; if (setrlimit64(RLIMIT_RSS, &res64lim) < 0) return (error("RLIMIT_RSS", PBSE_SYSTEM)); } } else if (strcmp(pname, "walltime") == 0) { /* Check */ retval = getlong(pres, &value); if (retval != PBSE_NONE) return (error(pname, retval)); } else if (strcmp(pname, "nice") == 0) { /* set nice */ if (set_mode == SET_LIMIT_SET) { errno = 0; if ((nice((int)pres->rs_value.at_val.at_long) == -1) && (errno != 0)) return (error(pname, PBSE_BADATVAL)); }#if NODEMASK != 0 } else if (strcmp(pname, "nodemask") == 0) { /* set nodemask */ /* call special node mask function */ nodemask = pres->rs_value.at_val.at_ll; rvalue = (__uint64_t)pmoctl(61, nodemask,0); if (rvalue != nodemask) { (void)sprintf(log_buffer, "Tried to set node mask to 0x%0llx, was set to 0x%0llx", nodemask, rvalue); LOG_EVENT(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); }#endif /* NODEMASK */ } else if ((pres->rs_defin->rs_flags & ATR_DFLAG_RMOMIG) == 0) /* don't recognize and not marked as ignore by mom */ return (error(pname, PBSE_UNKRESC)); pres = (resource *)GET_NEXT(pres->rs_link); } if (set_mode == SET_LIMIT_SET) { /* if either of vmem or pvmem was given, set sys limit to lesser */ if (mem_limit != 0) { res64lim.rlim_cur = res64lim.rlim_max= mem_limit; if (setrlimit64(RLIMIT_VMEM, &res64lim) < 0) return (error("RLIMIT_VMEM", PBSE_SYSTEM)); } } return (PBSE_NONE);}/* * State whether MOM main loop has to poll this job to determine if some * limits are being exceeded. * * Sets flag TRUE if polling is necessary, FALSE otherwise. Actual * polling is done using the mom_over_limit machine-dependent function. */int mom_do_poll(pjob) job *pjob;{ char *id = "mom_do_poll"; char *pname; resource *pres; DBPRT(("%s: entered\n", id)) assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "walltime") == 0 || strcmp(pname, "mem") == 0 || strcmp(pname, "ncpus") == 0 || strcmp(pname, "cput") == 0 || strcmp(pname, "pcput") == 0 || strcmp(pname, "pvmem") == 0 || strcmp(pname, "vmem") == 0) return (TRUE); pres = (resource *)GET_NEXT(pres->rs_link); } return (FALSE);}/* * Setup for polling. * * Open kernel device and get namelist info. * Also open sgi project files. */int mom_open_poll(){ char *id = "mom_open_poll"; extern int open_sgi_proj(); DBPRT(("%s: entered\n", id)) pagesize = getpagesize(); proc_array = (struct proc_info *)calloc(TBL_INC, sizeof(struct proc_info)); if (proc_array == NULL) { log_err(errno, id, "malloc"); return (PBSE_SYSTEM); } max_proc = TBL_INC; return (open_sgi_proj());}/* * Declare start of polling loop. * * for each job, obtain ASH from task tables * then obtain list of pids in each ASH in turn * open and process /proc/(pid) */int mom_get_sample(){ static char id[] = "mom_get_sample"; int fd; struct dirent *dent; char procname[100]; int np; int num; int mapsize; time_t currtime; prmap_sgi_arg_t maparg; struct proc_info *pi; prmap_sgi_t map[MAPNUM]; job *pjob; aspidlist_t *taskpids = 0; ash_t ash; extern aserror_t aserrorcode; extern list_head svr_alljobs; DBPRT(("%s: entered pagesize %d\n", id, pagesize)) rewinddir(pdir); nproc = 0; pi = proc_array; mapsize = sizeof(prmap_sgi_t) * MAPNUM; maparg.pr_size = mapsize; currtime = time(0); for (pjob = (job *)GET_NEXT(svr_alljobs); pjob; pjob = (job *)GET_NEXT(pjob->ji_alljobs)) { if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) continue; if (pjob->ji_globid == NULL) continue; sscanf(pjob->ji_globid, "%llx", &ash); DBPRT(("%s: looking at job %s ASH %llx\n", id, pjob->ji_qs.ji_jobid, ash)) taskpids = aspidsinash_local(ash); if (taskpids == NULL) { sprintf(log_buffer, "no pids in ash %lld for job %s", ash, pjob->ji_qs.ji_jobid); log_err(aserrorcode, id, log_buffer); continue; } for (np=0; np < taskpids->numpids; ++np, (void)close(fd)) { DBPRT(("%s:\t\t pid %d\n", id, taskpids->pids[np])) sprintf(procname, "%s/%d", procfs, taskpids->pids[np]); if ((fd = open(procname, O_RDONLY)) == -1) continue; if (ioctl(fd, PIOCPSINFO, &pi->info) == -1) { if (errno != ENOENT) { sprintf(log_buffer, "%s: ioctl(PIOCPSINFO)", procname); log_err(errno, id, log_buffer); } continue; }#if COMPLEX_MEM_CALC==1 if (pi->map) { free(pi->map); /* free any old space */ pi->map = NULL; } pi->map_num = 0; maparg.pr_vaddr = (caddr_t)map; if ((num = ioctl(fd, PIOCMAP_SGI, &maparg)) == -1) { if (errno != ENOENT) log_err(errno, id, "ioctl(PIOCMAP_SGI)"); free(map); continue; } if (num > 0) { size_t nb = sizeof(prmap_sgi_t) * num; assert(num < MAPNUM); pi->map = (prmap_sgi_t *) malloc(nb); memcpy(pi->map, map, nb); pi->map_num = num; }#endif /* COMPLEX_MEM_CALC */ /* save the ASH to which the proc belongs */ pi->procash = ash; if (++nproc == max_proc) { struct proc_info *hold; DBPRT(("%s: alloc more table space %d\n", id, nproc)) max_proc *= 2; hold = (struct proc_info *)realloc(proc_array, max_proc*sizeof(struct proc_info)); assert(hold != NULL); proc_array = hold; memset(&proc_array[nproc], '\0', sizeof(struct proc_info) * (max_proc >> 1)); } pi = &proc_array[nproc]; } if (taskpids != NULL) asfreepidlist(taskpids, 0); } sampletime = time(0); if ((sampletime - currtime) > 5) { sprintf(log_buffer, "time lag %d secs", sampletime-currtime); log_err(-1, id, log_buffer); return PBSE_SYSTEM; } return (PBSE_NONE);}/* * Measure job resource usage and compare with its limits.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -