📄 mom_mach.c
字号:
GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); while (pres != NULL) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "walltime") == 0 || strcmp(pname, "cput") == 0 || strcmp(pname, "pcput") == 0 || strcmp(pname, "vmem") == 0) return (TRUE); pres = (resource *)GET_NEXT(pres->rs_link); } return (FALSE);}/* * Setup for polling. * * Open kernel device and get namelist info. */int mom_open_poll(){ char *id = "mom_open_poll"; DBPRT(("%s: entered\n", id)) proc_info = (psinfo_t *)malloc(sizeof(psinfo_t) * TBL_INC); if (proc_info == NULL) { log_err(errno, id, "malloc"); return (PBSE_SYSTEM); } max_proc = TBL_INC; return (PBSE_NONE);}/* * Declare start of polling loop. */int mom_get_sample(){ static char id[] = "mom_get_sample"; int fd; struct dirent *dent; char procname[256]; psinfo_t *pi; DBPRT(("%s: entered\n", id)) rewinddir(pdir); pi = proc_info; nproc = 0; for (fd = -1; (dent = readdir(pdir)) != NULL; close(fd)) { if (!isdigit(dent->d_name[0])) continue; sprintf(procname, procfmts, dent->d_name); if ((fd = open(procname, O_RDONLY)) == -1) continue; if (read(fd, pi, sizeof(psinfo_t)) != sizeof(psinfo_t)) { sprintf(log_buffer, "read of %s failed", procname); log_err(errno, id, log_buffer); continue; } if (++nproc == max_proc) { void *hold; DBPRT(("%s: alloc more proc table space %d\n",id,nproc)) max_proc += TBL_INC; hold = realloc((void *)proc_info, max_proc*sizeof(psinfo_t)); assert(hold != NULL); proc_info = (psinfo_t *)hold; } pi = &proc_info[nproc]; } return (PBSE_NONE);}/* * Measure job resource usage and compare with its limits. * * If it has exceeded any well-formed polled limit return TRUE. * Otherwise, return FALSE. */int mom_over_limit(pjob) job *pjob;{ char *id = "mom_over_limit"; char *pname; int retval; unsigned long value, num; resource *pres; assert(pjob != NULL); assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC); pres = (resource *) GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list); DBPRT(("%s: entered\n", id)) for ( ; pres != NULL; pres = (resource *)GET_NEXT(pres->rs_link)) { assert(pres->rs_defin != NULL); pname = pres->rs_defin->rs_name; assert(pname != NULL); assert(*pname != '\0'); if (strcmp(pname, "cput") == 0) { retval = local_gettime(pres, &value); if (retval != PBSE_NONE) continue; if ((num = cput_sum(pjob)) > value) { sprintf(log_buffer, "cput %lu exceeded limit %lu", num, value); return (TRUE); } } else if (strcmp(pname, "pcput") == 0) { retval = local_gettime(pres, &value); if (retval != PBSE_NONE) continue; if (overcput_proc(pjob, value)) { sprintf(log_buffer, "pcput exceeded limit %lu", value); return (TRUE); } } else if (strcmp(pname, "vmem") == 0) { retval = local_getsize(pres, &value); if (retval != PBSE_NONE) continue; if ((num = mem_sum(pjob)) > value) { sprintf(log_buffer, "vmem %lu exceeded limit %lu", num, value); return (TRUE); } } else if (strcmp(pname, "walltime") == 0) { retval = local_gettime(pres, &value); if (retval != PBSE_NONE) continue; num = (unsigned long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor); if (num > value) { sprintf(log_buffer, "walltime %d exceeded limit %d", num, value); return (TRUE); } } } return (FALSE);}/* * Update the job attribute for resources used. * * The first time this is called for a job, set up resource entries for * each resource that can be reported for this machine. Fill in the * correct values. Return an error code. * * Assumes that the session ID attribute has already been set. */int mom_set_use(pjob) job *pjob;{ char *id = "mom_set_use"; resource *pres; attribute *at; resource_def *rd; unsigned long *lp, lnum; DBPRT(("%s for job %s\n",id, pjob->ji_qs.ji_jobid)) assert(pjob != NULL); at = &pjob->ji_wattr[(int)JOB_ATR_resc_used]; assert(at->at_type == ATR_TYPE_RESC); at->at_flags |= ATR_VFLAG_MODIFY; if ((at->at_flags & ATR_VFLAG_SET) == 0) { at->at_flags |= ATR_VFLAG_SET; rd = find_resc_def(svr_resc_def, "cput", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_LONG; rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_SIZE; pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */ pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ; rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_LONG; rd = find_resc_def(svr_resc_def, "mem", svr_resc_size); assert(rd != NULL); pres = add_resource_entry(at, rd); pres->rs_value.at_flags |= ATR_VFLAG_SET; pres->rs_value.at_type = ATR_TYPE_SIZE; pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */ pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ; } rd = find_resc_def(svr_resc_def, "cput", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); lp = (unsigned long *)&pres->rs_value.at_val.at_long; lnum = cput_sum(pjob); *lp = MAX(*lp, lnum); rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); lp = &pres->rs_value.at_val.at_size.atsv_num; lnum = (mem_sum(pjob) + 1023) >> 10; /* KB */ *lp = MAX(*lp, lnum); rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); pres->rs_value.at_val.at_long = (long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor); rd = find_resc_def(svr_resc_def, "mem", svr_resc_size); assert(rd != NULL); pres = find_resc_entry(at, rd); assert(pres != NULL); lp = &pres->rs_value.at_val.at_size.atsv_num; lnum = (resi_sum(pjob) + 1023) >> 10; /* in KB */ *lp = MAX(*lp, lnum); return (PBSE_NONE);}/* * bld_ptree - establish links (parent, child, and sibling) for processes * in a given session. * * The PBS_PROC_* macros are defined in resmom/.../mom_mach.h * to refer to the correct machine dependent table. */static int bld_ptree(sid) pid_t sid;{ int i, j; if (Proc_lnks == NULL) { Proc_lnks = (pbs_plinks *)malloc(TBL_INC * sizeof(pbs_plinks)); assert(Proc_lnks != NULL); myproc_max = TBL_INC; } /* * Build links for processes in the session in question. * First, load with the processes in the session. */ myproc_ct = 0; for (i = 0; i < nproc; i++) { if ((int)PBS_PROC_SID(i) == sid) { Proc_lnks[myproc_ct].pl_pid = PBS_PROC_PID(i); Proc_lnks[myproc_ct].pl_ppid = PBS_PROC_PPID(i); Proc_lnks[myproc_ct].pl_parent = -1; Proc_lnks[myproc_ct].pl_sib = -1; Proc_lnks[myproc_ct].pl_child = -1; if (++myproc_ct == myproc_max) { void * hold; myproc_max += TBL_INC; hold = realloc((void *)Proc_lnks, myproc_max*sizeof(pbs_plinks)); assert(hold != NULL); Proc_lnks = (pbs_plinks *)hold; } } } /* Now build the tree for those processes */ for (i = 0; i < myproc_ct; i++) { /* * Find all the children for this process, establish links. */ for (j = 0; j < myproc_ct; j++) { if (j == i) continue; if (Proc_lnks[j].pl_ppid == Proc_lnks[i].pl_pid) { Proc_lnks[j].pl_parent = i; Proc_lnks[j].pl_sib = Proc_lnks[i].pl_child; Proc_lnks[i].pl_child = j; } } } return (myproc_ct); /* number of processes in session */}/* * pid_to_idx - find index into the Proc_lnks table for a given pid * pid: pid * return: index to the table for pid */static int pid_to_idx(pid) pid_t pid;{ int i; for (i = 0; i < myproc_ct; i++) { if (Proc_lnks[i].pl_pid == pid) return (i); } return (-1);}/* * kill_ptree - traverse the process tree, killing the processes as we go * idx: current pid index * flag: traverse order, top down (1) or bottom up (0) * sig: the signal to send */static void kill_ptree(int idx, int flag, int sig){ int child, sib; if (flag) { /* top down */ (void)kill(Proc_lnks[idx].pl_pid, sig); } child = Proc_lnks[idx].pl_child; while (child != -1) { kill_ptree(child, flag, sig); child = Proc_lnks[child].pl_sib; } if (!flag) { /* bottom up */ (void)kill(Proc_lnks[idx].pl_pid, sig); }}/* * Kill a task session. * Call with the task pointer and a signal number. */int kill_task(ptask, sig) task *ptask; int sig;{ char *id = "kill_task"; int ct = 0; int i, sesid; psinfo_t *pi; sesid = ptask->ti_qs.ti_sid; if (sesid <= 1) return 0; (void)mom_get_sample(); for (i=0; i<nproc; i++) { pi = &proc_info[i]; if (sesid == pi->pr_sid) { (void)kill(pi->pr_pid, sig); ++ct; } } return ct;}/* * Clean up everything related to polling. * * In the case of the sun, close the kernal if it is open. */int mom_close_poll(){ char *id = "mom_close_poll"; DBPRT(("%s: entered\n", id)) if (proc_info) free(proc_info); if (pdir) { if (closedir(pdir) != 0) { log_err(errno, id, "closedir"); return (PBSE_SYSTEM); } } return (PBSE_NONE);}/* * mom_does_chkpnt - return 1 if mom supports checkpoint * 0 if not */int mom_does_chkpnt(){ return (0);}/* * Checkpoint the job. * * If abort is true, kill it too. */int mach_checkpoint(ptask, file, abort) task *ptask; char *file; int abort;{ return (-1);}/* * Restart the job from the checkpoint file. * * Return -1 on error or sid if okay. */long mach_restart(ptask,file) task *ptask; char *file;{ return (-1);}/*** Return 1 if proc table can be read, 0 otherwise.*/intgetprocs(){ static unsigned int lastproc = 0; static char id[] = "getprocs"; if (lastproc == reqnum) /* don't need new proc table */ return 1; if (mom_get_sample() != PBSE_NONE) return 0; lastproc = reqnum; return 1;}char *cput_job(jobid)pid_t jobid;{ char *id = "cput_job"; int found = 0; int i; double cputime, addtime; psinfo_t *pi; if (getprocs() == 0) { rm_errno = RM_ERR_SYSTEM; return NULL; } cputime = 0.0; for (i=0; i<nproc; i++) { pi = &proc_info[i]; if (jobid != pi->pr_sid) continue; found = 1; addtime = (double)(tv(pi->pr_time) + tv(pi->pr_ctime)); cputime += addtime; DBPRT(("%s: total %.2f pid %d %.2f\n", id, cputime, pi->pr_pid, addtime)) } if (found) { sprintf(ret_string, "%.2f", cputime * cputfactor); return ret_string; } rm_errno = RM_ERR_EXIST; return NULL;}char *cput_proc(pid)pid_t pid;{ char *id = "cput_pid"; double cputime; int i; psinfo_t *pi; if (getprocs() == 0) { rm_errno = RM_ERR_SYSTEM; return NULL; } for (i=0; i<nproc; i++) { pi = &proc_info[i]; if (pi->pr_pid == pid) break; } if (i == nproc) { rm_errno = RM_ERR_EXIST; return NULL; } cputime = (double)(tv(pi->pr_time) + tv(pi->pr_ctime)); sprintf(ret_string, "%.2f", cputime * cputfactor); return ret_string;}char *cput(attrib)struct rm_attribute *attrib;{ char *id = "cput"; int value; if (attrib == NULL) { log_err(-1, id, no_parm); rm_errno = RM_ERR_NOPARAM; return NULL; } if ((value = atoi(attrib->a_value)) == 0) { sprintf(log_buffer, "bad param: %s", attrib->a_value); log_err(-1, id, log_buffer); rm_errno = RM_ERR_BADPARAM; return NULL; } if (momgetattr(NULL)) { log_err(-1, id, extra_parm); rm_errno = RM_ERR_BADPARAM; return NULL; } if (strcmp(attrib->a_qualifier, "session") == 0) return (cput_job((pid_t)value)); else if (strcmp(attrib->a_qualifier, "proc") == 0) return (cput_proc((pid_t)value)); else { rm_errno = RM_ERR_BADPARAM; return NULL; }}char *mem_job(jobid)pid_t jobid;{ char *id = "mem_job"; size_t memsize; int i; int found = 0; psinfo_t *pi; if (getprocs() == 0) { rm_errno = RM_ERR_SYSTEM; return NULL; } memsize = 0; for (i=0; i<nproc; i++) { pi = &proc_info[i]; if (jobid != pi->pr_sid) continue; found = 1; memsize += pi->pr_size; DBPRT(("%s: total %dkb pid %d %dkb\n", id, memsize, pi->pr_pid, pi->pr_size)) } if (found) { sprintf(ret_string, "%lukb",memsize); /* KB */ return ret_string; } rm_errno = RM_ERR_EXIST; return NULL;}char *mem_proc(pid)pid_t pid;{ char *id = "mem_proc"; psinfo_t *pi; int i; if (getprocs() == 0) { rm_errno = RM_ERR_SYSTEM; return NULL; } for (i=0; i<nproc; i++) { pi = &proc_info[i]; if (pi->pr_pid == pid) break; } if (i == nproc) { rm_errno = RM_ERR_EXIST; return NULL; } sprintf(ret_string, "%ukb", pi->pr_size); /* KB */ return ret_string;}char *mem(attrib)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -