⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mom_mach.c

📁 openPBS的开放源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
 *	    2.	The child is still running as root. *	    3.  Standard error is open to the user's file. * *	If set_mode is SET_LIMIT_ALTER, we are beening called to modify *	existing limits.  Cannot alter those set by setrlimit (kernel) *	because we are the wrong process.   */int mom_set_limits(pjob, set_mode)    job			*pjob;    int			 set_mode;	/* SET_LIMIT_SET or SET_LIMIT_ALTER */{	char		*id = "mom_set_limits";	char		*pname;	int		retval;	unsigned long	value;	/* place in which to build resource value */	resource	*pres;       	struct rlimit	reslim;	DBPRT(("%s: entered\n", id))	assert(pjob != NULL);	assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC);	pres = (resource *)	    GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);/* * Cycle through all the resource specifications, * setting limits appropriately (SET_LIMIT_SET). */	while (pres != NULL) {		assert(pres->rs_defin != NULL);		pname = pres->rs_defin->rs_name;		assert(pname != NULL);		assert(*pname != '\0');		if (strcmp(pname, "cput") == 0) {	/* cpu time - check */			retval = gettime(pres, &value);			if (retval != PBSE_NONE)			        return (error(pname, retval));		} else if (strcmp(pname, "pcput") == 0) {			/* process cpu time - set */			if (set_mode == SET_LIMIT_SET)  {			    retval = gettime(pres, &value);			    if (retval != PBSE_NONE)			        return (error(pname, retval));			    assert(value <= INT_MAX);			    reslim.rlim_cur = reslim.rlim_max = 				(unsigned long)((double)value / cputfactor);			    if (setrlimit(RLIMIT_CPU, &reslim) < 0)	        		return (error("RLIMIT_CPU", PBSE_SYSTEM));			}		} else if (strcmp(pname, "file") == 0) {	/* set */			if (set_mode == SET_LIMIT_SET)  {			    retval = getsize(pres, &value);			    if (retval != PBSE_NONE)			        return (error(pname, retval));			    if (value > INT_MAX)			        return (error(pname, PBSE_BADATVAL));			    reslim.rlim_cur = reslim.rlim_max = value;			    if (setrlimit(RLIMIT_FSIZE, &reslim) < 0)			        return (error(pname, PBSE_SYSTEM));			}		} else if (strcmp(pname, "vmem") == 0) {	/* check */			retval = getsize(pres, &value);			if (retval != PBSE_NONE)			        return (error(pname, retval));		} else if (strcmp(pname, "pvmem") == 0) {	/* set */			if (set_mode == SET_LIMIT_SET)  {			    retval = getsize(pres, &value);			    if (retval != PBSE_NONE)			        return (error(pname, retval));			    if (value > INT_MAX)			        return (error(pname, PBSE_BADATVAL));			    reslim.rlim_cur = reslim.rlim_max = value;			    if (setrlimit(RLIMIT_DATA, &reslim) < 0)	        		return (error("RLIMIT_DATA", PBSE_SYSTEM));			    if (setrlimit(RLIMIT_STACK, &reslim) < 0)	        		return (error("RLIMIT_STACK", PBSE_SYSTEM));			}		} else if (strcmp(pname, "mem") == 0) {		/* ignore */		} else if (strcmp(pname, "pmem") == 0) {	/* set */			if (set_mode == SET_LIMIT_SET)  {			    retval = getsize(pres, &value);			    if (retval != PBSE_NONE)			        return (error(pname, retval));			    reslim.rlim_cur = reslim.rlim_max = value;			    if (setrlimit(RLIMIT_RSS, &reslim) < 0)	        		return (error("RLIMIT_RSS", PBSE_SYSTEM));			}		} else if (strcmp(pname, "walltime") == 0) {	/* Check */			retval = gettime(pres, &value);			if (retval != PBSE_NONE)			        return (error(pname, retval));		} else if (strcmp(pname, "nice") == 0) {	/* set nice */			if (set_mode == SET_LIMIT_SET)  {			    errno = 0;			    if ((nice((int)pres->rs_value.at_val.at_long) == -1)				&& (errno != 0))				return (error(pname, PBSE_BADATVAL));			}		} else if ((pres->rs_defin->rs_flags & ATR_DFLAG_RMOMIG) == 0)			/* don't recognize and not marked as ignore by mom */			return (error(pname, PBSE_UNKRESC));		pres = (resource *)GET_NEXT(pres->rs_link);	}	return (PBSE_NONE);}/* * State whether MOM main loop has to poll this job to determine if some * limits are being exceeded. * *	Sets flag TRUE if polling is necessary, FALSE otherwise.  Actual *	polling is done using the mom_over_limit machine-dependent function. */int mom_do_poll(pjob)    job			*pjob;{	char		*id = "mom_do_poll";	char		*pname;	resource	*pres;	DBPRT(("%s: entered\n", id))	assert(pjob != NULL);	assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC);	pres = (resource *)	    GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);	while (pres != NULL) {		assert(pres->rs_defin != NULL);		pname = pres->rs_defin->rs_name;		assert(pname != NULL);		assert(*pname != '\0');		if (strcmp(pname, "walltime") == 0 ||		    strcmp(pname, "cput") == 0 ||		    strcmp(pname, "pvmem") == 0 ||		    strcmp(pname, "vmem") == 0)			return (TRUE);		pres = (resource *)GET_NEXT(pres->rs_link);	}	return (FALSE);}/* * Setup for polling. * *	Open kernel device and get namelist info. */int mom_open_poll(){	char		*id = "mom_open_poll";	DBPRT(("%s: entered\n", id))	if (kd == NULL) {		kd = kvm_open(NULL, NULL, NULL, O_RDONLY, "mom");		if (kd == NULL) {			log_err(errno, id, "kvm_open");			return (PBSE_SYSTEM);		}	}	if (kvm_nlist(kd, nl) == -1) {		log_err(errno, id, "kvm_nlist");		return (PBSE_SYSTEM);	}	return (PBSE_NONE);}/* * Declare start of polling loop. * *	Until the next call to mom_get_sample, all mom_over_limit calls will *	use the same data.  Returns a PBS error code. */int mom_get_sample() {	char		*id = "mom_get_sample";	caddr_t		*kernel_proc;	int		i, len;	DBPRT(("%s: entered\n", id))	if (proc_tbl)		free(proc_tbl);	if (sess_tbl)		free(sess_tbl);	if (kd == NULL)		return (PBSE_INTERNAL);	if (nl[KSYM_NPROC].n_type == 0) {		log_err(-1, id, "number of process not found");		return (PBSE_SYSTEM);	}	if (kvm_read(kd, nl[KSYM_NPROC].n_value, (char *)&nproc,		     sizeof(nproc)) != sizeof(nproc)) {		log_err(errno, id, "kvm_read(NPROC)");		return (PBSE_SYSTEM);	}	if (nproc < MINPROC || nproc > MAXPROC) {		sprintf(log_buffer,		    "strange number of procs (%d)", nproc);		log_err(-1, id, log_buffer);		return (PBSE_SYSTEM);	}	if (nl[KSYM_PROC].n_type == 0) {		log_err(-1, id, "process table not found");		return (PBSE_SYSTEM);	}	if (kvm_read(kd, nl[KSYM_PROC].n_value, (char *)&kernel_proc,		     sizeof(kernel_proc)) != sizeof(kernel_proc)) {		log_err(errno, id, "kvm_read(PROC)");		return (PBSE_SYSTEM);	}	proc_tbl = (struct proc *)calloc(nproc, sizeof(struct proc));	if (proc_tbl == NULL) {		sprintf(log_buffer,		    "can't allocate memory for proc table");		log_err(errno, id, log_buffer);		return (PBSE_SYSTEM);	}	len = nproc * sizeof(struct proc);	if (kvm_read(kd, (unsigned long)kernel_proc,		     (char *)proc_tbl, len) != len) {		log_err(errno, id, "kvm_read(proc_tbl)");		return (PBSE_SYSTEM);	}/* * Read session info for each process. */	sess_tbl = (pid_t *)calloc(nproc, sizeof(pid_t));	if (sess_tbl == NULL) {		sprintf(log_buffer,		    "can't allocate memory for session table");		log_err(errno, id, log_buffer);		return (PBSE_SYSTEM);	}	for (i=0; i<nproc; i++) {		struct	sess	s;		struct	proc	*pp = &proc_tbl[i];		if (pp->p_stat==0)			continue;		if (pp->p_sessp == NULL)			continue;		if (kvm_read(kd, (unsigned long)pp->p_sessp,			     (char *)&s, sizeof(s)) != sizeof(s)) {			log_err(errno, id, "kvm_read(session)");			continue;	/* session gone? */		}		sess_tbl[i] = (pid_t)s.s_sid;	}	return (PBSE_NONE);}/* * Measure job resource usage and compare with its limits. * *	If it has exceeded any well-formed polled limit return TRUE. *	Otherwise, return FALSE. */int mom_over_limit(pjob)    job			*pjob;{	char		*id = "mom_over_limit";	char		*pname;	int		retval;	unsigned long	value, num;	resource	*pres;	assert(pjob != NULL);	assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC);	pres = (resource *)	    GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);	DBPRT(("%s: entered\n", id))	for ( ; pres != NULL; pres = (resource *)GET_NEXT(pres->rs_link)) {		assert(pres->rs_defin != NULL);		pname = pres->rs_defin->rs_name;		assert(pname != NULL);		assert(*pname != '\0');		if (strcmp(pname, "cput") == 0) {			retval = gettime(pres, &value);			if (retval != PBSE_NONE)				continue;			if ((num = cput_sum(pjob)) > value) {				sprintf(log_buffer,					"cput %lu exceeded limit %lu",					(float)num, 					(float)value);				return (TRUE);			}		} else if (strcmp(pname, "vmem") == 0) {			retval = getsize(pres, &value);			if (retval != PBSE_NONE)				continue;			if ((num = mem_sum(pjob)) > value) {				sprintf(log_buffer,					"vmem %lu exceeded limit %lu",					num, value);				return (TRUE);			}		} else if (strcmp(pname, "pvmem") == 0) {			retval = getsize(pres, &value);			if (retval != PBSE_NONE)				continue;			if (overmem_proc(pjob, value)) {				sprintf(log_buffer, "pvmem exceeded limit %lu",					value);				return (TRUE);			}		} else if (strcmp(pname, "walltime") == 0) {			retval = gettime(pres, &value);			if (retval != PBSE_NONE)				continue;			num = (unsigned long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);			if (num > value) {				sprintf(log_buffer,					"walltime %d exceeded limit %d",					num, value);				return (TRUE);			}		}	}	return (FALSE);}/* * Update the job attribute for resources used. * *	The first time this is called for a job, set up resource entries for *	each resource that can be reported for this machine.  Fill in the *	correct values.  Return an error code. * *	Assumes that the session ID attribute has already been set. */int mom_set_use(pjob)    job		*pjob;{	char		*id = "mom_set_use";	resource	*pres;	attribute	*at;	resource_def	*rd;	unsigned long	*lp, lnum;	assert(pjob != NULL);	at = &pjob->ji_wattr[(int)JOB_ATR_resc_used];	assert(at->at_type == ATR_TYPE_RESC);	at->at_flags |= ATR_VFLAG_MODIFY;	if ((at->at_flags & ATR_VFLAG_SET) == 0) {		at->at_flags |= ATR_VFLAG_SET;		rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		assert(pres != NULL);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		pres->rs_value.at_val.at_long = 0;		rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		assert(pres != NULL);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		pres->rs_value.at_val.at_size.atsv_num = 0;		rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		assert(pres != NULL);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		pres->rs_value.at_val.at_long = 0;		rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		assert(pres != NULL);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		pres->rs_value.at_val.at_size.atsv_num = 0;	}	rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = (unsigned long *)&pres->rs_value.at_val.at_long;	lnum = cput_sum(pjob);	*lp = MAX(*lp, lnum);	rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_size.atsv_num;	lnum = (mem_sum(pjob) + 1023) >> 10; /* KB */	*lp = MAX(*lp, lnum);	rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	pres->rs_value.at_val.at_long = (long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);	rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_size.atsv_num;	lnum = (resi_sum(pjob) + 1023) >> 10; /* KB */	*lp = MAX(*lp, lnum);	return (PBSE_NONE);}/* *	Kill a job task. *	Call with the job and a signal number. */int kill_task(ptask, sig)    task	*ptask;    int		sig;{	char	*id = "kill_task";	int	ct = 0;	int	i, err;	int	sesid;	DBPRT(("%s entered\n", id))	sesid = ptask->ti_qs.ti_sid;	if (sesid <= 1)		return 0;	if ((err = mom_get_sample()) != PBSE_NONE)		return 0;	for (i=0; i<nproc; i++) {		struct	proc	*pp = &proc_tbl[i];		if (pp->p_stat == 0)			continue;		if (sesid != sess_tbl[i])			continue;		DBPRT(("%s: send signal %d to pid %d\n", id, sig, pp->p_pid))		(void)kill(pp->p_pid, sig);		++ct;	}	return ct;}/* * Clean up everything related to polling. * *	In the case of the sun, close the kernal if it is open. */int mom_close_poll(){	DBPRT(("mom_close_poll entered\n"))	if (kd) {		if (kvm_close(kd) != 0) {			log_err(errno, "mom_close_poll", "kvm_close");			return (PBSE_SYSTEM);		}		kd = NULL;	}	return (PBSE_NONE);}/* * mom_does_chkpnt - return 1 if mom supports checkpoint *			    0 if not */int mom_does_chkpnt(){	return (0);}/* * Checkpoint the job. * *	If abort is true, kill it too. */int mach_checkpoint(ptask, file, abort)    task	*ptask;    char	*file;    int		abort;{       	return (-1);}/* * Restart the job from the checkpoint file. * *	Return -1 on error or sid if okay. */long mach_restart(ptask, file)    task	*ptask;    char	*file;{	return (-1);}/***	Return 1 if proc table can be read, 0 otherwise.*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -