⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mom_mach.c

📁 openPBS的开放源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
		assert(*pname != '\0');		if (strcmp(pname, "ncpus") == 0)	return (TRUE);		if (strcmp(pname, "walltime") == 0)	return (TRUE);		pres = (resource *)GET_NEXT(pres->rs_link);	}	return (FALSE);}/* * Setup for polling. * *	Open kernel device and get namelist info. */int mom_open_poll(){	static char	*id = "mom_open_poll";	DBPRT(("%s: entered\n", id))#if	SRFS#define SETDEV(name, var) \	if ((dir = var_value(name)) != NULL) { \		i |= chk_file_sec(dir, 1, 1, S_IWGRP|S_IWOTH, 1); \		if (quotactl(dir, SRFS_INFO, (caddr_t)&srfsinfo) == -1) \			log_err(errno, id, dir); \		else { \			var = srfsinfo.index; \			DBPRT(("%s: got %d for %s %s\n", id, var, name, dir)) \		} \	}{	int		i;	struct		fsres_s		srfsinfo;	char		*dir, *var_value();	var_init();	i = 0;	SETDEV("TMPDIR", srfs_tmp_dev)	SETDEV("BIGDIR", srfs_big_dev)	SETDEV("FASTDIR", srfs_fast_dev)#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)	if (i) return (PBSE_PERM);#endif	/* NO_SECURITY_CHECK */}#else#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)	if (chk_file_sec(TMP_DIR, 1, 1, S_IWGRP|S_IWOTH, 1)) return (PBSE_PERM);#endif	/* NO_SECURITY_CHECK */#endif	/* SRFS */	return (PBSE_NONE);}/* * Declare start of polling loop. * *	Until the next call to mom_get_sample, all mom_over_limit calls will *	use the same data.  Returns a PBS error code. */int mom_get_sample(){	static char	*id = "mom_get_sample";	struct tbs	info;	struct	proc	*pp;	struct	pcomm	*pc;	int		i, pbase;	DBPRT(("%s: entered\n", id))	if (session_table != NULL)		free(session_table);	if (tabinfo (SESS, &info) == -1)		return (PBSE_SYSTEM);	session_table_size = info.ent * info.len;	session_table = (struct sess*)malloc (session_table_size);	if (session_table == NULL)		return (PBSE_SYSTEM);	if (tabread(SESS, (char *) session_table, session_table_size,			info.head) == -1)		return (PBSE_SYSTEM); 	session_table_size = info.ent;	if (process_table != NULL)		free(process_table);		if (tabinfo(PROCTAB, &info) == -1 )		return (PBSE_SYSTEM);	process_table_size = info.ent * info.len;	process_table = (struct proc*)malloc (process_table_size);	if (process_table == NULL)		return (PBSE_SYSTEM);	if (tabread(PROCTAB, (char *) process_table, process_table_size,			info.head) == -1)		return (PBSE_SYSTEM); 	process_table_size = info.ent;	pbase = (int)info.addr;	for (pp=process_table,i=0; i<process_table_size; pp++,i++) {		if (pp->p_stat == 0)			continue;		if ((pc = pp->p_pc) != NULL) {			pp->p_pc = (struct pcomm *)((int)pc - pbase +					(int)process_table);		}	}	return (PBSE_NONE);}/* * Measure job resource usage and compare with its limits. * *	If it has exceeded any well-formed polled limit return TRUE. *	Otherwise, return FALSE. */int mom_over_limit(pjob)    job			*pjob;{	static char	*id = "mom_over_limit";	char		*pname;	int		retval;	unsigned long	value;	resource	*pres;	int		num;	assert(pjob != NULL);	assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC);	pres = (resource *)	    GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);	DBPRT(("%s: entered %s\n", id, pjob->ji_qs.ji_jobid))	for ( ; pres != NULL; pres = (resource *)GET_NEXT(pres->rs_link)) {		assert(pres->rs_defin != NULL);		pname = pres->rs_defin->rs_name;		assert(pname != NULL);		assert(*pname != '\0');		if (strcmp(pname, "ncpus") == 0) {			retval = getlong(pres, &value);			if (retval != PBSE_NONE)				continue;			if ((num = cpus_sum(pjob)) > value) {				sprintf(log_buffer,					"ncpus %d exceeded limit %d",					num, value);				return (TRUE);			}		}		if (strcmp(pname, "walltime") == 0) {			retval = gettime(pres, &value);			if (retval != PBSE_NONE)				continue;			num = (unsigned long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);			if (num > value) {				sprintf(log_buffer,					"walltime %d exceeded limit %d",					num, value);				return (TRUE);			}		}	}	return (FALSE);}/* * Update the job attribute for resources used. * *	The first time this is called for a job, set up resource entries for *	each resource that can be reported for this machine.  Fill in the *	correct values.  Return an error code. */int mom_set_use(pjob)    job			*pjob;{	static char		*id = "mom_set_use";	resource		*pres;	attribute		*at;	resource_def		*rd;	unsigned long		*ulp, unum;	long			*lp, num;	assert(pjob != NULL);	at = &pjob->ji_wattr[(int)JOB_ATR_resc_used];	assert(at->at_type == ATR_TYPE_RESC);	if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) != 0)		return (PBSE_NONE);	/* job suspended, don't track it */	DBPRT(("%s: entered %s\n", id, pjob->ji_qs.ji_jobid))	at->at_flags |= ATR_VFLAG_MODIFY;	if ((at->at_flags & ATR_VFLAG_SET) == 0) {		at->at_flags |= ATR_VFLAG_SET;#if	SRFS		rd = find_resc_def(svr_resc_def, "srfs_tmp", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		rd = find_resc_def(svr_resc_def, "srfs_big", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		rd = find_resc_def(svr_resc_def, "srfs_fast", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;#endif	/* SRFS */		rd = find_resc_def(svr_resc_def, "ncpus", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		rd = find_resc_def(svr_resc_def, "pf", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		rd = find_resc_def(svr_resc_def, "sds", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		rd = find_resc_def(svr_resc_def, "procs", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		rd = find_resc_def(svr_resc_def, "mppt", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;	}	rd = find_resc_def(svr_resc_def, "ncpus", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_long;	num = cpus_sum(pjob);	*lp = max(*lp, num);	rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_long;	num = cput_sum(pjob);	*lp = max(*lp, num);	rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	ulp = &pres->rs_value.at_val.at_size.atsv_num;	unum = (mem_sum(pjob) + 1023) >> 10;	/* KB */	*ulp = max(*ulp, unum);	rd = find_resc_def(svr_resc_def, "pf", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = (long *)&pres->rs_value.at_val.at_size.atsv_num;	num = (pf_sum(pjob) + 1023) >> 10;	/* KB */	*lp = max(*lp, num);	rd = find_resc_def(svr_resc_def, "sds", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	ulp = &pres->rs_value.at_val.at_size.atsv_num;	unum = (sds_sum(pjob) + 1023)>> 10;	/* KB */	*ulp = max(*ulp, unum);#if	SRFS	rd = find_resc_def(svr_resc_def, "srfs_tmp", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	ulp = &pres->rs_value.at_val.at_size.atsv_num;	unum = (srfs_sum(pjob, srfs_tmp_dev) + 1023) >> 10;	/* KB */	*ulp = max(*ulp, unum);	rd = find_resc_def(svr_resc_def, "srfs_big", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	ulp = &pres->rs_value.at_val.at_size.atsv_num;	unum = (srfs_sum(pjob, srfs_big_dev) + 1023) >> 10;	/* KB */	*ulp = max(*ulp, unum);	rd = find_resc_def(svr_resc_def, "srfs_fast", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	ulp = &pres->rs_value.at_val.at_size.atsv_num;	unum = (srfs_sum(pjob, srfs_fast_dev) + 1023) >> 10;	/* KB */	*ulp = max(*ulp, unum);#endif	/* SRFS */	rd = find_resc_def(svr_resc_def, "procs", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_long;	num = proc_cnt(pjob);	*lp = max(*lp, num);	rd = find_resc_def(svr_resc_def, "mppt", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_long;	num = mppt_sum(pjob);	*lp = max(*lp, num);	rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	pres->rs_value.at_val.at_long = (long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);	return (PBSE_NONE);}/* *	Kill a task session. *	Call with the task pointer and a signal number. */int kill_task(ptask, sig)    task	*ptask;    int		sig;{	static char	*id = "kill_task";	int		ct = 1;        int		sesid;	sesid = ptask->ti_qs.ti_sid;	if (sesid > 1) {		if (killm(C_JOB, sesid, sig) == -1) {		    if (errno != ESRCH) {			sprintf(log_buffer, "killm: sid=%d sig=%d", sesid, sig);			log_err(errno, id, log_buffer);		    } else {			ct = 0;			sprintf(log_buffer, "killm: sid=%d sig=%d", sesid, sig);			log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB,				   ptask->ti_job->ji_qs.ji_jobid, log_buffer);		    }		}	}	return ct;}/* * Clean up everything related to polling. * *	In the case of the sun, close the kernal if it is open. */int mom_close_poll(){	static char	*id = "mom_close_poll";	DBPRT(("%s: entered\n", id))	return (PBSE_NONE);}/* * mom_does_chkpnt - return 1 if mom supports checkpoint *			    0 if not */int mom_does_chkpnt(){	return (1);}/* * Checkpoint the job. * *	If abort is TRUE, kill it too. */int mach_checkpoint(ptask, path, abort)    task	*ptask;    char	*path;    int		abort;{	int	cprtn;	long	flags = 0;	if (abort)		flags = CHKPNT_KILL;	cprtn = chkpnt( C_SESS, ptask->ti_qs.ti_sid, path, flags );	return cprtn;}/* * Restart the job from the checkpoint file. * *	Return the session/job id */long mach_restart(ptask, path)    task	*ptask;    char	*path;{	int	sid;	sid = restart(path, 0);	return sid;}intgetprocs(){	static	unsigned	int	lastproc = 0; 	if (lastproc == reqnum) /* don't need new proc table */		return process_table_size;	if (mom_get_sample() != PBSE_NONE)		return 0;	lastproc = reqnum;	return(process_table_size);}char	*cput_job(jobid)pid_t	jobid;{	char		*id = "cput_job";	int		i, nproc;	int		found = 0;	time_t		addtime;	double		cputime;	if ((nproc = getprocs()) == 0) {		rm_errno = RM_ERR_SYSTEM;		return NULL;	}	cputime = 0.0;	for (i=0; i<nproc; i++) {		register struct proc    *pp = &process_table[i];		register struct	pcomm	*pc;		if (pp->p_stat==0)			continue;		if ((pc = pp->p_pc) == NULL)			continue;		if (jobid != pc->pc_sid)			continue;		found = 1;		addtime = pp->p_utime + pp->p_stime + pp->p_sctime +				pc->pc_cutime + pc->pc_cstime;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -