⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mom_mach.c

📁 OpenPBS
💻 C
📖 第 1 页 / 共 4 页
字号:
 * *	If it has exceeded any well-formed polled limit return TRUE. *	Otherwise, return FALSE. */int mom_over_limit(pjob)    job			*pjob;{	char		*pname;	int		retval;	rlim64_t	sizeval;	unsigned long	value, num;	rlim64_t	num64;	resource	*pres;	assert(pjob != NULL);	assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC);	pres = (resource *)	    GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);	for ( ; pres != NULL; pres = (resource *)GET_NEXT(pres->rs_link)) {		assert(pres->rs_defin != NULL);		pname = pres->rs_defin->rs_name;		assert(pname != NULL);		assert(*pname != '\0');		if (strcmp(pname, "ncpus") == 0) {			attribute		*at;			resource_def		*rd;			resource		*prescpup;			retval = getlong(pres, &value);			if (retval != PBSE_NONE)				continue;			at = &pjob->ji_wattr[(int)JOB_ATR_resc_used];			assert(at->at_type == ATR_TYPE_RESC);			rd = find_resc_def(svr_resc_def, "cpupercent",				svr_resc_size);			assert(rd != NULL);			prescpup = find_resc_entry(at, rd);			assert(prescpup != NULL);			num = prescpup->rs_value.at_val.at_long;			if (num > (value*100+10)) {				sprintf(log_buffer,					"ncpus %.2f exceeded limit %lu",					(float)num/100.0, value);#if !defined(SGI_ZOMBIE_WRONG)				return (TRUE);#else				LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,					  pjob->ji_qs.ji_jobid, log_buffer);#endif /* SGI_ZOMBIE_WRONG */			}		} else if (strcmp(pname, "cput") == 0) {			retval = getlong(pres, &value);			if (retval != PBSE_NONE)				continue;			if ((num = cput_sum(pjob)) > value) {				sprintf(log_buffer,					"cput %lu exceeded limit %lu",					num, value);				return (TRUE);			}		} else if (strcmp(pname, "pcput") == 0) {			retval = getlong(pres, &value);			if (retval != PBSE_NONE)				continue;			if (overcpu_proc(pjob, value)) {				sprintf(log_buffer,					"pcput exceeded limit %lu",					value);				return (TRUE);			}		} else if (strcmp(pname, "vmem") == 0) {			retval = getsize(pres, &sizeval);			if (retval != PBSE_NONE)				continue;			if ((num64 = mem_sum(pjob)) > sizeval) {				sprintf(log_buffer,					"vmem %llu exceeded limit %llu",					num64, sizeval);				return (TRUE);			}		} else if (strcmp(pname, "pvmem") == 0) {			retval = getsize(pres, &sizeval);			if (retval != PBSE_NONE)				continue;			if (overmem_proc(pjob, sizeval)) {				sprintf(log_buffer,					"pvmem exceeded limit %llukb", sizeval);				return (TRUE);			}		} else if (strcmp(pname, "mem") == 0) {			retval = getsize(pres, &sizeval);			if (retval != PBSE_NONE)				continue;			if ((num64 = resi_sum(pjob)) > sizeval) {				sprintf(log_buffer,					"mem %llu exceeded limit %llu",					num64, sizeval);				return (TRUE);			}		} else if (strcmp(pname, "walltime") == 0) {			retval = getlong(pres, &value);			if (retval != PBSE_NONE)				continue;			num = (unsigned long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);			if (num > value) {				sprintf(log_buffer,					"walltime %lu exceeded limit %lu",					num, value);				return (TRUE);			}		}	}	return (FALSE);}/* * Update the job attribute for resources used. * *	The first time this is called for a job, set up resource entries for *	each resource that can be reported for this machine.  Fill in the *	correct values.  Return an error code. */int mom_set_use(pjob)    job			*pjob;{	resource		*pres;	attribute		*at;	resource_def		*rd;	unsigned long		*lp, lnum, newcpu, oldcpu;	long			 dur;	unsigned long		 percent;	assert(pjob != NULL);	at = &pjob->ji_wattr[(int)JOB_ATR_resc_used];	assert(at->at_type == ATR_TYPE_RESC);	at->at_flags |= ATR_VFLAG_MODIFY;	if ((at->at_flags & ATR_VFLAG_SET) == 0) {		at->at_flags |= ATR_VFLAG_SET;		rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		pres->rs_value.at_val.at_long = 0;		rd = find_resc_def(svr_resc_def, "cpupercent", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		pres->rs_value.at_val.at_long = 0;		rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;		rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_LONG;		rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);		assert(rd != NULL);		pres = add_resource_entry(at, rd);		pres->rs_value.at_flags |= ATR_VFLAG_SET;		pres->rs_value.at_type = ATR_TYPE_SIZE;		pres->rs_value.at_val.at_size.atsv_shift = 10; /* in KB */		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;	}	rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = (unsigned long *)&pres->rs_value.at_val.at_long;	oldcpu = *lp;	lnum = cput_sum(pjob);	if (lnum > *lp) {		*lp = lnum;		if ( (dur = sampletime - pjob->ji_sampletim) > 10) {			newcpu = *lp;	/* save new cput */			rd = find_resc_def(svr_resc_def, "cpupercent", svr_resc_size);			assert(rd != NULL);			pres = find_resc_entry(at, rd);			assert(pres != NULL);			lp = (unsigned long *)&pres->rs_value.at_val.at_long;			percent = (newcpu - oldcpu)*100 / dur;			*lp = MAX(*lp, percent);			DBPRT(("cpu %%  : ses %ld (new %lu - old %lu)/delta %ld = %lu%%\n", pjob->ji_wattr[(int)JOB_ATR_session_id].at_val.at_long, newcpu, oldcpu, dur, percent))		}		pjob->ji_sampletim = sampletime;	}	rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_size.atsv_num;	lnum = (mem_sum(pjob) + 1023) >> 10;	/* as KB */	*lp = MAX(*lp, lnum);	rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	pres->rs_value.at_val.at_long = (long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);	rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);	assert(rd != NULL);	pres = find_resc_entry(at, rd);	assert(pres != NULL);	lp = &pres->rs_value.at_val.at_size.atsv_num;	lnum = (resi_sum(pjob) + 1023) >> 10;	/* in KB */	*lp = MAX(*lp, lnum);	return (PBSE_NONE);}/* *	Kill a task session. *	Call with the task pointer and a signal number. */int kill_task(ptask, sig)    task	*ptask;    int		sig;{	char		*id = "kill_task";	ash_t		ash;	int		ct = 0;	int		np;	struct startjob_rtn sgid;	aspidlist_t	*taskpids = 0;	extern aserror_t aserrorcode;	if (ptask->ti_job->ji_globid != NULL) {		sscanf(ptask->ti_job->ji_globid, "%llx", &ash);	} else {		ash = asashofpid(ptask->ti_qs.ti_sid);		sgid.sj_ash = ash;		set_globid(ptask->ti_job, &sgid);	}			if ((ash != 0LL) && (ash != -1LL)) {		taskpids = aspidsinash_local(ash);		if (taskpids) {			for (np=0; np<taskpids->numpids; ++np) {				(void)kill(taskpids->pids[np], sig);				++ct;			}		} else {			sprintf(log_buffer, "no pids in ash %lld in %s",ash,id);			log_err(aserrorcode, id, log_buffer);		}	}	return ct;}/* * Clean up everything related to polling. */int mom_close_poll(){	char	*id = "mom_close_poll";	int	i;	DBPRT(("%s: entered\n", id))	if (proc_array) {#if COMPLEX_MEM_CALC==1		for(i=0; i<max_proc; i++) {			struct	proc_info	*pi = &proc_array[i];			if (pi->map)				free(pi->map);		}#endif /* COMPLEX_MEM_CALC */		free(proc_array);	}	if (pdir) {		if (closedir(pdir) != 0) {			log_err(errno, id, "closedir");			return (PBSE_SYSTEM);		}	}	return (PBSE_NONE);}/* * mom_does_chkpnt - return 1 if mom supports checkpoint *			    0 if not */int mom_does_chkpnt(){#if MOM_CHECKPOINT == 1	return (1);#else /* MOM_CHECKPOINT */	return (0);#endif	/* MOM_CHECKPOINT */}/* * Checkpoint the task. * *	If abort is true, kill it too. */int mach_checkpoint(ptask, file, abort)    task	*ptask;    char	*file;    int		abort;{#if MOM_CHECKPOINT == 1	ash_t	ash;	sscanf(ptask->ti_job->ji_globid, "%llx", &ash);	/* ckpt_setup(0, 0);  Does nothing so why have it */	if (abort) 		cpr_flags = CKPT_CHECKPOINT_KILL | CKPT_NQE;	else		cpr_flags = CKPT_CHECKPOINT_CONT | CKPT_NQE;	return ( ckpt_create(file, ash, P_ASH, 0, 0) );     /*	return ( ckpt_create(file, ptask->ti_qs.ti_sid, P_SID, 0, 0) ); */#else /* MOM_CHECKPOINT */	return (-1);#endif /* MOM_CHECKPOINT */}/* * Restart the task from the checkpoint file. * *	Return -1 on error or sid if okay. */long mach_restart(ptask, file)    task	*ptask;    char	*file;{#if MOM_CHECKPOINT == 1	ckpt_id_t rc;	ash_t	 momash;	ash_t	 oldash = 0;	char	 cvtbuf[20];	cpr_flags = CKPT_NQE;	/* KLUDGE to work-around SGI problem, for some reason ckpt_restart() */	/* passes open file descriptor to /proc to restarted process	     */	if (pdir)		closedir(pdir);	/* To restart the job with its old ASH, Mom must be in that ASH	    */	/* When she does the restart.   However, before changing to that    */	/* ASH, Mom must put herself in a new ASH all by herself, otherwise */	/* she will take other system daemons with her into the job's ASH   */	momash = getash();	newarraysess();		/* isolate Mom in a ASH by herself  */	if (ptask->ti_job->ji_globid != NULL) {					/* now get job's old ASH and set it */		sscanf(ptask->ti_job->ji_globid, "%llx", &oldash);		if (setash(oldash) == -1) {			DBPRT(("setash failed before restart, errno = %d", errno))		}	}	rc =  ckpt_restart(file, (struct ckpt_args **)0, 0);	if ((ptask->ti_job->ji_globid == NULL) && (rc > 0)) {		(void)sprintf(cvtbuf, "%llx", rc);		ptask->ti_job->ji_globid = strdup(cvtbuf);	}	newarraysess();		/* again, isolate Mom into ASH by herself */	if (setash(momash) == -1) {	/* put Mom back to her old ASH */		DBPRT(("setash failed after restart, errno = %d", errno))	}		/* KLUDGE TO work-around SGI problem, ckpt_restart sets the uid of */	/* the calling process (me) to that of the restarted process       */	(void)setuid(0);	if ((pdir = opendir(procfs)) == NULL) {		log_err(errno, "mach_restart", "opendir");	}	return ((int)rc);#else	/* MOM_CHECKPOINT */	return (-1);#endif	/* MOM_CHECKPOINT */}/***	Return 1 if proc table can be read, 0 otherwise.*/intgetprocs(){	static	unsigned	int	lastproc = 0;	if (lastproc == reqnum)         /* don't need new proc table */		return 1;	if (mom_get_sample() != PBSE_NONE)		return 0;	lastproc = reqnum;	return 1;}char	*cput(attrib)struct	rm_attribute	*attrib;{	rm_errno = RM_ERR_UNKNOWN;	return NULL;}char	*mem(attrib)struct	rm_attribute	*attrib;{	rm_errno = RM_ERR_UNKNOWN;	return NULL;}char	*sessions(attrib)struct	rm_attribute	*attrib;{	rm_errno = RM_ERR_UNKNOWN;	return NULL;}char	*pids(attrib)struct	rm_attribute	*attrib;{	rm_errno = RM_ERR_UNKNOWN;	return NULL;}char	*nsessions(attrib)struct	rm_attribute	*attrib;{	rm_errno = RM_ERR_UNKNOWN;	return NULL;}char	*nusers(attrib)struct	rm_attribute	*attrib;{	rm_errno = RM_ERR_UNKNOWN;	return NULL;}static char	*totmem(attrib)struct	rm_attribute	*attrib;{	static	char	id[] = "totmem";	struct	statfs	fsbuf;	if (attrib) {		log_err(-1, id, extra_parm);		rm_errno = RM_ERR_BADPARAM;		return NULL;	}	if (statfs(procfs, &fsbuf, sizeof(struct statfs), 0) == -1) {		log_err(errno, id, "statfs");		rm_errno = RM_ERR_SYSTEM;		return NULL;	}	DBPRT(("%s: bsize=%ld blocks=%lld\n", id,		fsbuf.f_bsize, fsbuf.f_blocks))	sprintf(ret_string, "%llukb", 		((rlim64_t)fsbuf.f_bsize * (rlim64_t)fsbuf.f_blocks) >> 10);	return ret_string;}/* * availmem() - return amount of available memory in system in KB as string */static char	*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -