mom_mach.c

来自「OpenPBS」· C语言 代码 · 共 2,037 行 · 第 1/3 页

C
2,037
字号
/**         OpenPBS (Portable Batch System) v2.3 Software License* * Copyright (c) 1999-2002 Veridian Information Solutions, Inc.* All rights reserved.* * ---------------------------------------------------------------------------* For a license to use or redistribute the OpenPBS software under conditions* other than those described below, or to purchase support for this software,* please contact Veridian Systems, PBS Products Department ("Licensor") at:* *    www.OpenPBS.org  +1 650 967-4675                  sales@OpenPBS.org*                        877 902-4PBS (US toll-free)* ---------------------------------------------------------------------------* * This license covers use of the OpenPBS v2.3 software (the "Software") at* your site or location, and, for certain users, redistribution of the* Software to other sites and locations.  Use and redistribution of* OpenPBS v2.3 in source and binary forms, with or without modification,* are permitted provided that all of the following conditions are met.* After December 31, 2003, only conditions 3-6 must be met:* * 1. Commercial and/or non-commercial use of the Software is permitted*    provided a current software registration is on file at www.OpenPBS.org.*    If use of this software contributes to a publication, product, or*    service, proper attribution must be given; see www.OpenPBS.org/credit.html* * 2. Redistribution in any form is only permitted for non-commercial,*    non-profit purposes.  There can be no charge for the Software or any*    software incorporating the Software.  Further, there can be no*    expectation of revenue generated as a consequence of redistributing*    the Software.* * 3. Any Redistribution of source code must retain the above copyright notice*    and the acknowledgment contained in paragraph 6, this list of conditions*    and the disclaimer contained in paragraph 7.* * 4. Any Redistribution in binary form must reproduce the above copyright*    notice and the acknowledgment contained in paragraph 6, this list of*    conditions and the disclaimer contained in paragraph 7 in the*    documentation and/or other materials provided with the distribution.* * 5. Redistributions in any form must be accompanied by information on how to*    obtain complete source code for the OpenPBS software and any*    modifications and/or additions to the OpenPBS software.  The source code*    must either be included in the distribution or be available for no more*    than the cost of distribution plus a nominal fee, and all modifications*    and additions to the Software must be freely redistributable by any party*    (including Licensor) without restriction.* * 6. All advertising materials mentioning features or use of the Software must*    display the following acknowledgment:* *     "This product includes software developed by NASA Ames Research Center,*     Lawrence Livermore National Laboratory, and Veridian Information *     Solutions, Inc.*     Visit www.OpenPBS.org for OpenPBS software support,*     products, and information."* * 7. DISCLAIMER OF WARRANTY* * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT* ARE EXPRESSLY DISCLAIMED.* * IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE* U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT,* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.* * This license will be governed by the laws of the Commonwealth of Virginia,* without reference to its choice of law rules.*/#include <pbs_config.h>   /* the master config generated by configure */#include <assert.h>#include <limits.h>#include <stdio.h>#include <stdlib.h>#include <stddef.h>#include <unistd.h>#include <ctype.h>#include <procinfo.h>#include <dirent.h>#include <errno.h>#include <fcntl.h>#include <strings.h>#include <sys/time.h>#include <sys/param.h>#include <sys/resource.h>#include <sys/types.h>#include <sys/uio.h>#include <pwd.h>#include <sys/file.h>#include <sys/stat.h>#include <sys/statfs.h>#include <sys/vminfo.h>#include <sys/stream.h>#include <sys/stropts.h>#include <nlist.h>#if defined ( _AIX43 )#include <cf.h>#include <sys/cfgodm.h>#endif	/* AIX43 */#include "pbs_error.h"#include "portability.h"#include "list_link.h"#include "server_limits.h"#include "attribute.h"#include "resource.h"#include "job.h"#include "log.h"#include "resmon.h"#include "../rm_dep.h"#include "mom_mach.h"/***	System dependent code to gather information for the resource**	monitor for an IBM 590 running AIX4.****	Resources known by this code:**		cput		cpu time for a pid or session**		mem		memory size for a pid or session in KB**		ncpus		number of cpus **		resi		resident memory size for a pid or session in KB**		sessions	list of sessions in the system**		pids		list of pids in a session**		nsessions	number of sessions in the system**		nusers		number of users in the system**		totmem		total memory size in KB**		availmem	available memory size in KB**		size		size of a file or filesystem in KB**		idletime	seconds of idle time**		walltime	wall clock time for a pid**		loadave		current load average***/static char ident[] = "@(#) aix4/$RCSfile: mom_mach.c,v $ $Revision: 2.11.2.1.2.10 $";#ifndef TRUE#define FALSE	0#define TRUE	1#endif	/* TRUE *//*** external functions and data*/extern	struct	config		*search A_((struct config *, char *));extern	struct	rm_attribute	*momgetattr A_((char *));extern	int			rm_errno;extern	unsigned	int	reqnum;extern	char	*ret_string;extern 	double	 cputfactor;extern	double	 wallfactor;/*** local functions*/static char	*resi		A_((struct rm_attribute *attrib));static char	*totmem		A_((struct rm_attribute *attrib));static char	*physmem	A_((struct rm_attribute *attrib));static char	*availmem	A_((struct rm_attribute *attrib));static char	*walltime	A_((struct rm_attribute *attrib));static char	*ncpus		A_((struct rm_attribute *attrib));extern char	*loadave	A_((struct rm_attribute *attrib));extern char	*nullproc	A_((struct rm_attribute *attrib));/*** local resource list storage*/struct	config	dependent_config[] = {	{ "resi",	resi },	{ "totmem",	totmem },	{ "physmem",	physmem },	{ "availmem",	availmem },	{ "loadave",	loadave },	{ "ncpus",	ncpus },	{ "walltime",	walltime },	{ NULL,		nullproc },};	struct nlist nl[] = {	{ "avenrun" }};#define	KSYM_LOAD	0#define	ASIZE		10time_t			wait_time = 10;int			kd = -1;int			proctot = 0;struct	procsinfo	*proc_tbl = NULL;char			**swap_dev = NULL;int			nproc = 0;static int		nncpus;static uint	 	realmem;static long		page_size;extern	struct	pbs_err_to_txt	pbs_err_to_txt[];extern	time_t			time_now;extern	char	extra_parm[];extern	char	no_parm[];char		nokernel[] = "kernel not available";char		noproc[] = "process %d does not exist";voiddep_initialize(){	char	*id = "dep_initialize";	int	i, rc, len;	char	line[200], *dev;	char	*swapfil = "/etc/swapspaces";	FILE	*fil;#if defined( _AIX43 )	struct CuAt * obj;	int	      qty;#else	/* AIX43 */	struct	nlist ndata;#endif	/* AIX43 */	if (swap_dev == NULL)		swap_dev = (char **)calloc(10, sizeof(char *));	page_size = sysconf(_SC_PAGESIZE);	if ((fil = fopen(swapfil, "r")) == NULL)		log_err(errno, id, swapfil);	else {		for (i=0; fgets(line, sizeof(line), fil);) {			if (line[0] == '*')				continue;			len = strlen(line);			if (line[len-1] == '\n')				line[--len] = '\0';			if ((dev = strstr(line, "/dev/")) == NULL)				continue;			DBPRT(("%s: swapdev(%d) %s\n", id, i, dev))			swap_dev = realloc(swap_dev, (i+2)*sizeof(char *));			swap_dev[i++] = strdup(dev);		}		swap_dev[i] = NULL;		fclose(fil);	}	if ((kd = open("/dev/kmem", O_RDONLY)) == -1) {		log_err(errno, id, "open");		return;	}	/* insure /dev/kmem closed on exec */	if ((i = fcntl(kd,  F_GETFD)) == -1) {		log_err(errno, id, "F_GETFD");	}	i |= FD_CLOEXEC;	if (fcntl(kd, F_SETFD, i) == -1) {		log_err(errno, id, "F_SETFD");	}	proc_tbl = malloc(ASIZE*sizeof(struct procsinfo));	proctot = ASIZE;	rc = knlist(nl, sizeof(nl)/sizeof(struct nlist), sizeof(struct nlist));	if (rc == -1) {		log_err(errno, id, "knlist");		return;	}#ifdef	DEBUG	for (i=0; i<sizeof(nl)/sizeof(struct nlist); i++)		printf("%s: %s @ %x\n", id, nl[i].n_name, nl[i].n_value);#endif#if defined( _AIX43 )/* The following code works for AIX 4.3 * Supplied by Lloyd Caldwell, U of Utah */	if ( odm_initialize() == 0 ) {		obj = getattr( "sys0", "realmem", 0, &qty ) ;		if ( obj == NULL )			log_err(odmerrno, id, "odm_initialize");		else			realmem = atoi(obj->value);	/* in KB */		if ( odm_terminate() )			log_err(odmerrno, id, "odm_terminate for realmem");	} else {		log_err(odmerrno, id, "odm_initialize for realmem");	}#else	/* ! AIX43 *//* * This code works for AIX 4.1 and 4.2 * The size of real memory is stored in the 10th location (uint big each) * of undocumented kernel structure "vmker". * * Author: Wendy Lin, PUCC, May 1998 */	ndata.n_name = "vmker";	if ( (knlist(&ndata, 1, sizeof(struct nlist)) == -1) ||	     (kvm_read(kd, ndata.n_value + sizeof(uint) * 9, &realmem, sizeof (realmem)) != sizeof(realmem)) ) {		log_err(errno, id, "kvm_read of realmem");		realmem = 0;	} else {	 	realmem = realmem * 4;	/* now in KB */	}#endif	/* AIX43 */	nncpus = sysconf(_SC_NPROCESSORS_ONLN);	return;}voiddep_cleanup(){	char	*id = "dep_cleanup";	int	i;	log_record(PBSEVENT_SYSTEM, 0, id, "dependent cleanup");	close(kd);	kd = -1;	if (proc_tbl) {		free(proc_tbl);		proc_tbl = NULL;	}	if (swap_dev) {		for (i=0; swap_dev[i]; i++) {			free(swap_dev[i]);			swap_dev[i] = NULL;		}	}}voidend_proc(){	return;}/* * Time decoding macro.  Accepts a timeval structure.  Returns unsigned long * time in seconds. */#define tv(val) (ulong)((val).tv_sec)/* * Internal size decoding routine. * *	Accepts a resource pointer and a pointer to the unsigned long integer *	to receive the decoded value.  It returns a PBS error code, and the *	decoded value in the unsigned long integer. * *	For AIX, * *		sizeof(word) = sizeof(int) */static int getsize(pres, ret)    resource		*pres;    unsigned long	*ret;{	unsigned long	value;	if (pres->rs_value.at_type != ATR_TYPE_SIZE)		return (PBSE_ATTRTYPE);	value = pres->rs_value.at_val.at_size.atsv_num;	if (pres->rs_value.at_val.at_size.atsv_units ==	    ATR_SV_WORDSZ) {		if (value > ULONG_MAX / sizeof(int))			return (PBSE_BADATVAL);		value *= sizeof(int);	}	if (value > ULONG_MAX >>	    pres->rs_value.at_val.at_size.atsv_shift)	        return (PBSE_BADATVAL);	*ret = value << pres->rs_value.at_val.at_size.atsv_shift;	return (PBSE_NONE);}/* * Internal time decoding routine. * *	Accepts a resource pointer and a pointer to the unsigned long integer *	to receive the decoded value.  It returns a PBS error code, and the *	decoded value of time in seconds in the unsigned long integer. */static int gettime(pres, ret)    resource		*pres;    unsigned long	*ret;{	if (pres->rs_value.at_type != ATR_TYPE_LONG)		return (PBSE_ATTRTYPE);	if (pres->rs_value.at_val.at_long < 0)	        return (PBSE_BADATVAL);	*ret = pres->rs_value.at_val.at_long;	return (PBSE_NONE);}staticintinjob(pjob, sesid)    job			*pjob;    pid_t		sesid;{	task		*ptask;	for (ptask = (task *)GET_NEXT(pjob->ji_tasks);			ptask;			ptask = (task *)GET_NEXT(ptask->ti_jobtask)) {		if (ptask->ti_qs.ti_sid <= 1)			continue;		if (ptask->ti_qs.ti_sid == sesid)			return TRUE;	}	return FALSE;}/* * Internal session cpu time decoding routine. * *	Accepts a session id.  Returns the sum of all cpu time consumed for all *	tasks executed by the job, in seconds, adjusted by cputfactor. */static unsigned long cput_sum(pjob)    job		*pjob;{	char			*id = "cput_ses";	int			i;	unsigned long		cputime;	int			nps = 0;	cputime = 0;	for (i=0; i<nproc; i++) {		register struct procsinfo	*pp = &proc_tbl[i];		if (pp->pi_state == SNONE)			continue;		if (!injob(pjob, pp->pi_sid))			continue;		nps++;		DBPRT(("%s: pid=%d", id, pp->pi_pid))		if (pp->pi_state == SZOMB) {			DBPRT((" (zombie)"))			cputime +=  (pp->pi_utime + pp->pi_stime);		}		else {			DBPRT((" (active)"))			cputime += tv(pp->pi_ru.ru_utime) +				tv(pp->pi_ru.ru_stime) +				tv(pp->pi_cru.ru_utime) +				tv(pp->pi_cru.ru_stime);		}		DBPRT((" total=%lu\n", cputime))	}	if (nps == 0)		pjob->ji_flags |= MOM_NO_PROC;	return ((unsigned long)((double)cputime * cputfactor));}/* * Internal session memory usage function. * *	Accepts a session ID.  Returns the total number of bytes of address *	space consumed by all current tasks within the job. */static unsigned long mem_sum(pjob)    job			*pjob;{	char		*id="mem_ses";	int		i;	unsigned long	memsize;	memsize = 0;	for (i=0; i<nproc; i++) {		register struct procsinfo	*pp = &proc_tbl[i];		if (pp->pi_state == SNONE)			continue;		if (!injob(pjob, pp->pi_sid))			continue;		memsize += ctob(pp->pi_size);		DBPRT(("%s: pid=%d size=%lu\n", id, pp->pi_pid, memsize))	}	return (memsize);}/* * Internal session mem (workingset) size function. */static unsigned long resi_sum(pjob)    job			*pjob;{	char		*id="resi_ses";	int		i;	unsigned long	memsize;	memsize = 0;	for (i=0; i<nproc; i++) {		struct	procsinfo	*pp = &proc_tbl[i];		if (pp->pi_state == SNONE)			continue;		if (!injob(pjob, pp->pi_sid))			continue;		if (pp->pi_state == SZOMB)			continue;		memsize += (pp->pi_drss == -1) ? 0 : ctob(pp->pi_drss);		memsize += (pp->pi_trss == -1) ? 0 : ctob(pp->pi_trss);		DBPRT(("%s: pid=%d size=%lu\n", id, pp->pi_pid, memsize))	}	return (memsize);}/* * Return TRUE if any process in the job is over limit for memory usage. */static int overmem_proc(pjob, limit)    job			*pjob;    unsigned long	limit;{	int		i;	for (i=0; i<nproc; i++) {		register struct procsinfo	*pp = &proc_tbl[i];		if (pp->pi_state == SNONE)			continue;		if (!injob(pjob, pp->pi_sid))			continue;		if (ctob(pp->pi_size) > limit)			return (TRUE);	}	return (FALSE);}extern char *msg_momsetlim;/* * Internal error routine */int error(string, value)    char	*string;    int		value;{	int		i = 0;	char		*message;	assert(string != NULL);	assert(*string != '\0');	assert(value > PBSE_);			/* minimum PBS error number */	assert(value <= PBSE_NOSYNCMSTR);	/* maximum PBS error number */	assert(pbs_err_to_txt[i].err_no != 0);	do {		if (pbs_err_to_txt[i].err_no == value)			break;	} while (pbs_err_to_txt[++i].err_no != 0);	assert(pbs_err_to_txt[i].err_txt != NULL);	message = *pbs_err_to_txt[i].err_txt;	assert(message != NULL);	assert(*message != '\0');	(void)fprintf(stderr, msg_momsetlim, string, message);	(void)fflush(stderr);	return (value);}/* * Establish system-enforced limits for the job. * *	Run through the resource list, checking the values for all items *	we recognize. * *	If set_mode is SET_LIMIT_SET, then also set hard limits for the *	system enforced limits (not-polled). *	If anything goes wrong with the process, return a PBS error code *	and print a message on standard error.  A zero-length resource list *	is not an error. * *	If set_mode is SET_LIMIT_SET the entry conditions are: *	    1.	MOM has already forked, and we are called from the child. *	    2.	The child is still running as root. *	    3.  Standard error is open to the user's file. * *	If set_mode is SET_LIMIT_ALTER, we are beening called to modify *	existing limits.  Cannot alter those set by setrlimit (kernel) *	because we are the wrong process.   */int mom_set_limits(pjob, set_mode)    job			*pjob;    int			 set_mode;	/* SET_LIMIT_SET or SET_LIMIT_ALTER */{	char		*id = "mom_set_limits";	char		*pname;	int		retval;	unsigned long	value;	/* place in which to build resource value */	resource	*pres;       	struct rlimit	reslim;	unsigned long	mem_limit  = 0;	DBPRT(("%s: entered\n", id))	assert(pjob != NULL);	assert(pjob->ji_wattr[(int)JOB_ATR_resource].at_type == ATR_TYPE_RESC);	pres = (resource *)	    GET_NEXT(pjob->ji_wattr[(int)JOB_ATR_resource].at_val.at_list);/* * Cycle through all the resource specifications, * setting limits appropriately. */	while (pres != NULL) {		assert(pres->rs_defin != NULL);		pname = pres->rs_defin->rs_name;		assert(pname != NULL);		assert(*pname != '\0');		if (strcmp(pname, "cput") == 0) {			/* cpu time - check, if less than pcput use it */			retval = gettime(pres, &value);			if (retval != PBSE_NONE)			        return (error(pname, retval));		} else if (strcmp(pname, "pcput") == 0) {			/* process cpu time - set */			retval = gettime(pres, &value);			if (retval != PBSE_NONE)			        return (error(pname, retval));			reslim.rlim_cur = reslim.rlim_max = 				(unsigned long)((double)value / cputfactor);			if (setrlimit(RLIMIT_CPU, &reslim) < 0)	        		return (error("RLIMIT_CPU", PBSE_SYSTEM));		} else if (strcmp(pname, "file") == 0) {	/* set */			if (set_mode == SET_LIMIT_SET)  {			    retval = getsize(pres, &value);			    if (retval != PBSE_NONE)			        return (error(pname, retval));			    if (value > INT_MAX)			        return (error(pname, PBSE_BADATVAL));			    reslim.rlim_cur = reslim.rlim_max = value;			    if (setrlimit(RLIMIT_FSIZE, &reslim) < 0)			        return (error(pname, PBSE_SYSTEM));			}		} else if (strcmp(pname, "vmem") == 0) {	/* check */			retval = getsize(pres, &value);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?