⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 schedule.c

📁 openPBS的开放源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
	;    }     /*      * Get the queue limits and utilization for each queue about which the     * scheduler knows.  Any jobs on schd_AllJobs (set by get_and_sort_jobs()     * above) that belong to the queue will be placed on the queue->jobs     * list.     *     * If PBS fails to provide us any information about a queue, treat it     * as a fatal error.     */    error = get_all_queue_info(3 /* Number of queue lists */,	schd_SubmitQueue, 	schd_BatchQueues, 	schd_DedQueues);    if (error < 0) {	DBPRT(("get_all_queue_info() failed\n"));	return (1);	/* Bogus queue - don't recycle. */    } else if (error > 0) {	DBPRT(("queue failed sanity check - wait and recycle.\n"));	sleep(WAIT_FOR_QUEUE_SANITY);	return (0);	/* Attempt to recycle scheduler. */    }    /* Fix added by jjones per Dr. Hook to change behavior of jobs being     * moved from submit queue to exec queue before run, at request of     * ERDC.     */    fix_jim(schd_SubmitQueue->queue,schd_BatchQueues->queue);    /*      * At this point, schd_AllJobs should hold only orphan jobs (i.e. only      * jobs that belong to queues about which the scheduler does not care).     * Note it and go on scheduling -- unless nothing is being scheduled,     * this is more-or-less meaningless.     */    if (schd_AllJobs) {	(void)sprintf(log_buffer, "Some jobs not claimed by queues.");	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);	DBPRT(("%s: %s\n%s: Unclaimed jobs: ", id, log_buffer, id));#ifdef DEBUG	for (this = schd_AllJobs; this != NULL; this = this->next) {	    DBPRT(("%s%s", this->jobid, this->next ? ", " : ""));	}	DBPRT(("\n"));#endif /* DEBUG */    }    /* Dump the list of jobs being scheduled from submit queue. */    if (schd_JOB_DUMPFILE) {	(void)sprintf(log_buffer, "Dumping sorted job information to %s", 	    schd_JOB_DUMPFILE);	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);	make_job_dump(schd_JOB_DUMPFILE);    }    /*     * Allocation and usage information are updated at [roughly] 2:00 AM      * (Eastern time).  Since they may have been updated, attempt to fetch      * them again in the middle of the night.     */    if (schd_NeedToGetDecayInfo)	    schd_decay_info("r");    /* get users' recent past usage */    if (schd_ENFORCE_ALLOCATION && schd_TimeNow >= schd_ENFORCE_ALLOCATION) {	/* 	 * If the allocations file has already been loaded, consult the file	 * timestamp to determine if it has changed.  If so, flag that it	 * needs to be reloaded.	 */	if (!schd_NeedToGetAllocInfo && schd_AllocFilename)	    schd_NeedToGetAllocInfo = 		schd_file_has_changed(schd_AllocFilename, 1);	if (!schd_NeedToGetYTDInfo && schd_CurrentFilename)	    schd_NeedToGetYTDInfo = 		schd_file_has_changed(schd_CurrentFilename, 1);	/* If either file needs to be [re]loaded, do so. */	if (schd_NeedToGetAllocInfo || schd_NeedToGetYTDInfo)	    schd_alloc_info();    }    /*      * We need to save the past usage data periodically, so that a restart      * of pbs_sched doesn't lose it ...     */    if (schd_save_decay())	/* is it time yet ? */	schd_decay_info("w");	/* yep, so do it */    if (schd_SubmitQueue->queue->jobs && 	!(schd_SubmitQueue->queue->flags & (QFLAGS_DISABLED | QFLAGS_STOPPED)))    {	/*	 * Test each job against the set of execution queues.  If it can	 * never be run in any queue, reject it immediately.  This saves	 * the user having to wait for the scheduler to get around to being	 * able to run it.	 */	jobs = reject_unrunnables(schd_SubmitQueue->queue->jobs);	/*	 * Look for queues whose execution hosts are in dedicated time.  If	 * any are found, note that fact and continue.  Otherwise, add them	 * to the normalQs list, which will be scheduled normally.  If the	 * flag is set indicating that one or more hosts is in dedtime, they	 * will be scheduled after everything else is done.	 */	for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) {	    if (schd_ENFORCE_DEDTIME && schd_TimeNow >= schd_ENFORCE_DEDTIME)		outages = schd_host_outage(qptr->queue->exechost, 0);	    else		outages = NULL;	    /* 	     * Is there a scheduled outage right now for this host?  If so,	     * note that fact and continue to the next queue.  All of this	     * information is cached, so this isn't as expensive as it seems.	     */	    if (outages != NULL) {		if ((outages->beg_time <= schd_TimeNow) &&		    (outages->end_time > schd_TimeNow))		{		    DBPRT(("%s: Host %s is in dedtime (from %s:%s to %s:%s)\n",			id, outages->exechost, 			outages->beg_datestr, outages->beg_timestr,			outages->end_datestr, outages->end_timestr));		    DBPRT(("%s: Queue %s@%s will not be scheduled.\n", id,			qptr->queue->qname, qptr->queue->exechost));		    /* This exechost is in dedicated time, ignore the queue. */		    hosts_in_dedtime ++;		    continue;		} else if (outages->beg_time > schd_TimeNow) {		    /* Upcoming dedtime, but not yet.  Schedule the queue. */		    DBPRT(("%s: Host %s upcoming dedtime (at %s:%s to %s:%s)\n",			id, outages->exechost, 			outages->beg_datestr, outages->beg_timestr,			outages->end_datestr, outages->end_timestr));		}	    }	    /* 	     * This host is not currently in dedicated time.  Add it to the 	     * tail of the list of queues to be scheduled.	     */	    newqlp = (QueueList *)malloc(sizeof(QueueList));	    if (newqlp == NULL) {		(void)sprintf(log_buffer, "malloc(QueueList) for %s@%s failed",		    qptr->queue->qname, qptr->queue->exechost);		log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, 			log_buffer);		DBPRT(("%s: %s\n", id, log_buffer));		if (normalQs)		    schd_free_qlist(normalQs);		return (1);	    }	    newqlp->queue = qptr->queue;	    if (normalQtail)		normalQtail->next = newqlp;	    else		normalQs = newqlp;	    normalQtail = newqlp;	    newqlp->next = NULL;	}	DBPRT(("%s: calling schedule_jobs(", id));	if (normalQs) {	    for (qptr = normalQs; qptr != NULL; qptr = qptr->next)		DBPRT(("%s@%s%s", qptr->queue->qname, qptr->queue->exechost,		    qptr->next ? ", " : ""));	} else {	    DBPRT(("<no batch queues>"));	}	DBPRT((")\n"));	/* Now make the call to actually run some jobs */	total_ran += ran = schedule_jobs(normalQs, jobs, reason);	if (ran < 0) {	    DBPRT(("Could not run any jobs!\n"));	} else {	    DBPRT(("RAN %d jobs.\n", ran));	}	if (normalQs)	    schd_free_qlist(normalQs);	normalQs = normalQtail = NULL;    }    /*     * Now check the dedtime queues with queued jobs for hosts that are      * in dedicated time.  If any are found, comment the jobs appropriately     * and/or schedule them.     */    for (qptr = schd_DedQueues; qptr != NULL; qptr = qptr->next) {	if (qptr->queue->queued == 0)	    continue;	DBPRT(("%s: schd_handle_dedicated_time(%s)\n", id, qptr->queue->qname));	/* 	 * Keep track of the next pointer, and zero the queue's next ptr so	 * it looks like a single queue.	 */	next = qptr->next; 	qptr->next = NULL;	ran = schd_handle_dedicated_time(qptr->queue);	if (ran < 0) {	    (void)sprintf(log_buffer, 		"schd_handle_dedicated_time(%s@%s) failed!", 		qptr->queue->qname, qptr->queue->exechost);	    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, 		log_buffer);	    DBPRT(("%s: %s\n", id, log_buffer));	} else {	    DBPRT(("RAN %d jobs on %s@%s.\n", ran, qptr->queue->qname,		qptr->queue->exechost));	    total_ran += ran;	}	/* Replace the zero'd next pointer to rechain the list. */	qptr->next = next;    }    if (total_ran > 0) {	(void)sprintf(log_buffer, "System resources after scheduling:");	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);	schd_dump_rsrclist();    }    (void)sprintf(log_buffer, ">>>  End Scheduling Cycle (ran %d jobs)  <<<",	total_ran);    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);    DBPRT(("%s\n", log_buffer));    return (1);}static intschedule_jobs(QueueList *queues, Job *jobs, char *reason){    char *id = "schedule_jobs";    int    numran;    Job   *job;    Queue *shortest;    int    priority_to_1st = 1;    /*     * Since the sorting code has provided an order in which the jobs should     * be run, attempt to honor that order by treating the first job on the     * list as our first priority.  This amounts to draining the queue in     * order to run that job, if necessary.     *      * If the job has been waiting too long, find the smallest queue in which     * the job will fit, and consider its expected run time.  If the waiting     * job cannot run when the queue has emptied, then go on to the next.       * However, if there are jobs running on the queue, it is possible that     * this queue could support the waiting job if it were started draining     * now.  When enough jobs had exited, the waiting job would be runnable.     * In order to determine if this is true, walk through the list of jobs,     * which are sorted in order of completion (from soonest to last), and     * find how many resources would be available after that job finished.     * If there is space, calculate what time it will be when that many jobs     * have completed, and see if the primetime limits apply at that time.     * If the job fits in the primetime limits at that time, then start the     * queue draining.  If it will not fit after all jobs have been tested,     * then give up on this queue and go on to the next.     *      * If a queue was found that requires draining, mark it for draining.     *      * After the waiting job handling has completed, collect a list of     * all the available execution queues, and place it into the pointer     * given to this function by the caller.     */    for (job = jobs; job != NULL; job = job->next) {	if (job->state != 'Q')	    continue;	if (!priority_to_1st && !(job->flags & JFLAGS_WAITING))	    continue;	DBPRT(("%s: job %s is %s (eligible for %s, needs %d nodes)\n", id,	    job->jobid, 	    priority_to_1st ? "FIRSTJOB" : 		(job->flags & JFLAGS_PRIORITY) ? "SPECIAL" : "WAITING",	    schd_sec2val(job->eligible), job->nodes));	/*	 * Find the smallest, shortest-wait queue in which this job will	 * fit.  If it is empty, great.  If not, mark it to be drained,	 * in anticipation of the job being run soon.  Note that the queue	 * drain_by time should only be shortened - it doesn't make sense	 * to push it out.	 */	shortest = schd_find_drain(queues, job);	if (shortest) {	    /*	     * If there are no jobs running in the queue, then unset the	     * draining flag (if present), so that the queue will be	     * available for this job.	     *	     * If there are running jobs, set the draining flag, and	     * adjust the empty_by value to be the expected time when	     * the job will first become runnable.	     */	    if (shortest->running == 0) {		shortest->flags &= ~QFLAGS_DRAINING;	    } else {		/* If running jobs, empty_by should be non-zero. */		if (shortest->drain_by <= shortest->empty_by) {		    shortest->flags |= QFLAGS_DRAINING; 		    DBPRT(("%s:	shortest queue %s now draining, drain_by %s",			id, shortest->qname, ctime(&shortest->drain_by)));		}	    }	}	/*	 * We have looked at (and possibly arranged for special treatment	 * of) the first job on the list.  Now only look for special or	 * waiting jobs.	 */	priority_to_1st = 0;    }    numran = schd_pack_queues(jobs, queues, reason);    if (numran < 0) {	(void)sprintf(log_buffer, 	    "sched_pack_queues() failed!");	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);	DBPRT(("%s: %s\n", id, log_buffer));    }    return (numran);}/* * Get information about each of the queues in the list of lists.  If * schd_get_queue_limits() fails, return the error condition.  It may * be a transient or a hard failure, which the caller may want to deal * with.  If all queues are successful, return '0'. */static intget_all_queue_info(int numqlists, QueueList *list, ...){    va_list ap;    int    count = 0, ret;    QueueList *qptr;    char *id = "get_all_queue_info";        va_start(ap, numqlists);    while (count < numqlists) {	list = va_arg(ap, QueueList *);	for (qptr = list; qptr != NULL; qptr = qptr->next) {	    /*	     * Get the limits, current resources, and any jobs for this	     * queue.	     */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -