📄 schedule.c
字号:
/** OpenPBS (Portable Batch System) v2.3 Software License* * Copyright (c) 1999-2000 Veridian Information Solutions, Inc.* All rights reserved.* * ---------------------------------------------------------------------------* For a license to use or redistribute the OpenPBS software under conditions* other than those described below, or to purchase support for this software,* please contact Veridian Systems, PBS Products Department ("Licensor") at:* * www.OpenPBS.org +1 650 967-4675 sales@OpenPBS.org* 877 902-4PBS (US toll-free)* ---------------------------------------------------------------------------* * This license covers use of the OpenPBS v2.3 software (the "Software") at* your site or location, and, for certain users, redistribution of the* Software to other sites and locations. Use and redistribution of* OpenPBS v2.3 in source and binary forms, with or without modification,* are permitted provided that all of the following conditions are met.* After December 31, 2001, only conditions 3-6 must be met:* * 1. Commercial and/or non-commercial use of the Software is permitted* provided a current software registration is on file at www.OpenPBS.org.* If use of this software contributes to a publication, product, or* service, proper attribution must be given; see www.OpenPBS.org/credit.html* * 2. Redistribution in any form is only permitted for non-commercial,* non-profit purposes. There can be no charge for the Software or any* software incorporating the Software. Further, there can be no* expectation of revenue generated as a consequence of redistributing* the Software.* * 3. Any Redistribution of source code must retain the above copyright notice* and the acknowledgment contained in paragraph 6, this list of conditions* and the disclaimer contained in paragraph 7.* * 4. Any Redistribution in binary form must reproduce the above copyright* notice and the acknowledgment contained in paragraph 6, this list of* conditions and the disclaimer contained in paragraph 7 in the* documentation and/or other materials provided with the distribution.* * 5. Redistributions in any form must be accompanied by information on how to* obtain complete source code for the OpenPBS software and any* modifications and/or additions to the OpenPBS software. The source code* must either be included in the distribution or be available for no more* than the cost of distribution plus a nominal fee, and all modifications* and additions to the Software must be freely redistributable by any party* (including Licensor) without restriction.* * 6. All advertising materials mentioning features or use of the Software must* display the following acknowledgment:* * "This product includes software developed by NASA Ames Research Center,* Lawrence Livermore National Laboratory, and Veridian Information * Solutions, Inc.* Visit www.OpenPBS.org for OpenPBS software support,* products, and information."* * 7. DISCLAIMER OF WARRANTY* * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT* ARE EXPRESSLY DISCLAIMED.* * IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE* U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT,* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.* * This license will be governed by the laws of the Commonwealth of Virginia,* without reference to its choice of law rules.*//* $Id: schedule.c,v 1.1.6.2 2000/08/09 00:18:46 hender Exp $ *//* * This is the main loop for the scheduler. The scheduler receives a * command and then calls the routine schedule(). Appropriate actions * are taken based on the command specified. All actions taken by the * scheduler must be initiated from here. */#include <sys/stat.h>#include <errno.h>#include <stdarg.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <time.h>#include <unistd.h>/* PBS header files */#include "pbs_error.h"#include "pbs_ifl.h"#include "log.h"#include "sched_cmds.h"/* Scheduler header files */#include "toolkit.h"#include "gblxvars.h"#include "msgs.h"extern int connector;extern char *schd_CmdStr[16];/* * How many seconds to wait before attempting to re-request the job and queue * information from the server. */#define WAIT_FOR_QUEUE_SANITY 10Resources schd_Rsrcs; /* system resources */time_t schd_TimeNow;time_t schd_TimeLast = 0;struct tm schd_TmNow;int last_run_in_pt = 0;Job *schd_AllJobs = NULL;static int schd_req (int cmd);static int get_all_queue_info (int numqlists, QueueList *list, ...);static Job *reject_unrunnables (Job *jobs);static int schedule_jobs (QueueList *queues, Job *jobs, char *reason);static int schedule_restart (Job *joblist);static int make_job_dump (char *dumpfile);static int dump_sorted_jobs (FILE *file, Job *joblist);void fix_jim (Queue *submit, Queue *jim);/* * This routine gets the scheduling command and takes appropriate action. * C.f. PBS Administrator's Guide, Section 4.1. * * Valid commands are: * SCH_ERROR ERROR. * SCH_SCHEDULE_NULL Place holder. * SCH_SCHEDULE_NEW New job arrived or a non-running job changed state. * SCH_SCHEDULE_TERM A running job terminated. * SCH_SCHEDULE_TIME The schedulers time interval expired. * SCH_SCHEDULE_RECYC One job was started in the last scheduling cycle. * SCH_SCHEDULE_CMD The server attribute "scheduling" was set to true. * SCH_CONFIGURE Perform any scheduler [re]configuration. * SCH_QUIT QUIT * SCH_RULESET Reread the scheduler rules. * SCH_SCHEDULE_FIRST First schedule run after server starts. */int schedule(int cmd){ char *id = "schedule"; char *cmdstr = "Error"; /* * Try to find a text string that describes the command request. */ if (cmd >= 0) cmdstr = (schd_CmdStr[cmd] != NULL) ? schd_CmdStr[cmd] : "Unknown"; (void)sprintf(log_buffer, "Schedule: received cmd %d (%s)", cmd, cmdstr); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("--------\n%s\n", log_buffer)); /* * See if a reconfiguration is pending. If so, do it before anything * starts to look at the structures. */ if (schd_sigflags & SCHD_SIGFLAGS_RECONFIG) { (void)sprintf(log_buffer, "Reconfiguring due to delivery of SIGHUP."); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); if (schedinit(0, NULL)) { (void)sprintf(log_buffer, "%s: FATAL ERROR!!! Configuration failed!!!", id); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); exit (1); } /* Turn off the pending reconfiguration flag. */ schd_sigflags &= ~SCHD_SIGFLAGS_RECONFIG; } /* * Assume the scheduler is about to use the current configuration. * Set the BUSY flag so that reconfiguration will be postponed until * the scheduler is idle if a SIGHUP is delivered. */ schd_sigflags |= SCHD_SIGFLAGS_BUSY; switch (cmd) { /* * "Null" commands. These cases are quietly ignored. */ case SCH_SCHEDULE_NULL: /* Placeholder only. */ case SCH_ERROR: /* Error. */ case SCH_RULESET: /* Re-read scheduler rules. */ (void)sprintf(log_buffer, "command %d ignored.", cmd); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s\n", log_buffer)); break; /* * These commands cause the scheduler to perform its scheduling cycle. */ case SCH_SCHEDULE_FIRST: /* 1st schedule run after server starts. */ case SCH_SCHEDULE_CMD: /* Perform a schedule run now, on command. */ case SCH_SCHEDULE_TERM: /* A running job terminated. */ case SCH_SCHEDULE_NEW: /* A new job has arrived. */ case SCH_SCHEDULE_TIME: /* Scheduler sleep interval reached. */ case SCH_SCHEDULE_RECYC: /* Recycle scheduler after 1 run. */ if (!schd_req(cmd)) { schd_cleanup(); DBPRT(("schd_req(%d) returned 0. Calling it again.\n", cmd)); schd_req(cmd); } schd_cleanup(); break; case SCH_CONFIGURE: /* Re-initialize the scheduler. */ (void)sprintf(log_buffer, "[re]configure scheduler"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s\n", log_buffer)); schedinit(0, NULL); /* XXX args not used... for now! */ break; case SCH_QUIT: /* Exit gracefully. */ (void)sprintf(log_buffer, "Scheduler was asked to quit. Exit."); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s\n", log_buffer)); exit(0); default: (void)sprintf(log_buffer, "Schedule command %d unrecognized.", cmd); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s\n", log_buffer)); } /* * The scheduling part of the scheduler is now idle. Unset the BUSY * flag. */ schd_sigflags &= ~SCHD_SIGFLAGS_BUSY; schd_FirstRun = 0; return (0);}/* * Service a request to schedule a job. *//* ARGSUSED */int schd_req(int cmd){ char *id = "schd_req"; Job *this, *jobs = NULL; QueueList *qptr, *next; QueueList *normalQs = NULL, *normalQtail = NULL, *newqlp; Outage *outages; int ran, error, total_ran = 0; int hosts_in_dedtime = 0; struct tm *tm_ptr; char reason[MAX_TXT + 1]; /* Save "last" run time (in global 'schd_TimeNow') for later use. */ schd_TimeLast = schd_TimeNow; /* * Get the number of seconds since the Epoch, and break it down into * the various day, month, year, fields in a struct tm. */ time(&schd_TimeNow); if (tm_ptr = localtime(&schd_TimeNow)) memcpy((void *)&schd_TmNow, (void *)tm_ptr, sizeof (struct tm)); else memset((void *)&schd_TmNow, 0, sizeof (struct tm)); DBPRT(("[time_t %d] %s", schd_TimeNow, ctime(&schd_TimeNow))); /* * If the configuration file has been changed since the last time the * scheduler was run, than note that in the logs. Don't re-read it * automatically, just note the fact. Don't reset the timestamp - it * will be done when someone finally HUP's the scheduler. */ if (schd_CfgFilename && schd_file_has_changed(schd_CfgFilename, 0)) { (void)sprintf(log_buffer, "WARNING!!! Scheduler config file %s has changed!", schd_CfgFilename); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); (void)sprintf(log_buffer, "Run 'kill -HUP %ld' to reconfigure.", (long)getpid()); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } /* * See if the holidays file has changed. If it's re-read successfully, * update the last changed timestamp. Otherwise, keep it around and * keep trying to re-read it until someone fixes the problem. "This * shouldn't happen." */ if (schd_file_has_changed(HOLIDAYS_FILE, 0) > 0) { (void)sprintf(log_buffer, "Attempting to update holidays/primetime from %s.", HOLIDAYS_FILE); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s\n", log_buffer)); if (schd_read_holidays() < 0) { (void)sprintf(log_buffer, "Failed to read holidays file."); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s\n", log_buffer)); } else { /* Reset the "last changed time", since it was re-read okay. */ (void)schd_file_has_changed(HOLIDAYS_FILE, 1); } } /* * If this is the first run during non-primetime, set all the execution * queues' observed primetime back to 'on'. If it's primetime now, set * the "last run in primetime" global. */ if (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) { if (schd_prime_time(0)) { last_run_in_pt = 1; } else if (last_run_in_pt) { DBPRT(("%s: First non-pt run, reset queue observed times.\n", id)); if (schd_BatchQueues) schd_reset_observed_pt(schd_BatchQueues); /* Last run was not in prime time. */ last_run_in_pt = 0; } } /* Get the current list of all jobs known to our server. * Set the job->priority field so that each jobs is assign an * inherent priority, using several criteria including recent * past usage, nodes requsted, time on queue, originating queue * and then populate the schd_AllJobs list with these jobs */ jobs = schd_get_jobs(NULL, NULL); /* * Check for queued jobs on any of the run queues. This may happen if * there is some glitch and the POSIX jobs are checkpointed. * schedule_restart() will return non-zero if it finds and restarts * any jobs. Recycle if this is the case. */ if (schd_SCHED_RESTART_ACTION != SCHD_RESTART_NONE) { if (schedule_restart(jobs)) { schd_free_jobs(jobs); return (0); } } /* * Sort jobs by the priority field set above. Note that the jobs * are reordered "in situ". The sorting routine returns a pointer to * the new head of the list created by relinking the elements of the * linked list, or NULL if an error occurs. Zero the original list * pointer to reduce confusion - the same list, in different order, now * lives on schd_AllJobs. */ schd_AllJobs = schd_sort_jobs(jobs); jobs = NULL; /* now query the GRM on the T3e to get the node map information; * and then correlatate in-use PEs to running jobs, filling the global * schd_PEMAP in the process. */ if (load_pe_map(schd_AllJobs)) { /* unable to get PE map ... do something... */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -