📄 usersort.c
字号:
if (ndedtimeQ) { if ((dedtimeQ = (Job **)malloc(ndedtimeQ * sizeof (Job *))) == NULL) { DBPRT(("%s: malloc failed for %d Job *'s (%s)\n", id, ndedtimeQ, "dedtimeQ")); goto malloc_failed; } } if (nspecialQ) { if ((specialQ = (Job **)malloc(nspecialQ * sizeof (Job *))) == NULL) { DBPRT(("%s: malloc failed for %d Job *'s (%s)\n", id, nspecialQ, "specialQ")); goto malloc_failed; } } if (nwaitingQ) { if ((waitingQ = (Job **)malloc(nwaitingQ * sizeof (Job *))) == NULL) { DBPRT(("%s: malloc failed for %d Job *'s (%s)\n", id, nwaitingQ, "waitingQ")); goto malloc_failed; } } if (nnormalQ) { if ((normalQ = (Job **)malloc(nnormalQ * sizeof (Job *))) == NULL) { DBPRT(("%s: malloc failed for %d Job *'s (%s)\n", id, nnormalQ, "normalQ")); goto malloc_failed; } } if (notherQ) { if ((otherQ = (Job **)malloc(notherQ * sizeof (Job *))) == NULL) { DBPRT(("%s: malloc failed for %d Job *'s (%s)\n", id, notherQ, "otherQ")); goto malloc_failed; } } /* * Populate the arrays of pointers with */ dedtimeI = specialI = waitingI = normalI = otherI = runningI = 0; for (this = jobs; this != NULL; this = this->next) { if (this->state != 'R' && this->state != 'Q') { otherQ[otherI++] = this; continue; } if (this->state == 'R') { runningJobs[runningI++] = this; continue; } /* Does this job belong on the dedicated list? */ if (this->flags & JFLAGS_DEDICATED) { dedtimeQ[dedtimeI++] = this; continue; } /* Does this job belong on the special queue list? */ if ((schd_SpecialQueue != NULL) && (!strcmp(this->qname, schd_SpecialQueue->queue->qname))) { specialQ[specialI++] = this; continue; } /* Is the job in an outstanding condition? */ if (this->flags & JFLAGS_WAITING) { waitingQ[waitingI++] = this; continue; } /* Just a boring old everyday job. */ normalQ[normalI++] = this; } (void)sprintf(log_buffer, "running:%d queued:%d waiting:%d special:%d ded:%d other: %d total:%d", runningI, normalI, waitingI, specialI, dedtimeI, otherI, runningI + normalI + waitingI + specialI + dedtimeI + otherI); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (0);malloc_failed: if (dedtimeQ) { free(dedtimeQ); dedtimeQ = NULL; } if (specialQ) { free(specialQ); specialQ = NULL; } if (waitingQ) { free(waitingQ); waitingQ = NULL; } if (normalQ) { free(normalQ); normalQ = NULL; } if (otherQ) { free(otherQ); otherQ = NULL; } nJRs = nJQs = 0; log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "malloc failed"); return (-1);}/* * Determine whether a queued job has waited so long that extra emphasis * should be placed on running this job. */static intis_outstanding(Job *job){ /* * If it is currently primetime, consider an interactive job "waiting * for a long time" if it has waited for more time than some constant * plus the time it requested. * * A batch job can wait for much longer than an interactive job. Given * the ability to submit an interactive job, assume that if the user was * actually waiting for the job to start, they would submit it as inter- * active. */ if (schd_INTERACTIVE_LONG_WAIT && job->flags & JFLAGS_INTERACTIVE) { if (job->eligible > (job->walltime + schd_INTERACTIVE_LONG_WAIT)) if (schd_prime_time(0)) return (1); } else { if (schd_MAX_QUEUED_TIME && (job->eligible > schd_MAX_QUEUED_TIME)) return 1; }#ifdef JAMES_TEST if (job->eligible > Min_Queued_Time) return job->walltime < schd_SMALL_QUEUED_TIME;#endif /* JAMES_TEST */ return 0;}/* * Construct a list of users who own one or more "normal" jobs, and count * the number of jobs they own. */static voidget_users(void){ char *id = "get_users"; Job *job_ptr; char *uname; char *name; int which; int i; int j; /* * Destroy any previously created list. */ if (Users) free(Users); Users = NULL; nUsers = 0; /* * Walk the list of "normal" jobs, creating group/owner tuples for each * new user. */ for (i = 0; i < nnormalQ; ++i) { job_ptr = normalQ[i]; name = make_grp_usr_tuple(job_ptr); /* Is this a new entry in the list? */ if (is_new_user(name, Users, nUsers, &which)) { ++nUsers; Users = realloc(Users, nUsers * sizeof *Users); if (!Users) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "realloc(Users)"); return; } make_uinfo(name, &(Users[nUsers - 1])); } else { Users[which].jobcount++; } } /* Now, walk the list of running jobs and record each user's count. */ for (i = 0; i < nUsers; ++i) { uname = Users[i].name; for (j = 0; j < nJRs; ++j) { job_ptr = runningJobs[j]; /* Create the name tuple for this user. */ name = make_grp_usr_tuple(job_ptr); if (!strcmp(name, uname)) Users[i].running_jobs++; ++job_ptr; } } return;}/* Search the list of users, looking for 'user'. */static intis_new_user(char *user, struct Uinfo *list, int len, int *which){ struct Uinfo *list_ptr = list; int i; for (i = 0; i < len; ++i) { if (!strcmp(list_ptr->name, user)) { *which = i; return 0; } ++list_ptr; } return 1;}/* Build a record of the pertinent data about a job owner. */static intmake_uinfo(char *user, struct Uinfo *uinfo){ strncpy(uinfo->name, user, sizeof(uinfo->name) - 1); uinfo->jobcount = 1; uinfo->running_jobs = 0; uinfo->nodehours = get_resource_usage(user); return 0;}/* * Retrieve this user's info from the past usage database. If 'user' is * not found, create and install an appropriate entry in the usage database. */static doubleget_resource_usage(char *user){ char *id = "get_resource_usage"; struct past_usage *pusage_ptr; int i; /* Search for 'user' in the existing database. */ pusage_ptr = Resource_usage; for (i = 0; i < n_Resource_usage; ++i) { if (!strcmp(pusage_ptr->user, user)) return (pusage_ptr->usage); ++pusage_ptr; } /* No pre-existing entry. Create a new one. */ ++n_Resource_usage; pusage_ptr = realloc(Resource_usage, n_Resource_usage * sizeof *pusage_ptr); if (!pusage_ptr) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "realloc(Resource_usage)"); return -1; } Resource_usage = pusage_ptr; pusage_ptr += n_Resource_usage - 1; strcpy(pusage_ptr->user, user); pusage_ptr->usage = 0.0; return 0;}/* * Update past-usage database to account for a new job being run. */voidschd_update_resource_usage(Job *job){ char *id = "schd_update_resource_usage"; int i; char *name; double node_hours; /* create the name tuple for this guy */ name = make_grp_usr_tuple(job); /* We want to evaluate a new past-usage charge rate. * instead of use this old equation: * node_hours = (job->walltime / 3600.0) * job->nodes; * * lets just have a flat charge per job. but lets not * charge anything for very short jobs. */ if (job->walltime > (20*60)) /* greater than 20 minutes */ node_hours = 10.0; else node_hours = 1.0; /* * First, update the recent past-usage database. Find the appropriate * user entry, and add the job's expected usage to that user's total. */ for (i = 0; i < n_Resource_usage; ++i) { if (strcmp(Resource_usage[i].user, name)) continue; Resource_usage[i].usage += node_hours; if (debug) { sprintf(log_buffer, "%s has %f node-hour recent usage", Resource_usage[i].user, Resource_usage[i].usage); log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, id, log_buffer); } break; } /* * Next, update the FY-to-date usage database. Same as above, but * operate on the per-group database. */ for (i = 0; i < schd_NumAllocation; i++) { if (strcmp(schd_GroupTable[i].gname, job->group)) continue; schd_GroupTable[i].total_usage += node_hours; break; } /* * Finally, update the user's running-job count. */ for (i = 0; i < nUsers; ++i) { if (strcmp(Users[i].name, name)) continue; Users[i].running_jobs++; if (debug) { sprintf(log_buffer, "%s has %d running jobs", Users[i].name, Users[i].running_jobs); log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, id, log_buffer); } break; } return;}/* * Sort the list of users with jobs in the "normal" list in ascending order * by number of jobs queued. */static voidsort_users(void){ qsort(Users, nUsers, sizeof *Users, compare_users); return;}/* * qsort() comparison function. Sort Uinfo records by increasing value of * the 'jobcount' fields. */static intcompare_users(const void *e1, const void *e2){ struct Uinfo *u1 = (struct Uinfo *)e1; struct Uinfo *u2 = (struct Uinfo *)e2; int jc1; int jc2; jc1 = u1->jobcount; jc2 = u2->jobcount; return ((jc1 > jc2) ? 1 : ((jc1 < jc2) ? -1 : 0));}/* * Preliminary sort of "normal" jobs. The ordering is dependant upon * whether it is currently primetime or not. */static voidsort_jobs_1st(void){ int (*criterion) (const void *, const void *); /* * Depending upon the time of day, use a different ordering routine. * This is where most of the policy is implemented, as the rest of * the scheduler code attempts to run jobs in as close to this order * as possible, assuming available resources, time, etc. */ criterion = compare_nonprime_batch; if (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) { if (schd_prime_time(0)) criterion = compare_prime_batch; }#ifdef HISTORICAL_CODE /* * On the SP2, there was a concept of "non-primetime interactive time". * There is no equivalent on the Origins. However, this code remains * for historical reasons. */ else if ( Batch_Time ) criterion = compare_nonprime_inter;#endif /* HISTORICAL_CODE */ /* * Sort the list of "normal" jobs, based upon the proper criterion * for the current time of day. */ qsort(normalQ, nnormalQ, sizeof *normalQ, criterion); /* * Sort the list of running jobs, in ascending order by the expected * time to completion. */ qsort(runningJobs, nJRs, sizeof *runningJobs, compare_running); return;}/* * qsort() function to order jobs during primetime. * * Order shortest jobs first (ascending order of walltime requested) * Break ties by sorting in ascending order of requested number of CPUs. */static intcompare_prime_batch(const void *e1, const void *e2){ Job *job1 = *(Job **)e1; Job *job2 = *(Job **)e2; /* Shortest to Longest */ if (job1->walltime > job2->walltime) return 1; if (job1->walltime < job2->walltime) return -1; /* Largest to Smallest */ if (job1->nodes > job2->nodes) return -1; if (job1->nodes < job2->nodes) return 1; /* Oldest to youngest. */ if (job1->eligible > job2->eligible) return -1; if (job1->eligible < job2->eligible) return 1; return 0;}/* * qsort() function to order jobs during non-primetime. * * Order largest jobs first (descending order of CPUs requested) * Break ties by sorting in ascending order of walltime requested. */static intcompare_nonprime_batch(const void *e1, const void *e2){ Job *job1 = *(Job **)e1; Job *job2 = *(Job **)e2; /* Largest to Smallest */ if (job1->nodes > job2->nodes) return -1; if (job1->nodes < job2->nodes) return 1; /* Shortest to Longest */ if (job1->walltime > job2->walltime) return 1; if (job1->walltime < job2->walltime) return -1; /* Oldest to youngest. */ if (job1->eligible > job2->eligible) return -1; if (job1->eligible < job2->eligible) return 1; return 0;}#ifdef HISTORICAL_CODE/* * old qsort() function to order jobs during primetime. * * Order interactive jobs before non-interactive (batch) jobs. * * For interactive jobs, break ties by sorting in descending order by * requested number of CPUs. * In batch jobs, break ties by sorting from shortest to longest * requested walltime. */static intcompare_prime(const void *e1, const void *e2){ Job *job1 = (Job *)e1; Job *job2 = (Job *)e2; /* * If one job or the other is interactive, favor the interactive job. * Otherwise, break the tie based on CPUs or walltime requested (for * interactive or batch jobs, respectively). If it's still a tie, * return 0 (no reordering). */ if ((job1->flags & JFLAGS_INTERACTIVE) != (job2->flags & JFLAGS_INTERACTIVE)) { return (job1->flags & JFLAGS_INTERACTIVE) ? -1 : 1; } if (job1->flags & JFLAGS_INTERACTIVE) { if (job1->nodes > job2->nodes) return -1; if (job1->nodes < job2->nodes) return 1; } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -