📄 getqueues.c
字号:
queue->ncpus_assn = 0; if (queue->mem_assn == UNSPECIFIED) queue->mem_assn = 0; if (queue->running == UNSPECIFIED) queue->running = 0; /* * Get the resources for this queue from the resource monitor (if * available). If the resmom is not accessible, disable the queue. * Don't bother checking if the queue is Stopped. */ if (strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0 && (queue->flags & QFLAGS_STOPPED) == 0) { queue->rsrcs = schd_get_resources(queue->exechost); if (queue->rsrcs != NULL) { /* Account for this queue's resources. */ queue->rsrcs->ncpus_alloc += queue->ncpus_assn; queue->rsrcs->mem_alloc += queue->mem_assn; if (queue->rsrcs->freemem < 1) { tmpval=queue->rsrcs->mem_total - queue->rsrcs->mem_alloc; if (tmpval < 1) queue->rsrcs->freemem = 0; else queue->rsrcs->freemem = tmpval; } queue->rsrcs->njobs += queue->running; queue->ncpus_max = ( queue->ncpus_max <= queue->rsrcs->ncpus_total ? queue->ncpus_max : queue->rsrcs->ncpus_total ); } else { (void)sprintf(log_buffer, "Can't get resources for %s@%s - marking unavailable.", queue->qname, queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); queue->flags |= QFLAGS_NODEDOWN; } } return (0);}int queue_claim_jobs(Queue *queue, Job **joblist_ptr){ Job *job, *prev, *next, *qtail, *longest; int moved; int running, queued, held, other; /* * The time at which this queue should be empty (i.e. when all jobs * currently running on it are completed). Keep track of the longest * running job and compute the empties_at value from that. */ longest = NULL; /* * Keep track of some statistics about what jobs have been found in the * list. These aren't really used (yet), but are easy to compute. */ running = 0; queued = 0; held = 0; other = 0; /* * The number of jobs that have been moved from the global list to the * per-queue list. */ moved = 0; /* * Find the last element of the list of jobs on this queue. This is * probably unnecessary (since this should always be called with * 'queue->jobs' pointing to NULL. Still, it doesn't hurt to try. */ qtail = NULL; if (queue->jobs) { for (qtail = queue->jobs; qtail->next != NULL; qtail = qtail->next) /* Do nothing - just walk to next-to-last element of list */ ; } prev = NULL; for (job = *joblist_ptr; job != NULL; job = next) { next = job->next; /* Wrong queue -- ignore this job. */ if (strcmp(job->qname, queue->qname)) { prev = job; continue; } /* * This job belongs to this queue. Remove it from the job list and * place it at the tail of the queue's job list. This is somewhat * complicated since we have to remove it from the joblist first. * If there is no "previous" job element, then the current job is * the head of the list. */ if (job == *joblist_ptr) { /* * This is the head of the list -- just point the list head to * the job's next pointer and now the job is "orphaned". */ *joblist_ptr = next; prev = *joblist_ptr; } else { /* * This job lies in the middle of the list somewhere. Jump over * it in the previous element, and we're done. Note that since * we skipped this job, the previous job pointer does not change. */ prev->next = job->next; } /* * Now 'job' is the only active handle on the job. Place it at the * tail of the queue's list. If 'qtail' is NULL, this is the first * job -- place it at the head of the list. Otherwise, place it after * the element pointed to by the 'qtail'. Either way, this is the * last element in the list, so point the qtail at it and clear its * next pointer. */ if (qtail == NULL) queue->jobs = job; else qtail->next = job; qtail = job; job->next = NULL; if (job->flags & JFLAGS_QNAME_LOCAL) { /* * The job has some memory that was allocated when it was created, * that is used to store the name of the queue on which it resides. * Since we know exactly what queue it lives on (the one pointed to * by 'queue', to be exact), we can free the storage and point the * 'job->qname' at 'queue->qname'. Also store a reference to the * owner queue in the job itself. */ free (job->qname); job->qname = queue->qname; job->queue = queue; /* * Turn off the flag -- job->qname is now a reference to a queue. */ job->flags &= ~JFLAGS_QNAME_LOCAL; } /* Now, count the jobs and increment the correct statistic. */ moved ++; switch (job->state) { case 'R': running ++; /* * Adjust the time of any job that has run over its expected * time to the JOB_OVERTIME_SLOP. */ if (job->time_left < 0) { job->time_left = JOB_OVERTIME_SLOP; } /* If this job will be running the longest, note that. */ if ((longest == NULL) || (job->time_left > longest->time_left)) { longest = job; } break; case 'Q': queued ++; break; case 'H': held ++; break; default: other ++; break; } } /* * If any jobs were running, then set the empty_by time to the absolute * time (in seconds) when all jobs should be completed. If none are * running, then set empty_by time to 0. */ if (longest) queue->empty_by = schd_TimeNow + longest->time_left; else queue->empty_by = 0; return (moved);}int queue_sanity(Queue *queue){ char *id = "queue_sanity"; Job *job; int queued, running; int is_sane; is_sane = 1; /* Assume the queue is sane for now. */ /* * Count running and queued jobs and make sure the numbers match up. */ queued = running = 0; for (job = queue->jobs; job != NULL; job = job->next) { switch (job->state) { case 'R': running ++; break; case 'Q': queued ++; break; default: /* Empty */ break; } } if (queue->running != running) { sprintf(log_buffer, "Queue '%s' expected %d running jobs, but found %d", queue->qname, queue->running, running); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); is_sane = 0; } if (queue->queued != queued) { sprintf(log_buffer, "Queue '%s' expected %d queued jobs, but found %d", queue->qname, queue->queued, queued); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); is_sane = 0; } if (queue->running && (queue->empty_by < schd_TimeNow)) { sprintf(log_buffer, "Queue '%s' was expected to be empty %ld seconds ago", queue->qname, (long)(schd_TimeNow - queue->empty_by)); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); is_sane = 0; } return (is_sane);}int get_node_status(){ char *id = "get_node_status"; QueueList *qptr; Batch_Status *bs, *bsp; AttrList *attr; static AttrList alist[] = {{NULL, ATTR_NODE_state, "", ""}}; /* Query the server for status of all nodes, and then save this * info in the appropraite queue struct. */ if ((bs = pbs_statnode(connector, NULL, alist, NULL)) == NULL) { sprintf(log_buffer, "pbs_statnode failed: %d", pbs_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* First lets assume all nodes are down; later we will revise * this if we learn otherwise; we want to assume down so that * we don't get hung trying to connect to a hung node later. */ for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) qptr->queue->flags |= QFLAGS_NODEDOWN; /* Process the list of nodes returned by the server. */ for (bsp = bs; bsp != NULL; bsp = bsp->next) { for (attr = bsp->attribs; attr != NULL; attr = attr->next) { if ((strstr(attr->value, ND_free)) || (strstr(attr->value, ND_busy)) || (strstr(attr->value, ND_reserve)) || (strstr(attr->value, "job-"))) { for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) { if (strstr(qptr->queue->exechost, bsp->name)) { qptr->queue->flags &= ~QFLAGS_NODEDOWN; break; } } } else { sprintf(log_buffer, "%s (state=%s)", bsp->name,attr->value); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } } } pbs_statfree(bs); return (0);}/* schd_get_queue_memory - query queue memory limit from the server. */size_t schd_get_queue_memory(char *qName){ char *id = "schd_get_queue_limits"; size_t mem_max, mem_default; Batch_Status *bs; AttrList *attr; static AttrList alist[] = { {&alist[1], ATTR_rescdflt, "", ""}, {NULL, ATTR_rescmax, "", ""} }; mem_default = (size_t)0; mem_max = (size_t)0; /* Ask the server for information about the specified queue. */ if ((bs = pbs_statque(connector, qName, alist, NULL)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", qName, pbs_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (UNSPECIFIED); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* Queue maximum resource usage. */ if (!strcmp(attr->name, ATTR_rescmax)) { if (!strcmp("mem", attr->resource)) { mem_max = schd_val2byte(attr->value); continue; } continue; } if (!strcmp(attr->name, ATTR_rescdflt)) { if (!strcmp("mem", attr->resource)) { mem_default = schd_val2byte(attr->value); continue; } } /* Ignore anything else */ } pbs_statfree(bs); if (mem_default != (size_t)0) return(mem_default); if (mem_max != (size_t)0) return(mem_max); return (UNSPECIFIED);}void schd_calc_suspended_jobs(void){ Job *job, *next; Queue *queue, *schd_find_queue(char *exechost); /* walk the job list and adjust resources "assigned" to suspended jobs. */ for (job = schd_SubmitQueue->queue->jobs; job != NULL; job = next) { next = job->next; /* * Adjust the count of allocated resources for jobs that are * suspended. The queue still reports these as being used, * but since the job is suspended, we can reuse the resources * (since what was why we suspended them to begin with!). */ if (job->flags & JFLAGS_SUSPENDED) { queue = schd_find_queue(job->exechost); if (queue->flags & QFLAGS_NODEDOWN) continue; if (queue != NULL) { queue->rsrcs->ncpus_alloc -= job->ncpus; queue->ncpus_assn -= job->ncpus; queue->rsrcs->mem_alloc -= job->memory; queue->mem_assn -= job->memory; queue->running -= 1; queue->rsrcs->njobs -= 1; } } }}Queue *schd_find_queue(char *exechost){ QueueList *qptr; for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) { if (!strcmp(qptr->queue->exechost, exechost)) { return(qptr->queue); } } return(NULL);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -