📄 sched.c
字号:
}/* * Potentially available exiting-child timeslices are * retrieved here - this way the parent does not get * penalized for creating too many threads. * * (this cannot be used to 'generate' timeslices * artificially, because any timeslice recovered here * was given away by the parent in the first place.) */void sched_exit(task_t * p){ __cli(); current->time_slice += p->time_slice; if (unlikely(current->time_slice > MAX_TIMESLICE)) current->time_slice = MAX_TIMESLICE; __sti(); /* * If the child was a (relative-) CPU hog then decrease * the sleep_avg of the parent as well. */ if (p->sleep_avg < current->sleep_avg) current->sleep_avg = (current->sleep_avg * EXIT_WEIGHT + p->sleep_avg) / (EXIT_WEIGHT + 1);}/** * schedule_tail - first thing a freshly forked thread must call. * * @prev: the thread we just switched away from. */#if CONFIG_SMP || CONFIG_PREEMPTasmlinkage void schedule_tail(task_t *prev){ finish_arch_switch(this_rq()); finish_arch_schedule(prev);}#endif/* * context_switch - switch to the new MM and the new * thread's register state. */static inline task_t * context_switch(task_t *prev, task_t *next){ struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; if (unlikely(!mm)) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next, smp_processor_id()); } else switch_mm(oldmm, mm, next, smp_processor_id()); if (unlikely(!prev->mm)) { prev->active_mm = NULL; mmdrop(oldmm); } /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); return prev;}/* * nr_running, nr_uninterruptible and nr_context_switches: * * externally visible scheduler statistics: current number of runnable * threads, current number of uninterruptible-sleeping threads, total * number of context switches performed since bootup. */unsigned long nr_running(void){ unsigned long i, sum = 0; for (i = 0; i < smp_num_cpus; i++) sum += cpu_rq(cpu_logical_map(i))->nr_running; return sum;}unsigned long nr_uninterruptible(void){ unsigned long i, sum = 0; for (i = 0; i < smp_num_cpus; i++) sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible; return sum;}unsigned long nr_context_switches(void){ unsigned long i, sum = 0; for (i = 0; i < smp_num_cpus; i++) sum += cpu_rq(cpu_logical_map(i))->nr_switches; return sum;}/* * double_rq_lock - safely lock two runqueues * * Note this does not disable interrupts like task_rq_lock, * you need to do so manually before calling. */static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2){ if (rq1 == rq2) spin_lock(&rq1->lock); else { if (rq1 < rq2) { spin_lock(&rq1->lock); spin_lock(&rq2->lock); } else { spin_lock(&rq2->lock); spin_lock(&rq1->lock); } }}/* * double_rq_unlock - safely unlock two runqueues * * Note this does not restore interrupts like task_rq_unlock, * you need to do so manually after calling. */static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2){ spin_unlock(&rq1->lock); if (rq1 != rq2) spin_unlock(&rq2->lock);}#if CONFIG_SMP/* * double_lock_balance - lock the busiest runqueue * * this_rq is locked already. Recalculate nr_running if we have to * drop the runqueue lock. */static inline unsigned int double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running){ if (unlikely(!spin_trylock(&busiest->lock))) { if (busiest < this_rq) { spin_unlock(&this_rq->lock); spin_lock(&busiest->lock); spin_lock(&this_rq->lock); /* Need to recalculate nr_running */ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu])) nr_running = this_rq->nr_running; else nr_running = this_rq->prev_nr_running[this_cpu]; } else spin_lock(&busiest->lock); } return nr_running;}/* * Current runqueue is empty, or rebalance tick: if there is an * inbalance (current runqueue is too short) then pull from * busiest runqueue(s). * * We call this with the current runqueue locked, * irqs disabled. */static void load_balance(runqueue_t *this_rq, int idle){ int imbalance, nr_running, load, max_load, idx, i, this_cpu = smp_processor_id(); task_t *next = this_rq->idle, *tmp; runqueue_t *busiest, *rq_src; prio_array_t *array; struct list_head *head, *curr; /* * We search all runqueues to find the most busy one. * We do this lockless to reduce cache-bouncing overhead, * we re-check the 'best' source CPU later on again, with * the lock held. * * We fend off statistical fluctuations in runqueue lengths by * saving the runqueue length during the previous load-balancing * operation and using the smaller one the current and saved lengths. * If a runqueue is long enough for a longer amount of time then * we recognize it and pull tasks from it. * * The 'current runqueue length' is a statistical maximum variable, * for that one we take the longer one - to avoid fluctuations in * the other direction. So for a load-balance to happen it needs * stable long runqueue on the target CPU and stable short runqueue * on the local runqueue. * * We make an exception if this CPU is about to become idle - in * that case we are less picky about moving a task across CPUs and * take what can be taken. */ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu])) nr_running = this_rq->nr_running; else nr_running = this_rq->prev_nr_running[this_cpu]; busiest = NULL; max_load = 1; for (i = 0; i < smp_num_cpus; i++) { int logical = cpu_logical_map(i); rq_src = cpu_rq(logical); if (idle || (rq_src->nr_running < this_rq->prev_nr_running[logical])) load = rq_src->nr_running; else load = this_rq->prev_nr_running[logical]; this_rq->prev_nr_running[logical] = rq_src->nr_running; if ((load > max_load) && (rq_src != this_rq)) { busiest = rq_src; max_load = load; } } if (likely(!busiest)) return; imbalance = (max_load - nr_running) / 2; /* It needs an at least ~25% imbalance to trigger balancing. */ if (!idle && (imbalance < (max_load + 3)/4)) return; nr_running = double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running); /* * Make sure nothing changed since we checked the * runqueue length. */ if (busiest->nr_running <= nr_running + 1) goto out_unlock; /* * We first consider expired tasks. Those will likely not be * executed in the near future, and they are most likely to * be cache-cold, thus switching CPUs has the least effect * on them. */ if (busiest->expired->nr_active) array = busiest->expired; else array = busiest->active;new_array: /* Start searching at priority 0: */ idx = 0;skip_bitmap: if (!idx) idx = sched_find_first_bit(array->bitmap); else idx = find_next_bit(array->bitmap, MAX_PRIO, idx); if (idx == MAX_PRIO) { if (array == busiest->expired) { array = busiest->active; goto new_array; } goto out_unlock; } head = array->queue + idx; curr = head->prev;skip_queue: tmp = list_entry(curr, task_t, run_list); /* * We do not migrate tasks that are: * 1) running (obviously), or * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */#define CAN_MIGRATE_TASK(p,rq,this_cpu) \ ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \ ((p) != (rq)->curr) && \ ((p)->cpus_allowed & (1UL << (this_cpu)))) if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) { curr = curr->next; if (curr != head) goto skip_queue; idx++; goto skip_bitmap; } next = tmp; /* * take the task out of the other runqueue and * put it into this one: */ dequeue_task(next, array); busiest->nr_running--; set_task_cpu(next, this_cpu); this_rq->nr_running++; enqueue_task(next, this_rq->active); if (next->prio < current->prio) set_need_resched(); if (!idle && --imbalance) { if (array == busiest->expired) { array = busiest->active; goto new_array; } }out_unlock: spin_unlock(&busiest->lock);}/* * One of the idle_cpu_tick() and busy_cpu_tick() functions will * get called every timer tick, on every CPU. Our balancing action * frequency and balancing agressivity depends on whether the CPU is * idle or not. * * busy-rebalance every 250 msecs. idle-rebalance every 1 msec. (or on * systems with HZ=100, every 10 msecs.) */#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)static inline void idle_tick(void){ if (jiffies % IDLE_REBALANCE_TICK) return; spin_lock(&this_rq()->lock); load_balance(this_rq(), 1); spin_unlock(&this_rq()->lock);}#endif/* * We place interactive tasks back into the active array, if possible. * * To guarantee that this does not starve expired tasks we ignore the * interactivity of a task if the first expired task had to wait more * than a 'reasonable' amount of time. This deadline timeout is * load-dependent, as the frequency of array switched decreases with * increasing number of running tasks: */#define EXPIRED_STARVING(rq) \ ((rq)->expired_timestamp && \ (jiffies - (rq)->expired_timestamp >= \ STARVATION_LIMIT * ((rq)->nr_running) + 1))/* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. */void scheduler_tick(int user_tick, int system){ int cpu = smp_processor_id(); runqueue_t *rq = this_rq(); task_t *p = current; if (p == rq->idle) { if (local_bh_count(cpu) || local_irq_count(cpu) > 1) kstat.per_cpu_system[cpu] += system;#if CONFIG_SMP idle_tick();#endif return; } if (TASK_NICE(p) > 0) kstat.per_cpu_nice[cpu] += user_tick; else kstat.per_cpu_user[cpu] += user_tick; kstat.per_cpu_system[cpu] += system; /* Task might have expired already, but not scheduled off yet */ if (p->array != rq->active) { set_tsk_need_resched(p); return; } spin_lock(&rq->lock); if (unlikely(rt_task(p))) { /* * RR tasks need a special form of timeslice management. * FIFO tasks have no timeslices. */ if ((p->policy == SCHED_RR) && !--p->time_slice) { p->time_slice = task_timeslice(p); set_tsk_need_resched(p); /* put it at the end of the queue: */ dequeue_task(p, rq->active); enqueue_task(p, rq->active); } goto out; } /* * The task was running during this tick - update the * time slice counter and the sleep average. Note: we * do not update a process's priority until it either * goes to sleep or uses up its timeslice. This makes * it possible for interactive tasks to use up their * timeslices at their highest priority levels. */ if (p->sleep_avg) p->sleep_avg--; if (!--p->time_slice) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); p->time_slice = task_timeslice(p); if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; enqueue_task(p, rq->expired); } else enqueue_task(p, rq->active); }out:#if CONFIG_SMP if (!(jiffies % BUSY_REBALANCE_TICK)) load_balance(rq, 0);#endif spin_unlock(&rq->lock);}void scheduling_functions_start_here(void) { }/* * schedule() is the main scheduler function. */asmlinkage void schedule(void){ task_t *prev, *next; runqueue_t *rq; prio_array_t *array; struct list_head *queue; int idx; if (unlikely(in_interrupt())) BUG();need_resched: preempt_disable(); prev = current; rq = this_rq(); release_kernel_lock(prev, smp_processor_id()); prepare_arch_schedule(prev); prev->sleep_timestamp = jiffies; spin_lock_irq(&rq->lock); /* * if entering from preempt_schedule, off a kernel preemption, * go straight to picking the next task. */ if (unlikely(preempt_get_count() & PREEMPT_ACTIVE)) goto pick_next_task; switch (prev->state) { case TASK_INTERRUPTIBLE: if (unlikely(signal_pending(prev))) { prev->state = TASK_RUNNING; break; } default: deactivate_task(prev, rq); case TASK_RUNNING: ; }pick_next_task: if (unlikely(!rq->nr_running)) {#if CONFIG_SMP load_balance(rq, 1); if (rq->nr_running) goto pick_next_task;#endif next = rq->idle; rq->expired_timestamp = 0; goto switch_tasks; } array = rq->active; if (unlikely(!array->nr_active)) { /* * Switch the active and expired arrays. */ rq->active = rq->expired; rq->expired = array; array = rq->active; rq->expired_timestamp = 0; } idx = sched_find_first_bit(array->bitmap); queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list);switch_tasks: prefetch(next); clear_tsk_need_resched(prev); if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; prepare_arch_switch(rq); TRACE_SCHEDCHANGE(prev, next); prev = context_switch(prev, next); barrier(); rq = this_rq(); finish_arch_switch(rq); } else spin_unlock_irq(&rq->lock); finish_arch_schedule(prev); reacquire_kernel_lock(current); preempt_enable_no_resched(); if (need_resched()) goto need_resched;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -