📄 sched.c
字号:
switch_to(prev, next, prev); return prev;}/* * nr_running, nr_uninterruptible and nr_context_switches: * * externally visible scheduler statistics: current number of runnable * threads, current number of uninterruptible-sleeping threads, total * number of context switches performed since bootup. */unsigned long nr_running(void){ unsigned long i, sum = 0; for_each_online_cpu(i) sum += cpu_rq(i)->nr_running; return sum;}unsigned long nr_uninterruptible(void){ unsigned long i, sum = 0; for_each_possible_cpu(i) sum += cpu_rq(i)->nr_uninterruptible; /* * Since we read the counters lockless, it might be slightly * inaccurate. Do not allow it to go below zero though: */ if (unlikely((long)sum < 0)) sum = 0; return sum;}unsigned long long nr_context_switches(void){ int i; unsigned long long sum = 0; for_each_possible_cpu(i) sum += cpu_rq(i)->nr_switches; return sum;}unsigned long nr_iowait(void){ unsigned long i, sum = 0; for_each_possible_cpu(i) sum += atomic_read(&cpu_rq(i)->nr_iowait); return sum;}unsigned long nr_active(void){ unsigned long i, running = 0, uninterruptible = 0; for_each_online_cpu(i) { running += cpu_rq(i)->nr_running; uninterruptible += cpu_rq(i)->nr_uninterruptible; } if (unlikely((long)uninterruptible < 0)) uninterruptible = 0; return running + uninterruptible;}#ifdef CONFIG_SMP/* * Is this task likely cache-hot: */static inline inttask_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd){ return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;}/* * double_rq_lock - safely lock two runqueues * * Note this does not disable interrupts like task_rq_lock, * you need to do so manually before calling. */static void double_rq_lock(struct rq *rq1, struct rq *rq2) __acquires(rq1->lock) __acquires(rq2->lock){ BUG_ON(!irqs_disabled()); if (rq1 == rq2) { spin_lock(&rq1->lock); __acquire(rq2->lock); /* Fake it out ;) */ } else { if (rq1 < rq2) { spin_lock(&rq1->lock); spin_lock(&rq2->lock); } else { spin_lock(&rq2->lock); spin_lock(&rq1->lock); } }}/* * double_rq_unlock - safely unlock two runqueues * * Note this does not restore interrupts like task_rq_unlock, * you need to do so manually after calling. */static void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock){ spin_unlock(&rq1->lock); if (rq1 != rq2) spin_unlock(&rq2->lock); else __release(rq2->lock);}/* * double_lock_balance - lock the busiest runqueue, this_rq is locked already. */static void double_lock_balance(struct rq *this_rq, struct rq *busiest) __releases(this_rq->lock) __acquires(busiest->lock) __acquires(this_rq->lock){ if (unlikely(!irqs_disabled())) { /* printk() doesn't work good under rq->lock */ spin_unlock(&this_rq->lock); BUG_ON(1); } if (unlikely(!spin_trylock(&busiest->lock))) { if (busiest < this_rq) { spin_unlock(&this_rq->lock); spin_lock(&busiest->lock); spin_lock(&this_rq->lock); } else spin_lock(&busiest->lock); }}/* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only * allow dest_cpu, which will force the cpu onto dest_cpu. Then * the cpu_allowed mask is restored. */static void sched_migrate_task(struct task_struct *p, int dest_cpu){ struct migration_req req; unsigned long flags; struct rq *rq; rq = task_rq_lock(p, &flags); if (!cpu_isset(dest_cpu, p->cpus_allowed) || unlikely(cpu_is_offline(dest_cpu))) goto out; /* force the process onto the specified CPU */ if (migrate_task(p, dest_cpu, &req)) { /* Need to wait for migration thread (might exit: take ref). */ struct task_struct *mt = rq->migration_thread; get_task_struct(mt); task_rq_unlock(rq, &flags); wake_up_process(mt); put_task_struct(mt); wait_for_completion(&req.done); return; }out: task_rq_unlock(rq, &flags);}/* * sched_exec - execve() is a valuable balancing opportunity, because at * this point the task has the smallest effective memory and cache footprint. */void sched_exec(void){ int new_cpu, this_cpu = get_cpu(); new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); put_cpu(); if (new_cpu != this_cpu) sched_migrate_task(current, new_cpu);}/* * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */static void pull_task(struct rq *src_rq, struct prio_array *src_array, struct task_struct *p, struct rq *this_rq, struct prio_array *this_array, int this_cpu){ dequeue_task(p, src_array); dec_nr_running(p, src_rq); set_task_cpu(p, this_cpu); inc_nr_running(p, this_rq); enqueue_task(p, this_array); p->timestamp = (p->timestamp - src_rq->most_recent_timestamp) + this_rq->most_recent_timestamp; /* * Note that idle threads have a prio of MAX_PRIO, for this test * to be always true for them. */ if (TASK_PREEMPTS_CURR(p, this_rq)) resched_task(this_rq->curr);}/* * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */staticint can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, struct sched_domain *sd, enum idle_type idle, int *all_pinned){ /* * We do not migrate tasks that are: * 1) running (obviously), or * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ if (!cpu_isset(this_cpu, p->cpus_allowed)) return 0; *all_pinned = 0; if (task_running(rq, p)) return 0; /* * Aggressive migration if: * 1) task is cache cold, or * 2) too many balance attempts have failed. */ if (sd->nr_balance_failed > sd->cache_nice_tries) {#ifdef CONFIG_SCHEDSTATS if (task_hot(p, rq->most_recent_timestamp, sd)) schedstat_inc(sd, lb_hot_gained[idle]);#endif return 1; } if (task_hot(p, rq->most_recent_timestamp, sd)) return 0; return 1;}#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)/* * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted * load from busiest to this_rq, as part of a balancing operation within * "domain". Returns the number of tasks moved. * * Called with both runqueues locked. */static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum idle_type idle, int *all_pinned){ int idx, pulled = 0, pinned = 0, this_best_prio, best_prio, best_prio_seen, skip_for_load; struct prio_array *array, *dst_array; struct list_head *head, *curr; struct task_struct *tmp; long rem_load_move; if (max_nr_move == 0 || max_load_move == 0) goto out; rem_load_move = max_load_move; pinned = 1; this_best_prio = rq_best_prio(this_rq); best_prio = rq_best_prio(busiest); /* * Enable handling of the case where there is more than one task * with the best priority. If the current running task is one * of those with prio==best_prio we know it won't be moved * and therefore it's safe to override the skip (based on load) of * any task we find with that prio. */ best_prio_seen = best_prio == busiest->curr->prio; /* * We first consider expired tasks. Those will likely not be * executed in the near future, and they are most likely to * be cache-cold, thus switching CPUs has the least effect * on them. */ if (busiest->expired->nr_active) { array = busiest->expired; dst_array = this_rq->expired; } else { array = busiest->active; dst_array = this_rq->active; }new_array: /* Start searching at priority 0: */ idx = 0;skip_bitmap: if (!idx) idx = sched_find_first_bit(array->bitmap); else idx = find_next_bit(array->bitmap, MAX_PRIO, idx); if (idx >= MAX_PRIO) { if (array == busiest->expired && busiest->active->nr_active) { array = busiest->active; dst_array = this_rq->active; goto new_array; } goto out; } head = array->queue + idx; curr = head->prev;skip_queue: tmp = list_entry(curr, struct task_struct, run_list); curr = curr->prev; /* * To help distribute high priority tasks accross CPUs we don't * skip a task if it will be the highest priority task (i.e. smallest * prio value) on its new queue regardless of its load weight */ skip_for_load = tmp->load_weight > rem_load_move; if (skip_for_load && idx < this_best_prio) skip_for_load = !best_prio_seen && idx == best_prio; if (skip_for_load || !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { best_prio_seen |= idx == best_prio; if (curr != head) goto skip_queue; idx++; goto skip_bitmap; } pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); pulled++; rem_load_move -= tmp->load_weight; /* * We only want to steal up to the prescribed number of tasks * and the prescribed amount of weighted load. */ if (pulled < max_nr_move && rem_load_move > 0) { if (idx < this_best_prio) this_best_prio = idx; if (curr != head) goto skip_queue; idx++; goto skip_bitmap; }out: /* * Right now, this is the only place pull_task() is called, * so we can safely collect pull_task() stats here rather than * inside pull_task(). */ schedstat_add(sd, lb_gained[idle], pulled); if (all_pinned) *all_pinned = pinned; return pulled;}/* * find_busiest_group finds and returns the busiest CPU group within the * domain. It calculates and returns the amount of weighted load which * should be moved to restore balance via the imbalance parameter. */static struct sched_group *find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long *imbalance, enum idle_type idle, int *sd_idle, cpumask_t *cpus, int *balance){ struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; unsigned long max_pull; unsigned long busiest_load_per_task, busiest_nr_running; unsigned long this_load_per_task, this_nr_running; int load_idx;#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) int power_savings_balance = 1; unsigned long leader_nr_running = 0, min_load_per_task = 0; unsigned long min_nr_running = ULONG_MAX; struct sched_group *group_min = NULL, *group_leader = NULL;#endif max_load = this_load = total_load = total_pwr = 0; busiest_load_per_task = busiest_nr_running = 0; this_load_per_task = this_nr_running = 0; if (idle == NOT_IDLE) load_idx = sd->busy_idx; else if (idle == NEWLY_IDLE) load_idx = sd->newidle_idx; else load_idx = sd->idle_idx; do { unsigned long load, group_capacity; int local_group; int i; unsigned int balance_cpu = -1, first_idle_cpu = 0; unsigned long sum_nr_running, sum_weighted_load; local_group = cpu_isset(this_cpu, group->cpumask); if (local_group) balance_cpu = first_cpu(group->cpumask); /* Tally up the load of all CPUs in the group */ sum_weighted_load = sum_nr_running = avg_load = 0; for_each_cpu_mask(i, group->cpumask) { struct rq *rq; if (!cpu_isset(i, *cpus)) continue; rq = cpu_rq(i); if (*sd_idle && !idle_cpu(i)) *sd_idle = 0; /* Bias balancing toward cpus of our domain */ if (local_group) { if (idle_cpu(i) && !first_idle_cpu) { first_idle_cpu = 1; balance_cpu = i; } load = target_load(i, load_idx); } else load = source_load(i, load_idx); avg_load += load; sum_nr_running += rq->nr_running; sum_weighted_load += rq->raw_weighted_load; } /* * First idle cpu or the first cpu(busiest) in this sched group * is eligible for doing load balancing at this and above * domains. */ if (local_group && balance_cpu != this_cpu && balance) { *balance = 0; goto ret; } total_load += avg_load; total_pwr += group->__cpu_power; /* Adjust by relative CPU power of the group */ avg_load = sg_div_cpu_power(group, avg_load * SCHED_LOAD_SCALE); group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; if (local_group) { this_load = avg_load; this = group; this_nr_running = sum_nr_running; this_load_per_task = sum_weighted_load; } else if (avg_load > max_load && sum_nr_running > group_capacity) { max_load = avg_load; busiest = group; busiest_nr_running = sum_nr_running; busiest_load_per_task = sum_weighted_load; }#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) /* * Busy pro
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -