📄 sched.c
字号:
hrtick_clear(cpu_rq(cpu)); return NOTIFY_OK; } return NOTIFY_DONE;}static __init void init_hrtick(void){ hotcpu_notifier(hotplug_hrtick, 0);}#else/* * Called to set the hrtick timer state. * * called with rq->lock held and irqs disabled */static void hrtick_start(struct rq *rq, u64 delay){ hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);}static void init_hrtick(void){}#endif /* CONFIG_SMP */static void init_rq_hrtick(struct rq *rq){#ifdef CONFIG_SMP rq->hrtick_csd_pending = 0; rq->hrtick_csd.flags = 0; rq->hrtick_csd.func = __hrtick_start; rq->hrtick_csd.info = rq;#endif hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;}#elsestatic inline void hrtick_clear(struct rq *rq){}static inline void init_rq_hrtick(struct rq *rq){}static inline void init_hrtick(void){}#endif/* * resched_task - mark a task 'to be rescheduled now'. * * On UP this means the setting of the need_resched flag, on SMP it * might also involve a cross-CPU call to trigger the scheduler on * the target CPU. */#ifdef CONFIG_SMP#ifndef tsk_is_polling#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)#endifstatic void resched_task(struct task_struct *p){ int cpu; assert_spin_locked(&task_rq(p)->lock); if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) return; set_tsk_thread_flag(p, TIF_NEED_RESCHED); cpu = task_cpu(p); if (cpu == smp_processor_id()) return; /* NEED_RESCHED must be visible before we test polling */ smp_mb(); if (!tsk_is_polling(p)) smp_send_reschedule(cpu);}static void resched_cpu(int cpu){ struct rq *rq = cpu_rq(cpu); unsigned long flags; if (!spin_trylock_irqsave(&rq->lock, flags)) return; resched_task(cpu_curr(cpu)); spin_unlock_irqrestore(&rq->lock, flags);}#ifdef CONFIG_NO_HZ/* * When add_timer_on() enqueues a timer into the timer wheel of an * idle CPU then this timer might expire before the next timer event * which is scheduled to wake up that CPU. In case of a completely * idle system the next event might even be infinite time into the * future. wake_up_idle_cpu() ensures that the CPU is woken up and * leaves the inner idle loop so the newly added timer is taken into * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */void wake_up_idle_cpu(int cpu){ struct rq *rq = cpu_rq(cpu); if (cpu == smp_processor_id()) return; /* * This is safe, as this function is called with the timer * wheel base lock of (cpu) held. When the CPU is on the way * to idle and has not yet set rq->curr to idle then it will * be serialized on the timer wheel base lock and take the new * timer into account automatically. */ if (rq->curr != rq->idle) return; /* * We can set TIF_RESCHED on the idle task of the other CPU * lockless. The worst case is that the other CPU runs the * idle task through an additional NOOP schedule() */ set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED); /* NEED_RESCHED must be visible before we test polling */ smp_mb(); if (!tsk_is_polling(rq->idle)) smp_send_reschedule(cpu);}#endif /* CONFIG_NO_HZ */#else /* !CONFIG_SMP */static void resched_task(struct task_struct *p){ assert_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p);}#endif /* CONFIG_SMP */#if BITS_PER_LONG == 32# define WMULT_CONST (~0UL)#else# define WMULT_CONST (1UL << 32)#endif#define WMULT_SHIFT 32/* * Shift right and round: */#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))/* * delta *= weight / lw */static unsigned longcalc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw){ u64 tmp; if (!lw->inv_weight) { if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) lw->inv_weight = 1; else lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2) / (lw->weight+1); } tmp = (u64)delta_exec * weight; /* * Check whether we'd overflow the 64-bit multiplication: */ if (unlikely(tmp > WMULT_CONST)) tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, WMULT_SHIFT/2); else tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);}static inline void update_load_add(struct load_weight *lw, unsigned long inc){ lw->weight += inc; lw->inv_weight = 0;}static inline void update_load_sub(struct load_weight *lw, unsigned long dec){ lw->weight -= dec; lw->inv_weight = 0;}/* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that * each task makes to its run queue's load is weighted according to its * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a * scaled version of the new time slice allocation that they receive on time * slice expiry etc. */#define WEIGHT_IDLEPRIO 2#define WMULT_IDLEPRIO (1 << 31)/* * Nice levels are multiplicative, with a gentle 10% change for every * nice level changed. I.e. when a CPU-bound task goes from nice 0 to * nice 1, it will get ~10% less CPU time than another CPU-bound task * that remained on nice 0. * * The "10% effect" is relative and cumulative: from _any_ nice level, * if you go up 1 level, it's -10% CPU usage, if you go down 1 level * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. * If a task goes up by ~10% and another task goes down by ~10% then * the relative distance between them is ~25%.) */static const int prio_to_weight[40] = { /* -20 */ 88761, 71755, 56483, 46273, 36291, /* -15 */ 29154, 23254, 18705, 14949, 11916, /* -10 */ 9548, 7620, 6100, 4904, 3906, /* -5 */ 3121, 2501, 1991, 1586, 1277, /* 0 */ 1024, 820, 655, 526, 423, /* 5 */ 335, 272, 215, 172, 137, /* 10 */ 110, 87, 70, 56, 45, /* 15 */ 36, 29, 23, 18, 15,};/* * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. * * In cases where the weight does not change often, we can use the * precalculated inverse to speed up arithmetics by turning divisions * into multiplications: */static const u32 prio_to_wmult[40] = { /* -20 */ 48388, 59856, 76040, 92818, 118348, /* -15 */ 147320, 184698, 229616, 287308, 360437, /* -10 */ 449829, 563644, 704093, 875809, 1099582, /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,};static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);/* * runqueue iterator, to support SMP load-balancing between different * scheduling classes, without having to expose their internal data * structures to the load-balancing proper: */struct rq_iterator { void *arg; struct task_struct *(*start)(void *); struct task_struct *(*next)(void *);};#ifdef CONFIG_SMPstatic unsigned longbalance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, struct rq_iterator *iterator);static intiter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle, struct rq_iterator *iterator);#endif#ifdef CONFIG_CGROUP_CPUACCTstatic void cpuacct_charge(struct task_struct *tsk, u64 cputime);#elsestatic inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}#endifstatic inline void inc_cpu_load(struct rq *rq, unsigned long load){ update_load_add(&rq->load, load);}static inline void dec_cpu_load(struct rq *rq, unsigned long load){ update_load_sub(&rq->load, load);}#ifdef CONFIG_SMPstatic unsigned long source_load(int cpu, int type);static unsigned long target_load(int cpu, int type);static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);static unsigned long cpu_avg_load_per_task(int cpu){ struct rq *rq = cpu_rq(cpu); if (rq->nr_running) rq->avg_load_per_task = rq->load.weight / rq->nr_running; return rq->avg_load_per_task;}#ifdef CONFIG_FAIR_GROUP_SCHEDtypedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);/* * Iterate the full tree, calling @down when first entering a node and @up when * leaving it for the final time. */static voidwalk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd){ struct task_group *parent, *child; rcu_read_lock(); parent = &root_task_group;down: (*down)(parent, cpu, sd); list_for_each_entry_rcu(child, &parent->children, siblings) { parent = child; goto down;up: continue; } (*up)(parent, cpu, sd); child = parent; parent = parent->parent; if (parent) goto up; rcu_read_unlock();}static void __set_se_shares(struct sched_entity *se, unsigned long shares);/* * Calculate and set the cpu's group shares. */static void__update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long sd_shares, unsigned long sd_rq_weight){ int boost = 0; unsigned long shares; unsigned long rq_weight; if (!tg->se[cpu]) return; rq_weight = tg->cfs_rq[cpu]->load.weight; /* * If there are currently no tasks on the cpu pretend there is one of * average load so that when a new task gets to run here it will not * get delayed by group starvation. */ if (!rq_weight) { boost = 1; rq_weight = NICE_0_LOAD; } if (unlikely(rq_weight > sd_rq_weight)) rq_weight = sd_rq_weight; /* * \Sum shares * rq_weight * shares = ----------------------- * \Sum rq_weight * */ shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); /* * record the actual number of shares, not the boosted amount. */ tg->cfs_rq[cpu]->shares = boost ? 0 : shares; tg->cfs_rq[cpu]->rq_weight = rq_weight; if (shares < MIN_SHARES) shares = MIN_SHARES; else if (shares > MAX_SHARES) shares = MAX_SHARES; __set_se_shares(tg->se[cpu], shares);}/* * Re-compute the task group their per cpu shares over the given domain. * This needs to be done in a bottom-up fashion because the rq weight of a * parent group depends on the shares of its child groups. */static voidtg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd){ unsigned long rq_weight = 0; unsigned long shares = 0; int i; for_each_cpu_mask(i, sd->span) { rq_weight += tg->cfs_rq[i]->load.weight; shares += tg->cfs_rq[i]->shares; } if ((!shares && rq_weight) || shares > tg->shares) shares = tg->shares; if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; if (!rq_weight) rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; for_each_cpu_mask(i, sd->span) { struct rq *rq = cpu_rq(i); unsigned long flags; spin_lock_irqsave(&rq->lock, flags); __update_group_shares_cpu(tg, i, shares, rq_weight); spin_unlock_irqrestore(&rq->lock, flags); }}/* * Compute the cpu's hierarchical load factor for each task group. * This needs to be done in a top-down fashion because the load of a child * group is a fraction of its parents load. */static voidtg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd){ unsigned long load; if (!tg->parent) { load = cpu_rq(cpu)->load.weight; } else { load = tg->parent->cfs_rq[cpu]->h_load; load *= tg->cfs_rq[cpu]->shares; load /= tg->parent->cfs_rq[cpu]->load.weight + 1; } tg->cfs_rq[cpu]->h_load = load;}static voidtg_nop(struct task_group *tg, int cpu, struct sched_domain *sd){}static void update_shares(struct sched_domain *sd){ u64 now = cpu_clock(raw_smp_processor_id()); s64 elapsed = now - sd->last_update; if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { sd->last_update = now; walk_tg_tree(tg_nop, tg_shares_up, 0, sd); }}static void update_shares_locked(struct rq *rq, struct sched_domain *sd){ spin_unlock(&rq->lock); update_shares(sd); spin_lock(&rq->lock);}static void update_h_load(int cpu){ walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);}#elsestatic inline void update_shares(struct sched_domain *sd){}static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd){}#endif#endif#ifdef CONFIG_FAIR_GROUP_SCHEDstatic void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares){#ifdef CONFIG_SMP cfs_rq->shares = shares;#endif}#endif#include "sched_stats.h"#include "sched_idletask.c"#include "sched_fair.c"#include "sched_rt.c"#ifdef CONFIG_SCHED_DEBUG# include "sched_debug.c"#endif#define sched_class_highest (&rt_sched_class)#define for_each_class(class) \ for (class = sched_class_highest; class; class = class->next)static void inc_nr_running(struct rq *rq){ rq->nr_running++;}static void dec_nr_running(struct rq *rq){ rq->nr_running--;}static void set_load_weight(struct task_struct *p){ if (task_has_rt_policy(p)) { p->se.load.weight = prio_to_weight[0] * 2; p->se.load.inv_weight = prio_to_wmult[0] >> 1; return; } /* * SCHED_IDLE tasks get minimal weight:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -