📄 sched.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 5 页
字号:
		hrtick_clear(cpu_rq(cpu));		return NOTIFY_OK;	}	return NOTIFY_DONE;}static __init void init_hrtick(void){	hotcpu_notifier(hotplug_hrtick, 0);}#else/* * Called to set the hrtick timer state. * * called with rq->lock held and irqs disabled */static void hrtick_start(struct rq *rq, u64 delay){	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);}static void init_hrtick(void){}#endif /* CONFIG_SMP */static void init_rq_hrtick(struct rq *rq){#ifdef CONFIG_SMP	rq->hrtick_csd_pending = 0;	rq->hrtick_csd.flags = 0;	rq->hrtick_csd.func = __hrtick_start;	rq->hrtick_csd.info = rq;#endif	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	rq->hrtick_timer.function = hrtick;	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;}#elsestatic inline void hrtick_clear(struct rq *rq){}static inline void init_rq_hrtick(struct rq *rq){}static inline void init_hrtick(void){}#endif/* * resched_task - mark a task 'to be rescheduled now'. * * On UP this means the setting of the need_resched flag, on SMP it * might also involve a cross-CPU call to trigger the scheduler on * the target CPU. */#ifdef CONFIG_SMP#ifndef tsk_is_polling#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)#endifstatic void resched_task(struct task_struct *p){	int cpu;	assert_spin_locked(&task_rq(p)->lock);	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))		return;	set_tsk_thread_flag(p, TIF_NEED_RESCHED);	cpu = task_cpu(p);	if (cpu == smp_processor_id())		return;	/* NEED_RESCHED must be visible before we test polling */	smp_mb();	if (!tsk_is_polling(p))		smp_send_reschedule(cpu);}static void resched_cpu(int cpu){	struct rq *rq = cpu_rq(cpu);	unsigned long flags;	if (!spin_trylock_irqsave(&rq->lock, flags))		return;	resched_task(cpu_curr(cpu));	spin_unlock_irqrestore(&rq->lock, flags);}#ifdef CONFIG_NO_HZ/* * When add_timer_on() enqueues a timer into the timer wheel of an * idle CPU then this timer might expire before the next timer event * which is scheduled to wake up that CPU. In case of a completely * idle system the next event might even be infinite time into the * future. wake_up_idle_cpu() ensures that the CPU is woken up and * leaves the inner idle loop so the newly added timer is taken into * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */void wake_up_idle_cpu(int cpu){	struct rq *rq = cpu_rq(cpu);	if (cpu == smp_processor_id())		return;	/*	 * This is safe, as this function is called with the timer	 * wheel base lock of (cpu) held. When the CPU is on the way	 * to idle and has not yet set rq->curr to idle then it will	 * be serialized on the timer wheel base lock and take the new	 * timer into account automatically.	 */	if (rq->curr != rq->idle)		return;	/*	 * We can set TIF_RESCHED on the idle task of the other CPU	 * lockless. The worst case is that the other CPU runs the	 * idle task through an additional NOOP schedule()	 */	set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);	/* NEED_RESCHED must be visible before we test polling */	smp_mb();	if (!tsk_is_polling(rq->idle))		smp_send_reschedule(cpu);}#endif /* CONFIG_NO_HZ */#else /* !CONFIG_SMP */static void resched_task(struct task_struct *p){	assert_spin_locked(&task_rq(p)->lock);	set_tsk_need_resched(p);}#endif /* CONFIG_SMP */#if BITS_PER_LONG == 32# define WMULT_CONST	(~0UL)#else# define WMULT_CONST	(1UL << 32)#endif#define WMULT_SHIFT	32/* * Shift right and round: */#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))/* * delta *= weight / lw */static unsigned longcalc_delta_mine(unsigned long delta_exec, unsigned long weight,		struct load_weight *lw){	u64 tmp;	if (!lw->inv_weight) {		if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))			lw->inv_weight = 1;		else			lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)				/ (lw->weight+1);	}	tmp = (u64)delta_exec * weight;	/*	 * Check whether we'd overflow the 64-bit multiplication:	 */	if (unlikely(tmp > WMULT_CONST))		tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,			WMULT_SHIFT/2);	else		tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);}static inline void update_load_add(struct load_weight *lw, unsigned long inc){	lw->weight += inc;	lw->inv_weight = 0;}static inline void update_load_sub(struct load_weight *lw, unsigned long dec){	lw->weight -= dec;	lw->inv_weight = 0;}/* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that * each task makes to its run queue's load is weighted according to its * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a * scaled version of the new time slice allocation that they receive on time * slice expiry etc. */#define WEIGHT_IDLEPRIO		2#define WMULT_IDLEPRIO		(1 << 31)/* * Nice levels are multiplicative, with a gentle 10% change for every * nice level changed. I.e. when a CPU-bound task goes from nice 0 to * nice 1, it will get ~10% less CPU time than another CPU-bound task * that remained on nice 0. * * The "10% effect" is relative and cumulative: from _any_ nice level, * if you go up 1 level, it's -10% CPU usage, if you go down 1 level * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. * If a task goes up by ~10% and another task goes down by ~10% then * the relative distance between them is ~25%.) */static const int prio_to_weight[40] = { /* -20 */     88761,     71755,     56483,     46273,     36291, /* -15 */     29154,     23254,     18705,     14949,     11916, /* -10 */      9548,      7620,      6100,      4904,      3906, /*  -5 */      3121,      2501,      1991,      1586,      1277, /*   0 */      1024,       820,       655,       526,       423, /*   5 */       335,       272,       215,       172,       137, /*  10 */       110,        87,        70,        56,        45, /*  15 */        36,        29,        23,        18,        15,};/* * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. * * In cases where the weight does not change often, we can use the * precalculated inverse to speed up arithmetics by turning divisions * into multiplications: */static const u32 prio_to_wmult[40] = { /* -20 */     48388,     59856,     76040,     92818,    118348, /* -15 */    147320,    184698,    229616,    287308,    360437, /* -10 */    449829,    563644,    704093,    875809,   1099582, /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326, /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587, /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126, /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717, /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,};static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);/* * runqueue iterator, to support SMP load-balancing between different * scheduling classes, without having to expose their internal data * structures to the load-balancing proper: */struct rq_iterator {	void *arg;	struct task_struct *(*start)(void *);	struct task_struct *(*next)(void *);};#ifdef CONFIG_SMPstatic unsigned longbalance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,	      unsigned long max_load_move, struct sched_domain *sd,	      enum cpu_idle_type idle, int *all_pinned,	      int *this_best_prio, struct rq_iterator *iterator);static intiter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,		   struct sched_domain *sd, enum cpu_idle_type idle,		   struct rq_iterator *iterator);#endif#ifdef CONFIG_CGROUP_CPUACCTstatic void cpuacct_charge(struct task_struct *tsk, u64 cputime);#elsestatic inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}#endifstatic inline void inc_cpu_load(struct rq *rq, unsigned long load){	update_load_add(&rq->load, load);}static inline void dec_cpu_load(struct rq *rq, unsigned long load){	update_load_sub(&rq->load, load);}#ifdef CONFIG_SMPstatic unsigned long source_load(int cpu, int type);static unsigned long target_load(int cpu, int type);static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);static unsigned long cpu_avg_load_per_task(int cpu){	struct rq *rq = cpu_rq(cpu);	if (rq->nr_running)		rq->avg_load_per_task = rq->load.weight / rq->nr_running;	return rq->avg_load_per_task;}#ifdef CONFIG_FAIR_GROUP_SCHEDtypedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);/* * Iterate the full tree, calling @down when first entering a node and @up when * leaving it for the final time. */static voidwalk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd){	struct task_group *parent, *child;	rcu_read_lock();	parent = &root_task_group;down:	(*down)(parent, cpu, sd);	list_for_each_entry_rcu(child, &parent->children, siblings) {		parent = child;		goto down;up:		continue;	}	(*up)(parent, cpu, sd);	child = parent;	parent = parent->parent;	if (parent)		goto up;	rcu_read_unlock();}static void __set_se_shares(struct sched_entity *se, unsigned long shares);/* * Calculate and set the cpu's group shares. */static void__update_group_shares_cpu(struct task_group *tg, int cpu,			  unsigned long sd_shares, unsigned long sd_rq_weight){	int boost = 0;	unsigned long shares;	unsigned long rq_weight;	if (!tg->se[cpu])		return;	rq_weight = tg->cfs_rq[cpu]->load.weight;	/*	 * If there are currently no tasks on the cpu pretend there is one of	 * average load so that when a new task gets to run here it will not	 * get delayed by group starvation.	 */	if (!rq_weight) {		boost = 1;		rq_weight = NICE_0_LOAD;	}	if (unlikely(rq_weight > sd_rq_weight))		rq_weight = sd_rq_weight;	/*	 *           \Sum shares * rq_weight	 * shares =  -----------------------	 *               \Sum rq_weight	 *	 */	shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);	/*	 * record the actual number of shares, not the boosted amount.	 */	tg->cfs_rq[cpu]->shares = boost ? 0 : shares;	tg->cfs_rq[cpu]->rq_weight = rq_weight;	if (shares < MIN_SHARES)		shares = MIN_SHARES;	else if (shares > MAX_SHARES)		shares = MAX_SHARES;	__set_se_shares(tg->se[cpu], shares);}/* * Re-compute the task group their per cpu shares over the given domain. * This needs to be done in a bottom-up fashion because the rq weight of a * parent group depends on the shares of its child groups. */static voidtg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd){	unsigned long rq_weight = 0;	unsigned long shares = 0;	int i;	for_each_cpu_mask(i, sd->span) {		rq_weight += tg->cfs_rq[i]->load.weight;		shares += tg->cfs_rq[i]->shares;	}	if ((!shares && rq_weight) || shares > tg->shares)		shares = tg->shares;	if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))		shares = tg->shares;	if (!rq_weight)		rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;	for_each_cpu_mask(i, sd->span) {		struct rq *rq = cpu_rq(i);		unsigned long flags;		spin_lock_irqsave(&rq->lock, flags);		__update_group_shares_cpu(tg, i, shares, rq_weight);		spin_unlock_irqrestore(&rq->lock, flags);	}}/* * Compute the cpu's hierarchical load factor for each task group. * This needs to be done in a top-down fashion because the load of a child * group is a fraction of its parents load. */static voidtg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd){	unsigned long load;	if (!tg->parent) {		load = cpu_rq(cpu)->load.weight;	} else {		load = tg->parent->cfs_rq[cpu]->h_load;		load *= tg->cfs_rq[cpu]->shares;		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;	}	tg->cfs_rq[cpu]->h_load = load;}static voidtg_nop(struct task_group *tg, int cpu, struct sched_domain *sd){}static void update_shares(struct sched_domain *sd){	u64 now = cpu_clock(raw_smp_processor_id());	s64 elapsed = now - sd->last_update;	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {		sd->last_update = now;		walk_tg_tree(tg_nop, tg_shares_up, 0, sd);	}}static void update_shares_locked(struct rq *rq, struct sched_domain *sd){	spin_unlock(&rq->lock);	update_shares(sd);	spin_lock(&rq->lock);}static void update_h_load(int cpu){	walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);}#elsestatic inline void update_shares(struct sched_domain *sd){}static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd){}#endif#endif#ifdef CONFIG_FAIR_GROUP_SCHEDstatic void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares){#ifdef CONFIG_SMP	cfs_rq->shares = shares;#endif}#endif#include "sched_stats.h"#include "sched_idletask.c"#include "sched_fair.c"#include "sched_rt.c"#ifdef CONFIG_SCHED_DEBUG# include "sched_debug.c"#endif#define sched_class_highest (&rt_sched_class)#define for_each_class(class) \   for (class = sched_class_highest; class; class = class->next)static void inc_nr_running(struct rq *rq){	rq->nr_running++;}static void dec_nr_running(struct rq *rq){	rq->nr_running--;}static void set_load_weight(struct task_struct *p){	if (task_has_rt_policy(p)) {		p->se.load.weight = prio_to_weight[0] * 2;		p->se.load.inv_weight = prio_to_wmult[0] >> 1;		return;	}	/*	 * SCHED_IDLE tasks get minimal weight:
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -