📄 sched_fair.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 3 页
字号:
{}#endifstatic voidaccount_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se){	update_load_add(&cfs_rq->load, se->load.weight);	if (!parent_entity(se))		inc_cpu_load(rq_of(cfs_rq), se->load.weight);	if (entity_is_task(se))		add_cfs_task_weight(cfs_rq, se->load.weight);	cfs_rq->nr_running++;	se->on_rq = 1;	list_add(&se->group_node, &cfs_rq->tasks);}static voidaccount_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se){	update_load_sub(&cfs_rq->load, se->load.weight);	if (!parent_entity(se))		dec_cpu_load(rq_of(cfs_rq), se->load.weight);	if (entity_is_task(se))		add_cfs_task_weight(cfs_rq, -se->load.weight);	cfs_rq->nr_running--;	se->on_rq = 0;	list_del_init(&se->group_node);}static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se){#ifdef CONFIG_SCHEDSTATS	if (se->sleep_start) {		u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;		struct task_struct *tsk = task_of(se);		if ((s64)delta < 0)			delta = 0;		if (unlikely(delta > se->sleep_max))			se->sleep_max = delta;		se->sleep_start = 0;		se->sum_sleep_runtime += delta;		account_scheduler_latency(tsk, delta >> 10, 1);	}	if (se->block_start) {		u64 delta = rq_of(cfs_rq)->clock - se->block_start;		struct task_struct *tsk = task_of(se);		if ((s64)delta < 0)			delta = 0;		if (unlikely(delta > se->block_max))			se->block_max = delta;		se->block_start = 0;		se->sum_sleep_runtime += delta;		/*		 * Blocking time is in units of nanosecs, so shift by 20 to		 * get a milliseconds-range estimation of the amount of		 * time that the task spent sleeping:		 */		if (unlikely(prof_on == SLEEP_PROFILING)) {			profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),				     delta >> 20);		}		account_scheduler_latency(tsk, delta >> 10, 0);	}#endif}static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se){#ifdef CONFIG_SCHED_DEBUG	s64 d = se->vruntime - cfs_rq->min_vruntime;	if (d < 0)		d = -d;	if (d > 3*sysctl_sched_latency)		schedstat_inc(cfs_rq, nr_spread_over);#endif}static voidplace_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial){	u64 vruntime;	if (first_fair(cfs_rq)) {		vruntime = min_vruntime(cfs_rq->min_vruntime,				__pick_next_entity(cfs_rq)->vruntime);	} else		vruntime = cfs_rq->min_vruntime;	/*	 * The 'current' period is already promised to the current tasks,	 * however the extra weight of the new task will slow them down a	 * little, place the new task so that it fits in the slot that	 * stays open at the end.	 */	if (initial && sched_feat(START_DEBIT))		vruntime += sched_vslice_add(cfs_rq, se);	if (!initial) {		/* sleeps upto a single latency don't count. */		if (sched_feat(NEW_FAIR_SLEEPERS)) {			unsigned long thresh = sysctl_sched_latency;			/*			 * convert the sleeper threshold into virtual time			 */			if (sched_feat(NORMALIZED_SLEEPER))				thresh = calc_delta_fair(thresh, se);			vruntime -= thresh;		}		/* ensure we never gain time by being placed backwards. */		vruntime = max_vruntime(se->vruntime, vruntime);	}	se->vruntime = vruntime;}static voidenqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup){	/*	 * Update run-time statistics of the 'current'.	 */	update_curr(cfs_rq);	account_entity_enqueue(cfs_rq, se);	if (wakeup) {		place_entity(cfs_rq, se, 0);		enqueue_sleeper(cfs_rq, se);	}	update_stats_enqueue(cfs_rq, se);	check_spread(cfs_rq, se);	if (se != cfs_rq->curr)		__enqueue_entity(cfs_rq, se);}static voiddequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep){	/*	 * Update run-time statistics of the 'current'.	 */	update_curr(cfs_rq);	update_stats_dequeue(cfs_rq, se);	if (sleep) {#ifdef CONFIG_SCHEDSTATS		if (entity_is_task(se)) {			struct task_struct *tsk = task_of(se);			if (tsk->state & TASK_INTERRUPTIBLE)				se->sleep_start = rq_of(cfs_rq)->clock;			if (tsk->state & TASK_UNINTERRUPTIBLE)				se->block_start = rq_of(cfs_rq)->clock;		}#endif	}	if (se != cfs_rq->curr)		__dequeue_entity(cfs_rq, se);	account_entity_dequeue(cfs_rq, se);}/* * Preempt the current task with a newly woken task if needed: */static voidcheck_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr){	unsigned long ideal_runtime, delta_exec;	ideal_runtime = sched_slice(cfs_rq, curr);	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;	if (delta_exec > ideal_runtime)		resched_task(rq_of(cfs_rq)->curr);}static voidset_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se){	/* 'current' is not kept within the tree. */	if (se->on_rq) {		/*		 * Any task has to be enqueued before it get to execute on		 * a CPU. So account for the time it spent waiting on the		 * runqueue.		 */		update_stats_wait_end(cfs_rq, se);		__dequeue_entity(cfs_rq, se);	}	update_stats_curr_start(cfs_rq, se);	cfs_rq->curr = se;#ifdef CONFIG_SCHEDSTATS	/*	 * Track our maximum slice length, if the CPU's load is at	 * least twice that of our own weight (i.e. dont track it	 * when there are only lesser-weight tasks around):	 */	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {		se->slice_max = max(se->slice_max,			se->sum_exec_runtime - se->prev_sum_exec_runtime);	}#endif	se->prev_sum_exec_runtime = se->sum_exec_runtime;}static struct sched_entity *pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se){	struct rq *rq = rq_of(cfs_rq);	u64 pair_slice = rq->clock - cfs_rq->pair_start;	if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {		cfs_rq->pair_start = rq->clock;		return se;	}	return cfs_rq->next;}static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq){	struct sched_entity *se = NULL;	if (first_fair(cfs_rq)) {		se = __pick_next_entity(cfs_rq);		se = pick_next(cfs_rq, se);		set_next_entity(cfs_rq, se);	}	return se;}static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev){	/*	 * If still on the runqueue then deactivate_task()	 * was not called and update_curr() has to be done:	 */	if (prev->on_rq)		update_curr(cfs_rq);	check_spread(cfs_rq, prev);	if (prev->on_rq) {		update_stats_wait_start(cfs_rq, prev);		/* Put 'current' back into the tree. */		__enqueue_entity(cfs_rq, prev);	}	cfs_rq->curr = NULL;}static voidentity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued){	/*	 * Update run-time statistics of the 'current'.	 */	update_curr(cfs_rq);#ifdef CONFIG_SCHED_HRTICK	/*	 * queued ticks are scheduled to match the slice, so don't bother	 * validating it and just reschedule.	 */	if (queued) {		resched_task(rq_of(cfs_rq)->curr);		return;	}	/*	 * don't let the period tick interfere with the hrtick preemption	 */	if (!sched_feat(DOUBLE_TICK) &&			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))		return;#endif	if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))		check_preempt_tick(cfs_rq, curr);}/************************************************** * CFS operations on tasks: */#ifdef CONFIG_SCHED_HRTICKstatic void hrtick_start_fair(struct rq *rq, struct task_struct *p){	struct sched_entity *se = &p->se;	struct cfs_rq *cfs_rq = cfs_rq_of(se);	WARN_ON(task_rq(p) != rq);	if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {		u64 slice = sched_slice(cfs_rq, se);		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;		s64 delta = slice - ran;		if (delta < 0) {			if (rq->curr == p)				resched_task(p);			return;		}		/*		 * Don't schedule slices shorter than 10000ns, that just		 * doesn't make sense. Rely on vruntime for fairness.		 */		if (rq->curr != p)			delta = max_t(s64, 10000LL, delta);		hrtick_start(rq, delta);	}}#else /* !CONFIG_SCHED_HRTICK */static inline voidhrtick_start_fair(struct rq *rq, struct task_struct *p){}#endif/* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup){	struct cfs_rq *cfs_rq;	struct sched_entity *se = &p->se;	for_each_sched_entity(se) {		if (se->on_rq)			break;		cfs_rq = cfs_rq_of(se);		enqueue_entity(cfs_rq, se, wakeup);		wakeup = 1;	}	hrtick_start_fair(rq, rq->curr);}/* * The dequeue_task method is called before nr_running is * decreased. We remove the task from the rbtree and * update the fair scheduling stats: */static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep){	struct cfs_rq *cfs_rq;	struct sched_entity *se = &p->se;	for_each_sched_entity(se) {		cfs_rq = cfs_rq_of(se);		dequeue_entity(cfs_rq, se, sleep);		/* Don't dequeue parent if it has other entities besides us */		if (cfs_rq->load.weight)			break;		sleep = 1;	}	hrtick_start_fair(rq, rq->curr);}/* * sched_yield() support is very simple - we dequeue and enqueue. * * If compat_yield is turned on then we requeue to the end of the tree. */static void yield_task_fair(struct rq *rq){	struct task_struct *curr = rq->curr;	struct cfs_rq *cfs_rq = task_cfs_rq(curr);	struct sched_entity *rightmost, *se = &curr->se;	/*	 * Are we the only task in the tree?	 */	if (unlikely(cfs_rq->nr_running == 1))		return;	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {		update_rq_clock(rq);		/*		 * Update run-time statistics of the 'current'.		 */		update_curr(cfs_rq);		return;	}	/*	 * Find the rightmost entry in the rbtree:	 */	rightmost = __pick_last_entity(cfs_rq);	/*	 * Already in the rightmost position?	 */	if (unlikely(!rightmost || rightmost->vruntime < se->vruntime))		return;	/*	 * Minimally necessary key value to be last in the tree:	 * Upon rescheduling, sched_class::put_prev_task() will place	 * 'current' within the tree based on its new key value.	 */	se->vruntime = rightmost->vruntime + 1;}/* * wake_idle() will wake a task on an idle cpu if task->cpu is * not idle and an idle cpu is available.  The span of cpus to * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, * hence we need to mask them out (cpu_active_map) * * Returns the CPU we should wake onto. */#if defined(ARCH_HAS_SCHED_WAKE_IDLE)static int wake_idle(int cpu, struct task_struct *p){	cpumask_t tmp;	struct sched_domain *sd;	int i;	/*	 * If it is idle, then it is the best cpu to run this task.	 *	 * This cpu is also the best, if it has more than one task already.	 * Siblings must be also busy(in most cases) as they didn't already	 * pickup the extra load from this cpu and hence we need not check	 * sibling runqueue info. This will avoid the checks and cache miss	 * penalities associated with that.	 */	if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)		return cpu;	for_each_domain(cpu, sd) {		if ((sd->flags & SD_WAKE_IDLE)		    || ((sd->flags & SD_WAKE_IDLE_FAR)			&& !task_hot(p, task_rq(p)->clock, sd))) {			cpus_and(tmp, sd->span, p->cpus_allowed);			cpus_and(tmp, tmp, cpu_active_map);			for_each_cpu_mask_nr(i, tmp) {				if (idle_cpu(i)) {					if (i != task_cpu(p)) {						schedstat_inc(p,						       se.nr_wakeups_idle);					}					return i;				}			}		} else {			break;		}	}	return cpu;}#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/static inline int wake_idle(int cpu, struct task_struct *p){	return cpu;}#endif#ifdef CONFIG_SMPstatic const struct sched_class fair_sched_class;#ifdef CONFIG_FAIR_GROUP_SCHED/* * effective_load() calculates the load change as seen from the root_task_group * * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. * * The problem is that perfectly aligning the shares is rather expensive, hence * we try to avoid doing that too often - see update_shares(), which ratelimits * this change. * * We compensate this by not only taking the current delta into account, but * also considering the delta between when the shares were last adjusted and * now. * * We still saw a performance dip, some tracing learned us that between * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased * significantly. Therefore try to bias the error in direction of failing * the affine wakeup. * */static long effective_load(struct task_group *tg, int cpu,		long wl, long wg){	struct sched_entity *se = tg->se[cpu];	long more_w;	if (!tg->parent)		return wl;	/*	 * By not taking the decrease of shares on the other cpu into	 * account our error leans towards reducing the affine wakeups.	 */	if (!wl && sched_feat(ASYM_EFF_LOAD))		return wl;	/*	 * Instead of using this increment, also add the difference	 * between when the shares were last updated and now.	 */	more_w = se->my_q->load.weight - se->my_q->rq_weight;	wl += more_w;	wg += more_w;	for_each_sched_entity(se) {#define D(n) (likely(n) ? (n) : 1)		long S, rw, s, a, b;		S = se->my_q->tg->shares;		s = se->my_q->shares;		rw = se->my_q->rq_weight;		a = S*(rw + wl);		b = S*rw + s*wg;		wl = s*(a-b)/D(b);		/*		 * Assume the group is already running and will		 * thus already be accounted for in the weight.		 *		 * That is, moving shares between CPUs, does not		 * alter the group weight.		 */		wg = 0;#undef D	}	return wl;}#elsestatic inline unsigned long effective_load(struct task_group *tg, int cpu,		unsigned long wl, unsigned long wg){	return wl;}#endifstatic intwake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,	    struct task_struct *p, int prev_cpu, int this_cpu, int sync,	    int idx, unsigned long load, unsigned long this_load,	    unsigned int imbalance){	struct task_struct *curr = this_rq->curr;	struct task_group *tg;	unsigned long tl = this_load;	unsigned long tl_per_task;	unsigned long weight;	int balanced;
💿 文件大小 749 K
👤 上传用户 god_dog
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Kernel #kernel #linux #code
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -