📄 sched_fair.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH) * *  Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * *  Interactivity improvements by Mike Galbraith *  (C) 2007 Mike Galbraith <efault@gmx.de> * *  Various enhancements by Dmitry Adamushko. *  (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com> * *  Group scheduling enhancements by Srivatsa Vaddagiri *  Copyright IBM Corporation, 2007 *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> * *  Scaled math optimizations by Thomas Gleixner *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> * *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> */#include <linux/latencytop.h>/* * Targeted preemption latency for CPU-bound tasks: * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds) * * NOTE: this latency value is not the same as the concept of * 'timeslice length' - timeslices in CFS are of variable length * and have no persistent notion like in traditional, time-slice * based scheduling concepts. * * (to see the precise effective timeslice length of your workload, *  run vmstat and monitor the context-switches (cs) field) */unsigned int sysctl_sched_latency = 20000000ULL;/* * Minimal preemption granularity for CPU-bound tasks: * (default: 4 msec * (1 + ilog(ncpus)), units: nanoseconds) */unsigned int sysctl_sched_min_granularity = 4000000ULL;/* * is kept at sysctl_sched_latency / sysctl_sched_min_granularity */static unsigned int sched_nr_latency = 5;/* * After fork, child runs first. (default) If set to 0 then * parent will (try to) run first. */const_debug unsigned int sysctl_sched_child_runs_first = 1;/* * sys_sched_yield() compat mode * * This option switches the agressive yield implementation of the * old scheduler back on. */unsigned int __read_mostly sysctl_sched_compat_yield;/* * SCHED_OTHER wake-up granularity. * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds) * * This option delays the preemption effects of decoupled workloads * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */unsigned int sysctl_sched_wakeup_granularity = 5000000UL;const_debug unsigned int sysctl_sched_migration_cost = 500000UL;/************************************************************** * CFS operations on generic schedulable entities: */static inline struct task_struct *task_of(struct sched_entity *se){	return container_of(se, struct task_struct, se);}#ifdef CONFIG_FAIR_GROUP_SCHED/* cpu runqueue to which this cfs_rq is attached */static inline struct rq *rq_of(struct cfs_rq *cfs_rq){	return cfs_rq->rq;}/* An entity is a task if it doesn't "own" a runqueue */#define entity_is_task(se)	(!se->my_q)/* Walk up scheduling entities hierarchy */#define for_each_sched_entity(se) \		for (; se; se = se->parent)static inline struct cfs_rq *task_cfs_rq(struct task_struct *p){	return p->se.cfs_rq;}/* runqueue on which this entity is (to be) queued */static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se){	return se->cfs_rq;}/* runqueue "owned" by this group */static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp){	return grp->my_q;}/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on * another cpu ('this_cpu') */static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu){	return cfs_rq->tg->cfs_rq[this_cpu];}/* Iterate thr' all leaf cfs_rq's on a runqueue */#define for_each_leaf_cfs_rq(rq, cfs_rq) \	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)/* Do the two (enqueued) entities belong to the same group ? */static inline intis_same_group(struct sched_entity *se, struct sched_entity *pse){	if (se->cfs_rq == pse->cfs_rq)		return 1;	return 0;}static inline struct sched_entity *parent_entity(struct sched_entity *se){	return se->parent;}#else	/* CONFIG_FAIR_GROUP_SCHED */static inline struct rq *rq_of(struct cfs_rq *cfs_rq){	return container_of(cfs_rq, struct rq, cfs);}#define entity_is_task(se)	1#define for_each_sched_entity(se) \		for (; se; se = NULL)static inline struct cfs_rq *task_cfs_rq(struct task_struct *p){	return &task_rq(p)->cfs;}static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se){	struct task_struct *p = task_of(se);	struct rq *rq = task_rq(p);	return &rq->cfs;}/* runqueue "owned" by this group */static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp){	return NULL;}static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu){	return &cpu_rq(this_cpu)->cfs;}#define for_each_leaf_cfs_rq(rq, cfs_rq) \		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)static inline intis_same_group(struct sched_entity *se, struct sched_entity *pse){	return 1;}static inline struct sched_entity *parent_entity(struct sched_entity *se){	return NULL;}#endif	/* CONFIG_FAIR_GROUP_SCHED *//************************************************************** * Scheduling class tree data structure manipulation methods: */static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime){	s64 delta = (s64)(vruntime - min_vruntime);	if (delta > 0)		min_vruntime = vruntime;	return min_vruntime;}static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime){	s64 delta = (s64)(vruntime - min_vruntime);	if (delta < 0)		min_vruntime = vruntime;	return min_vruntime;}static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se){	return se->vruntime - cfs_rq->min_vruntime;}/* * Enqueue an entity into the rb-tree: */static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se){	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;	struct rb_node *parent = NULL;	struct sched_entity *entry;	s64 key = entity_key(cfs_rq, se);	int leftmost = 1;	/*	 * Find the right place in the rbtree:	 */	while (*link) {		parent = *link;		entry = rb_entry(parent, struct sched_entity, run_node);		/*		 * We dont care about collisions. Nodes with		 * the same key stay together.		 */		if (key < entity_key(cfs_rq, entry)) {			link = &parent->rb_left;		} else {			link = &parent->rb_right;			leftmost = 0;		}	}	/*	 * Maintain a cache of leftmost tree entries (it is frequently	 * used):	 */	if (leftmost) {		cfs_rq->rb_leftmost = &se->run_node;		/*		 * maintain cfs_rq->min_vruntime to be a monotonic increasing		 * value tracking the leftmost vruntime in the tree.		 */		cfs_rq->min_vruntime =			max_vruntime(cfs_rq->min_vruntime, se->vruntime);	}	rb_link_node(&se->run_node, parent, link);	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);}static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se){	if (cfs_rq->rb_leftmost == &se->run_node) {		struct rb_node *next_node;		struct sched_entity *next;		next_node = rb_next(&se->run_node);		cfs_rq->rb_leftmost = next_node;		if (next_node) {			next = rb_entry(next_node,					struct sched_entity, run_node);			cfs_rq->min_vruntime =				max_vruntime(cfs_rq->min_vruntime,					     next->vruntime);		}	}	if (cfs_rq->next == se)		cfs_rq->next = NULL;	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);}static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq){	return cfs_rq->rb_leftmost;}static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq){	return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);}static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq){	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);	if (!last)		return NULL;	return rb_entry(last, struct sched_entity, run_node);}/************************************************************** * Scheduling class statistics methods: */#ifdef CONFIG_SCHED_DEBUGint sched_nr_latency_handler(struct ctl_table *table, int write,		struct file *filp, void __user *buffer, size_t *lenp,		loff_t *ppos){	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);	if (ret || !write)		return ret;	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,					sysctl_sched_min_granularity);	return 0;}#endif/* * delta *= w / rw */static inline unsigned longcalc_delta_weight(unsigned long delta, struct sched_entity *se){	for_each_sched_entity(se) {		delta = calc_delta_mine(delta,				se->load.weight, &cfs_rq_of(se)->load);	}	return delta;}/* * delta *= rw / w */static inline unsigned longcalc_delta_fair(unsigned long delta, struct sched_entity *se){	for_each_sched_entity(se) {		delta = calc_delta_mine(delta,				cfs_rq_of(se)->load.weight, &se->load);	}	return delta;}/* * The idea is to set a period in which each task runs once. * * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch * this period because otherwise the slices get too small. * * p = (nr <= nl) ? l : l*nr/nl */static u64 __sched_period(unsigned long nr_running){	u64 period = sysctl_sched_latency;	unsigned long nr_latency = sched_nr_latency;	if (unlikely(nr_running > nr_latency)) {		period = sysctl_sched_min_granularity;		period *= nr_running;	}	return period;}/* * We calculate the wall-time slice from the period by taking a part * proportional to the weight. * * s = p*w/rw */static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se){	return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);}/* * We calculate the vruntime slice of a to be inserted task * * vs = s*rw/w = p */static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se){	unsigned long nr_running = cfs_rq->nr_running;	if (!se->on_rq)		nr_running++;	return __sched_period(nr_running);}/* * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in * that it favours >=0 over <0. * *   -20         | *               | *     0 --------+------- *             .' *    19     .' * */static unsigned longcalc_delta_asym(unsigned long delta, struct sched_entity *se){	struct load_weight lw = {		.weight = NICE_0_LOAD,		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)	};	for_each_sched_entity(se) {		struct load_weight *se_lw = &se->load;		unsigned long rw = cfs_rq_of(se)->load.weight;#ifdef CONFIG_FAIR_SCHED_GROUP		struct cfs_rq *cfs_rq = se->my_q;		struct task_group *tg = NULL		if (cfs_rq)			tg = cfs_rq->tg;		if (tg && tg->shares < NICE_0_LOAD) {			/*			 * scale shares to what it would have been had			 * tg->weight been NICE_0_LOAD:			 *			 *   weight = 1024 * shares / tg->weight			 */			lw.weight *= se->load.weight;			lw.weight /= tg->shares;			lw.inv_weight = 0;			se_lw = &lw;			rw += lw.weight - se->load.weight;		} else#endif		if (se->load.weight < NICE_0_LOAD) {			se_lw = &lw;			rw += NICE_0_LOAD - se->load.weight;		}		delta = calc_delta_mine(delta, rw, se_lw);	}	return delta;}/* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. */static inline void__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,	      unsigned long delta_exec){	unsigned long delta_exec_weighted;	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));	curr->sum_exec_runtime += delta_exec;	schedstat_add(cfs_rq, exec_clock, delta_exec);	delta_exec_weighted = calc_delta_fair(delta_exec, curr);	curr->vruntime += delta_exec_weighted;}static void update_curr(struct cfs_rq *cfs_rq){	struct sched_entity *curr = cfs_rq->curr;	u64 now = rq_of(cfs_rq)->clock;	unsigned long delta_exec;	if (unlikely(!curr))		return;	/*	 * Get the amount of time the current task was running	 * since the last time we changed load (this cannot	 * overflow on 32 bits):	 */	delta_exec = (unsigned long)(now - curr->exec_start);	__update_curr(cfs_rq, curr, delta_exec);	curr->exec_start = now;	if (entity_is_task(curr)) {		struct task_struct *curtask = task_of(curr);		cpuacct_charge(curtask, delta_exec);	}}static inline voidupdate_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se){	schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);}/* * Task is being enqueued - update stats: */static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se){	/*	 * Are we enqueueing a waiting task? (for current tasks	 * a dequeue/enqueue event is a NOP)	 */	if (se != cfs_rq->curr)		update_stats_wait_start(cfs_rq, se);}static voidupdate_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se){	schedstat_set(se->wait_max, max(se->wait_max,			rq_of(cfs_rq)->clock - se->wait_start));	schedstat_set(se->wait_count, se->wait_count + 1);	schedstat_set(se->wait_sum, se->wait_sum +			rq_of(cfs_rq)->clock - se->wait_start);	schedstat_set(se->wait_start, 0);}static inline voidupdate_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se){	/*	 * Mark the end of the wait period if dequeueing a	 * waiting task:	 */	if (se != cfs_rq->curr)		update_stats_wait_end(cfs_rq, se);}/* * We are picking a new current task - update its stats: */static inline voidupdate_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se){	/*	 * We are starting a new run period:	 */	se->exec_start = rq_of(cfs_rq)->clock;}/************************************************** * Scheduling class queueing methods: */#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHEDstatic voidadd_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight){	cfs_rq->task_weight += weight;}#elsestatic inline voidadd_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
12 3 下一页
💿 文件大小 749 K
👤 上传用户 god_dog
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Kernel #kernel #linux #code
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -