sched.c

来自「Kernel code of linux kernel」· C语言代码 · 共 2,703 行 · 第 1/5 页
2,703 行
/* *  kernel/sched.c * *  Kernel scheduler and related syscalls * *  Copyright (C) 1991-2002  Linus Torvalds * *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and *		make semaphores SMP safe *  1998-11-19	Implemented schedule_timeout() and related stuff *		by Andrea Arcangeli *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar: *		hybrid priority-list and round-robin design with *		an array-switch method of distributing timeslices *		and per-CPU runqueues.  Cleanups and useful suggestions *		by Davide Libenzi, preemptible kernel bits by Robert Love. *  2003-09-03	Interactivity tuning by Con Kolivas. *  2004-04-02	Scheduler domains code by Nick Piggin *  2007-04-15  Work begun on replacing all interactivity tuning with a *              fair scheduling design by Con Kolivas. *  2007-05-05  Load balancing (smp-nice) and other improvements *              by Peter Williams *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins, *              Thomas Gleixner, Mike Kravetz */#include <linux/mm.h>#include <linux/module.h>#include <linux/nmi.h>#include <linux/init.h>#include <linux/uaccess.h>#include <linux/highmem.h>#include <linux/smp_lock.h>#include <asm/mmu_context.h>#include <linux/interrupt.h>#include <linux/capability.h>#include <linux/completion.h>#include <linux/kernel_stat.h>#include <linux/debug_locks.h>#include <linux/security.h>#include <linux/notifier.h>#include <linux/profile.h>#include <linux/freezer.h>#include <linux/vmalloc.h>#include <linux/blkdev.h>#include <linux/delay.h>#include <linux/pid_namespace.h>#include <linux/smp.h>#include <linux/threads.h>#include <linux/timer.h>#include <linux/rcupdate.h>#include <linux/cpu.h>#include <linux/cpuset.h>#include <linux/percpu.h>#include <linux/kthread.h>#include <linux/seq_file.h>#include <linux/sysctl.h>#include <linux/syscalls.h>#include <linux/times.h>#include <linux/tsacct_kern.h>#include <linux/kprobes.h>#include <linux/delayacct.h>#include <linux/reciprocal_div.h>#include <linux/unistd.h>#include <linux/pagemap.h>#include <linux/hrtimer.h>#include <linux/tick.h>#include <linux/bootmem.h>#include <linux/debugfs.h>#include <linux/ctype.h>#include <linux/ftrace.h>#include <asm/tlb.h>#include <asm/irq_regs.h>#include "sched_cpupri.h"/* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. */#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)/* * 'User priority' is the nice value converted to something we * can work with better when scaling various scheduler parameters, * it's a [ 0 ... 39 ] range. */#define USER_PRIO(p)		((p)-MAX_RT_PRIO)#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))/* * Helpers for converting nanosecond timing to jiffy resolution */#define NS_TO_JIFFIES(TIME)	((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))#define NICE_0_LOAD		SCHED_LOAD_SCALE#define NICE_0_SHIFT		SCHED_LOAD_SHIFT/* * These are the 'tuning knobs' of the scheduler: * * default timeslice is 100 msecs (used only for SCHED_RR tasks). * Timeslices get refilled after they expire. */#define DEF_TIMESLICE		(100 * HZ / 1000)/* * single value that denotes runtime == period, ie unlimited time. */#define RUNTIME_INF	((u64)~0ULL)#ifdef CONFIG_SMP/* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) * Since cpu_power is a 'constant', we can use a reciprocal divide. */static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load){	return reciprocal_divide(load, sg->reciprocal_cpu_power);}/* * Each time a sched group cpu_power is changed, * we must compute its reciprocal value */static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val){	sg->__cpu_power += val;	sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);}#endifstatic inline int rt_policy(int policy){	if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))		return 1;	return 0;}static inline int task_has_rt_policy(struct task_struct *p){	return rt_policy(p->policy);}/* * This is the priority-queue data structure of the RT scheduling class: */struct rt_prio_array {	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */	struct list_head queue[MAX_RT_PRIO];};struct rt_bandwidth {	/* nests inside the rq lock: */	spinlock_t		rt_runtime_lock;	ktime_t			rt_period;	u64			rt_runtime;	struct hrtimer		rt_period_timer;};static struct rt_bandwidth def_rt_bandwidth;static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer){	struct rt_bandwidth *rt_b =		container_of(timer, struct rt_bandwidth, rt_period_timer);	ktime_t now;	int overrun;	int idle = 0;	for (;;) {		now = hrtimer_cb_get_time(timer);		overrun = hrtimer_forward(timer, now, rt_b->rt_period);		if (!overrun)			break;		idle = do_sched_rt_period_timer(rt_b, overrun);	}	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;}staticvoid init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime){	rt_b->rt_period = ns_to_ktime(period);	rt_b->rt_runtime = runtime;	spin_lock_init(&rt_b->rt_runtime_lock);	hrtimer_init(&rt_b->rt_period_timer,			CLOCK_MONOTONIC, HRTIMER_MODE_REL);	rt_b->rt_period_timer.function = sched_rt_period_timer;	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;}static void start_rt_bandwidth(struct rt_bandwidth *rt_b){	ktime_t now;	if (rt_b->rt_runtime == RUNTIME_INF)		return;	if (hrtimer_active(&rt_b->rt_period_timer))		return;	spin_lock(&rt_b->rt_runtime_lock);	for (;;) {		if (hrtimer_active(&rt_b->rt_period_timer))			break;		now = hrtimer_cb_get_time(&rt_b->rt_period_timer);		hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);		hrtimer_start(&rt_b->rt_period_timer,			      rt_b->rt_period_timer.expires,			      HRTIMER_MODE_ABS);	}	spin_unlock(&rt_b->rt_runtime_lock);}#ifdef CONFIG_RT_GROUP_SCHEDstatic void destroy_rt_bandwidth(struct rt_bandwidth *rt_b){	hrtimer_cancel(&rt_b->rt_period_timer);}#endif/* * sched_domains_mutex serializes calls to arch_init_sched_domains, * detach_destroy_domains and partition_sched_domains. */static DEFINE_MUTEX(sched_domains_mutex);#ifdef CONFIG_GROUP_SCHED#include <linux/cgroup.h>struct cfs_rq;static LIST_HEAD(task_groups);/* task group related information */struct task_group {#ifdef CONFIG_CGROUP_SCHED	struct cgroup_subsys_state css;#endif#ifdef CONFIG_FAIR_GROUP_SCHED	/* schedulable entities of this group on each cpu */	struct sched_entity **se;	/* runqueue "owned" by this group on each cpu */	struct cfs_rq **cfs_rq;	unsigned long shares;#endif#ifdef CONFIG_RT_GROUP_SCHED	struct sched_rt_entity **rt_se;	struct rt_rq **rt_rq;	struct rt_bandwidth rt_bandwidth;#endif	struct rcu_head rcu;	struct list_head list;	struct task_group *parent;	struct list_head siblings;	struct list_head children;};#ifdef CONFIG_USER_SCHED/* * Root task group. * 	Every UID task group (including init_task_group aka UID-0) will * 	be a child to this group. */struct task_group root_task_group;#ifdef CONFIG_FAIR_GROUP_SCHED/* Default task group's sched entity on each cpu */static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);/* Default task group's cfs_rq on each cpu */static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;#endif /* CONFIG_FAIR_GROUP_SCHED */#ifdef CONFIG_RT_GROUP_SCHEDstatic DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;#endif /* CONFIG_RT_GROUP_SCHED */#else /* !CONFIG_FAIR_GROUP_SCHED */#define root_task_group init_task_group#endif /* CONFIG_FAIR_GROUP_SCHED *//* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. */static DEFINE_SPINLOCK(task_group_lock);#ifdef CONFIG_FAIR_GROUP_SCHED#ifdef CONFIG_USER_SCHED# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)#else /* !CONFIG_USER_SCHED */# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD#endif /* CONFIG_USER_SCHED *//* * A weight of 0 or 1 can cause arithmetics problems. * A weight of a cfs_rq is the sum of weights of which entities * are queued on this cfs_rq, so a weight of a entity should not be * too large, so as the shares value of a task group. * (The default weight is 1024 - so there's no practical *  limitation from this.) */#define MIN_SHARES	2#define MAX_SHARES	(1UL << 18)static int init_task_group_load = INIT_TASK_GROUP_LOAD;#endif/* Default task group. *	Every task in system belong to this group at bootup. */struct task_group init_task_group;/* return group to which a task belongs */static inline struct task_group *task_group(struct task_struct *p){	struct task_group *tg;#ifdef CONFIG_USER_SCHED	tg = p->user->tg;#elif defined(CONFIG_CGROUP_SCHED)	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),				struct task_group, css);#else	tg = &init_task_group;#endif	return tg;}/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */static inline void set_task_rq(struct task_struct *p, unsigned int cpu){#ifdef CONFIG_FAIR_GROUP_SCHED	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];	p->se.parent = task_group(p)->se[cpu];#endif#ifdef CONFIG_RT_GROUP_SCHED	p->rt.rt_rq  = task_group(p)->rt_rq[cpu];	p->rt.parent = task_group(p)->rt_se[cpu];#endif}#elsestatic inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }static inline struct task_group *task_group(struct task_struct *p){	return NULL;}#endif	/* CONFIG_GROUP_SCHED *//* CFS-related fields in a runqueue */struct cfs_rq {	struct load_weight load;	unsigned long nr_running;	u64 exec_clock;	u64 min_vruntime;	u64 pair_start;	struct rb_root tasks_timeline;	struct rb_node *rb_leftmost;	struct list_head tasks;	struct list_head *balance_iterator;	/*	 * 'curr' points to currently running entity on this cfs_rq.	 * It is set to NULL otherwise (i.e when none are currently running).	 */	struct sched_entity *curr, *next;	unsigned long nr_spread_over;#ifdef CONFIG_FAIR_GROUP_SCHED	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */	/*	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities	 * (like users, containers etc.)	 *	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This	 * list is used during load balance.	 */	struct list_head leaf_cfs_rq_list;	struct task_group *tg;	/* group that "owns" this runqueue */#ifdef CONFIG_SMP	/*	 * the part of load.weight contributed by tasks	 */	unsigned long task_weight;	/*	 *   h_load = weight * f(tg)	 *	 * Where f(tg) is the recursive weight fraction assigned to	 * this group.	 */	unsigned long h_load;	/*	 * this cpu's part of tg->shares	 */	unsigned long shares;	/*	 * load.weight at the time we set shares	 */	unsigned long rq_weight;#endif#endif};/* Real-Time classes' related field in a runqueue: */struct rt_rq {	struct rt_prio_array active;	unsigned long rt_nr_running;#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED	int highest_prio; /* highest queued rt task prio */#endif#ifdef CONFIG_SMP	unsigned long rt_nr_migratory;	int overloaded;#endif	int rt_throttled;	u64 rt_time;	u64 rt_runtime;	/* Nests inside the rq lock: */	spinlock_t rt_runtime_lock;#ifdef CONFIG_RT_GROUP_SCHED	unsigned long rt_nr_boosted;	struct rq *rq;	struct list_head leaf_rt_rq_list;	struct task_group *tg;	struct sched_rt_entity *rt_se;#endif};#ifdef CONFIG_SMP/* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by * fully partitioning the member cpus from any other cpuset. Whenever a new * exclusive cpuset is created, we also create and attach a new root-domain * object. * */struct root_domain {	atomic_t refcount;	cpumask_t span;	cpumask_t online;	/*	 * The "RT overload" flag: it gets set if a CPU has more than	 * one runnable RT task.	 */	cpumask_t rto_mask;	atomic_t rto_count;#ifdef CONFIG_SMP	struct cpupri cpupri;#endif};/* * By default the system creates a single root-domain with all cpus as * members (mimicking the global state we have today). */static struct root_domain def_root_domain;#endif/* * This is the main, per-CPU runqueue data structure. * * Locking rule: those places that want to lock multiple runqueues * (such as the load balancing or the thread migration code), lock * acquire operations must be ordered by ascending &runqueue. */struct rq {	/* runqueue lock: */	spinlock_t lock;	/*	 * nr_running and cpu_load should be in the same cacheline because	 * remote CPUs use both these fields when doing load calculation.	 */	unsigned long nr_running;	#define CPU_LOAD_IDX_MAX 5	unsigned long cpu_load[CPU_LOAD_IDX_MAX];	unsigned char idle_at_tick;#ifdef CONFIG_NO_HZ	unsigned long last_tick_seen;	unsigned char in_nohz_recently;#endif	/* capture load from *all* tasks on this cpu: */	struct load_weight load;	unsigned long nr_load_updates;	u64 nr_switches;	struct cfs_rq cfs;	struct rt_rq rt;#ifdef CONFIG_FAIR_GROUP_SCHED	/* list of leaf cfs_rq on this cpu: */	struct list_head leaf_cfs_rq_list;#endif#ifdef CONFIG_RT_GROUP_SCHED	struct list_head leaf_rt_rq_list;#endif	/*	 * This is part of a global counter where only the total sum
sched.c - 源码说明

本页面展示了「Kernel code of linux kernel」中的 sched.c 源码文件，采用 C语言编程语言编写，共 2,703 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与Linux相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?