sched.c

来自「linux 2.6.19 kernel source code before p」· C语言代码 · 共 2,477 行 · 第 1/5 页
2,477 行
/* *  kernel/sched.c * *  Kernel scheduler and related syscalls * *  Copyright (C) 1991-2002  Linus Torvalds * *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and *		make semaphores SMP safe *  1998-11-19	Implemented schedule_timeout() and related stuff *		by Andrea Arcangeli *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar: *		hybrid priority-list and round-robin design with *		an array-switch method of distributing timeslices *		and per-CPU runqueues.  Cleanups and useful suggestions *		by Davide Libenzi, preemptible kernel bits by Robert Love. *  2003-09-03	Interactivity tuning by Con Kolivas. *  2004-04-02	Scheduler domains code by Nick Piggin */#include <linux/mm.h>#include <linux/module.h>#include <linux/nmi.h>#include <linux/init.h>#include <asm/uaccess.h>#include <linux/highmem.h>#include <linux/smp_lock.h>#include <asm/mmu_context.h>#include <linux/interrupt.h>#include <linux/capability.h>#include <linux/completion.h>#include <linux/kernel_stat.h>#include <linux/debug_locks.h>#include <linux/security.h>#include <linux/notifier.h>#include <linux/profile.h>#include <linux/freezer.h>#include <linux/vmalloc.h>#include <linux/blkdev.h>#include <linux/delay.h>#include <linux/smp.h>#include <linux/threads.h>#include <linux/timer.h>#include <linux/rcupdate.h>#include <linux/cpu.h>#include <linux/cpuset.h>#include <linux/percpu.h>#include <linux/kthread.h>#include <linux/seq_file.h>#include <linux/syscalls.h>#include <linux/times.h>#include <linux/tsacct_kern.h>#include <linux/kprobes.h>#include <linux/delayacct.h>#include <linux/reciprocal_div.h>#include <asm/tlb.h>#include <asm/unistd.h>/* * Scheduler clock - returns current time in nanosec units. * This is default implementation. * Architectures and sub-architectures can override this. */unsigned long long __attribute__((weak)) sched_clock(void){	return (unsigned long long)jiffies * (1000000000 / HZ);}/* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. */#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)/* * 'User priority' is the nice value converted to something we * can work with better when scaling various scheduler parameters, * it's a [ 0 ... 39 ] range. */#define USER_PRIO(p)		((p)-MAX_RT_PRIO)#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))/* * Some helpers for converting nanosecond timing to jiffy resolution */#define NS_TO_JIFFIES(TIME)	((TIME) / (1000000000 / HZ))#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))/* * These are the 'tuning knobs' of the scheduler: * * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger), * default timeslice is 100 msecs, maximum timeslice is 800 msecs. * Timeslices get refilled after they expire. */#define MIN_TIMESLICE		max(5 * HZ / 1000, 1)#define DEF_TIMESLICE		(100 * HZ / 1000)#define ON_RUNQUEUE_WEIGHT	 30#define CHILD_PENALTY		 95#define PARENT_PENALTY		100#define EXIT_WEIGHT		  3#define PRIO_BONUS_RATIO	 25#define MAX_BONUS		(MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)#define INTERACTIVE_DELTA	  2#define MAX_SLEEP_AVG		(DEF_TIMESLICE * MAX_BONUS)#define STARVATION_LIMIT	(MAX_SLEEP_AVG)#define NS_MAX_SLEEP_AVG	(JIFFIES_TO_NS(MAX_SLEEP_AVG))/* * If a task is 'interactive' then we reinsert it in the active * array after it has expired its current timeslice. (it will not * continue to run immediately, it will still roundrobin with * other interactive tasks.) * * This part scales the interactivity limit depending on niceness. * * We scale it linearly, offset by the INTERACTIVE_DELTA delta. * Here are a few examples of different nice levels: * *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0] *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] * * (the X axis represents the possible -5 ... 0 ... +5 dynamic *  priority range a task can explore, a value of '1' means the *  task is rated interactive.) * * Ie. nice +19 tasks can never get 'interactive' enough to be * reinserted into the active array. And only heavily CPU-hog nice -20 * tasks will be expired. Default nice 0 tasks are somewhere between, * it takes some effort for them to get interactive, but it's not * too hard. */#define CURRENT_BONUS(p) \	(NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \		MAX_SLEEP_AVG)#define GRANULARITY	(10 * HZ / 1000 ? : 1)#ifdef CONFIG_SMP#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \			num_online_cpus())#else#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))#endif#define SCALE(v1,v1_max,v2_max) \	(v1) * (v2_max) / (v1_max)#define DELTA(p) \	(SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \		INTERACTIVE_DELTA)#define TASK_INTERACTIVE(p) \	((p)->prio <= (p)->static_prio - DELTA(p))#define INTERACTIVE_SLEEP(p) \	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \		(MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))#define TASK_PREEMPTS_CURR(p, rq) \	((p)->prio < (rq)->curr->prio)#define SCALE_PRIO(x, prio) \	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)static unsigned int static_prio_timeslice(int static_prio){	if (static_prio < NICE_TO_PRIO(0))		return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);	else		return SCALE_PRIO(DEF_TIMESLICE, static_prio);}#ifdef CONFIG_SMP/* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) * Since cpu_power is a 'constant', we can use a reciprocal divide. */static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load){	return reciprocal_divide(load, sg->reciprocal_cpu_power);}/* * Each time a sched group cpu_power is changed, * we must compute its reciprocal value */static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val){	sg->__cpu_power += val;	sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);}#endif/* * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] * to time slice values: [800ms ... 100ms ... 5ms] * * The higher a thread's priority, the bigger timeslices * it gets during one round of execution. But even the lowest * priority thread gets MIN_TIMESLICE worth of execution time. */static inline unsigned int task_timeslice(struct task_struct *p){	return static_prio_timeslice(p->static_prio);}/* * These are the runqueue data structures: */struct prio_array {	unsigned int nr_active;	DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */	struct list_head queue[MAX_PRIO];};/* * This is the main, per-CPU runqueue data structure. * * Locking rule: those places that want to lock multiple runqueues * (such as the load balancing or the thread migration code), lock * acquire operations must be ordered by ascending &runqueue. */struct rq {	spinlock_t lock;	/*	 * nr_running and cpu_load should be in the same cacheline because	 * remote CPUs use both these fields when doing load calculation.	 */	unsigned long nr_running;	unsigned long raw_weighted_load;#ifdef CONFIG_SMP	unsigned long cpu_load[3];	unsigned char idle_at_tick;#ifdef CONFIG_NO_HZ	unsigned char in_nohz_recently;#endif#endif	unsigned long long nr_switches;	/*	 * This is part of a global counter where only the total sum	 * over all CPUs matters. A task can increase this counter on	 * one CPU and if it got migrated afterwards it may decrease	 * it on another CPU. Always updated under the runqueue lock:	 */	unsigned long nr_uninterruptible;	unsigned long expired_timestamp;	/* Cached timestamp set by update_cpu_clock() */	unsigned long long most_recent_timestamp;	struct task_struct *curr, *idle;	unsigned long next_balance;	struct mm_struct *prev_mm;	struct prio_array *active, *expired, arrays[2];	int best_expired_prio;	atomic_t nr_iowait;#ifdef CONFIG_SMP	struct sched_domain *sd;	/* For active balancing */	int active_balance;	int push_cpu;	int cpu;		/* cpu of this runqueue */	struct task_struct *migration_thread;	struct list_head migration_queue;#endif#ifdef CONFIG_SCHEDSTATS	/* latency stats */	struct sched_info rq_sched_info;	/* sys_sched_yield() stats */	unsigned long yld_exp_empty;	unsigned long yld_act_empty;	unsigned long yld_both_empty;	unsigned long yld_cnt;	/* schedule() stats */	unsigned long sched_switch;	unsigned long sched_cnt;	unsigned long sched_goidle;	/* try_to_wake_up() stats */	unsigned long ttwu_cnt;	unsigned long ttwu_local;#endif	struct lock_class_key rq_lock_key;};static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp;static DEFINE_MUTEX(sched_hotcpu_mutex);static inline int cpu_of(struct rq *rq){#ifdef CONFIG_SMP	return rq->cpu;#else	return 0;#endif}/* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. * * The domain tree of any CPU may only be accessed from within * preempt-disabled sections. */#define for_each_domain(cpu, __sd) \	for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))#define this_rq()		(&__get_cpu_var(runqueues))#define task_rq(p)		cpu_rq(task_cpu(p))#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)#ifndef prepare_arch_switch# define prepare_arch_switch(next)	do { } while (0)#endif#ifndef finish_arch_switch# define finish_arch_switch(prev)	do { } while (0)#endif#ifndef __ARCH_WANT_UNLOCKED_CTXSWstatic inline int task_running(struct rq *rq, struct task_struct *p){	return rq->curr == p;}static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next){}static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev){#ifdef CONFIG_DEBUG_SPINLOCK	/* this is a valid case when another task releases the spinlock */	rq->lock.owner = current;#endif	/*	 * If we are tracking spinlock dependencies then we have to	 * fix up the runqueue lock - which gets 'carried over' from	 * prev into current:	 */	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);	spin_unlock_irq(&rq->lock);}#else /* __ARCH_WANT_UNLOCKED_CTXSW */static inline int task_running(struct rq *rq, struct task_struct *p){#ifdef CONFIG_SMP	return p->oncpu;#else	return rq->curr == p;#endif}static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next){#ifdef CONFIG_SMP	/*	 * We can optimise this out completely for !SMP, because the	 * SMP rebalancing from interrupt is the only thing that cares	 * here.	 */	next->oncpu = 1;#endif#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW	spin_unlock_irq(&rq->lock);#else	spin_unlock(&rq->lock);#endif}static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev){#ifdef CONFIG_SMP	/*	 * After ->oncpu is cleared, the task can be moved to a different CPU.	 * We must ensure this doesn't happen until the switch is completely	 * finished.	 */	smp_wmb();	prev->oncpu = 0;#endif#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW	local_irq_enable();#endif}#endif /* __ARCH_WANT_UNLOCKED_CTXSW *//* * __task_rq_lock - lock the runqueue a given task resides on. * Must be called interrupts disabled. */static inline struct rq *__task_rq_lock(struct task_struct *p)	__acquires(rq->lock){	struct rq *rq;repeat_lock_task:	rq = task_rq(p);	spin_lock(&rq->lock);	if (unlikely(rq != task_rq(p))) {		spin_unlock(&rq->lock);		goto repeat_lock_task;	}	return rq;}/* * task_rq_lock - lock the runqueue a given task resides on and disable * interrupts.  Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. */static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)	__acquires(rq->lock){	struct rq *rq;repeat_lock_task:	local_irq_save(*flags);	rq = task_rq(p);	spin_lock(&rq->lock);	if (unlikely(rq != task_rq(p))) {		spin_unlock_irqrestore(&rq->lock, *flags);		goto repeat_lock_task;	}	return rq;}static inline void __task_rq_unlock(struct rq *rq)	__releases(rq->lock){	spin_unlock(&rq->lock);}static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)	__releases(rq->lock){	spin_unlock_irqrestore(&rq->lock, *flags);}#ifdef CONFIG_SCHEDSTATS/* * bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */#define SCHEDSTAT_VERSION 14static int show_schedstat(struct seq_file *seq, void *v){	int cpu;	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);	seq_printf(seq, "timestamp %lu\n", jiffies);	for_each_online_cpu(cpu) {		struct rq *rq = cpu_rq(cpu);#ifdef CONFIG_SMP		struct sched_domain *sd;		int dcnt = 0;#endif		/* runqueue-specific stats */		seq_printf(seq,		    "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",		    cpu, rq->yld_both_empty,		    rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,		    rq->sched_switch, rq->sched_cnt, rq->sched_goidle,		    rq->ttwu_cnt, rq->ttwu_local,		    rq->rq_sched_info.cpu_time,		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);		seq_printf(seq, "\n");#ifdef CONFIG_SMP		/* domain-specific stats */
sched.c - 源码说明

本页面展示了「linux 2.6.19 kernel source code before patching」中的 sched.c 源码文件，采用 C语言编程语言编写，共 2,477 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与patching相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?