📄 sched.c
字号:
/* * linux/kernel/sched.c * * Kernel scheduler and related syscalls * * Copyright (C) 1991, 1992 Linus Torvalds * * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and * make semaphores SMP safe * 1998-11-19 Implemented schedule_timeout() and related stuff * by Andrea Arcangeli * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar *//* * 'sched.c' is the main kernel file. It contains scheduling primitives * (sleep_on, wakeup, schedule etc) as well as a number of simple system * call functions (type getpid()), which just extract a field from * current-task */#include <linux/config.h>#include <linux/mm.h>#include <linux/init.h>#include <linux/smp_lock.h>#include <linux/nmi.h>#include <linux/interrupt.h>#include <linux/kernel_stat.h>#include <linux/completion.h>#include <linux/prefetch.h>#include <linux/compiler.h>#include <asm/uaccess.h>#include <asm/mmu_context.h>extern void timer_bh(void);extern void tqueue_bh(void);extern void immediate_bh(void);/* * scheduler variables */unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */extern void mem_use(void);/* * Scheduling quanta. * * NOTE! The unix "nice" value influences how long a process * gets. The nice value ranges from -20 to +19, where a -20 * is a "high-priority" task, and a "+10" is a low-priority * task. * * We want the time-slice to be around 50ms or so, so this * calculation depends on the value of HZ. */#if HZ < 200#define TICK_SCALE(x) ((x) >> 2)#elif HZ < 400#define TICK_SCALE(x) ((x) >> 1)#elif HZ < 800#define TICK_SCALE(x) (x)#elif HZ < 1600#define TICK_SCALE(x) ((x) << 1)#else#define TICK_SCALE(x) ((x) << 2)#endif#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)/* * Init task must be ok at boot for the ix86 as we will check its signals * via the SMP irq return path. */ struct task_struct * init_tasks[NR_CPUS] = {&init_task, };/* * The tasklist_lock protects the linked list of processes. * * The runqueue_lock locks the parts that actually access * and change the run-queues, and have to be interrupt-safe. * * If both locks are to be concurrently held, the runqueue_lock * nests inside the tasklist_lock. * * task->alloc_lock nests inside tasklist_lock. */spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */static LIST_HEAD(runqueue_head);/* * We align per-CPU scheduling data on cacheline boundaries, * to prevent cacheline ping-pong. */static union { struct schedule_data { struct task_struct * curr; cycles_t last_schedule; } schedule_data; char __pad [SMP_CACHE_BYTES];} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedulestruct kernel_stat kstat;extern struct task_struct *child_reaper;#ifdef CONFIG_SMP#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])#define can_schedule(p,cpu) \ ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu))#else#define idle_task(cpu) (&init_task)#define can_schedule(p,cpu) (1)#endifvoid scheduling_functions_start_here(void) { }/* * This is the function that decides how desirable a process is.. * You can weigh different processes against each other depending * on what CPU they've run on lately etc to try to handle cache * and TLB miss penalties. * * Return values: * -1000: never select this * 0: out of time, recalculate counters (but it might still be * selected) * +ve: "goodness" value (the larger, the better) * +1000: realtime process, select this. */static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm){ int weight; /* * select the current process after every other * runnable process, but before the idle thread. * Also, dont trigger a counter recalculation. */ weight = -1; if (p->policy & SCHED_YIELD) goto out; /* * Non-RT process - normal case first. */ if (p->policy == SCHED_FB) { /* * Give the process a first-approximation goodness value * according to the number of clock-ticks it has left. * * Don't do any other calculations if the time slice is * over.. */ weight = p->fb*300+p->counter; if (!weight){ p->fb=0; goto out; } #ifdef CONFIG_SMP /* Give a largish advantage to the same processor... */ /* (this is equivalent to penalizing other processors) */ if (p->processor == this_cpu) weight += PROC_CHANGE_PENALTY;#endif /* .. and a slight advantage to the current MM */ if (p->mm == this_mm || !p->mm) weight += 1; weight += 20 - p->nice; goto out; } /* * Realtime process, select the first one on the * runqueue (taking priorities within processes * into account). */ weight = 1000 + p->rt_priority;out: return weight;}/* * the 'goodness value' of replacing a process on a given CPU. * positive value means 'replace', zero or negative means 'dont'. */static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu){ return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);}/* * This is ugly, but reschedule_idle() is very timing-critical. * We are called with the runqueue spinlock held and we must * not claim the tasklist_lock. */static FASTCALL(void reschedule_idle(struct task_struct * p));static void reschedule_idle(struct task_struct * p){#ifdef CONFIG_SMP int this_cpu = smp_processor_id(); struct task_struct *tsk, *target_tsk; int cpu, best_cpu, i, max_prio; cycles_t oldest_idle; /* * shortcut if the woken up task's last CPU is * idle now. */ best_cpu = p->processor; if (can_schedule(p, best_cpu)) { tsk = idle_task(best_cpu); if (cpu_curr(best_cpu) == tsk) { int need_resched;send_now_idle: /* * If need_resched == -1 then we can skip sending * the IPI altogether, tsk->need_resched is * actively watched by the idle thread. */ need_resched = tsk->need_resched; tsk->need_resched = 1; if ((best_cpu != this_cpu) && !need_resched) smp_send_reschedule(best_cpu); return; } } /* * We know that the preferred CPU has a cache-affine current * process, lets try to find a new idle CPU for the woken-up * process. Select the least recently active idle CPU. (that * one will have the least active cache context.) Also find * the executing process which has the least priority. */ oldest_idle = (cycles_t) -1; target_tsk = NULL; max_prio = 0; for (i = 0; i < smp_num_cpus; i++) { cpu = cpu_logical_map(i); if (!can_schedule(p, cpu)) continue; tsk = cpu_curr(cpu); /* * We use the first available idle CPU. This creates * a priority list between idle CPUs, but this is not * a problem. */ if (tsk == idle_task(cpu)) {#if defined(__i386__) && defined(CONFIG_SMP) /* * Check if two siblings are idle in the same * physical package. Use them if found. */ if (smp_num_siblings == 2) { if (cpu_curr(cpu_sibling_map[cpu]) == idle_task(cpu_sibling_map[cpu])) { oldest_idle = last_schedule(cpu); target_tsk = tsk; break; } }#endif if (last_schedule(cpu) < oldest_idle) { oldest_idle = last_schedule(cpu); target_tsk = tsk; } } else { if (oldest_idle == -1ULL) { int prio = preemption_goodness(tsk, p, cpu); if (prio > max_prio) { max_prio = prio; target_tsk = tsk; } } } } tsk = target_tsk; if (tsk) { if (oldest_idle != -1ULL) { best_cpu = tsk->processor; goto send_now_idle; } tsk->need_resched = 1; if (tsk->processor != this_cpu) smp_send_reschedule(tsk->processor); } return; #else /* UP */ int this_cpu = smp_processor_id(); struct task_struct *tsk; tsk = cpu_curr(this_cpu); if (preemption_goodness(tsk, p, this_cpu) > 0) tsk->need_resched = 1;#endif}/* * Careful! * * This has to add the process to the _beginning_ of the * run-queue, not the end. See the comment about "This is * subtle" in the scheduler proper.. */static inline void add_to_runqueue(struct task_struct * p){ list_add(&p->run_list, &runqueue_head); nr_running++;}static inline void move_last_runqueue(struct task_struct * p){ list_del(&p->run_list); list_add_tail(&p->run_list, &runqueue_head);}static inline void move_first_runqueue(struct task_struct * p){ list_del(&p->run_list); list_add(&p->run_list, &runqueue_head);}/* * Wake up a process. Put it on the run-queue if it's not * already there. The "current" process is always on the * run-queue (except when the actual re-schedule is in * progress), and as such you're allowed to do the simpler * "current->state = TASK_RUNNING" to mark yourself runnable * without the overhead of this. */static inline int try_to_wake_up(struct task_struct * p, int synchronous){ unsigned long flags; int success = 0; /* * We want the common case fall through straight, thus the goto. */ spin_lock_irqsave(&runqueue_lock, flags); p->state = TASK_RUNNING; if (task_on_runqueue(p)) goto out; add_to_runqueue(p); if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id()))) reschedule_idle(p); success = 1;out: spin_unlock_irqrestore(&runqueue_lock, flags); return success;}inline int wake_up_process(struct task_struct * p){ return try_to_wake_up(p, 0);}static void process_timeout(unsigned long __data){ struct task_struct * p = (struct task_struct *) __data; wake_up_process(p);}/** * schedule_timeout - sleep until timeout * @timeout: timeout value in jiffies * * Make the current task sleep until @timeout jiffies have * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to * pass before the routine returns. The routine will return 0 * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is * delivered to the current task. In this case the remaining time * in jiffies will be returned, or 0 if the timer expired in time * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule * the CPU away without a bound on the timeout. In this case the return * value will be %MAX_SCHEDULE_TIMEOUT. * * In all cases the return value is guaranteed to be non-negative. */signed long schedule_timeout(signed long timeout){ struct timer_list timer; unsigned long expire; switch (timeout) { case MAX_SCHEDULE_TIMEOUT: /* * These two special cases are useful to be comfortable * in the caller. Nothing more. We could take * MAX_SCHEDULE_TIMEOUT from one of the negative value * but I' d like to return a valid offset (>=0) to allow * the caller to do everything it want with the retval. */ schedule(); goto out; default: /* * Another bit of PARANOID. Note that the retval will be * 0 since no piece of kernel is supposed to do a check * for a negative retval of schedule_timeout() (since it * should never happens anyway). You just have the printk() * that will tell you if something is gone wrong and where. */ if (timeout < 0) { printk(KERN_ERR "schedule_timeout: wrong timeout " "value %lx from %p\n", timeout, __builtin_return_address(0)); current->state = TASK_RUNNING; goto out; } } expire = timeout + jiffies; init_timer(&timer);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -