📄 sched.c
字号:
preempt_disable(); for_each_domain(cpu, sd) { enum idle_type itype; char mask_str[NR_CPUS]; cpumask_scnprintf(mask_str, NR_CPUS, sd->span); seq_printf(seq, "domain%d %s", dcnt++, mask_str); for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; itype++) { seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " "%lu", sd->lb_cnt[itype], sd->lb_balanced[itype], sd->lb_failed[itype], sd->lb_imbalance[itype], sd->lb_gained[itype], sd->lb_hot_gained[itype], sd->lb_nobusyq[itype], sd->lb_nobusyg[itype]); } seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" " %lu %lu %lu\n", sd->alb_cnt, sd->alb_failed, sd->alb_pushed, sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } preempt_enable();#endif } return 0;}static int schedstat_open(struct inode *inode, struct file *file){ unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); char *buf = kmalloc(size, GFP_KERNEL); struct seq_file *m; int res; if (!buf) return -ENOMEM; res = single_open(file, show_schedstat, NULL); if (!res) { m = file->private_data; m->buf = buf; m->size = size; } else kfree(buf); return res;}const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, .release = single_release,};/* * Expects runqueue lock to be held for atomicity of update */static inline voidrq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies){ if (rq) { rq->rq_sched_info.run_delay += delta_jiffies; rq->rq_sched_info.pcnt++; }}/* * Expects runqueue lock to be held for atomicity of update */static inline voidrq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies){ if (rq) rq->rq_sched_info.cpu_time += delta_jiffies;}# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)#else /* !CONFIG_SCHEDSTATS */static inline voidrq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies){}static inline voidrq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies){}# define schedstat_inc(rq, field) do { } while (0)# define schedstat_add(rq, field, amt) do { } while (0)#endif/* * this_rq_lock - lock this runqueue and disable interrupts. */static inline struct rq *this_rq_lock(void) __acquires(rq->lock){ struct rq *rq; local_irq_disable(); rq = this_rq(); spin_lock(&rq->lock); return rq;}#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)/* * Called when a process is dequeued from the active array and given * the cpu. We should note that with the exception of interactive * tasks, the expired queue will become the active queue after the active * queue is empty, without explicitly dequeuing and requeuing tasks in the * expired queue. (Interactive tasks may be requeued directly to the * active queue, thus delaying tasks in the expired queue from running; * see scheduler_tick()). * * This function is only called from sched_info_arrive(), rather than * dequeue_task(). Even though a task may be queued and dequeued multiple * times as it is shuffled about, we're really interested in knowing how * long it was from the *first* time it was queued to the time that it * finally hit a cpu. */static inline void sched_info_dequeued(struct task_struct *t){ t->sched_info.last_queued = 0;}/* * Called when a task finally hits the cpu. We can now calculate how * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */static void sched_info_arrive(struct task_struct *t){ unsigned long now = jiffies, delta_jiffies = 0; if (t->sched_info.last_queued) delta_jiffies = now - t->sched_info.last_queued; sched_info_dequeued(t); t->sched_info.run_delay += delta_jiffies; t->sched_info.last_arrival = now; t->sched_info.pcnt++; rq_sched_info_arrive(task_rq(t), delta_jiffies);}/* * Called when a process is queued into either the active or expired * array. The time is noted and later used to determine how long we * had to wait for us to reach the cpu. Since the expired queue will * become the active queue after active queue is empty, without dequeuing * and requeuing any tasks, we are interested in queuing to either. It * is unusual but not impossible for tasks to be dequeued and immediately * requeued in the same or another array: this can happen in sched_yield(), * set_user_nice(), and even load_balance() as it moves tasks from runqueue * to runqueue. * * This function is only called from enqueue_task(), but also only updates * the timestamp if it is already not set. It's assumed that * sched_info_dequeued() will clear that stamp when appropriate. */static inline void sched_info_queued(struct task_struct *t){ if (unlikely(sched_info_on())) if (!t->sched_info.last_queued) t->sched_info.last_queued = jiffies;}/* * Called when a process ceases being the active-running process, either * voluntarily or involuntarily. Now we can calculate how long we ran. */static inline void sched_info_depart(struct task_struct *t){ unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; t->sched_info.cpu_time += delta_jiffies; rq_sched_info_depart(task_rq(t), delta_jiffies);}/* * Called when tasks are switched involuntarily due, typically, to expiring * their time slice. (This may also be called when switching to or from * the idle task.) We are only called when prev != next. */static inline void__sched_info_switch(struct task_struct *prev, struct task_struct *next){ struct rq *rq = task_rq(prev); /* * prev now departs the cpu. It's not interesting to record * stats about how efficient we were at scheduling the idle * process, however. */ if (prev != rq->idle) sched_info_depart(prev); if (next != rq->idle) sched_info_arrive(next);}static inline voidsched_info_switch(struct task_struct *prev, struct task_struct *next){ if (unlikely(sched_info_on())) __sched_info_switch(prev, next);}#else#define sched_info_queued(t) do { } while (0)#define sched_info_switch(t, next) do { } while (0)#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT *//* * Adding/removing a task to/from a priority array: */static void dequeue_task(struct task_struct *p, struct prio_array *array){ array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) __clear_bit(p->prio, array->bitmap);}static void enqueue_task(struct task_struct *p, struct prio_array *array){ sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; p->array = array;}/* * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */static void requeue_task(struct task_struct *p, struct prio_array *array){ list_move_tail(&p->run_list, array->queue + p->prio);}static inline voidenqueue_task_head(struct task_struct *p, struct prio_array *array){ list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; p->array = array;}/* * __normal_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. * * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] * into the -5 ... 0 ... +5 bonus/penalty range. * * We use 25% of the full 0...39 priority range so that: * * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. * * Both properties are important to certain workloads. */static inline int __normal_prio(struct task_struct *p){ int bonus, prio; bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) prio = MAX_PRIO-1; return prio;}/* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that * each task makes to its run queue's load is weighted according to its * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a * scaled version of the new time slice allocation that they receive on time * slice expiry etc. *//* * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE * If static_prio_timeslice() is ever changed to break this assumption then * this code will need modification */#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE#define LOAD_WEIGHT(lp) \ (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)#define PRIO_TO_LOAD_WEIGHT(prio) \ LOAD_WEIGHT(static_prio_timeslice(prio))#define RTPRIO_TO_LOAD_WEIGHT(rp) \ (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))static void set_load_weight(struct task_struct *p){ if (has_rt_policy(p)) {#ifdef CONFIG_SMP if (p == task_rq(p)->migration_thread) /* * The migration thread does the actual balancing. * Giving its load any weight will skew balancing * adversely. */ p->load_weight = 0; else#endif p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority); } else p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);}static inline voidinc_raw_weighted_load(struct rq *rq, const struct task_struct *p){ rq->raw_weighted_load += p->load_weight;}static inline voiddec_raw_weighted_load(struct rq *rq, const struct task_struct *p){ rq->raw_weighted_load -= p->load_weight;}static inline void inc_nr_running(struct task_struct *p, struct rq *rq){ rq->nr_running++; inc_raw_weighted_load(rq, p);}static inline void dec_nr_running(struct task_struct *p, struct rq *rq){ rq->nr_running--; dec_raw_weighted_load(rq, p);}/* * Calculate the expected normal priority: i.e. priority * without taking RT-inheritance into account. Might be * boosted by interactivity modifiers. Changes upon fork, * setprio syscalls, and whenever the interactivity * estimator recalculates. */static inline int normal_prio(struct task_struct *p){ int prio; if (has_rt_policy(p)) prio = MAX_RT_PRIO-1 - p->rt_priority; else prio = __normal_prio(p); return prio;}/* * Calculate the current priority, i.e. the priority * taken into account by the scheduler. This value might * be boosted by RT tasks, or might be boosted by * interactivity modifiers. Will be RT if the task got * RT-boosted. If not then it returns p->normal_prio. */static int effective_prio(struct task_struct *p){ p->normal_prio = normal_prio(p); /* * If we are RT tasks or we were boosted to RT priority, * keep the priority unchanged. Otherwise, update priority * to the normal priority: */ if (!rt_prio(p->prio)) return p->normal_prio; return p->prio;}/* * __activate_task - move a task to the runqueue. */static void __activate_task(struct task_struct *p, struct rq *rq){ struct prio_array *target = rq->active; if (batch_task(p)) target = rq->expired; enqueue_task(p, target); inc_nr_running(p, rq);}/* * __activate_idle_task - move idle task to the _front_ of runqueue. */static inline void __activate_idle_task(struct task_struct *p, struct rq *rq){ enqueue_task_head(p, rq->active); inc_nr_running(p, rq);}/* * Recalculate p->normal_prio and p->prio after having slept, * updating the sleep-average too: */static int recalc_task_prio(struct task_struct *p, unsigned long long now){ /* Caller must always ensure 'now >= p->timestamp' */ unsigned long sleep_time = now - p->timestamp; if (batch_task(p)) sleep_time = 0; if (likely(sleep_time > 0)) { /* * This ceiling is set to the lowest priority that would allow * a task to be reinserted into the active array on timeslice * completion. */ unsigned long ceiling = INTERACTIVE_SLEEP(p); if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) { /* * Prevents user tasks from achieving best priority * with one single large enough sleep. */ p->sleep_avg = ceiling; /* * Using INTERACTIVE_SLEEP() as a ceiling places a * nice(0) task 1ms sleep away from promotion, and * gives it 700ms to round-robin with no chance of * being demoted. This is more than generous, so * mark this sleep as non-interactive to prevent the * on-runqueue bonus logic from intervening should * this task not receive cpu immediately. */ p->sleep_type = SLEEP_NONINTERACTIVE; } else { /* * Tasks waking from uninterruptible sleep are * limited in their sleep_avg rise as they * are likely to be waiting on I/O */ if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) { if (p->sleep_avg >= ceiling) sleep_time = 0; else if (p->sleep_avg + sleep_time >= ceiling) { p->sleep_avg = ceiling; sleep_time = 0; } } /* * This code gives a bonus to interactive tasks. * * The boost works by updating the 'average sleep time' * value here, based on ->timestamp. The more time a * task spends sleeping, the higher the average gets - * and the higher the priority boost gets as well. */ p->sleep_avg += sleep_time; } if (p->sleep_avg > NS_MAX_SLEEP_AVG) p->sleep_avg = NS_MAX_SLEEP_AVG; } return effective_prio(p);}/* * activate_task - move a task to the runqueue and do priority recalculation * * Update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */static void activate_task(struct task_struct *p, struct rq *rq, int local){ unsigned long long now; if (rt_task(p)) goto out; now = sched_clock();#ifdef CONFIG_SMP if (!local) { /* Compensate for drifting sched_clock */ struct rq *this_rq = this_rq(); now = (now - this_rq->most_recent_timestamp) + rq->most_recent_timestamp;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -