📄 sched.c

📁 linux 2.6.19 kernel source code before patching
💻 C
📖 第 1 页 / 共 5 页
字号:
		preempt_disable();		for_each_domain(cpu, sd) {			enum idle_type itype;			char mask_str[NR_CPUS];			cpumask_scnprintf(mask_str, NR_CPUS, sd->span);			seq_printf(seq, "domain%d %s", dcnt++, mask_str);			for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;					itype++) {				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "						"%lu",				    sd->lb_cnt[itype],				    sd->lb_balanced[itype],				    sd->lb_failed[itype],				    sd->lb_imbalance[itype],				    sd->lb_gained[itype],				    sd->lb_hot_gained[itype],				    sd->lb_nobusyq[itype],				    sd->lb_nobusyg[itype]);			}			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"			    " %lu %lu %lu\n",			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,			    sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,			    sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,			    sd->ttwu_wake_remote, sd->ttwu_move_affine,			    sd->ttwu_move_balance);		}		preempt_enable();#endif	}	return 0;}static int schedstat_open(struct inode *inode, struct file *file){	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);	char *buf = kmalloc(size, GFP_KERNEL);	struct seq_file *m;	int res;	if (!buf)		return -ENOMEM;	res = single_open(file, show_schedstat, NULL);	if (!res) {		m = file->private_data;		m->buf = buf;		m->size = size;	} else		kfree(buf);	return res;}const struct file_operations proc_schedstat_operations = {	.open    = schedstat_open,	.read    = seq_read,	.llseek  = seq_lseek,	.release = single_release,};/* * Expects runqueue lock to be held for atomicity of update */static inline voidrq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies){	if (rq) {		rq->rq_sched_info.run_delay += delta_jiffies;		rq->rq_sched_info.pcnt++;	}}/* * Expects runqueue lock to be held for atomicity of update */static inline voidrq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies){	if (rq)		rq->rq_sched_info.cpu_time += delta_jiffies;}# define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)# define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)#else /* !CONFIG_SCHEDSTATS */static inline voidrq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies){}static inline voidrq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies){}# define schedstat_inc(rq, field)	do { } while (0)# define schedstat_add(rq, field, amt)	do { } while (0)#endif/* * this_rq_lock - lock this runqueue and disable interrupts. */static inline struct rq *this_rq_lock(void)	__acquires(rq->lock){	struct rq *rq;	local_irq_disable();	rq = this_rq();	spin_lock(&rq->lock);	return rq;}#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)/* * Called when a process is dequeued from the active array and given * the cpu.  We should note that with the exception of interactive * tasks, the expired queue will become the active queue after the active * queue is empty, without explicitly dequeuing and requeuing tasks in the * expired queue.  (Interactive tasks may be requeued directly to the * active queue, thus delaying tasks in the expired queue from running; * see scheduler_tick()). * * This function is only called from sched_info_arrive(), rather than * dequeue_task(). Even though a task may be queued and dequeued multiple * times as it is shuffled about, we're really interested in knowing how * long it was from the *first* time it was queued to the time that it * finally hit a cpu. */static inline void sched_info_dequeued(struct task_struct *t){	t->sched_info.last_queued = 0;}/* * Called when a task finally hits the cpu.  We can now calculate how * long it was waiting to run.  We also note when it began so that we * can keep stats on how long its timeslice is. */static void sched_info_arrive(struct task_struct *t){	unsigned long now = jiffies, delta_jiffies = 0;	if (t->sched_info.last_queued)		delta_jiffies = now - t->sched_info.last_queued;	sched_info_dequeued(t);	t->sched_info.run_delay += delta_jiffies;	t->sched_info.last_arrival = now;	t->sched_info.pcnt++;	rq_sched_info_arrive(task_rq(t), delta_jiffies);}/* * Called when a process is queued into either the active or expired * array.  The time is noted and later used to determine how long we * had to wait for us to reach the cpu.  Since the expired queue will * become the active queue after active queue is empty, without dequeuing * and requeuing any tasks, we are interested in queuing to either. It * is unusual but not impossible for tasks to be dequeued and immediately * requeued in the same or another array: this can happen in sched_yield(), * set_user_nice(), and even load_balance() as it moves tasks from runqueue * to runqueue. * * This function is only called from enqueue_task(), but also only updates * the timestamp if it is already not set.  It's assumed that * sched_info_dequeued() will clear that stamp when appropriate. */static inline void sched_info_queued(struct task_struct *t){	if (unlikely(sched_info_on()))		if (!t->sched_info.last_queued)			t->sched_info.last_queued = jiffies;}/* * Called when a process ceases being the active-running process, either * voluntarily or involuntarily.  Now we can calculate how long we ran. */static inline void sched_info_depart(struct task_struct *t){	unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;	t->sched_info.cpu_time += delta_jiffies;	rq_sched_info_depart(task_rq(t), delta_jiffies);}/* * Called when tasks are switched involuntarily due, typically, to expiring * their time slice.  (This may also be called when switching to or from * the idle task.)  We are only called when prev != next. */static inline void__sched_info_switch(struct task_struct *prev, struct task_struct *next){	struct rq *rq = task_rq(prev);	/*	 * prev now departs the cpu.  It's not interesting to record	 * stats about how efficient we were at scheduling the idle	 * process, however.	 */	if (prev != rq->idle)		sched_info_depart(prev);	if (next != rq->idle)		sched_info_arrive(next);}static inline voidsched_info_switch(struct task_struct *prev, struct task_struct *next){	if (unlikely(sched_info_on()))		__sched_info_switch(prev, next);}#else#define sched_info_queued(t)		do { } while (0)#define sched_info_switch(t, next)	do { } while (0)#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT *//* * Adding/removing a task to/from a priority array: */static void dequeue_task(struct task_struct *p, struct prio_array *array){	array->nr_active--;	list_del(&p->run_list);	if (list_empty(array->queue + p->prio))		__clear_bit(p->prio, array->bitmap);}static void enqueue_task(struct task_struct *p, struct prio_array *array){	sched_info_queued(p);	list_add_tail(&p->run_list, array->queue + p->prio);	__set_bit(p->prio, array->bitmap);	array->nr_active++;	p->array = array;}/* * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */static void requeue_task(struct task_struct *p, struct prio_array *array){	list_move_tail(&p->run_list, array->queue + p->prio);}static inline voidenqueue_task_head(struct task_struct *p, struct prio_array *array){	list_add(&p->run_list, array->queue + p->prio);	__set_bit(p->prio, array->bitmap);	array->nr_active++;	p->array = array;}/* * __normal_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. * * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] * into the -5 ... 0 ... +5 bonus/penalty range. * * We use 25% of the full 0...39 priority range so that: * * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. * * Both properties are important to certain workloads. */static inline int __normal_prio(struct task_struct *p){	int bonus, prio;	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;	prio = p->static_prio - bonus;	if (prio < MAX_RT_PRIO)		prio = MAX_RT_PRIO;	if (prio > MAX_PRIO-1)		prio = MAX_PRIO-1;	return prio;}/* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that * each task makes to its run queue's load is weighted according to its * scheduling class and "nice" value.  For SCHED_NORMAL tasks this is just a * scaled version of the new time slice allocation that they receive on time * slice expiry etc. *//* * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE * If static_prio_timeslice() is ever changed to break this assumption then * this code will need modification */#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE#define LOAD_WEIGHT(lp) \	(((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)#define PRIO_TO_LOAD_WEIGHT(prio) \	LOAD_WEIGHT(static_prio_timeslice(prio))#define RTPRIO_TO_LOAD_WEIGHT(rp) \	(PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))static void set_load_weight(struct task_struct *p){	if (has_rt_policy(p)) {#ifdef CONFIG_SMP		if (p == task_rq(p)->migration_thread)			/*			 * The migration thread does the actual balancing.			 * Giving its load any weight will skew balancing			 * adversely.			 */			p->load_weight = 0;		else#endif			p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority);	} else		p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);}static inline voidinc_raw_weighted_load(struct rq *rq, const struct task_struct *p){	rq->raw_weighted_load += p->load_weight;}static inline voiddec_raw_weighted_load(struct rq *rq, const struct task_struct *p){	rq->raw_weighted_load -= p->load_weight;}static inline void inc_nr_running(struct task_struct *p, struct rq *rq){	rq->nr_running++;	inc_raw_weighted_load(rq, p);}static inline void dec_nr_running(struct task_struct *p, struct rq *rq){	rq->nr_running--;	dec_raw_weighted_load(rq, p);}/* * Calculate the expected normal priority: i.e. priority * without taking RT-inheritance into account. Might be * boosted by interactivity modifiers. Changes upon fork, * setprio syscalls, and whenever the interactivity * estimator recalculates. */static inline int normal_prio(struct task_struct *p){	int prio;	if (has_rt_policy(p))		prio = MAX_RT_PRIO-1 - p->rt_priority;	else		prio = __normal_prio(p);	return prio;}/* * Calculate the current priority, i.e. the priority * taken into account by the scheduler. This value might * be boosted by RT tasks, or might be boosted by * interactivity modifiers. Will be RT if the task got * RT-boosted. If not then it returns p->normal_prio. */static int effective_prio(struct task_struct *p){	p->normal_prio = normal_prio(p);	/*	 * If we are RT tasks or we were boosted to RT priority,	 * keep the priority unchanged. Otherwise, update priority	 * to the normal priority:	 */	if (!rt_prio(p->prio))		return p->normal_prio;	return p->prio;}/* * __activate_task - move a task to the runqueue. */static void __activate_task(struct task_struct *p, struct rq *rq){	struct prio_array *target = rq->active;	if (batch_task(p))		target = rq->expired;	enqueue_task(p, target);	inc_nr_running(p, rq);}/* * __activate_idle_task - move idle task to the _front_ of runqueue. */static inline void __activate_idle_task(struct task_struct *p, struct rq *rq){	enqueue_task_head(p, rq->active);	inc_nr_running(p, rq);}/* * Recalculate p->normal_prio and p->prio after having slept, * updating the sleep-average too: */static int recalc_task_prio(struct task_struct *p, unsigned long long now){	/* Caller must always ensure 'now >= p->timestamp' */	unsigned long sleep_time = now - p->timestamp;	if (batch_task(p))		sleep_time = 0;	if (likely(sleep_time > 0)) {		/*		 * This ceiling is set to the lowest priority that would allow		 * a task to be reinserted into the active array on timeslice		 * completion.		 */		unsigned long ceiling = INTERACTIVE_SLEEP(p);		if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) {			/*			 * Prevents user tasks from achieving best priority			 * with one single large enough sleep.			 */			p->sleep_avg = ceiling;			/*			 * Using INTERACTIVE_SLEEP() as a ceiling places a			 * nice(0) task 1ms sleep away from promotion, and			 * gives it 700ms to round-robin with no chance of			 * being demoted.  This is more than generous, so			 * mark this sleep as non-interactive to prevent the			 * on-runqueue bonus logic from intervening should			 * this task not receive cpu immediately.			 */			p->sleep_type = SLEEP_NONINTERACTIVE;		} else {			/*			 * Tasks waking from uninterruptible sleep are			 * limited in their sleep_avg rise as they			 * are likely to be waiting on I/O			 */			if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {				if (p->sleep_avg >= ceiling)					sleep_time = 0;				else if (p->sleep_avg + sleep_time >=					 ceiling) {						p->sleep_avg = ceiling;						sleep_time = 0;				}			}			/*			 * This code gives a bonus to interactive tasks.			 *			 * The boost works by updating the 'average sleep time'			 * value here, based on ->timestamp. The more time a			 * task spends sleeping, the higher the average gets -			 * and the higher the priority boost gets as well.			 */			p->sleep_avg += sleep_time;		}		if (p->sleep_avg > NS_MAX_SLEEP_AVG)			p->sleep_avg = NS_MAX_SLEEP_AVG;	}	return effective_prio(p);}/* * activate_task - move a task to the runqueue and do priority recalculation * * Update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */static void activate_task(struct task_struct *p, struct rq *rq, int local){	unsigned long long now;	if (rt_task(p))		goto out;	now = sched_clock();#ifdef CONFIG_SMP	if (!local) {		/* Compensate for drifting sched_clock */		struct rq *this_rq = this_rq();		now = (now - this_rq->most_recent_timestamp)			+ rq->most_recent_timestamp;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -