📄 sched.c
字号:
/* sched.c - SPU scheduler. * * Copyright (C) IBM 2005 * Author: Mark Nutter <mnutter@us.ibm.com> * * 2006-03-31 NUMA domains added. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#undef DEBUG#include <linux/module.h>#include <linux/errno.h>#include <linux/sched.h>#include <linux/kernel.h>#include <linux/mm.h>#include <linux/completion.h>#include <linux/vmalloc.h>#include <linux/smp.h>#include <linux/stddef.h>#include <linux/unistd.h>#include <linux/numa.h>#include <linux/mutex.h>#include <linux/notifier.h>#include <linux/kthread.h>#include <linux/pid_namespace.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <asm/io.h>#include <asm/mmu_context.h>#include <asm/spu.h>#include <asm/spu_csa.h>#include <asm/spu_priv1.h>#include "spufs.h"struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; int nr_waiting;};static unsigned long spu_avenrun[3];static struct spu_prio_array *spu_prio;static struct task_struct *spusched_task;static struct timer_list spusched_timer;/* * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). */#define NORMAL_PRIO 120/* * Frequency of the spu scheduler tick. By default we do one SPU scheduler * tick for every 10 CPU scheduler ticks. */#define SPUSCHED_TICK (10)/* * These are the 'tuning knobs' of the scheduler: * * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs. */#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1)#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK))#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO)#define SCALE_PRIO(x, prio) \ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)/* * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values: * [800ms ... 100ms ... 5ms] * * The higher a thread's priority, the bigger timeslices * it gets during one round of execution. But even the lowest * priority thread gets MIN_TIMESLICE worth of execution time. */void spu_set_timeslice(struct spu_context *ctx){ if (ctx->prio < NORMAL_PRIO) ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio); else ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);}/* * Update scheduling information from the owning thread. */void __spu_update_sched_info(struct spu_context *ctx){ /* * 32-Bit assignment are atomic on powerpc, and we don't care about * memory ordering here because retriving the controlling thread is * per defintion racy. */ ctx->tid = current->pid; /* * We do our own priority calculations, so we normally want * ->static_prio to start with. Unfortunately thies field * contains junk for threads with a realtime scheduling * policy so we have to look at ->prio in this case. */ if (rt_prio(current->prio)) ctx->prio = current->prio; else ctx->prio = current->static_prio; ctx->policy = current->policy; /* * A lot of places that don't hold list_mutex poke into * cpus_allowed, including grab_runnable_context which * already holds the runq_lock. So abuse runq_lock * to protect this field aswell. */ spin_lock(&spu_prio->runq_lock); ctx->cpus_allowed = current->cpus_allowed; spin_unlock(&spu_prio->runq_lock);}void spu_update_sched_info(struct spu_context *ctx){ int node = ctx->spu->node; mutex_lock(&cbe_spu_info[node].list_mutex); __spu_update_sched_info(ctx); mutex_unlock(&cbe_spu_info[node].list_mutex);}static int __node_allowed(struct spu_context *ctx, int node){ if (nr_cpus_node(node)) { cpumask_t mask = node_to_cpumask(node); if (cpus_intersects(mask, ctx->cpus_allowed)) return 1; } return 0;}static int node_allowed(struct spu_context *ctx, int node){ int rval; spin_lock(&spu_prio->runq_lock); rval = __node_allowed(ctx, node); spin_unlock(&spu_prio->runq_lock); return rval;}void do_notify_spus_active(void){ int node; /* * Wake up the active spu_contexts. * * When the awakened processes see their "notify_active" flag is set, * they will call spu_switch_notify(); */ for_each_online_node(node) { struct spu *spu; mutex_lock(&cbe_spu_info[node].list_mutex); list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { if (spu->alloc_state != SPU_FREE) { struct spu_context *ctx = spu->ctx; set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags); mb(); wake_up_all(&ctx->stop_wq); } } mutex_unlock(&cbe_spu_info[node].list_mutex); }}/** * spu_bind_context - bind spu context to physical spu * @spu: physical spu to bind to * @ctx: context to bind */static void spu_bind_context(struct spu *spu, struct spu_context *ctx){ pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); if (ctx->flags & SPU_CREATE_NOSCHED) atomic_inc(&cbe_spu_info[spu->node].reserved_spus); ctx->stats.slb_flt_base = spu->stats.slb_flt; ctx->stats.class2_intr_base = spu->stats.class2_intr; spu->ctx = ctx; spu->flags = 0; ctx->spu = spu; ctx->ops = &spu_hw_ops; spu->pid = current->pid; spu->tgid = current->tgid; spu_associate_mm(spu, ctx->owner); spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; spu->stop_callback = spufs_stop_callback; spu->mfc_callback = spufs_mfc_callback; spu->dma_callback = spufs_dma_callback; mb(); spu_unmap_mappings(ctx); spu_restore(&ctx->csa, spu); spu->timestamp = jiffies; spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);}/* * Must be used with the list_mutex held. */static inline int sched_spu(struct spu *spu){ BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));}static void aff_merge_remaining_ctxs(struct spu_gang *gang){ struct spu_context *ctx; list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { if (list_empty(&ctx->aff_list)) list_add(&ctx->aff_list, &gang->aff_list_head); } gang->aff_flags |= AFF_MERGED;}static void aff_set_offsets(struct spu_gang *gang){ struct spu_context *ctx; int offset; offset = -1; list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, aff_list) { if (&ctx->aff_list == &gang->aff_list_head) break; ctx->aff_offset = offset--; } offset = 0; list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { if (&ctx->aff_list == &gang->aff_list_head) break; ctx->aff_offset = offset++; } gang->aff_flags |= AFF_OFFSETS_SET;}static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, int group_size, int lowest_offset){ struct spu *spu; int node, n; /* * TODO: A better algorithm could be used to find a good spu to be * used as reference location for the ctxs chain. */ node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; mutex_lock(&cbe_spu_info[node].list_mutex); list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { if ((!mem_aff || spu->has_mem_affinity) && sched_spu(spu)) { mutex_unlock(&cbe_spu_info[node].list_mutex); return spu; } } mutex_unlock(&cbe_spu_info[node].list_mutex); } return NULL;}static void aff_set_ref_point_location(struct spu_gang *gang){ int mem_aff, gs, lowest_offset; struct spu_context *ctx; struct spu *tmp; mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; lowest_offset = 0; gs = 0; list_for_each_entry(tmp, &gang->aff_list_head, aff_list) gs++; list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, aff_list) { if (&ctx->aff_list == &gang->aff_list_head) break; lowest_offset = ctx->aff_offset; } gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs, lowest_offset);}static struct spu *ctx_location(struct spu *ref, int offset, int node){ struct spu *spu; spu = NULL; if (offset >= 0) { list_for_each_entry(spu, ref->aff_list.prev, aff_list) { BUG_ON(spu->node != node); if (offset == 0) break; if (sched_spu(spu)) offset--; } } else { list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { BUG_ON(spu->node != node); if (offset == 0) break; if (sched_spu(spu)) offset++; } } return spu;}/* * affinity_check is called each time a context is going to be scheduled. * It returns the spu ptr on which the context must run. */static int has_affinity(struct spu_context *ctx){ struct spu_gang *gang = ctx->gang; if (list_empty(&ctx->aff_list)) return 0; if (!gang->aff_ref_spu) { if (!(gang->aff_flags & AFF_MERGED)) aff_merge_remaining_ctxs(gang); if (!(gang->aff_flags & AFF_OFFSETS_SET)) aff_set_offsets(gang); aff_set_ref_point_location(gang); } return gang->aff_ref_spu != NULL;}/** * spu_unbind_context - unbind spu context from physical spu * @spu: physical spu to unbind from * @ctx: context to unbind */static void spu_unbind_context(struct spu *spu, struct spu_context *ctx){ pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, spu->pid, spu->number, spu->node); spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); if (spu->ctx->flags & SPU_CREATE_NOSCHED) atomic_dec(&cbe_spu_info[spu->node].reserved_spus); if (ctx->gang){ mutex_lock(&ctx->gang->aff_mutex); if (has_affinity(ctx)) { if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) ctx->gang->aff_ref_spu = NULL; } mutex_unlock(&ctx->gang->aff_mutex); } spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); spu->timestamp = jiffies; ctx->state = SPU_STATE_SAVED; spu->ibox_callback = NULL; spu->wbox_callback = NULL; spu->stop_callback = NULL; spu->mfc_callback = NULL; spu->dma_callback = NULL; spu_associate_mm(spu, NULL); spu->pid = 0; spu->tgid = 0; ctx->ops = &spu_backing_ops; spu->flags = 0; spu->ctx = NULL; ctx->stats.slb_flt += (spu->stats.slb_flt - ctx->stats.slb_flt_base); ctx->stats.class2_intr += (spu->stats.class2_intr - ctx->stats.class2_intr_base); /* This maps the underlying spu state to idle */ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); ctx->spu = NULL;}/** * spu_add_to_rq - add a context to the runqueue * @ctx: context to add */static void __spu_add_to_rq(struct spu_context *ctx){ /* * Unfortunately this code path can be called from multiple threads * on behalf of a single context due to the way the problem state * mmap support works. * * Fortunately we need to wake up all these threads at the same time * and can simply skip the runqueue addition for every but the first * thread getting into this codepath. * * It's still quite hacky, and long-term we should proxy all other * threads through the owner thread so that spu_run is in control * of all the scheduling activity for a given context. */ if (list_empty(&ctx->rq)) { list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); set_bit(ctx->prio, spu_prio->bitmap); if (!spu_prio->nr_waiting++) __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); }}static void __spu_del_from_rq(struct spu_context *ctx){ int prio = ctx->prio; if (!list_empty(&ctx->rq)) { if (!--spu_prio->nr_waiting) del_timer(&spusched_timer); list_del_init(&ctx->rq); if (list_empty(&spu_prio->runq[prio])) clear_bit(prio, spu_prio->bitmap); }}static void spu_prio_wait(struct spu_context *ctx){ DEFINE_WAIT(wait); spin_lock(&spu_prio->runq_lock); prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); if (!signal_pending(current)) { __spu_add_to_rq(ctx); spin_unlock(&spu_prio->runq_lock); mutex_unlock(&ctx->state_mutex); schedule(); mutex_lock(&ctx->state_mutex); spin_lock(&spu_prio->runq_lock); __spu_del_from_rq(ctx);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -