cpuset.c

来自「Kernel code of linux kernel」· C语言代码 · 共 2,237 行 · 第 1/5 页
2,237 行
/* *  kernel/cpuset.c * *  Processor and Memory placement constraints for sets of tasks. * *  Copyright (C) 2003 BULL SA. *  Copyright (C) 2004-2007 Silicon Graphics, Inc. *  Copyright (C) 2006 Google, Inc * *  Portions derived from Patrick Mochel's sysfs code. *  sysfs is Copyright (c) 2001-3 Patrick Mochel * *  2003-10-10 Written by Simon Derr. *  2003-10-22 Updates by Stephen Hemminger. *  2004 May-July Rework by Paul Jackson. *  2006 Rework by Paul Menage to use generic cgroups *  2008 Rework of the scheduler domains and CPU hotplug handling *       by Max Krasnyansky * *  This file is subject to the terms and conditions of the GNU General Public *  License.  See the file COPYING in the main directory of the Linux *  distribution for more details. */#include <linux/cpu.h>#include <linux/cpumask.h>#include <linux/cpuset.h>#include <linux/err.h>#include <linux/errno.h>#include <linux/file.h>#include <linux/fs.h>#include <linux/init.h>#include <linux/interrupt.h>#include <linux/kernel.h>#include <linux/kmod.h>#include <linux/list.h>#include <linux/mempolicy.h>#include <linux/mm.h>#include <linux/module.h>#include <linux/mount.h>#include <linux/namei.h>#include <linux/pagemap.h>#include <linux/proc_fs.h>#include <linux/rcupdate.h>#include <linux/sched.h>#include <linux/seq_file.h>#include <linux/security.h>#include <linux/slab.h>#include <linux/spinlock.h>#include <linux/stat.h>#include <linux/string.h>#include <linux/time.h>#include <linux/backing-dev.h>#include <linux/sort.h>#include <asm/uaccess.h>#include <asm/atomic.h>#include <linux/mutex.h>#include <linux/workqueue.h>#include <linux/cgroup.h>/* * Tracks how many cpusets are currently defined in system. * When there is only one cpuset (the root cpuset) we can * short circuit some hooks. */int number_of_cpusets __read_mostly;/* Forward declare cgroup structures */struct cgroup_subsys cpuset_subsys;struct cpuset;/* See "Frequency meter" comments, below. */struct fmeter {	int cnt;		/* unprocessed events count */	int val;		/* most recent output value */	time_t time;		/* clock (secs) when val computed */	spinlock_t lock;	/* guards read or write of above */};struct cpuset {	struct cgroup_subsys_state css;	unsigned long flags;		/* "unsigned long" so bitops work */	cpumask_t cpus_allowed;		/* CPUs allowed to tasks in cpuset */	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */	struct cpuset *parent;		/* my parent */	/*	 * Copy of global cpuset_mems_generation as of the most	 * recent time this cpuset changed its mems_allowed.	 */	int mems_generation;	struct fmeter fmeter;		/* memory_pressure filter */	/* partition number for rebuild_sched_domains() */	int pn;	/* for custom sched domain */	int relax_domain_level;	/* used for walking a cpuset heirarchy */	struct list_head stack_list;};/* Retrieve the cpuset for a cgroup */static inline struct cpuset *cgroup_cs(struct cgroup *cont){	return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),			    struct cpuset, css);}/* Retrieve the cpuset for a task */static inline struct cpuset *task_cs(struct task_struct *task){	return container_of(task_subsys_state(task, cpuset_subsys_id),			    struct cpuset, css);}struct cpuset_hotplug_scanner {	struct cgroup_scanner scan;	struct cgroup *to;};/* bits in struct cpuset flags field */typedef enum {	CS_CPU_EXCLUSIVE,	CS_MEM_EXCLUSIVE,	CS_MEM_HARDWALL,	CS_MEMORY_MIGRATE,	CS_SCHED_LOAD_BALANCE,	CS_SPREAD_PAGE,	CS_SPREAD_SLAB,} cpuset_flagbits_t;/* convenient tests for these bits */static inline int is_cpu_exclusive(const struct cpuset *cs){	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);}static inline int is_mem_exclusive(const struct cpuset *cs){	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);}static inline int is_mem_hardwall(const struct cpuset *cs){	return test_bit(CS_MEM_HARDWALL, &cs->flags);}static inline int is_sched_load_balance(const struct cpuset *cs){	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);}static inline int is_memory_migrate(const struct cpuset *cs){	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);}static inline int is_spread_page(const struct cpuset *cs){	return test_bit(CS_SPREAD_PAGE, &cs->flags);}static inline int is_spread_slab(const struct cpuset *cs){	return test_bit(CS_SPREAD_SLAB, &cs->flags);}/* * Increment this integer everytime any cpuset changes its * mems_allowed value.  Users of cpusets can track this generation * number, and avoid having to lock and reload mems_allowed unless * the cpuset they're using changes generation. * * A single, global generation is needed because cpuset_attach_task() could * reattach a task to a different cpuset, which must not have its * generation numbers aliased with those of that tasks previous cpuset. * * Generations are needed for mems_allowed because one task cannot * modify another's memory placement.  So we must enable every task, * on every visit to __alloc_pages(), to efficiently check whether * its current->cpuset->mems_allowed has changed, requiring an update * of its current->mems_allowed. * * Since writes to cpuset_mems_generation are guarded by the cgroup lock * there is no need to mark it atomic. */static int cpuset_mems_generation;static struct cpuset top_cpuset = {	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),	.cpus_allowed = CPU_MASK_ALL,	.mems_allowed = NODE_MASK_ALL,};/* * There are two global mutexes guarding cpuset structures.  The first * is the main control groups cgroup_mutex, accessed via * cgroup_lock()/cgroup_unlock().  The second is the cpuset-specific * callback_mutex, below. They can nest.  It is ok to first take * cgroup_mutex, then nest callback_mutex.  We also require taking * task_lock() when dereferencing a task's cpuset pointer.  See "The * task_lock() exception", at the end of this comment. * * A task must hold both mutexes to modify cpusets.  If a task * holds cgroup_mutex, then it blocks others wanting that mutex, * ensuring that it is the only task able to also acquire callback_mutex * and be able to modify cpusets.  It can perform various checks on * the cpuset structure first, knowing nothing will change.  It can * also allocate memory while just holding cgroup_mutex.  While it is * performing these checks, various callback routines can briefly * acquire callback_mutex to query cpusets.  Once it is ready to make * the changes, it takes callback_mutex, blocking everyone else. * * Calls to the kernel memory allocator can not be made while holding * callback_mutex, as that would risk double tripping on callback_mutex * from one of the callbacks into the cpuset code from within * __alloc_pages(). * * If a task is only holding callback_mutex, then it has read-only * access to cpusets. * * The task_struct fields mems_allowed and mems_generation may only * be accessed in the context of that task, so require no locks. * * The cpuset_common_file_read() handlers only hold callback_mutex across * small pieces of code, such as when reading out possibly multi-word * cpumasks and nodemasks. * * Accessing a task's cpuset should be done in accordance with the * guidelines for accessing subsystem state in kernel/cgroup.c */static DEFINE_MUTEX(callback_mutex);/* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */static int cpuset_get_sb(struct file_system_type *fs_type,			 int flags, const char *unused_dev_name,			 void *data, struct vfsmount *mnt){	struct file_system_type *cgroup_fs = get_fs_type("cgroup");	int ret = -ENODEV;	if (cgroup_fs) {		char mountopts[] =			"cpuset,noprefix,"			"release_agent=/sbin/cpuset_release_agent";		ret = cgroup_fs->get_sb(cgroup_fs, flags,					   unused_dev_name, mountopts, mnt);		put_filesystem(cgroup_fs);	}	return ret;}static struct file_system_type cpuset_fs_type = {	.name = "cpuset",	.get_sb = cpuset_get_sb,};/* * Return in *pmask the portion of a cpusets's cpus_allowed that * are online.  If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus.  If we get * all the way to the top and still haven't found any online cpus, * return cpu_online_map.  Or if passed a NULL cs from an exit'ing * task, return cpu_online_map. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_map. * * Call with callback_mutex held. */static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask){	while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))		cs = cs->parent;	if (cs)		cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);	else		*pmask = cpu_online_map;	BUG_ON(!cpus_intersects(*pmask, cpu_online_map));}/* * Return in *pmask the portion of a cpusets's mems_allowed that * are online, with memory.  If none are online with memory, walk * up the cpuset hierarchy until we find one that does have some * online mems.  If we get all the way to the top and still haven't * found any online mems, return node_states[N_HIGH_MEMORY]. * * One way or another, we guarantee to return some non-empty subset * of node_states[N_HIGH_MEMORY]. * * Call with callback_mutex held. */static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask){	while (cs && !nodes_intersects(cs->mems_allowed,					node_states[N_HIGH_MEMORY]))		cs = cs->parent;	if (cs)		nodes_and(*pmask, cs->mems_allowed,					node_states[N_HIGH_MEMORY]);	else		*pmask = node_states[N_HIGH_MEMORY];	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));}/** * cpuset_update_task_memory_state - update task memory placement * * If the current tasks cpusets mems_allowed changed behind our * backs, update current->mems_allowed, mems_generation and task NUMA * mempolicy to the new value. * * Task mempolicy is updated by rebinding it relative to the * current->cpuset if a task has its memory placement changed. * Do not call this routine if in_interrupt(). * * Call without callback_mutex or task_lock() held.  May be * called with or without cgroup_mutex held.  Thanks in part to * 'the_top_cpuset_hack', the task's cpuset pointer will never * be NULL.  This routine also might acquire callback_mutex during * call. * * Reading current->cpuset->mems_generation doesn't need task_lock * to guard the current->cpuset derefence, because it is guarded * from concurrent freeing of current->cpuset using RCU. * * The rcu_dereference() is technically probably not needed, * as I don't actually mind if I see a new cpuset pointer but * an old value of mems_generation.  However this really only * matters on alpha systems using cpusets heavily.  If I dropped * that rcu_dereference(), it would save them a memory barrier. * For all other arch's, rcu_dereference is a no-op anyway, and for * alpha systems not using cpusets, another planned optimization, * avoiding the rcu critical section for tasks in the root cpuset * which is statically allocated, so can't vanish, will make this * irrelevant.  Better to use RCU as intended, than to engage in * some cute trick to save a memory barrier that is impossible to * test, for alpha systems using cpusets heavily, which might not * even exist. * * This routine is needed to update the per-task mems_allowed data, * within the tasks context, when it is trying to allocate memory * (in various mm/mempolicy.c routines) and notices that some other * task has been modifying its cpuset. */void cpuset_update_task_memory_state(void){	int my_cpusets_mem_gen;	struct task_struct *tsk = current;	struct cpuset *cs;	if (task_cs(tsk) == &top_cpuset) {		/* Don't need rcu for top_cpuset.  It's never freed. */		my_cpusets_mem_gen = top_cpuset.mems_generation;	} else {		rcu_read_lock();		my_cpusets_mem_gen = task_cs(tsk)->mems_generation;		rcu_read_unlock();	}	if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {		mutex_lock(&callback_mutex);		task_lock(tsk);		cs = task_cs(tsk); /* Maybe changed when task not locked */		guarantee_online_mems(cs, &tsk->mems_allowed);		tsk->cpuset_mems_generation = cs->mems_generation;		if (is_spread_page(cs))			tsk->flags |= PF_SPREAD_PAGE;		else			tsk->flags &= ~PF_SPREAD_PAGE;		if (is_spread_slab(cs))			tsk->flags |= PF_SPREAD_SLAB;		else			tsk->flags &= ~PF_SPREAD_SLAB;		task_unlock(tsk);		mutex_unlock(&callback_mutex);		mpol_rebind_task(tsk, &tsk->mems_allowed);	}}/* * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q? * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags * are only set if the other's are set.  Call holding cgroup_mutex. */static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q){	return	cpus_subset(p->cpus_allowed, q->cpus_allowed) &&		nodes_subset(p->mems_allowed, q->mems_allowed) &&		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&		is_mem_exclusive(p) <= is_mem_exclusive(q);}/* * validate_change() - Used to validate that any proposed cpuset change *		       follows the structural rules for cpusets. * * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid?  Presumes * cgroup_mutex held. * * 'cur' is the address of an actual, in-use cpuset.  Operations * such as list traversal that depend on the actual address of the * cpuset in the list must use cur below, not trial. * * 'trial' is the address of bulk structure copy of cur, with * perhaps one or more of the fields cpus_allowed, mems_allowed, * or flags changed to new, trial values. * * Return 0 if valid, -errno if not. */static int validate_change(const struct cpuset *cur, const struct cpuset *trial){	struct cgroup *cont;	struct cpuset *c, *par;	/* Each of our child cpusets must be a subset of us */	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {		if (!is_cpuset_subset(cgroup_cs(cont), trial))			return -EBUSY;	}	/* Remaining checks don't apply to root cpuset */	if (cur == &top_cpuset)		return 0;	par = cur->parent;	/* We must be a subset of our parent cpuset */
cpuset.c - 源码说明

本页面展示了「Kernel code of linux kernel」中的 cpuset.c 源码文件，采用 C语言编程语言编写，共 2,237 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Kernel相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?