cpuset.c

来自「linux 2.6.19 kernel source code before p」· C语言代码 · 共 2,235 行 · 第 1/5 页
2,235 行
/* *  kernel/cpuset.c * *  Processor and Memory placement constraints for sets of tasks. * *  Copyright (C) 2003 BULL SA. *  Copyright (C) 2004-2006 Silicon Graphics, Inc. * *  Portions derived from Patrick Mochel's sysfs code. *  sysfs is Copyright (c) 2001-3 Patrick Mochel * *  2003-10-10 Written by Simon Derr. *  2003-10-22 Updates by Stephen Hemminger. *  2004 May-July Rework by Paul Jackson. * *  This file is subject to the terms and conditions of the GNU General Public *  License.  See the file COPYING in the main directory of the Linux *  distribution for more details. */#include <linux/cpu.h>#include <linux/cpumask.h>#include <linux/cpuset.h>#include <linux/err.h>#include <linux/errno.h>#include <linux/file.h>#include <linux/fs.h>#include <linux/init.h>#include <linux/interrupt.h>#include <linux/kernel.h>#include <linux/kmod.h>#include <linux/list.h>#include <linux/mempolicy.h>#include <linux/mm.h>#include <linux/module.h>#include <linux/mount.h>#include <linux/namei.h>#include <linux/pagemap.h>#include <linux/proc_fs.h>#include <linux/rcupdate.h>#include <linux/sched.h>#include <linux/seq_file.h>#include <linux/security.h>#include <linux/slab.h>#include <linux/spinlock.h>#include <linux/stat.h>#include <linux/string.h>#include <linux/time.h>#include <linux/backing-dev.h>#include <linux/sort.h>#include <asm/uaccess.h>#include <asm/atomic.h>#include <linux/mutex.h>#define CPUSET_SUPER_MAGIC		0x27e0eb/* * Tracks how many cpusets are currently defined in system. * When there is only one cpuset (the root cpuset) we can * short circuit some hooks. */int number_of_cpusets __read_mostly;/* See "Frequency meter" comments, below. */struct fmeter {	int cnt;		/* unprocessed events count */	int val;		/* most recent output value */	time_t time;		/* clock (secs) when val computed */	spinlock_t lock;	/* guards read or write of above */};struct cpuset {	unsigned long flags;		/* "unsigned long" so bitops work */	cpumask_t cpus_allowed;		/* CPUs allowed to tasks in cpuset */	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */	/*	 * Count is atomic so can incr (fork) or decr (exit) without a lock.	 */	atomic_t count;			/* count tasks using this cpuset */	/*	 * We link our 'sibling' struct into our parents 'children'.	 * Our children link their 'sibling' into our 'children'.	 */	struct list_head sibling;	/* my parents children */	struct list_head children;	/* my children */	struct cpuset *parent;		/* my parent */	struct dentry *dentry;		/* cpuset fs entry */	/*	 * Copy of global cpuset_mems_generation as of the most	 * recent time this cpuset changed its mems_allowed.	 */	int mems_generation;	struct fmeter fmeter;		/* memory_pressure filter */};/* bits in struct cpuset flags field */typedef enum {	CS_CPU_EXCLUSIVE,	CS_MEM_EXCLUSIVE,	CS_MEMORY_MIGRATE,	CS_REMOVED,	CS_NOTIFY_ON_RELEASE,	CS_SPREAD_PAGE,	CS_SPREAD_SLAB,} cpuset_flagbits_t;/* convenient tests for these bits */static inline int is_cpu_exclusive(const struct cpuset *cs){	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);}static inline int is_mem_exclusive(const struct cpuset *cs){	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);}static inline int is_removed(const struct cpuset *cs){	return test_bit(CS_REMOVED, &cs->flags);}static inline int notify_on_release(const struct cpuset *cs){	return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);}static inline int is_memory_migrate(const struct cpuset *cs){	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);}static inline int is_spread_page(const struct cpuset *cs){	return test_bit(CS_SPREAD_PAGE, &cs->flags);}static inline int is_spread_slab(const struct cpuset *cs){	return test_bit(CS_SPREAD_SLAB, &cs->flags);}/* * Increment this integer everytime any cpuset changes its * mems_allowed value.  Users of cpusets can track this generation * number, and avoid having to lock and reload mems_allowed unless * the cpuset they're using changes generation. * * A single, global generation is needed because attach_task() could * reattach a task to a different cpuset, which must not have its * generation numbers aliased with those of that tasks previous cpuset. * * Generations are needed for mems_allowed because one task cannot * modify anothers memory placement.  So we must enable every task, * on every visit to __alloc_pages(), to efficiently check whether * its current->cpuset->mems_allowed has changed, requiring an update * of its current->mems_allowed. * * Since cpuset_mems_generation is guarded by manage_mutex, * there is no need to mark it atomic. */static int cpuset_mems_generation;static struct cpuset top_cpuset = {	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),	.cpus_allowed = CPU_MASK_ALL,	.mems_allowed = NODE_MASK_ALL,	.count = ATOMIC_INIT(0),	.sibling = LIST_HEAD_INIT(top_cpuset.sibling),	.children = LIST_HEAD_INIT(top_cpuset.children),};static struct vfsmount *cpuset_mount;static struct super_block *cpuset_sb;/* * We have two global cpuset mutexes below.  They can nest. * It is ok to first take manage_mutex, then nest callback_mutex.  We also * require taking task_lock() when dereferencing a tasks cpuset pointer. * See "The task_lock() exception", at the end of this comment. * * A task must hold both mutexes to modify cpusets.  If a task * holds manage_mutex, then it blocks others wanting that mutex, * ensuring that it is the only task able to also acquire callback_mutex * and be able to modify cpusets.  It can perform various checks on * the cpuset structure first, knowing nothing will change.  It can * also allocate memory while just holding manage_mutex.  While it is * performing these checks, various callback routines can briefly * acquire callback_mutex to query cpusets.  Once it is ready to make * the changes, it takes callback_mutex, blocking everyone else. * * Calls to the kernel memory allocator can not be made while holding * callback_mutex, as that would risk double tripping on callback_mutex * from one of the callbacks into the cpuset code from within * __alloc_pages(). * * If a task is only holding callback_mutex, then it has read-only * access to cpusets. * * The task_struct fields mems_allowed and mems_generation may only * be accessed in the context of that task, so require no locks. * * Any task can increment and decrement the count field without lock. * So in general, code holding manage_mutex or callback_mutex can't rely * on the count field not changing.  However, if the count goes to * zero, then only attach_task(), which holds both mutexes, can * increment it again.  Because a count of zero means that no tasks * are currently attached, therefore there is no way a task attached * to that cpuset can fork (the other way to increment the count). * So code holding manage_mutex or callback_mutex can safely assume that * if the count is zero, it will stay zero.  Similarly, if a task * holds manage_mutex or callback_mutex on a cpuset with zero count, it * knows that the cpuset won't be removed, as cpuset_rmdir() needs * both of those mutexes. * * The cpuset_common_file_write handler for operations that modify * the cpuset hierarchy holds manage_mutex across the entire operation, * single threading all such cpuset modifications across the system. * * The cpuset_common_file_read() handlers only hold callback_mutex across * small pieces of code, such as when reading out possibly multi-word * cpumasks and nodemasks. * * The fork and exit callbacks cpuset_fork() and cpuset_exit(), don't * (usually) take either mutex.  These are the two most performance * critical pieces of code here.  The exception occurs on cpuset_exit(), * when a task in a notify_on_release cpuset exits.  Then manage_mutex * is taken, and if the cpuset count is zero, a usermode call made * to /sbin/cpuset_release_agent with the name of the cpuset (path * relative to the root of cpuset file system) as the argument. * * A cpuset can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cpusets is empty.  Since all * tasks in the system use _some_ cpuset, and since there is always at * least one task in the system (init), therefore, top_cpuset * always has either children cpusets and/or using tasks.  So we don't * need a special hack to ensure that top_cpuset cannot be deleted. * * The above "Tale of Two Semaphores" would be complete, but for: * *	The task_lock() exception * * The need for this exception arises from the action of attach_task(), * which overwrites one tasks cpuset pointer with another.  It does * so using both mutexes, however there are several performance * critical places that need to reference task->cpuset without the * expense of grabbing a system global mutex.  Therefore except as * noted below, when dereferencing or, as in attach_task(), modifying * a tasks cpuset pointer we use task_lock(), which acts on a spinlock * (task->alloc_lock) already in the task_struct routinely used for * such matters. * * P.S.  One more locking exception.  RCU is used to guard the * update of a tasks cpuset pointer by attach_task() and the * access of task->cpuset->mems_generation via that pointer in * the routine cpuset_update_task_memory_state(). */static DEFINE_MUTEX(manage_mutex);static DEFINE_MUTEX(callback_mutex);/* * A couple of forward declarations required, due to cyclic reference loop: *  cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file *  -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. */static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);static struct backing_dev_info cpuset_backing_dev_info = {	.ra_pages = 0,		/* No readahead */	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,};static struct inode *cpuset_new_inode(mode_t mode){	struct inode *inode = new_inode(cpuset_sb);	if (inode) {		inode->i_mode = mode;		inode->i_uid = current->fsuid;		inode->i_gid = current->fsgid;		inode->i_blocks = 0;		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;		inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;	}	return inode;}static void cpuset_diput(struct dentry *dentry, struct inode *inode){	/* is dentry a directory ? if so, kfree() associated cpuset */	if (S_ISDIR(inode->i_mode)) {		struct cpuset *cs = dentry->d_fsdata;		BUG_ON(!(is_removed(cs)));		kfree(cs);	}	iput(inode);}static struct dentry_operations cpuset_dops = {	.d_iput = cpuset_diput,};static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name){	struct dentry *d = lookup_one_len(name, parent, strlen(name));	if (!IS_ERR(d))		d->d_op = &cpuset_dops;	return d;}static void remove_dir(struct dentry *d){	struct dentry *parent = dget(d->d_parent);	d_delete(d);	simple_rmdir(parent->d_inode, d);	dput(parent);}/* * NOTE : the dentry must have been dget()'ed */static void cpuset_d_remove_dir(struct dentry *dentry){	struct list_head *node;	spin_lock(&dcache_lock);	node = dentry->d_subdirs.next;	while (node != &dentry->d_subdirs) {		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);		list_del_init(node);		if (d->d_inode) {			d = dget_locked(d);			spin_unlock(&dcache_lock);			d_delete(d);			simple_unlink(dentry->d_inode, d);			dput(d);			spin_lock(&dcache_lock);		}		node = dentry->d_subdirs.next;	}	list_del_init(&dentry->d_u.d_child);	spin_unlock(&dcache_lock);	remove_dir(dentry);}static struct super_operations cpuset_ops = {	.statfs = simple_statfs,	.drop_inode = generic_delete_inode,};static int cpuset_fill_super(struct super_block *sb, void *unused_data,							int unused_silent){	struct inode *inode;	struct dentry *root;	sb->s_blocksize = PAGE_CACHE_SIZE;	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;	sb->s_magic = CPUSET_SUPER_MAGIC;	sb->s_op = &cpuset_ops;	cpuset_sb = sb;	inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);	if (inode) {		inode->i_op = &simple_dir_inode_operations;		inode->i_fop = &simple_dir_operations;		/* directories start off with i_nlink == 2 (for "." entry) */		inc_nlink(inode);	} else {		return -ENOMEM;	}	root = d_alloc_root(inode);	if (!root) {		iput(inode);		return -ENOMEM;	}	sb->s_root = root;	return 0;}static int cpuset_get_sb(struct file_system_type *fs_type,			 int flags, const char *unused_dev_name,			 void *data, struct vfsmount *mnt){	return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);}static struct file_system_type cpuset_fs_type = {	.name = "cpuset",	.get_sb = cpuset_get_sb,	.kill_sb = kill_litter_super,};/* struct cftype: * * The files in the cpuset filesystem mostly have a very simple read/write * handling, some common function will take care of it. Nevertheless some cases * (read tasks) are special and therefore I define this structure for every * kind of file. * * * When reading/writing to a file: *	- the cpuset to use in file->f_path.dentry->d_parent->d_fsdata *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata */struct cftype {	char *name;	int private;	int (*open) (struct inode *inode, struct file *file);	ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,							loff_t *ppos);	int (*write) (struct file *file, const char __user *buf, size_t nbytes,							loff_t *ppos);	int (*release) (struct inode *inode, struct file *file);};static inline struct cpuset *__d_cs(struct dentry *dentry){	return dentry->d_fsdata;}static inline struct cftype *__d_cft(struct dentry *dentry){	return dentry->d_fsdata;}/* * Call with manage_mutex held.  Writes path of cpuset into buf. * Returns 0 on success, -errno on error. */static int cpuset_path(const struct cpuset *cs, char *buf, int buflen){	char *start;
cpuset.c - 源码说明

本页面展示了「linux 2.6.19 kernel source code before patching」中的 cpuset.c 源码文件，采用 C语言编程语言编写，共 2,235 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与patching相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?