📄 cgroup.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 5 页
字号:
 * increment the count).  So code holding cgroup_mutex can safely * assume that if the count is zero, it will stay zero. Similarly, if * a task holds cgroup_mutex on a cgroup with zero count, it * knows that the cgroup won't be removed, as cgroup_rmdir() * needs that mutex. * * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't * (usually) take cgroup_mutex.  These are the two most performance * critical pieces of code here.  The exception occurs on cgroup_exit(), * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex * is taken, and if the cgroup count is zero, a usermode call made * to the release agent with the name of the cgroup (path relative to * the root of cgroup file system) as the argument. * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty.  Since all * tasks in the system use _some_ cgroup, and since there is always at * least one task in the system (init, pid == 1), therefore, top_cgroup * always has either children cgroups and/or using tasks.  So we don't * need a special hack to ensure that top_cgroup cannot be deleted. * *	The task_lock() exception * * The need for this exception arises from the action of * cgroup_attach_task(), which overwrites one tasks cgroup pointer with * another.  It does so using cgroup_mutex, however there are * several performance critical places that need to reference * task->cgroup without the expense of grabbing a system global * mutex.  Therefore except as noted below, when dereferencing or, as * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use * task_lock(), which acts on a spinlock (task->alloc_lock) already in * the task_struct routinely used for such matters. * * P.S.  One more locking exception.  RCU is used to guard the * update of a tasks cgroup pointer by cgroup_attach_task() *//** * cgroup_lock - lock out any changes to cgroup structures * */void cgroup_lock(void){	mutex_lock(&cgroup_mutex);}/** * cgroup_unlock - release lock on cgroup changes * * Undo the lock taken in a previous cgroup_lock() call. */void cgroup_unlock(void){	mutex_unlock(&cgroup_mutex);}/* * A couple of forward declarations required, due to cyclic reference loop: * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir -> * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations * -> cgroup_mkdir. */static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);static int cgroup_populate_dir(struct cgroup *cgrp);static struct inode_operations cgroup_dir_inode_operations;static struct file_operations proc_cgroupstats_operations;static struct backing_dev_info cgroup_backing_dev_info = {	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,};static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb){	struct inode *inode = new_inode(sb);	if (inode) {		inode->i_mode = mode;		inode->i_uid = current->fsuid;		inode->i_gid = current->fsgid;		inode->i_blocks = 0;		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;		inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;	}	return inode;}/* * Call subsys's pre_destroy handler. * This is called before css refcnt check. */static void cgroup_call_pre_destroy(struct cgroup *cgrp){	struct cgroup_subsys *ss;	for_each_subsys(cgrp->root, ss)		if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])			ss->pre_destroy(ss, cgrp);	return;}static void cgroup_diput(struct dentry *dentry, struct inode *inode){	/* is dentry a directory ? if so, kfree() associated cgroup */	if (S_ISDIR(inode->i_mode)) {		struct cgroup *cgrp = dentry->d_fsdata;		struct cgroup_subsys *ss;		BUG_ON(!(cgroup_is_removed(cgrp)));		/* It's possible for external users to be holding css		 * reference counts on a cgroup; css_put() needs to		 * be able to access the cgroup after decrementing		 * the reference count in order to know if it needs to		 * queue the cgroup to be handled by the release		 * agent */		synchronize_rcu();		mutex_lock(&cgroup_mutex);		/*		 * Release the subsystem state objects.		 */		for_each_subsys(cgrp->root, ss) {			if (cgrp->subsys[ss->subsys_id])				ss->destroy(ss, cgrp);		}		cgrp->root->number_of_cgroups--;		mutex_unlock(&cgroup_mutex);		/* Drop the active superblock reference that we took when we		 * created the cgroup */		deactivate_super(cgrp->root->sb);		kfree(cgrp);	}	iput(inode);}static void remove_dir(struct dentry *d){	struct dentry *parent = dget(d->d_parent);	d_delete(d);	simple_rmdir(parent->d_inode, d);	dput(parent);}static void cgroup_clear_directory(struct dentry *dentry){	struct list_head *node;	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));	spin_lock(&dcache_lock);	node = dentry->d_subdirs.next;	while (node != &dentry->d_subdirs) {		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);		list_del_init(node);		if (d->d_inode) {			/* This should never be called on a cgroup			 * directory with child cgroups */			BUG_ON(d->d_inode->i_mode & S_IFDIR);			d = dget_locked(d);			spin_unlock(&dcache_lock);			d_delete(d);			simple_unlink(dentry->d_inode, d);			dput(d);			spin_lock(&dcache_lock);		}		node = dentry->d_subdirs.next;	}	spin_unlock(&dcache_lock);}/* * NOTE : the dentry must have been dget()'ed */static void cgroup_d_remove_dir(struct dentry *dentry){	cgroup_clear_directory(dentry);	spin_lock(&dcache_lock);	list_del_init(&dentry->d_u.d_child);	spin_unlock(&dcache_lock);	remove_dir(dentry);}static int rebind_subsystems(struct cgroupfs_root *root,			      unsigned long final_bits){	unsigned long added_bits, removed_bits;	struct cgroup *cgrp = &root->top_cgroup;	int i;	removed_bits = root->actual_subsys_bits & ~final_bits;	added_bits = final_bits & ~root->actual_subsys_bits;	/* Check that any added subsystems are currently free */	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {		unsigned long bit = 1UL << i;		struct cgroup_subsys *ss = subsys[i];		if (!(bit & added_bits))			continue;		if (ss->root != &rootnode) {			/* Subsystem isn't free */			return -EBUSY;		}	}	/* Currently we don't handle adding/removing subsystems when	 * any child cgroups exist. This is theoretically supportable	 * but involves complex error handling, so it's being left until	 * later */	if (!list_empty(&cgrp->children))		return -EBUSY;	/* Process each subsystem */	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {		struct cgroup_subsys *ss = subsys[i];		unsigned long bit = 1UL << i;		if (bit & added_bits) {			/* We're binding this subsystem to this hierarchy */			BUG_ON(cgrp->subsys[i]);			BUG_ON(!dummytop->subsys[i]);			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);			cgrp->subsys[i] = dummytop->subsys[i];			cgrp->subsys[i]->cgroup = cgrp;			list_add(&ss->sibling, &root->subsys_list);			rcu_assign_pointer(ss->root, root);			if (ss->bind)				ss->bind(ss, cgrp);		} else if (bit & removed_bits) {			/* We're removing this subsystem */			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);			if (ss->bind)				ss->bind(ss, dummytop);			dummytop->subsys[i]->cgroup = dummytop;			cgrp->subsys[i] = NULL;			rcu_assign_pointer(subsys[i]->root, &rootnode);			list_del(&ss->sibling);		} else if (bit & final_bits) {			/* Subsystem state should already exist */			BUG_ON(!cgrp->subsys[i]);		} else {			/* Subsystem state shouldn't exist */			BUG_ON(cgrp->subsys[i]);		}	}	root->subsys_bits = root->actual_subsys_bits = final_bits;	synchronize_rcu();	return 0;}static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs){	struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;	struct cgroup_subsys *ss;	mutex_lock(&cgroup_mutex);	for_each_subsys(root, ss)		seq_printf(seq, ",%s", ss->name);	if (test_bit(ROOT_NOPREFIX, &root->flags))		seq_puts(seq, ",noprefix");	if (strlen(root->release_agent_path))		seq_printf(seq, ",release_agent=%s", root->release_agent_path);	mutex_unlock(&cgroup_mutex);	return 0;}struct cgroup_sb_opts {	unsigned long subsys_bits;	unsigned long flags;	char *release_agent;};/* Convert a hierarchy specifier into a bitmask of subsystems and * flags. */static int parse_cgroupfs_options(char *data,				     struct cgroup_sb_opts *opts){	char *token, *o = data ?: "all";	opts->subsys_bits = 0;	opts->flags = 0;	opts->release_agent = NULL;	while ((token = strsep(&o, ",")) != NULL) {		if (!*token)			return -EINVAL;		if (!strcmp(token, "all")) {			/* Add all non-disabled subsystems */			int i;			opts->subsys_bits = 0;			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {				struct cgroup_subsys *ss = subsys[i];				if (!ss->disabled)					opts->subsys_bits |= 1ul << i;			}		} else if (!strcmp(token, "noprefix")) {			set_bit(ROOT_NOPREFIX, &opts->flags);		} else if (!strncmp(token, "release_agent=", 14)) {			/* Specifying two release agents is forbidden */			if (opts->release_agent)				return -EINVAL;			opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);			if (!opts->release_agent)				return -ENOMEM;			strncpy(opts->release_agent, token + 14, PATH_MAX - 1);			opts->release_agent[PATH_MAX - 1] = 0;		} else {			struct cgroup_subsys *ss;			int i;			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {				ss = subsys[i];				if (!strcmp(token, ss->name)) {					if (!ss->disabled)						set_bit(i, &opts->subsys_bits);					break;				}			}			if (i == CGROUP_SUBSYS_COUNT)				return -ENOENT;		}	}	/* We can't have an empty hierarchy */	if (!opts->subsys_bits)		return -EINVAL;	return 0;}static int cgroup_remount(struct super_block *sb, int *flags, char *data){	int ret = 0;	struct cgroupfs_root *root = sb->s_fs_info;	struct cgroup *cgrp = &root->top_cgroup;	struct cgroup_sb_opts opts;	mutex_lock(&cgrp->dentry->d_inode->i_mutex);	mutex_lock(&cgroup_mutex);	/* See what subsystems are wanted */	ret = parse_cgroupfs_options(data, &opts);	if (ret)		goto out_unlock;	/* Don't allow flags to change at remount */	if (opts.flags != root->flags) {		ret = -EINVAL;		goto out_unlock;	}	ret = rebind_subsystems(root, opts.subsys_bits);	/* (re)populate subsystem files */	if (!ret)		cgroup_populate_dir(cgrp);	if (opts.release_agent)		strcpy(root->release_agent_path, opts.release_agent); out_unlock:	if (opts.release_agent)		kfree(opts.release_agent);	mutex_unlock(&cgroup_mutex);	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);	return ret;}static struct super_operations cgroup_ops = {	.statfs = simple_statfs,	.drop_inode = generic_delete_inode,	.show_options = cgroup_show_options,	.remount_fs = cgroup_remount,};static void init_cgroup_root(struct cgroupfs_root *root){	struct cgroup *cgrp = &root->top_cgroup;	INIT_LIST_HEAD(&root->subsys_list);	INIT_LIST_HEAD(&root->root_list);	root->number_of_cgroups = 1;	cgrp->root = root;	cgrp->top_cgroup = cgrp;	INIT_LIST_HEAD(&cgrp->sibling);	INIT_LIST_HEAD(&cgrp->children);	INIT_LIST_HEAD(&cgrp->css_sets);	INIT_LIST_HEAD(&cgrp->release_list);}static int cgroup_test_super(struct super_block *sb, void *data){	struct cgroupfs_root *new = data;	struct cgroupfs_root *root = sb->s_fs_info;	/* First check subsystems */	if (new->subsys_bits != root->subsys_bits)	    return 0;	/* Next check flags */	if (new->flags != root->flags)		return 0;	return 1;}static int cgroup_set_super(struct super_block *sb, void *data){	int ret;	struct cgroupfs_root *root = data;	ret = set_anon_super(sb, NULL);	if (ret)		return ret;	sb->s_fs_info = root;	root->sb = sb;	sb->s_blocksize = PAGE_CACHE_SIZE;	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;	sb->s_magic = CGROUP_SUPER_MAGIC;	sb->s_op = &cgroup_ops;	return 0;}static int cgroup_get_rootdir(struct super_block *sb){	struct inode *inode =		cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);	struct dentry *dentry;	if (!inode)		return -ENOMEM;	inode->i_fop = &simple_dir_operations;	inode->i_op = &cgroup_dir_inode_operations;	/* directories start off with i_nlink == 2 (for "." entry) */	inc_nlink(inode);	dentry = d_alloc_root(inode);	if (!dentry) {		iput(inode);		return -ENOMEM;	}	sb->s_root = dentry;	return 0;}static int cgroup_get_sb(struct file_system_type *fs_type,			 int flags, const char *unused_dev_name,			 void *data, struct vfsmount *mnt){	struct cgroup_sb_opts opts;	int ret = 0;	struct super_block *sb;	struct cgroupfs_root *root;	struct list_head tmp_cg_links;	/* First find the desired set of subsystems */	ret = parse_cgroupfs_options(data, &opts);	if (ret) {		if (opts.release_agent)			kfree(opts.release_agent);		return ret;	}	root = kzalloc(sizeof(*root), GFP_KERNEL);	if (!root) {		if (opts.release_agent)			kfree(opts.release_agent);		return -ENOMEM;	}	init_cgroup_root(root);	root->subsys_bits = opts.subsys_bits;	root->flags = opts.flags;	if (opts.release_agent) {		strcpy(root->release_agent_path, opts.release_agent);		kfree(opts.release_agent);	}	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);	if (IS_ERR(sb)) {		kfree(root);		return PTR_ERR(sb);	}	if (sb->s_fs_info != root) {		/* Reusing an existing superblock */		BUG_ON(sb->s_root == NULL);		kfree(root);		root = NULL;	} else {		/* New superblock */		struct cgroup *cgrp = &root->top_cgroup;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -