📄 cgroup.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		goto again;	}	if (heap == &tmp_heap)		heap_free(&tmp_heap);	return 0;}/* * Stuff for reading the 'tasks' file. * * Reading this file can return large amounts of data if a cgroup has * *lots* of attached tasks. So it may need several calls to read(), * but we cannot guarantee that the information we produce is correct * unless we produce it entirely atomically. * * Upon tasks file open(), a struct ctr_struct is allocated, that * will have a pointer to an array (also allocated here).  The struct * ctr_struct * is stored in file->private_data.  Its resources will * be freed by release() when the file is closed.  The array is used * to sprintf the PIDs and then used by read(). */struct ctr_struct {	char *buf;	int bufsz;};/* * Load into 'pidarray' up to 'npids' of the tasks using cgroup * 'cgrp'.  Return actual number of pids loaded.  No need to * task_lock(p) when reading out p->cgroup, since we're in an RCU * read section, so the css_set can't go away, and is * immutable after creation. */static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp){	int n = 0;	struct cgroup_iter it;	struct task_struct *tsk;	cgroup_iter_start(cgrp, &it);	while ((tsk = cgroup_iter_next(cgrp, &it))) {		if (unlikely(n == npids))			break;		pidarray[n++] = task_pid_vnr(tsk);	}	cgroup_iter_end(cgrp, &it);	return n;}/** * cgroupstats_build - build and fill cgroupstats * @stats: cgroupstats to fill information into * @dentry: A dentry entry belonging to the cgroup for which stats have * been requested. * * Build and fill cgroupstats so that taskstats can export it to user * space. */int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry){	int ret = -EINVAL;	struct cgroup *cgrp;	struct cgroup_iter it;	struct task_struct *tsk;	/*	 * Validate dentry by checking the superblock operations	 */	if (dentry->d_sb->s_op != &cgroup_ops)		 goto err;	ret = 0;	cgrp = dentry->d_fsdata;	rcu_read_lock();	cgroup_iter_start(cgrp, &it);	while ((tsk = cgroup_iter_next(cgrp, &it))) {		switch (tsk->state) {		case TASK_RUNNING:			stats->nr_running++;			break;		case TASK_INTERRUPTIBLE:			stats->nr_sleeping++;			break;		case TASK_UNINTERRUPTIBLE:			stats->nr_uninterruptible++;			break;		case TASK_STOPPED:			stats->nr_stopped++;			break;		default:			if (delayacct_is_task_waiting_on_io(tsk))				stats->nr_io_wait++;			break;		}	}	cgroup_iter_end(cgrp, &it);	rcu_read_unlock();err:	return ret;}static int cmppid(const void *a, const void *b){	return *(pid_t *)a - *(pid_t *)b;}/* * Convert array 'a' of 'npids' pid_t's to a string of newline separated * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return * count 'cnt' of how many chars would be written if buf were large enough. */static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids){	int cnt = 0;	int i;	for (i = 0; i < npids; i++)		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);	return cnt;}/* * Handle an open on 'tasks' file.  Prepare a buffer listing the * process id's of tasks currently attached to the cgroup being opened. * * Does not require any specific cgroup mutexes, and does not take any. */static int cgroup_tasks_open(struct inode *unused, struct file *file){	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);	struct ctr_struct *ctr;	pid_t *pidarray;	int npids;	char c;	if (!(file->f_mode & FMODE_READ))		return 0;	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);	if (!ctr)		goto err0;	/*	 * If cgroup gets more users after we read count, we won't have	 * enough space - tough.  This race is indistinguishable to the	 * caller from the case that the additional cgroup users didn't	 * show up until sometime later on.	 */	npids = cgroup_task_count(cgrp);	if (npids) {		pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);		if (!pidarray)			goto err1;		npids = pid_array_load(pidarray, npids, cgrp);		sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);		/* Call pid_array_to_buf() twice, first just to get bufsz */		ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;		ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);		if (!ctr->buf)			goto err2;		ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);		kfree(pidarray);	} else {		ctr->buf = NULL;		ctr->bufsz = 0;	}	file->private_data = ctr;	return 0;err2:	kfree(pidarray);err1:	kfree(ctr);err0:	return -ENOMEM;}static ssize_t cgroup_tasks_read(struct cgroup *cgrp,				    struct cftype *cft,				    struct file *file, char __user *buf,				    size_t nbytes, loff_t *ppos){	struct ctr_struct *ctr = file->private_data;	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);}static int cgroup_tasks_release(struct inode *unused_inode,					struct file *file){	struct ctr_struct *ctr;	if (file->f_mode & FMODE_READ) {		ctr = file->private_data;		kfree(ctr->buf);		kfree(ctr);	}	return 0;}static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,					    struct cftype *cft){	return notify_on_release(cgrp);}static int cgroup_write_notify_on_release(struct cgroup *cgrp,					  struct cftype *cft,					  u64 val){	clear_bit(CGRP_RELEASABLE, &cgrp->flags);	if (val)		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);	else		clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);	return 0;}/* * for the common functions, 'private' gives the type of file */static struct cftype files[] = {	{		.name = "tasks",		.open = cgroup_tasks_open,		.read = cgroup_tasks_read,		.write_u64 = cgroup_tasks_write,		.release = cgroup_tasks_release,		.private = FILE_TASKLIST,	},	{		.name = "notify_on_release",		.read_u64 = cgroup_read_notify_on_release,		.write_u64 = cgroup_write_notify_on_release,		.private = FILE_NOTIFY_ON_RELEASE,	},};static struct cftype cft_release_agent = {	.name = "release_agent",	.read_seq_string = cgroup_release_agent_show,	.write_string = cgroup_release_agent_write,	.max_write_len = PATH_MAX,	.private = FILE_RELEASE_AGENT,};static int cgroup_populate_dir(struct cgroup *cgrp){	int err;	struct cgroup_subsys *ss;	/* First clear out any existing files */	cgroup_clear_directory(cgrp->dentry);	err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));	if (err < 0)		return err;	if (cgrp == cgrp->top_cgroup) {		if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)			return err;	}	for_each_subsys(cgrp->root, ss) {		if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)			return err;	}	return 0;}static void init_cgroup_css(struct cgroup_subsys_state *css,			       struct cgroup_subsys *ss,			       struct cgroup *cgrp){	css->cgroup = cgrp;	atomic_set(&css->refcnt, 0);	css->flags = 0;	if (cgrp == dummytop)		set_bit(CSS_ROOT, &css->flags);	BUG_ON(cgrp->subsys[ss->subsys_id]);	cgrp->subsys[ss->subsys_id] = css;}/* * cgroup_create - create a cgroup * @parent: cgroup that will be parent of the new cgroup * @dentry: dentry of the new cgroup * @mode: mode to set on new inode * * Must be called with the mutex on the parent inode held */static long cgroup_create(struct cgroup *parent, struct dentry *dentry,			     int mode){	struct cgroup *cgrp;	struct cgroupfs_root *root = parent->root;	int err = 0;	struct cgroup_subsys *ss;	struct super_block *sb = root->sb;	cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);	if (!cgrp)		return -ENOMEM;	/* Grab a reference on the superblock so the hierarchy doesn't	 * get deleted on unmount if there are child cgroups.  This	 * can be done outside cgroup_mutex, since the sb can't	 * disappear while someone has an open control file on the	 * fs */	atomic_inc(&sb->s_active);	mutex_lock(&cgroup_mutex);	INIT_LIST_HEAD(&cgrp->sibling);	INIT_LIST_HEAD(&cgrp->children);	INIT_LIST_HEAD(&cgrp->css_sets);	INIT_LIST_HEAD(&cgrp->release_list);	cgrp->parent = parent;	cgrp->root = parent->root;	cgrp->top_cgroup = parent->top_cgroup;	if (notify_on_release(parent))		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);	for_each_subsys(root, ss) {		struct cgroup_subsys_state *css = ss->create(ss, cgrp);		if (IS_ERR(css)) {			err = PTR_ERR(css);			goto err_destroy;		}		init_cgroup_css(css, ss, cgrp);	}	list_add(&cgrp->sibling, &cgrp->parent->children);	root->number_of_cgroups++;	err = cgroup_create_dir(cgrp, dentry, mode);	if (err < 0)		goto err_remove;	/* The cgroup directory was pre-locked for us */	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));	err = cgroup_populate_dir(cgrp);	/* If err < 0, we have a half-filled directory - oh well ;) */	mutex_unlock(&cgroup_mutex);	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);	return 0; err_remove:	list_del(&cgrp->sibling);	root->number_of_cgroups--; err_destroy:	for_each_subsys(root, ss) {		if (cgrp->subsys[ss->subsys_id])			ss->destroy(ss, cgrp);	}	mutex_unlock(&cgroup_mutex);	/* Release the reference count that we took on the superblock */	deactivate_super(sb);	kfree(cgrp);	return err;}static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode){	struct cgroup *c_parent = dentry->d_parent->d_fsdata;	/* the vfs holds inode->i_mutex already */	return cgroup_create(c_parent, dentry, mode | S_IFDIR);}static int cgroup_has_css_refs(struct cgroup *cgrp){	/* Check the reference count on each subsystem. Since we	 * already established that there are no tasks in the	 * cgroup, if the css refcount is also 0, then there should	 * be no outstanding references, so the subsystem is safe to	 * destroy. We scan across all subsystems rather than using	 * the per-hierarchy linked list of mounted subsystems since	 * we can be called via check_for_release() with no	 * synchronization other than RCU, and the subsystem linked	 * list isn't RCU-safe */	int i;	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {		struct cgroup_subsys *ss = subsys[i];		struct cgroup_subsys_state *css;		/* Skip subsystems not in this hierarchy */		if (ss->root != cgrp->root)			continue;		css = cgrp->subsys[ss->subsys_id];		/* When called from check_for_release() it's possible		 * that by this point the cgroup has been removed		 * and the css deleted. But a false-positive doesn't		 * matter, since it can only happen if the cgroup		 * has been deleted and hence no longer needs the		 * release agent to be called anyway. */		if (css && atomic_read(&css->refcnt))			return 1;	}	return 0;}static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry){	struct cgroup *cgrp = dentry->d_fsdata;	struct dentry *d;	struct cgroup *parent;	struct super_block *sb;	struct cgroupfs_root *root;	/* the vfs holds both inode->i_mutex already */	mutex_lock(&cgroup_mutex);	if (atomic_read(&cgrp->count) != 0) {		mutex_unlock(&cgroup_mutex);		return -EBUSY;	}	if (!list_empty(&cgrp->children)) {		mutex_unlock(&cgroup_mutex);		return -EBUSY;	}	parent = cgrp->parent;	root = cgrp->root;	sb = root->sb;	/*	 * Call pre_destroy handlers of subsys. Notify subsystems	 * that rmdir() request comes.	 */	cgroup_call_pre_destroy(cgrp);	if (cgroup_has_css_refs(cgrp)) {		mutex_unlock(&cgroup_mutex);		return -EBUSY;	}	spin_lock(&release_list_lock);	set_bit(CGRP_REMOVED, &cgrp->flags);	if (!list_empty(&cgrp->release_list))		list_del(&cgrp->release_list);	spin_unlock(&release_list_lock);	/* delete my sibling from parent->children */	list_del(&cgrp->sibling);	spin_lock(&cgrp->dentry->d_lock);	d = dget(cgrp->dentry);	spin_unlock(&d->d_lock);	cgroup_d_remove_dir(d);	dput(d);	set_bit(CGRP_RELEASABLE, &parent->flags);	check_for_release(parent);	mutex_unlock(&cgroup_mutex);	return 0;}static void __init cgroup_init_subsys(struct cgroup_subsys *ss){	struct cgroup_subsys_state *css;	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);	/* Create the top cgroup state for this subsystem */	ss->root = &rootnode;	css = ss->create(ss, dummytop);	/* We don't handle early failures gracefully */	BUG_ON(IS_ERR(css));	init_cgroup_css(css, ss, dummytop);	/* Update the init_css_set to contain a subsys	 * pointer to this state - since the subsystem is	 * newly registered, all tasks and hence the	 * init_css_set is in the subsystem's top cgroup. */	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];	need_forkexit_callback |= ss->fork || ss->exit;	need_mm_owne
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -