📄 cgroup.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 5 页
字号:
		return -ENODEV;	if (cft->read)		return cft->read(cgrp, cft, file, buf, nbytes, ppos);	if (cft->read_u64)		return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);	if (cft->read_s64)		return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);	return -EINVAL;}/* * seqfile ops/methods for returning structured data. Currently just * supports string->u64 maps, but can be extended in future. */struct cgroup_seqfile_state {	struct cftype *cft;	struct cgroup *cgroup;};static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value){	struct seq_file *sf = cb->state;	return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);}static int cgroup_seqfile_show(struct seq_file *m, void *arg){	struct cgroup_seqfile_state *state = m->private;	struct cftype *cft = state->cft;	if (cft->read_map) {		struct cgroup_map_cb cb = {			.fill = cgroup_map_add,			.state = m,		};		return cft->read_map(state->cgroup, cft, &cb);	}	return cft->read_seq_string(state->cgroup, cft, m);}static int cgroup_seqfile_release(struct inode *inode, struct file *file){	struct seq_file *seq = file->private_data;	kfree(seq->private);	return single_release(inode, file);}static struct file_operations cgroup_seqfile_operations = {	.read = seq_read,	.write = cgroup_file_write,	.llseek = seq_lseek,	.release = cgroup_seqfile_release,};static int cgroup_file_open(struct inode *inode, struct file *file){	int err;	struct cftype *cft;	err = generic_file_open(inode, file);	if (err)		return err;	cft = __d_cft(file->f_dentry);	if (!cft)		return -ENODEV;	if (cft->read_map || cft->read_seq_string) {		struct cgroup_seqfile_state *state =			kzalloc(sizeof(*state), GFP_USER);		if (!state)			return -ENOMEM;		state->cft = cft;		state->cgroup = __d_cgrp(file->f_dentry->d_parent);		file->f_op = &cgroup_seqfile_operations;		err = single_open(file, cgroup_seqfile_show, state);		if (err < 0)			kfree(state);	} else if (cft->open)		err = cft->open(inode, file);	else		err = 0;	return err;}static int cgroup_file_release(struct inode *inode, struct file *file){	struct cftype *cft = __d_cft(file->f_dentry);	if (cft->release)		return cft->release(inode, file);	return 0;}/* * cgroup_rename - Only allow simple rename of directories in place. */static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,			    struct inode *new_dir, struct dentry *new_dentry){	if (!S_ISDIR(old_dentry->d_inode->i_mode))		return -ENOTDIR;	if (new_dentry->d_inode)		return -EEXIST;	if (old_dir != new_dir)		return -EIO;	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);}static struct file_operations cgroup_file_operations = {	.read = cgroup_file_read,	.write = cgroup_file_write,	.llseek = generic_file_llseek,	.open = cgroup_file_open,	.release = cgroup_file_release,};static struct inode_operations cgroup_dir_inode_operations = {	.lookup = simple_lookup,	.mkdir = cgroup_mkdir,	.rmdir = cgroup_rmdir,	.rename = cgroup_rename,};static int cgroup_create_file(struct dentry *dentry, int mode,				struct super_block *sb){	static struct dentry_operations cgroup_dops = {		.d_iput = cgroup_diput,	};	struct inode *inode;	if (!dentry)		return -ENOENT;	if (dentry->d_inode)		return -EEXIST;	inode = cgroup_new_inode(mode, sb);	if (!inode)		return -ENOMEM;	if (S_ISDIR(mode)) {		inode->i_op = &cgroup_dir_inode_operations;		inode->i_fop = &simple_dir_operations;		/* start off with i_nlink == 2 (for "." entry) */		inc_nlink(inode);		/* start with the directory inode held, so that we can		 * populate it without racing with another mkdir */		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);	} else if (S_ISREG(mode)) {		inode->i_size = 0;		inode->i_fop = &cgroup_file_operations;	}	dentry->d_op = &cgroup_dops;	d_instantiate(dentry, inode);	dget(dentry);	/* Extra count - pin the dentry in core */	return 0;}/* * cgroup_create_dir - create a directory for an object. * @cgrp: the cgroup we create the directory for. It must have a valid *        ->parent field. And we are going to fill its ->dentry field. * @dentry: dentry of the new cgroup * @mode: mode to set on new directory. */static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,				int mode){	struct dentry *parent;	int error = 0;	parent = cgrp->parent->dentry;	error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);	if (!error) {		dentry->d_fsdata = cgrp;		inc_nlink(parent->d_inode);		cgrp->dentry = dentry;		dget(dentry);	}	dput(dentry);	return error;}int cgroup_add_file(struct cgroup *cgrp,		       struct cgroup_subsys *subsys,		       const struct cftype *cft){	struct dentry *dir = cgrp->dentry;	struct dentry *dentry;	int error;	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {		strcpy(name, subsys->name);		strcat(name, ".");	}	strcat(name, cft->name);	BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));	dentry = lookup_one_len(name, dir, strlen(name));	if (!IS_ERR(dentry)) {		error = cgroup_create_file(dentry, 0644 | S_IFREG,						cgrp->root->sb);		if (!error)			dentry->d_fsdata = (void *)cft;		dput(dentry);	} else		error = PTR_ERR(dentry);	return error;}int cgroup_add_files(struct cgroup *cgrp,			struct cgroup_subsys *subsys,			const struct cftype cft[],			int count){	int i, err;	for (i = 0; i < count; i++) {		err = cgroup_add_file(cgrp, subsys, &cft[i]);		if (err)			return err;	}	return 0;}/** * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question * * Return the number of tasks in the cgroup. */int cgroup_task_count(const struct cgroup *cgrp){	int count = 0;	struct cg_cgroup_link *link;	read_lock(&css_set_lock);	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {		count += atomic_read(&link->cg->ref.refcount);	}	read_unlock(&css_set_lock);	return count;}/* * Advance a list_head iterator.  The iterator should be positioned at * the start of a css_set */static void cgroup_advance_iter(struct cgroup *cgrp,					  struct cgroup_iter *it){	struct list_head *l = it->cg_link;	struct cg_cgroup_link *link;	struct css_set *cg;	/* Advance to the next non-empty css_set */	do {		l = l->next;		if (l == &cgrp->css_sets) {			it->cg_link = NULL;			return;		}		link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);		cg = link->cg;	} while (list_empty(&cg->tasks));	it->cg_link = l;	it->task = cg->tasks.next;}/* * To reduce the fork() overhead for systems that are not actually * using their cgroups capability, we don't maintain the lists running * through each css_set to its tasks until we see the list actually * used - in other words after the first call to cgroup_iter_start(). * * The tasklist_lock is not held here, as do_each_thread() and * while_each_thread() are protected by RCU. */static void cgroup_enable_task_cg_lists(void){	struct task_struct *p, *g;	write_lock(&css_set_lock);	use_task_css_set_links = 1;	do_each_thread(g, p) {		task_lock(p);		/*		 * We should check if the process is exiting, otherwise		 * it will race with cgroup_exit() in that the list		 * entry won't be deleted though the process has exited.		 */		if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))			list_add(&p->cg_list, &p->cgroups->tasks);		task_unlock(p);	} while_each_thread(g, p);	write_unlock(&css_set_lock);}void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it){	/*	 * The first time anyone tries to iterate across a cgroup,	 * we need to enable the list linking each css_set to its	 * tasks, and fix up all existing tasks.	 */	if (!use_task_css_set_links)		cgroup_enable_task_cg_lists();	read_lock(&css_set_lock);	it->cg_link = &cgrp->css_sets;	cgroup_advance_iter(cgrp, it);}struct task_struct *cgroup_iter_next(struct cgroup *cgrp,					struct cgroup_iter *it){	struct task_struct *res;	struct list_head *l = it->task;	/* If the iterator cg is NULL, we have no tasks */	if (!it->cg_link)		return NULL;	res = list_entry(l, struct task_struct, cg_list);	/* Advance iterator to find next entry */	l = l->next;	if (l == &res->cgroups->tasks) {		/* We reached the end of this task list - move on to		 * the next cg_cgroup_link */		cgroup_advance_iter(cgrp, it);	} else {		it->task = l;	}	return res;}void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it){	read_unlock(&css_set_lock);}static inline int started_after_time(struct task_struct *t1,				     struct timespec *time,				     struct task_struct *t2){	int start_diff = timespec_compare(&t1->start_time, time);	if (start_diff > 0) {		return 1;	} else if (start_diff < 0) {		return 0;	} else {		/*		 * Arbitrarily, if two processes started at the same		 * time, we'll say that the lower pointer value		 * started first. Note that t2 may have exited by now		 * so this may not be a valid pointer any longer, but		 * that's fine - it still serves to distinguish		 * between two tasks started (effectively) simultaneously.		 */		return t1 > t2;	}}/* * This function is a callback from heap_insert() and is used to order * the heap. * In this case we order the heap in descending task start time. */static inline int started_after(void *p1, void *p2){	struct task_struct *t1 = p1;	struct task_struct *t2 = p2;	return started_after_time(t1, &t2->start_time, t2);}/** * cgroup_scan_tasks - iterate though all the tasks in a cgroup * @scan: struct cgroup_scanner containing arguments for the scan * * Arguments include pointers to callback functions test_task() and * process_task(). * Iterate through all the tasks in a cgroup, calling test_task() for each, * and if it returns true, call process_task() for it also. * The test_task pointer may be NULL, meaning always true (select all tasks). * Effectively duplicates cgroup_iter_{start,next,end}() * but does not lock css_set_lock for the call to process_task(). * The struct cgroup_scanner may be embedded in any structure of the caller's * creation. * It is guaranteed that process_task() will act on every task that * is a member of the cgroup for the duration of this call. This * function may or may not call process_task() for tasks that exit * or move to a different cgroup during the call, or are forked or * move into the cgroup during the call. * * Note that test_task() may be called with locks held, and may in some * situations be called multiple times for the same task, so it should * be cheap. * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been * pre-allocated and will be used for heap operations (and its "gt" member will * be overwritten), else a temporary heap will be used (allocation of which * may cause this function to fail). */int cgroup_scan_tasks(struct cgroup_scanner *scan){	int retval, i;	struct cgroup_iter it;	struct task_struct *p, *dropped;	/* Never dereference latest_task, since it's not refcounted */	struct task_struct *latest_task = NULL;	struct ptr_heap tmp_heap;	struct ptr_heap *heap;	struct timespec latest_time = { 0, 0 };	if (scan->heap) {		/* The caller supplied our heap and pre-allocated its memory */		heap = scan->heap;		heap->gt = &started_after;	} else {		/* We need to allocate our own heap memory */		heap = &tmp_heap;		retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);		if (retval)			/* cannot allocate the heap */			return retval;	} again:	/*	 * Scan tasks in the cgroup, using the scanner's "test_task" callback	 * to determine which are of interest, and using the scanner's	 * "process_task" callback to process any of them that need an update.	 * Since we don't want to hold any locks during the task updates,	 * gather tasks to be processed in a heap structure.	 * The heap is sorted by descending task start time.	 * If the statically-sized heap fills up, we overflow tasks that	 * started later, and in future iterations only consider tasks that	 * started after the latest task in the previous pass. This	 * guarantees forward progress and that we don't miss any tasks.	 */	heap->size = 0;	cgroup_iter_start(scan->cg, &it);	while ((p = cgroup_iter_next(scan->cg, &it))) {		/*		 * Only affect tasks that qualify per the caller's callback,		 * if he provided one		 */		if (scan->test_task && !scan->test_task(p, scan))			continue;		/*		 * Only process tasks that started after the last task		 * we processed		 */		if (!started_after_time(p, &latest_time, latest_task))			continue;		dropped = heap_insert(heap, p);		if (dropped == NULL) {			/*			 * The new task was inserted; the heap wasn't			 * previously full			 */			get_task_struct(p);		} else if (dropped != p) {			/*			 * The new task was inserted, and pushed out a			 * different task			 */			get_task_struct(p);			put_task_struct(dropped);		}		/*		 * Else the new task was newer than anything already in		 * the heap and wasn't inserted		 */	}	cgroup_iter_end(scan->cg, &it);	if (heap->size) {		for (i = 0; i < heap->size; i++) {			struct task_struct *q = heap->ptrs[i];			if (i == 0) {				latest_time = q->start_time;				latest_task = q;			}			/* Process the task per the caller's callback */			scan->process_task(q, scan);			put_task_struct(q);		}		/*		 * If we had to process any tasks at all, scan again		 * in case some of them were in the middle of forking		 * children that didn't get processed.		 * Not the most efficient way to do it, but it avoids		 * having to take callback_mutex in the fork path		 */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -