cgroup.c

来自「Kernel code of linux kernel」· C语言代码 · 共 2,478 行 · 第 1/5 页
2,478 行
/* *  Generic process-grouping system. * *  Based originally on the cpuset system, extracted by Paul Menage *  Copyright (C) 2006 Google, Inc * *  Copyright notices from the original cpuset code: *  -------------------------------------------------- *  Copyright (C) 2003 BULL SA. *  Copyright (C) 2004-2006 Silicon Graphics, Inc. * *  Portions derived from Patrick Mochel's sysfs code. *  sysfs is Copyright (c) 2001-3 Patrick Mochel * *  2003-10-10 Written by Simon Derr. *  2003-10-22 Updates by Stephen Hemminger. *  2004 May-July Rework by Paul Jackson. *  --------------------------------------------------- * *  This file is subject to the terms and conditions of the GNU General Public *  License.  See the file COPYING in the main directory of the Linux *  distribution for more details. */#include <linux/cgroup.h>#include <linux/errno.h>#include <linux/fs.h>#include <linux/kernel.h>#include <linux/list.h>#include <linux/mm.h>#include <linux/mutex.h>#include <linux/mount.h>#include <linux/pagemap.h>#include <linux/proc_fs.h>#include <linux/rcupdate.h>#include <linux/sched.h>#include <linux/backing-dev.h>#include <linux/seq_file.h>#include <linux/slab.h>#include <linux/magic.h>#include <linux/spinlock.h>#include <linux/string.h>#include <linux/sort.h>#include <linux/kmod.h>#include <linux/delayacct.h>#include <linux/cgroupstats.h>#include <linux/hash.h>#include <linux/namei.h>#include <asm/atomic.h>static DEFINE_MUTEX(cgroup_mutex);/* Generate an array of cgroup subsystem pointers */#define SUBSYS(_x) &_x ## _subsys,static struct cgroup_subsys *subsys[] = {#include <linux/cgroup_subsys.h>};/* * A cgroupfs_root represents the root of a cgroup hierarchy, * and may be associated with a superblock to form an active * hierarchy */struct cgroupfs_root {	struct super_block *sb;	/*	 * The bitmask of subsystems intended to be attached to this	 * hierarchy	 */	unsigned long subsys_bits;	/* The bitmask of subsystems currently attached to this hierarchy */	unsigned long actual_subsys_bits;	/* A list running through the attached subsystems */	struct list_head subsys_list;	/* The root cgroup for this hierarchy */	struct cgroup top_cgroup;	/* Tracks how many cgroups are currently defined in hierarchy.*/	int number_of_cgroups;	/* A list running through the mounted hierarchies */	struct list_head root_list;	/* Hierarchy-specific flags */	unsigned long flags;	/* The path to use for release notifications. */	char release_agent_path[PATH_MAX];};/* * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the * subsystems that are otherwise unattached - it never has more than a * single cgroup, and all tasks are part of that cgroup. */static struct cgroupfs_root rootnode;/* The list of hierarchy roots */static LIST_HEAD(roots);static int root_count;/* dummytop is a shorthand for the dummy hierarchy's top cgroup */#define dummytop (&rootnode.top_cgroup)/* This flag indicates whether tasks in the fork and exit paths should * check for fork/exit handlers to call. This avoids us having to do * extra work in the fork/exit path if none of the subsystems need to * be called. */static int need_forkexit_callback __read_mostly;static int need_mm_owner_callback __read_mostly;/* convenient tests for these bits */inline int cgroup_is_removed(const struct cgroup *cgrp){	return test_bit(CGRP_REMOVED, &cgrp->flags);}/* bits in struct cgroupfs_root flags field */enum {	ROOT_NOPREFIX, /* mounted subsystems have no named prefix */};static int cgroup_is_releasable(const struct cgroup *cgrp){	const int bits =		(1 << CGRP_RELEASABLE) |		(1 << CGRP_NOTIFY_ON_RELEASE);	return (cgrp->flags & bits) == bits;}static int notify_on_release(const struct cgroup *cgrp){	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);}/* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy */#define for_each_subsys(_root, _ss) \list_for_each_entry(_ss, &_root->subsys_list, sibling)/* for_each_root() allows you to iterate across the active hierarchies */#define for_each_root(_root) \list_for_each_entry(_root, &roots, root_list)/* the list of cgroups eligible for automatic release. Protected by * release_list_lock */static LIST_HEAD(release_list);static DEFINE_SPINLOCK(release_list_lock);static void cgroup_release_agent(struct work_struct *work);static DECLARE_WORK(release_agent_work, cgroup_release_agent);static void check_for_release(struct cgroup *cgrp);/* Link structure for associating css_set objects with cgroups */struct cg_cgroup_link {	/*	 * List running through cg_cgroup_links associated with a	 * cgroup, anchored on cgroup->css_sets	 */	struct list_head cgrp_link_list;	/*	 * List running through cg_cgroup_links pointing at a	 * single css_set object, anchored on css_set->cg_links	 */	struct list_head cg_link_list;	struct css_set *cg;};/* The default css_set - used by init and its children prior to any * hierarchies being mounted. It contains a pointer to the root state * for each subsystem. Also used to anchor the list of css_sets. Not * reference-counted, to improve performance when child cgroups * haven't been created. */static struct css_set init_css_set;static struct cg_cgroup_link init_css_set_link;/* css_set_lock protects the list of css_set objects, and the * chain of tasks off each css_set.  Nests outside task->alloc_lock * due to cgroup_iter_start() */static DEFINE_RWLOCK(css_set_lock);static int css_set_count;/* hash table for cgroup groups. This improves the performance to * find an existing css_set */#define CSS_SET_HASH_BITS	7#define CSS_SET_TABLE_SIZE	(1 << CSS_SET_HASH_BITS)static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]){	int i;	int index;	unsigned long tmp = 0UL;	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)		tmp += (unsigned long)css[i];	tmp = (tmp >> 16) ^ tmp;	index = hash_long(tmp, CSS_SET_HASH_BITS);	return &css_set_table[index];}/* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups * compiled into their kernel but not actually in use */static int use_task_css_set_links __read_mostly;/* When we create or destroy a css_set, the operation simply * takes/releases a reference count on all the cgroups referenced * by subsystems in this css_set. This can end up multiple-counting * some cgroups, but that's OK - the ref-count is just a * busy/not-busy indicator; ensuring that we only count each cgroup * once would require taking a global lock to ensure that no * subsystems moved between hierarchies while we were doing so. * * Possible TODO: decide at boot time based on the number of * registered subsystems and the number of CPUs or NUMA nodes whether * it's better for performance to ref-count every subsystem, or to * take a global lock and only add one ref count to each hierarchy. *//* * unlink a css_set from the list and free it */static void unlink_css_set(struct css_set *cg){	struct cg_cgroup_link *link;	struct cg_cgroup_link *saved_link;	write_lock(&css_set_lock);	hlist_del(&cg->hlist);	css_set_count--;	list_for_each_entry_safe(link, saved_link, &cg->cg_links,				 cg_link_list) {		list_del(&link->cg_link_list);		list_del(&link->cgrp_link_list);		kfree(link);	}	write_unlock(&css_set_lock);}static void __release_css_set(struct kref *k, int taskexit){	int i;	struct css_set *cg = container_of(k, struct css_set, ref);	unlink_css_set(cg);	rcu_read_lock();	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {		struct cgroup *cgrp = cg->subsys[i]->cgroup;		if (atomic_dec_and_test(&cgrp->count) &&		    notify_on_release(cgrp)) {			if (taskexit)				set_bit(CGRP_RELEASABLE, &cgrp->flags);			check_for_release(cgrp);		}	}	rcu_read_unlock();	kfree(cg);}static void release_css_set(struct kref *k){	__release_css_set(k, 0);}static void release_css_set_taskexit(struct kref *k){	__release_css_set(k, 1);}/* * refcounted get/put for css_set objects */static inline void get_css_set(struct css_set *cg){	kref_get(&cg->ref);}static inline void put_css_set(struct css_set *cg){	kref_put(&cg->ref, release_css_set);}static inline void put_css_set_taskexit(struct css_set *cg){	kref_put(&cg->ref, release_css_set_taskexit);}/* * find_existing_css_set() is a helper for * find_css_set(), and checks to see whether an existing * css_set is suitable. * * oldcg: the cgroup group that we're using before the cgroup * transition * * cgrp: the cgroup that we're moving into * * template: location in which to build the desired set of subsystem * state objects for the new cgroup group */static struct css_set *find_existing_css_set(	struct css_set *oldcg,	struct cgroup *cgrp,	struct cgroup_subsys_state *template[]){	int i;	struct cgroupfs_root *root = cgrp->root;	struct hlist_head *hhead;	struct hlist_node *node;	struct css_set *cg;	/* Built the set of subsystem state objects that we want to	 * see in the new css_set */	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {		if (root->subsys_bits & (1UL << i)) {			/* Subsystem is in this hierarchy. So we want			 * the subsystem state from the new			 * cgroup */			template[i] = cgrp->subsys[i];		} else {			/* Subsystem is not in this hierarchy, so we			 * don't want to change the subsystem state */			template[i] = oldcg->subsys[i];		}	}	hhead = css_set_hash(template);	hlist_for_each_entry(cg, node, hhead, hlist) {		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {			/* All subsystems matched */			return cg;		}	}	/* No existing cgroup group matched */	return NULL;}static void free_cg_links(struct list_head *tmp){	struct cg_cgroup_link *link;	struct cg_cgroup_link *saved_link;	list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {		list_del(&link->cgrp_link_list);		kfree(link);	}}/* * allocate_cg_links() allocates "count" cg_cgroup_link structures * and chains them on tmp through their cgrp_link_list fields. Returns 0 on * success or a negative error */static int allocate_cg_links(int count, struct list_head *tmp){	struct cg_cgroup_link *link;	int i;	INIT_LIST_HEAD(tmp);	for (i = 0; i < count; i++) {		link = kmalloc(sizeof(*link), GFP_KERNEL);		if (!link) {			free_cg_links(tmp);			return -ENOMEM;		}		list_add(&link->cgrp_link_list, tmp);	}	return 0;}/* * find_css_set() takes an existing cgroup group and a * cgroup object, and returns a css_set object that's * equivalent to the old group, but with the given cgroup * substituted into the appropriate hierarchy. Must be called with * cgroup_mutex held */static struct css_set *find_css_set(	struct css_set *oldcg, struct cgroup *cgrp){	struct css_set *res;	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];	int i;	struct list_head tmp_cg_links;	struct cg_cgroup_link *link;	struct hlist_head *hhead;	/* First see if we already have a cgroup group that matches	 * the desired set */	read_lock(&css_set_lock);	res = find_existing_css_set(oldcg, cgrp, template);	if (res)		get_css_set(res);	read_unlock(&css_set_lock);	if (res)		return res;	res = kmalloc(sizeof(*res), GFP_KERNEL);	if (!res)		return NULL;	/* Allocate all the cg_cgroup_link objects that we'll need */	if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {		kfree(res);		return NULL;	}	kref_init(&res->ref);	INIT_LIST_HEAD(&res->cg_links);	INIT_LIST_HEAD(&res->tasks);	INIT_HLIST_NODE(&res->hlist);	/* Copy the set of subsystem state objects generated in	 * find_existing_css_set() */	memcpy(res->subsys, template, sizeof(res->subsys));	write_lock(&css_set_lock);	/* Add reference counts and links from the new css_set. */	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {		struct cgroup *cgrp = res->subsys[i]->cgroup;		struct cgroup_subsys *ss = subsys[i];		atomic_inc(&cgrp->count);		/*		 * We want to add a link once per cgroup, so we		 * only do it for the first subsystem in each		 * hierarchy		 */		if (ss->root->subsys_list.next == &ss->sibling) {			BUG_ON(list_empty(&tmp_cg_links));			link = list_entry(tmp_cg_links.next,					  struct cg_cgroup_link,					  cgrp_link_list);			list_del(&link->cgrp_link_list);			list_add(&link->cgrp_link_list, &cgrp->css_sets);			link->cg = res;			list_add(&link->cg_link_list, &res->cg_links);		}	}	if (list_empty(&rootnode.subsys_list)) {		link = list_entry(tmp_cg_links.next,				  struct cg_cgroup_link,				  cgrp_link_list);		list_del(&link->cgrp_link_list);		list_add(&link->cgrp_link_list, &dummytop->css_sets);		link->cg = res;		list_add(&link->cg_link_list, &res->cg_links);	}	BUG_ON(!list_empty(&tmp_cg_links));	css_set_count++;	/* Add this cgroup group to the hash table */	hhead = css_set_hash(res->subsys);	hlist_add_head(&res->hlist, hhead);	write_unlock(&css_set_lock);	return res;}/* * There is one global cgroup mutex. We also require taking * task_lock() when dereferencing a task's cgroup subsys pointers. * See "The task_lock() exception", at the end of this comment. * * A task must hold cgroup_mutex to modify cgroups. * * Any task can increment and decrement the count field without lock. * So in general, code holding cgroup_mutex can't rely on the count * field not changing.  However, if the count goes to zero, then only * cgroup_attach_task() can increment it again.  Because a count of zero * means that no tasks are currently attached, therefore there is no * way a task attached to that cgroup can fork (the other way to
cgroup.c - 源码说明

本页面展示了「Kernel code of linux kernel」中的 cgroup.c 源码文件，采用 C语言编程语言编写，共 2,478 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Kernel相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?