📄 cpuset.c

📁 linux 2.6.19 kernel source code before patching
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
/* * for the common functions, 'private' gives the type of file */static struct cftype cft_tasks = {	.name = "tasks",	.open = cpuset_tasks_open,	.read = cpuset_tasks_read,	.release = cpuset_tasks_release,	.private = FILE_TASKLIST,};static struct cftype cft_cpus = {	.name = "cpus",	.private = FILE_CPULIST,};static struct cftype cft_mems = {	.name = "mems",	.private = FILE_MEMLIST,};static struct cftype cft_cpu_exclusive = {	.name = "cpu_exclusive",	.private = FILE_CPU_EXCLUSIVE,};static struct cftype cft_mem_exclusive = {	.name = "mem_exclusive",	.private = FILE_MEM_EXCLUSIVE,};static struct cftype cft_notify_on_release = {	.name = "notify_on_release",	.private = FILE_NOTIFY_ON_RELEASE,};static struct cftype cft_memory_migrate = {	.name = "memory_migrate",	.private = FILE_MEMORY_MIGRATE,};static struct cftype cft_memory_pressure_enabled = {	.name = "memory_pressure_enabled",	.private = FILE_MEMORY_PRESSURE_ENABLED,};static struct cftype cft_memory_pressure = {	.name = "memory_pressure",	.private = FILE_MEMORY_PRESSURE,};static struct cftype cft_spread_page = {	.name = "memory_spread_page",	.private = FILE_SPREAD_PAGE,};static struct cftype cft_spread_slab = {	.name = "memory_spread_slab",	.private = FILE_SPREAD_SLAB,};static int cpuset_populate_dir(struct dentry *cs_dentry){	int err;	if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)		return err;	if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)		return err;	return 0;}/* *	cpuset_create - create a cpuset *	parent:	cpuset that will be parent of the new cpuset. *	name:		name of the new cpuset. Will be strcpy'ed. *	mode:		mode to set on new inode * *	Must be called with the mutex on the parent inode held */static long cpuset_create(struct cpuset *parent, const char *name, int mode){	struct cpuset *cs;	int err;	cs = kmalloc(sizeof(*cs), GFP_KERNEL);	if (!cs)		return -ENOMEM;	mutex_lock(&manage_mutex);	cpuset_update_task_memory_state();	cs->flags = 0;	if (notify_on_release(parent))		set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);	if (is_spread_page(parent))		set_bit(CS_SPREAD_PAGE, &cs->flags);	if (is_spread_slab(parent))		set_bit(CS_SPREAD_SLAB, &cs->flags);	cs->cpus_allowed = CPU_MASK_NONE;	cs->mems_allowed = NODE_MASK_NONE;	atomic_set(&cs->count, 0);	INIT_LIST_HEAD(&cs->sibling);	INIT_LIST_HEAD(&cs->children);	cs->mems_generation = cpuset_mems_generation++;	fmeter_init(&cs->fmeter);	cs->parent = parent;	mutex_lock(&callback_mutex);	list_add(&cs->sibling, &cs->parent->children);	number_of_cpusets++;	mutex_unlock(&callback_mutex);	err = cpuset_create_dir(cs, name, mode);	if (err < 0)		goto err;	/*	 * Release manage_mutex before cpuset_populate_dir() because it	 * will down() this new directory's i_mutex and if we race with	 * another mkdir, we might deadlock.	 */	mutex_unlock(&manage_mutex);	err = cpuset_populate_dir(cs->dentry);	/* If err < 0, we have a half-filled directory - oh well ;) */	return 0;err:	list_del(&cs->sibling);	mutex_unlock(&manage_mutex);	kfree(cs);	return err;}static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode){	struct cpuset *c_parent = dentry->d_parent->d_fsdata;	/* the vfs holds inode->i_mutex already */	return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);}/* * Locking note on the strange update_flag() call below: * * If the cpuset being removed is marked cpu_exclusive, then simulate * turning cpu_exclusive off, which will call update_cpu_domains(). * The lock_cpu_hotplug() call in update_cpu_domains() must not be * made while holding callback_mutex.  Elsewhere the kernel nests * callback_mutex inside lock_cpu_hotplug() calls.  So the reverse * nesting would risk an ABBA deadlock. */static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry){	struct cpuset *cs = dentry->d_fsdata;	struct dentry *d;	struct cpuset *parent;	char *pathbuf = NULL;	/* the vfs holds both inode->i_mutex already */	mutex_lock(&manage_mutex);	cpuset_update_task_memory_state();	if (atomic_read(&cs->count) > 0) {		mutex_unlock(&manage_mutex);		return -EBUSY;	}	if (!list_empty(&cs->children)) {		mutex_unlock(&manage_mutex);		return -EBUSY;	}	if (is_cpu_exclusive(cs)) {		int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");		if (retval < 0) {			mutex_unlock(&manage_mutex);			return retval;		}	}	parent = cs->parent;	mutex_lock(&callback_mutex);	set_bit(CS_REMOVED, &cs->flags);	list_del(&cs->sibling);	/* delete my sibling from parent->children */	spin_lock(&cs->dentry->d_lock);	d = dget(cs->dentry);	cs->dentry = NULL;	spin_unlock(&d->d_lock);	cpuset_d_remove_dir(d);	dput(d);	number_of_cpusets--;	mutex_unlock(&callback_mutex);	if (list_empty(&parent->children))		check_for_release(parent, &pathbuf);	mutex_unlock(&manage_mutex);	cpuset_release_agent(pathbuf);	return 0;}/* * cpuset_init_early - just enough so that the calls to * cpuset_update_task_memory_state() in early init code * are harmless. */int __init cpuset_init_early(void){	struct task_struct *tsk = current;	tsk->cpuset = &top_cpuset;	tsk->cpuset->mems_generation = cpuset_mems_generation++;	return 0;}/** * cpuset_init - initialize cpusets at system boot * * Description: Initialize top_cpuset and the cpuset internal file system, **/int __init cpuset_init(void){	struct dentry *root;	int err;	top_cpuset.cpus_allowed = CPU_MASK_ALL;	top_cpuset.mems_allowed = NODE_MASK_ALL;	fmeter_init(&top_cpuset.fmeter);	top_cpuset.mems_generation = cpuset_mems_generation++;	init_task.cpuset = &top_cpuset;	err = register_filesystem(&cpuset_fs_type);	if (err < 0)		goto out;	cpuset_mount = kern_mount(&cpuset_fs_type);	if (IS_ERR(cpuset_mount)) {		printk(KERN_ERR "cpuset: could not mount!\n");		err = PTR_ERR(cpuset_mount);		cpuset_mount = NULL;		goto out;	}	root = cpuset_mount->mnt_sb->s_root;	root->d_fsdata = &top_cpuset;	inc_nlink(root->d_inode);	top_cpuset.dentry = root;	root->d_inode->i_op = &cpuset_dir_inode_operations;	number_of_cpusets = 1;	err = cpuset_populate_dir(root);	/* memory_pressure_enabled is in root cpuset only */	if (err == 0)		err = cpuset_add_file(root, &cft_memory_pressure_enabled);out:	return err;}/* * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, * removing that CPU or node from all cpusets.  If this removes the * last CPU or node from a cpuset, then the guarantee_online_cpus() * or guarantee_online_mems() code will use that emptied cpusets * parent online CPUs or nodes.  Cpusets that were already empty of * CPUs or nodes are left empty. * * This routine is intentionally inefficient in a couple of regards. * It will check all cpusets in a subtree even if the top cpuset of * the subtree has no offline CPUs or nodes.  It checks both CPUs and * nodes, even though the caller could have been coded to know that * only one of CPUs or nodes needed to be checked on a given call. * This was done to minimize text size rather than cpu cycles. * * Call with both manage_mutex and callback_mutex held. * * Recursive, on depth of cpuset subtree. */static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur){	struct cpuset *c;	/* Each of our child cpusets mems must be online */	list_for_each_entry(c, &cur->children, sibling) {		guarantee_online_cpus_mems_in_subtree(c);		if (!cpus_empty(c->cpus_allowed))			guarantee_online_cpus(c, &c->cpus_allowed);		if (!nodes_empty(c->mems_allowed))			guarantee_online_mems(c, &c->mems_allowed);	}}/* * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track * cpu_online_map and node_online_map.  Force the top cpuset to track * whats online after any CPU or memory node hotplug or unplug event. * * To ensure that we don't remove a CPU or node from the top cpuset * that is currently in use by a child cpuset (which would violate * the rule that cpusets must be subsets of their parent), we first * call the recursive routine guarantee_online_cpus_mems_in_subtree(). * * Since there are two callers of this routine, one for CPU hotplug * events and one for memory node hotplug events, we could have coded * two separate routines here.  We code it as a single common routine * in order to minimize text size. */static void common_cpu_mem_hotplug_unplug(void){	mutex_lock(&manage_mutex);	mutex_lock(&callback_mutex);	guarantee_online_cpus_mems_in_subtree(&top_cpuset);	top_cpuset.cpus_allowed = cpu_online_map;	top_cpuset.mems_allowed = node_online_map;	mutex_unlock(&callback_mutex);	mutex_unlock(&manage_mutex);}/* * The top_cpuset tracks what CPUs and Memory Nodes are online, * period.  This is necessary in order to make cpusets transparent * (of no affect) on systems that are actively using CPU hotplug * but making no active use of cpusets. * * This routine ensures that top_cpuset.cpus_allowed tracks * cpu_online_map on each CPU hotplug (cpuhp) event. */static int cpuset_handle_cpuhp(struct notifier_block *nb,				unsigned long phase, void *cpu){	common_cpu_mem_hotplug_unplug();	return 0;}#ifdef CONFIG_MEMORY_HOTPLUG/* * Keep top_cpuset.mems_allowed tracking node_online_map. * Call this routine anytime after you change node_online_map. * See also the previous routine cpuset_handle_cpuhp(). */void cpuset_track_online_nodes(void){	common_cpu_mem_hotplug_unplug();}#endif/** * cpuset_init_smp - initialize cpus_allowed * * Description: Finish top cpuset after cpu, node maps are initialized **/void __init cpuset_init_smp(void){	top_cpuset.cpus_allowed = cpu_online_map;	top_cpuset.mems_allowed = node_online_map;	hotcpu_notifier(cpuset_handle_cpuhp, 0);}/** * cpuset_fork - attach newly forked task to its parents cpuset. * @tsk: pointer to task_struct of forking parent process. * * Description: A task inherits its parent's cpuset at fork(). * * A pointer to the shared cpuset was automatically copied in fork.c * by dup_task_struct().  However, we ignore that copy, since it was * not made under the protection of task_lock(), so might no longer be * a valid cpuset pointer.  attach_task() might have already changed * current->cpuset, allowing the previously referenced cpuset to * be removed and freed.  Instead, we task_lock(current) and copy * its present value of current->cpuset for our freshly forked child. * * At the point that cpuset_fork() is called, 'current' is the parent * task, and the passed argument 'child' points to the child task. **/void cpuset_fork(struct task_struct *child){	task_lock(current);	child->cpuset = current->cpuset;	atomic_inc(&child->cpuset->count);	task_unlock(current);}/** * cpuset_exit - detach cpuset from exiting task * @tsk: pointer to task_struct of exiting process * * Description: Detach cpuset from @tsk and release it. * * Note that cpusets marked notify_on_release force every task in * them to take the global manage_mutex mutex when exiting. * This could impact scaling on very large systems.  Be reluctant to * use notify_on_release cpusets where very high task exit scaling * is required on large systems. * * Don't even think about derefencing 'cs' after the cpuset use count * goes to zero, except inside a critical section guarded by manage_mutex * or callback_mutex.   Otherwise a zero cpuset use count is a license to * any other task to nuke the cpuset immediately, via cpuset_rmdir(). * * This routine has to take manage_mutex, not callback_mutex, because * it is holding that mutex while calling check_for_release(), * which calls kmalloc(), so can't be called holding callback_mutex(). * * the_top_cpuset_hack: * *    Set the exiting tasks cpuset to the root cpuset (top_cpuset). * *    Don't leave a task unable to allocate memory, as that is an *    accident waiting to happen should someone add a callout in *    do_exit() after the cpuset_exit() call that might allocate. *    If a task tries to allocate memory with an invalid cpuset, *    it will oops in cpuset_update_task_memory_state(). * *    We call cpuset_exit() while the task is still competent to *    handle notify_on_release(), then leave the task attached to *    the root cpuset (top_cpuset) for the remainder of its exit. * *    To do this properly, we would increment the reference count on *    top_cpuset, and near the very end of the kernel/exit.c do_exit() *    code we would add a second cpuse
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -