📄 cpuset.c

📁 linux 2.6.19 kernel source code before patching
💻 C
📖 第 1 页 / 共 5 页
字号:
	start = buf + buflen;	*--start = '\0';	for (;;) {		int len = cs->dentry->d_name.len;		if ((start -= len) < buf)			return -ENAMETOOLONG;		memcpy(start, cs->dentry->d_name.name, len);		cs = cs->parent;		if (!cs)			break;		if (!cs->parent)			continue;		if (--start < buf)			return -ENAMETOOLONG;		*start = '/';	}	memmove(buf, start, buf + buflen - start);	return 0;}/* * Notify userspace when a cpuset is released, by running * /sbin/cpuset_release_agent with the name of the cpuset (path * relative to the root of cpuset file system) as the argument. * * Most likely, this user command will try to rmdir this cpuset. * * This races with the possibility that some other task will be * attached to this cpuset before it is removed, or that some other * user task will 'mkdir' a child cpuset of this cpuset.  That's ok. * The presumed 'rmdir' will fail quietly if this cpuset is no longer * unused, and this cpuset will be reprieved from its death sentence, * to continue to serve a useful existence.  Next time it's released, * we will get notified again, if it still has 'notify_on_release' set. * * The final arg to call_usermodehelper() is 0, which means don't * wait.  The separate /sbin/cpuset_release_agent task is forked by * call_usermodehelper(), then control in this thread returns here, * without waiting for the release agent task.  We don't bother to * wait because the caller of this routine has no use for the exit * status of the /sbin/cpuset_release_agent task, so no sense holding * our caller up for that. * * When we had only one cpuset mutex, we had to call this * without holding it, to avoid deadlock when call_usermodehelper() * allocated memory.  With two locks, we could now call this while * holding manage_mutex, but we still don't, so as to minimize * the time manage_mutex is held. */static void cpuset_release_agent(const char *pathbuf){	char *argv[3], *envp[3];	int i;	if (!pathbuf)		return;	i = 0;	argv[i++] = "/sbin/cpuset_release_agent";	argv[i++] = (char *)pathbuf;	argv[i] = NULL;	i = 0;	/* minimal command environment */	envp[i++] = "HOME=/";	envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";	envp[i] = NULL;	call_usermodehelper(argv[0], argv, envp, 0);	kfree(pathbuf);}/* * Either cs->count of using tasks transitioned to zero, or the * cs->children list of child cpusets just became empty.  If this * cs is notify_on_release() and now both the user count is zero and * the list of children is empty, prepare cpuset path in a kmalloc'd * buffer, to be returned via ppathbuf, so that the caller can invoke * cpuset_release_agent() with it later on, once manage_mutex is dropped. * Call here with manage_mutex held. * * This check_for_release() routine is responsible for kmalloc'ing * pathbuf.  The above cpuset_release_agent() is responsible for * kfree'ing pathbuf.  The caller of these routines is responsible * for providing a pathbuf pointer, initialized to NULL, then * calling check_for_release() with manage_mutex held and the address * of the pathbuf pointer, then dropping manage_mutex, then calling * cpuset_release_agent() with pathbuf, as set by check_for_release(). */static void check_for_release(struct cpuset *cs, char **ppathbuf){	if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&	    list_empty(&cs->children)) {		char *buf;		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);		if (!buf)			return;		if (cpuset_path(cs, buf, PAGE_SIZE) < 0)			kfree(buf);		else			*ppathbuf = buf;	}}/* * Return in *pmask the portion of a cpusets's cpus_allowed that * are online.  If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus.  If we get * all the way to the top and still haven't found any online cpus, * return cpu_online_map.  Or if passed a NULL cs from an exit'ing * task, return cpu_online_map. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_map. * * Call with callback_mutex held. */static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask){	while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))		cs = cs->parent;	if (cs)		cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);	else		*pmask = cpu_online_map;	BUG_ON(!cpus_intersects(*pmask, cpu_online_map));}/* * Return in *pmask the portion of a cpusets's mems_allowed that * are online.  If none are online, walk up the cpuset hierarchy * until we find one that does have some online mems.  If we get * all the way to the top and still haven't found any online mems, * return node_online_map. * * One way or another, we guarantee to return some non-empty subset * of node_online_map. * * Call with callback_mutex held. */static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask){	while (cs && !nodes_intersects(cs->mems_allowed, node_online_map))		cs = cs->parent;	if (cs)		nodes_and(*pmask, cs->mems_allowed, node_online_map);	else		*pmask = node_online_map;	BUG_ON(!nodes_intersects(*pmask, node_online_map));}/** * cpuset_update_task_memory_state - update task memory placement * * If the current tasks cpusets mems_allowed changed behind our * backs, update current->mems_allowed, mems_generation and task NUMA * mempolicy to the new value. * * Task mempolicy is updated by rebinding it relative to the * current->cpuset if a task has its memory placement changed. * Do not call this routine if in_interrupt(). * * Call without callback_mutex or task_lock() held.  May be * called with or without manage_mutex held.  Thanks in part to * 'the_top_cpuset_hack', the tasks cpuset pointer will never * be NULL.  This routine also might acquire callback_mutex and * current->mm->mmap_sem during call. * * Reading current->cpuset->mems_generation doesn't need task_lock * to guard the current->cpuset derefence, because it is guarded * from concurrent freeing of current->cpuset by attach_task(), * using RCU. * * The rcu_dereference() is technically probably not needed, * as I don't actually mind if I see a new cpuset pointer but * an old value of mems_generation.  However this really only * matters on alpha systems using cpusets heavily.  If I dropped * that rcu_dereference(), it would save them a memory barrier. * For all other arch's, rcu_dereference is a no-op anyway, and for * alpha systems not using cpusets, another planned optimization, * avoiding the rcu critical section for tasks in the root cpuset * which is statically allocated, so can't vanish, will make this * irrelevant.  Better to use RCU as intended, than to engage in * some cute trick to save a memory barrier that is impossible to * test, for alpha systems using cpusets heavily, which might not * even exist. * * This routine is needed to update the per-task mems_allowed data, * within the tasks context, when it is trying to allocate memory * (in various mm/mempolicy.c routines) and notices that some other * task has been modifying its cpuset. */void cpuset_update_task_memory_state(void){	int my_cpusets_mem_gen;	struct task_struct *tsk = current;	struct cpuset *cs;	if (tsk->cpuset == &top_cpuset) {		/* Don't need rcu for top_cpuset.  It's never freed. */		my_cpusets_mem_gen = top_cpuset.mems_generation;	} else {		rcu_read_lock();		cs = rcu_dereference(tsk->cpuset);		my_cpusets_mem_gen = cs->mems_generation;		rcu_read_unlock();	}	if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {		mutex_lock(&callback_mutex);		task_lock(tsk);		cs = tsk->cpuset;	/* Maybe changed when task not locked */		guarantee_online_mems(cs, &tsk->mems_allowed);		tsk->cpuset_mems_generation = cs->mems_generation;		if (is_spread_page(cs))			tsk->flags |= PF_SPREAD_PAGE;		else			tsk->flags &= ~PF_SPREAD_PAGE;		if (is_spread_slab(cs))			tsk->flags |= PF_SPREAD_SLAB;		else			tsk->flags &= ~PF_SPREAD_SLAB;		task_unlock(tsk);		mutex_unlock(&callback_mutex);		mpol_rebind_task(tsk, &tsk->mems_allowed);	}}/* * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q? * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags * are only set if the other's are set.  Call holding manage_mutex. */static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q){	return	cpus_subset(p->cpus_allowed, q->cpus_allowed) &&		nodes_subset(p->mems_allowed, q->mems_allowed) &&		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&		is_mem_exclusive(p) <= is_mem_exclusive(q);}/* * validate_change() - Used to validate that any proposed cpuset change *		       follows the structural rules for cpusets. * * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid?  Presumes * manage_mutex held. * * 'cur' is the address of an actual, in-use cpuset.  Operations * such as list traversal that depend on the actual address of the * cpuset in the list must use cur below, not trial. * * 'trial' is the address of bulk structure copy of cur, with * perhaps one or more of the fields cpus_allowed, mems_allowed, * or flags changed to new, trial values. * * Return 0 if valid, -errno if not. */static int validate_change(const struct cpuset *cur, const struct cpuset *trial){	struct cpuset *c, *par;	/* Each of our child cpusets must be a subset of us */	list_for_each_entry(c, &cur->children, sibling) {		if (!is_cpuset_subset(c, trial))			return -EBUSY;	}	/* Remaining checks don't apply to root cpuset */	if (cur == &top_cpuset)		return 0;	par = cur->parent;	/* We must be a subset of our parent cpuset */	if (!is_cpuset_subset(trial, par))		return -EACCES;	/* If either I or some sibling (!= me) is exclusive, we can't overlap */	list_for_each_entry(c, &par->children, sibling) {		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&		    c != cur &&		    cpus_intersects(trial->cpus_allowed, c->cpus_allowed))			return -EINVAL;		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&		    c != cur &&		    nodes_intersects(trial->mems_allowed, c->mems_allowed))			return -EINVAL;	}	return 0;}/* * For a given cpuset cur, partition the system as follows * a. All cpus in the parent cpuset's cpus_allowed that are not part of any *    exclusive child cpusets * b. All cpus in the current cpuset's cpus_allowed that are not part of any *    exclusive child cpusets * Build these two partitions by calling partition_sched_domains * * Call with manage_mutex held.  May nest a call to the * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. * Must not be called holding callback_mutex, because we must * not call lock_cpu_hotplug() while holding callback_mutex. */static void update_cpu_domains(struct cpuset *cur){	struct cpuset *c, *par = cur->parent;	cpumask_t pspan, cspan;	if (par == NULL || cpus_empty(cur->cpus_allowed))		return;	/*	 * Get all cpus from parent's cpus_allowed not part of exclusive	 * children	 */	pspan = par->cpus_allowed;	list_for_each_entry(c, &par->children, sibling) {		if (is_cpu_exclusive(c))			cpus_andnot(pspan, pspan, c->cpus_allowed);	}	if (!is_cpu_exclusive(cur)) {		cpus_or(pspan, pspan, cur->cpus_allowed);		if (cpus_equal(pspan, cur->cpus_allowed))			return;		cspan = CPU_MASK_NONE;	} else {		if (cpus_empty(pspan))			return;		cspan = cur->cpus_allowed;		/*		 * Get all cpus from current cpuset's cpus_allowed not part		 * of exclusive children		 */		list_for_each_entry(c, &cur->children, sibling) {			if (is_cpu_exclusive(c))				cpus_andnot(cspan, cspan, c->cpus_allowed);		}	}	lock_cpu_hotplug();	partition_sched_domains(&pspan, &cspan);	unlock_cpu_hotplug();}/* * Call with manage_mutex held.  May take callback_mutex during call. */static int update_cpumask(struct cpuset *cs, char *buf){	struct cpuset trialcs;	int retval, cpus_unchanged;	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */	if (cs == &top_cpuset)		return -EACCES;	trialcs = *cs;	/*	 * We allow a cpuset's cpus_allowed to be empty; if it has attached	 * tasks, we'll catch it later when we validate the change and return	 * -ENOSPC.	 */	if (!buf[0] || (buf[0] == '\n' && !buf[1])) {		cpus_clear(trialcs.cpus_allowed);	} else {		retval = cpulist_parse(buf, trialcs.cpus_allowed);		if (retval < 0)			return retval;	}	cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);	/* cpus_allowed cannot be empty for a cpuset with attached tasks. */	if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))		return -ENOSPC;	retval = validate_change(cs, &trialcs);	if (retval < 0)		return retval;	cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);	mutex_lock(&callback_mutex);	cs->cpus_allowed = trialcs.cpus_allowed;	mutex_unlock(&callback_mutex);	if (is_cpu_exclusive(cs) && !cpus_unchanged)		update_cpu_domains(cs);	return 0;}/* * cpuset_migrate_mm * *    Migrate memory region from one set of nodes to another. * *    Temporarilly set tasks mems_allowed to target nodes of migration, *    so that the migration code can allocate pages on these nodes. * *    Call holding manage_mutex, so our current->cpuset won't change *    during this call, as manage_mutex holds off any attach_task() *    calls.  Therefore we don't need to take task_lock around the *    call to guarantee_online_mems(), as we know no one is changing *    our tasks cpuset. * *    Hold callback_mutex around the two modifications of our tasks *    mems_allowed to synchronize with cpuset_mems_allowed(). * *    While the mm_struct we are migrating is typically from some *    other task, the task_struct mems_allowed that we are hacking *    is for our current task, which must allocate new pages for that *    migrating memory region. * *    We call cpuset_update_task_memory_state() before hacking *    our tasks mems_allowed, so that we are assured of being in *    sync with our tasks cpuset, and in particular, callbacks to *    cpuset_update_task_memory_state() from nested page allocations *    won't see any mismatch of our cpuset and task mems_generation *    values, so won't overwrite our hacked tasks mems_allowed *    nodemask. */static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,							const nodemask_t *to){	struct task_struct *tsk = current;	cpuset_update_task_memory_state();	mutex_lock(&callback_mutex);	tsk->mems_allowed = *to;	mutex_unlock(&callback_mutex);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -