📄 cpuset.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 5 页
字号:
	if (!is_cpuset_subset(trial, par))		return -EACCES;	/*	 * If either I or some sibling (!= me) is exclusive, we can't	 * overlap	 */	list_for_each_entry(cont, &par->css.cgroup->children, sibling) {		c = cgroup_cs(cont);		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&		    c != cur &&		    cpus_intersects(trial->cpus_allowed, c->cpus_allowed))			return -EINVAL;		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&		    c != cur &&		    nodes_intersects(trial->mems_allowed, c->mems_allowed))			return -EINVAL;	}	/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */	if (cgroup_task_count(cur->css.cgroup)) {		if (cpus_empty(trial->cpus_allowed) ||		    nodes_empty(trial->mems_allowed)) {			return -ENOSPC;		}	}	return 0;}/* * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping cpus_allowed masks? */static int cpusets_overlap(struct cpuset *a, struct cpuset *b){	return cpus_intersects(a->cpus_allowed, b->cpus_allowed);}static voidupdate_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c){	if (dattr->relax_domain_level < c->relax_domain_level)		dattr->relax_domain_level = c->relax_domain_level;	return;}static voidupdate_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c){	LIST_HEAD(q);	list_add(&c->stack_list, &q);	while (!list_empty(&q)) {		struct cpuset *cp;		struct cgroup *cont;		struct cpuset *child;		cp = list_first_entry(&q, struct cpuset, stack_list);		list_del(q.next);		if (cpus_empty(cp->cpus_allowed))			continue;		if (is_sched_load_balance(cp))			update_domain_attr(dattr, cp);		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {			child = cgroup_cs(cont);			list_add_tail(&child->stack_list, &q);		}	}}/* * generate_sched_domains() * * This function builds a partial partition of the systems CPUs * A 'partial partition' is a set of non-overlapping subsets whose * union is a subset of that set. * The output of this function needs to be passed to kernel/sched.c * partition_sched_domains() routine, which will rebuild the scheduler's * load balancing domains (sched domains) as specified by that partial * partition. * * See "What is sched_load_balance" in Documentation/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this * routine would rather not worry about failures to rebuild sched * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * * Must be called with cgroup_lock held. * * The three key local variables below are: *    q  - a linked-list queue of cpuset pointers, used to implement a *	   top-down scan of all cpusets.  This scan loads a pointer *	   to each cpuset marked is_sched_load_balance into the *	   array 'csa'.  For our purposes, rebuilding the schedulers *	   sched domains, we can ignore !is_sched_load_balance cpusets. *  csa  - (for CpuSet Array) Array of pointers to all the cpusets *	   that need to be load balanced, for convenient iterative *	   access by the subsequent code that finds the best partition, *	   i.e the set of domains (subsets) of CPUs such that the *	   cpus_allowed of every cpuset marked is_sched_load_balance *	   is a subset of one of these domains, while there are as *	   many such domains as possible, each as small as possible. * doms  - Conversion of 'csa' to an array of cpumasks, for passing to *	   the kernel/sched.c routine partition_sched_domains() in a *	   convenient format, that can be easily compared to the prior *	   value to determine what partition elements (sched domains) *	   were changed (added or removed.) * * Finding the best partition (set of domains): *	The triple nested loops below over i, j, k scan over the *	load balanced cpusets (using the array of cpuset pointers in *	csa[]) looking for pairs of cpusets that have overlapping *	cpus_allowed, but which don't have the same 'pn' partition *	number and gives them in the same partition number.  It keeps *	looping on the 'restart' label until it can no longer find *	any such pairs. * *	The union of the cpus_allowed masks from the set of *	all cpusets having the same 'pn' value then form the one *	element of the partition (one sched domain) to be passed to *	partition_sched_domains(). */static int generate_sched_domains(cpumask_t **domains,			struct sched_domain_attr **attributes){	LIST_HEAD(q);		/* queue of cpusets to be scanned */	struct cpuset *cp;	/* scans q */	struct cpuset **csa;	/* array of all cpuset ptrs */	int csn;		/* how many cpuset ptrs in csa so far */	int i, j, k;		/* indices for partition finding loops */	cpumask_t *doms;	/* resulting partition; i.e. sched domains */	struct sched_domain_attr *dattr;  /* attributes for custom domains */	int ndoms;		/* number of sched domains in result */	int nslot;		/* next empty doms[] cpumask_t slot */	ndoms = 0;	doms = NULL;	dattr = NULL;	csa = NULL;	/* Special case for the 99% of systems with one, full, sched domain */	if (is_sched_load_balance(&top_cpuset)) {		doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);		if (!doms)			goto done;		dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);		if (dattr) {			*dattr = SD_ATTR_INIT;			update_domain_attr_tree(dattr, &top_cpuset);		}		*doms = top_cpuset.cpus_allowed;		ndoms = 1;		goto done;	}	csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);	if (!csa)		goto done;	csn = 0;	list_add(&top_cpuset.stack_list, &q);	while (!list_empty(&q)) {		struct cgroup *cont;		struct cpuset *child;   /* scans child cpusets of cp */		cp = list_first_entry(&q, struct cpuset, stack_list);		list_del(q.next);		if (cpus_empty(cp->cpus_allowed))			continue;		/*		 * All child cpusets contain a subset of the parent's cpus, so		 * just skip them, and then we call update_domain_attr_tree()		 * to calc relax_domain_level of the corresponding sched		 * domain.		 */		if (is_sched_load_balance(cp)) {			csa[csn++] = cp;			continue;		}		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {			child = cgroup_cs(cont);			list_add_tail(&child->stack_list, &q);		}  	}	for (i = 0; i < csn; i++)		csa[i]->pn = i;	ndoms = csn;restart:	/* Find the best partition (set of sched domains) */	for (i = 0; i < csn; i++) {		struct cpuset *a = csa[i];		int apn = a->pn;		for (j = 0; j < csn; j++) {			struct cpuset *b = csa[j];			int bpn = b->pn;			if (apn != bpn && cpusets_overlap(a, b)) {				for (k = 0; k < csn; k++) {					struct cpuset *c = csa[k];					if (c->pn == bpn)						c->pn = apn;				}				ndoms--;	/* one less element */				goto restart;			}		}	}	/*	 * Now we know how many domains to create.	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.	 */	doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);	if (!doms) {		ndoms = 0;		goto done;	}	/*	 * The rest of the code, including the scheduler, can deal with	 * dattr==NULL case. No need to abort if alloc fails.	 */	dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);	for (nslot = 0, i = 0; i < csn; i++) {		struct cpuset *a = csa[i];		cpumask_t *dp;		int apn = a->pn;		if (apn < 0) {			/* Skip completed partitions */			continue;		}		dp = doms + nslot;		if (nslot == ndoms) {			static int warnings = 10;			if (warnings) {				printk(KERN_WARNING				 "rebuild_sched_domains confused:"				  " nslot %d, ndoms %d, csn %d, i %d,"				  " apn %d\n",				  nslot, ndoms, csn, i, apn);				warnings--;			}			continue;		}		cpus_clear(*dp);		if (dattr)			*(dattr + nslot) = SD_ATTR_INIT;		for (j = i; j < csn; j++) {			struct cpuset *b = csa[j];			if (apn == b->pn) {				cpus_or(*dp, *dp, b->cpus_allowed);				if (dattr)					update_domain_attr_tree(dattr + nslot, b);				/* Done with this partition */				b->pn = -1;			}		}		nslot++;	}	BUG_ON(nslot != ndoms);done:	kfree(csa);	*domains    = doms;	*attributes = dattr;	return ndoms;}/* * Rebuild scheduler domains. * * Call with neither cgroup_mutex held nor within get_online_cpus(). * Takes both cgroup_mutex and get_online_cpus(). * * Cannot be directly called from cpuset code handling changes * to the cpuset pseudo-filesystem, because it cannot be called * from code that already holds cgroup_mutex. */static void do_rebuild_sched_domains(struct work_struct *unused){	struct sched_domain_attr *attr;	cpumask_t *doms;	int ndoms;	get_online_cpus();	/* Generate domain masks and attrs */	cgroup_lock();	ndoms = generate_sched_domains(&doms, &attr);	cgroup_unlock();	/* Have scheduler rebuild the domains */	partition_sched_domains(ndoms, doms, attr);	put_online_cpus();}static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);/* * Rebuild scheduler domains, asynchronously via workqueue. * * If the flag 'sched_load_balance' of any cpuset with non-empty * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset * which has that flag enabled, or if any cpuset with a non-empty * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * * The rebuild_sched_domains() and partition_sched_domains() * routines must nest cgroup_lock() inside get_online_cpus(), * but such cpuset changes as these must nest that locking the * other way, holding cgroup_lock() for much of the code. * * So in order to avoid an ABBA deadlock, the cpuset code handling * these user changes delegates the actual sched domain rebuilding * to a separate workqueue thread, which ends up processing the * above do_rebuild_sched_domains() function. */static void async_rebuild_sched_domains(void){	schedule_work(&rebuild_sched_domains_work);}/* * Accomplishes the same scheduler domain rebuild as the above * async_rebuild_sched_domains(), however it directly calls the * rebuild routine synchronously rather than calling it via an * asynchronous work thread. * * This can only be called from code that is not holding * cgroup_mutex (not nested in a cgroup_lock() call.) */void rebuild_sched_domains(void){	do_rebuild_sched_domains(NULL);}/** * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's * @tsk: task to test * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner * * Call with cgroup_mutex held.  May take callback_mutex during call. * Called for each task in a cgroup by cgroup_scan_tasks(). * Return nonzero if this tasks's cpus_allowed mask should be changed (in other * words, if its mask is not equal to its cpuset's mask). */static int cpuset_test_cpumask(struct task_struct *tsk,			       struct cgroup_scanner *scan){	return !cpus_equal(tsk->cpus_allowed,			(cgroup_cs(scan->cg))->cpus_allowed);}/** * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's * @tsk: task to test * @scan: struct cgroup_scanner containing the cgroup of the task * * Called by cgroup_scan_tasks() for each task in a cgroup whose * cpus_allowed mask needs to be changed. * * We don't need to re-check for the cgroup/cpuset membership, since we're * holding cgroup_lock() at this point. */static void cpuset_change_cpumask(struct task_struct *tsk,				  struct cgroup_scanner *scan){	set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));}/** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() * * Called with cgroup_mutex held * * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, * calling callback functions for each. * * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 * if @heap != NULL. */static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap){	struct cgroup_scanner scan;	scan.cg = cs->css.cgroup;	scan.test_task = cpuset_test_cpumask;	scan.process_task = cpuset_change_cpumask;	scan.heap = heap;	cgroup_scan_tasks(&scan);}/** * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it * @cs: the cpuset to consider * @buf: buffer of cpu numbers written to this cpuset */static int update_cpumask(struct cpuset *cs, const char *buf){	struct ptr_heap heap;	struct cpuset trialcs;	int retval;	int is_load_balanced;	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */	if (cs == &top_cpuset)		return -EACCES;	trialcs = *cs;	/*	 * An empty cpus_allowed is ok only if the cpuset has no tasks.	 * Since cpulist_parse() fails on an empty mask, we special case	 * that parsing.  The validate_change() call ensures that cpusets	 * with tasks have cpus.	 */	if (!*buf) {		cpus_clear(trialcs.cpus_allowed);	} else {		retval = cpulist_parse(buf, trialcs.cpus_allowed);		if (retval < 0)			return retval;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -