📄 cpuset.c

📁 Kernel code of linux kernel
💻 C
📖 第 1 页 / 共 5 页
字号:
		mmput(mm);	}}/* The various types of files and directories in a cpuset file system */typedef enum {	FILE_MEMORY_MIGRATE,	FILE_CPULIST,	FILE_MEMLIST,	FILE_CPU_EXCLUSIVE,	FILE_MEM_EXCLUSIVE,	FILE_MEM_HARDWALL,	FILE_SCHED_LOAD_BALANCE,	FILE_SCHED_RELAX_DOMAIN_LEVEL,	FILE_MEMORY_PRESSURE_ENABLED,	FILE_MEMORY_PRESSURE,	FILE_SPREAD_PAGE,	FILE_SPREAD_SLAB,} cpuset_filetype_t;static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val){	int retval = 0;	struct cpuset *cs = cgroup_cs(cgrp);	cpuset_filetype_t type = cft->private;	if (!cgroup_lock_live_group(cgrp))		return -ENODEV;	switch (type) {	case FILE_CPU_EXCLUSIVE:		retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);		break;	case FILE_MEM_EXCLUSIVE:		retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);		break;	case FILE_MEM_HARDWALL:		retval = update_flag(CS_MEM_HARDWALL, cs, val);		break;	case FILE_SCHED_LOAD_BALANCE:		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);		break;	case FILE_MEMORY_MIGRATE:		retval = update_flag(CS_MEMORY_MIGRATE, cs, val);		break;	case FILE_MEMORY_PRESSURE_ENABLED:		cpuset_memory_pressure_enabled = !!val;		break;	case FILE_MEMORY_PRESSURE:		retval = -EACCES;		break;	case FILE_SPREAD_PAGE:		retval = update_flag(CS_SPREAD_PAGE, cs, val);		cs->mems_generation = cpuset_mems_generation++;		break;	case FILE_SPREAD_SLAB:		retval = update_flag(CS_SPREAD_SLAB, cs, val);		cs->mems_generation = cpuset_mems_generation++;		break;	default:		retval = -EINVAL;		break;	}	cgroup_unlock();	return retval;}static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val){	int retval = 0;	struct cpuset *cs = cgroup_cs(cgrp);	cpuset_filetype_t type = cft->private;	if (!cgroup_lock_live_group(cgrp))		return -ENODEV;	switch (type) {	case FILE_SCHED_RELAX_DOMAIN_LEVEL:		retval = update_relax_domain_level(cs, val);		break;	default:		retval = -EINVAL;		break;	}	cgroup_unlock();	return retval;}/* * Common handling for a write to a "cpus" or "mems" file. */static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,				const char *buf){	int retval = 0;	if (!cgroup_lock_live_group(cgrp))		return -ENODEV;	switch (cft->private) {	case FILE_CPULIST:		retval = update_cpumask(cgroup_cs(cgrp), buf);		break;	case FILE_MEMLIST:		retval = update_nodemask(cgroup_cs(cgrp), buf);		break;	default:		retval = -EINVAL;		break;	}	cgroup_unlock();	return retval;}/* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map.  If read in smaller * chunks, there is no guarantee of atomicity.  Since the display format * used, list of ranges of sequential numbers, is variable length, * and since these maps can change value dynamically, one could read * gibberish by doing partial reads while a list was changing. * A single large read to a buffer that crosses a page boundary is * ok, because the result being copied to user land is not recomputed * across a page fault. */static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs){	cpumask_t mask;	mutex_lock(&callback_mutex);	mask = cs->cpus_allowed;	mutex_unlock(&callback_mutex);	return cpulist_scnprintf(page, PAGE_SIZE, mask);}static int cpuset_sprintf_memlist(char *page, struct cpuset *cs){	nodemask_t mask;	mutex_lock(&callback_mutex);	mask = cs->mems_allowed;	mutex_unlock(&callback_mutex);	return nodelist_scnprintf(page, PAGE_SIZE, mask);}static ssize_t cpuset_common_file_read(struct cgroup *cont,				       struct cftype *cft,				       struct file *file,				       char __user *buf,				       size_t nbytes, loff_t *ppos){	struct cpuset *cs = cgroup_cs(cont);	cpuset_filetype_t type = cft->private;	char *page;	ssize_t retval = 0;	char *s;	if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))		return -ENOMEM;	s = page;	switch (type) {	case FILE_CPULIST:		s += cpuset_sprintf_cpulist(s, cs);		break;	case FILE_MEMLIST:		s += cpuset_sprintf_memlist(s, cs);		break;	default:		retval = -EINVAL;		goto out;	}	*s++ = '\n';	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);out:	free_page((unsigned long)page);	return retval;}static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft){	struct cpuset *cs = cgroup_cs(cont);	cpuset_filetype_t type = cft->private;	switch (type) {	case FILE_CPU_EXCLUSIVE:		return is_cpu_exclusive(cs);	case FILE_MEM_EXCLUSIVE:		return is_mem_exclusive(cs);	case FILE_MEM_HARDWALL:		return is_mem_hardwall(cs);	case FILE_SCHED_LOAD_BALANCE:		return is_sched_load_balance(cs);	case FILE_MEMORY_MIGRATE:		return is_memory_migrate(cs);	case FILE_MEMORY_PRESSURE_ENABLED:		return cpuset_memory_pressure_enabled;	case FILE_MEMORY_PRESSURE:		return fmeter_getrate(&cs->fmeter);	case FILE_SPREAD_PAGE:		return is_spread_page(cs);	case FILE_SPREAD_SLAB:		return is_spread_slab(cs);	default:		BUG();	}	/* Unreachable but makes gcc happy */	return 0;}static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft){	struct cpuset *cs = cgroup_cs(cont);	cpuset_filetype_t type = cft->private;	switch (type) {	case FILE_SCHED_RELAX_DOMAIN_LEVEL:		return cs->relax_domain_level;	default:		BUG();	}	/* Unrechable but makes gcc happy */	return 0;}/* * for the common functions, 'private' gives the type of file */static struct cftype files[] = {	{		.name = "cpus",		.read = cpuset_common_file_read,		.write_string = cpuset_write_resmask,		.max_write_len = (100U + 6 * NR_CPUS),		.private = FILE_CPULIST,	},	{		.name = "mems",		.read = cpuset_common_file_read,		.write_string = cpuset_write_resmask,		.max_write_len = (100U + 6 * MAX_NUMNODES),		.private = FILE_MEMLIST,	},	{		.name = "cpu_exclusive",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_CPU_EXCLUSIVE,	},	{		.name = "mem_exclusive",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_MEM_EXCLUSIVE,	},	{		.name = "mem_hardwall",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_MEM_HARDWALL,	},	{		.name = "sched_load_balance",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_SCHED_LOAD_BALANCE,	},	{		.name = "sched_relax_domain_level",		.read_s64 = cpuset_read_s64,		.write_s64 = cpuset_write_s64,		.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,	},	{		.name = "memory_migrate",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_MEMORY_MIGRATE,	},	{		.name = "memory_pressure",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_MEMORY_PRESSURE,	},	{		.name = "memory_spread_page",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_SPREAD_PAGE,	},	{		.name = "memory_spread_slab",		.read_u64 = cpuset_read_u64,		.write_u64 = cpuset_write_u64,		.private = FILE_SPREAD_SLAB,	},};static struct cftype cft_memory_pressure_enabled = {	.name = "memory_pressure_enabled",	.read_u64 = cpuset_read_u64,	.write_u64 = cpuset_write_u64,	.private = FILE_MEMORY_PRESSURE_ENABLED,};static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont){	int err;	err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));	if (err)		return err;	/* memory_pressure_enabled is in root cpuset only */	if (!cont->parent)		err = cgroup_add_file(cont, ss,				      &cft_memory_pressure_enabled);	return err;}/* * post_clone() is called at the end of cgroup_clone(). * 'cgroup' was just created automatically as a result of * a cgroup_clone(), and the current task is about to * be moved into 'cgroup'. * * Currently we refuse to set up the cgroup - thereby * refusing the task to be entered, and as a result refusing * the sys_unshare() or clone() which initiated it - if any * sibling cpusets have exclusive cpus or mem. * * If this becomes a problem for some users who wish to * allow that scenario, then cpuset_post_clone() could be * changed to grant parent->cpus_allowed-sibling_cpus_exclusive * (and likewise for mems) to the new cgroup. Called with cgroup_mutex * held. */static void cpuset_post_clone(struct cgroup_subsys *ss,			      struct cgroup *cgroup){	struct cgroup *parent, *child;	struct cpuset *cs, *parent_cs;	parent = cgroup->parent;	list_for_each_entry(child, &parent->children, sibling) {		cs = cgroup_cs(child);		if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))			return;	}	cs = cgroup_cs(cgroup);	parent_cs = cgroup_cs(parent);	cs->mems_allowed = parent_cs->mems_allowed;	cs->cpus_allowed = parent_cs->cpus_allowed;	return;}/* *	cpuset_create - create a cpuset *	ss:	cpuset cgroup subsystem *	cont:	control group that the new cpuset will be part of */static struct cgroup_subsys_state *cpuset_create(	struct cgroup_subsys *ss,	struct cgroup *cont){	struct cpuset *cs;	struct cpuset *parent;	if (!cont->parent) {		/* This is early initialization for the top cgroup */		top_cpuset.mems_generation = cpuset_mems_generation++;		return &top_cpuset.css;	}	parent = cgroup_cs(cont->parent);	cs = kmalloc(sizeof(*cs), GFP_KERNEL);	if (!cs)		return ERR_PTR(-ENOMEM);	cpuset_update_task_memory_state();	cs->flags = 0;	if (is_spread_page(parent))		set_bit(CS_SPREAD_PAGE, &cs->flags);	if (is_spread_slab(parent))		set_bit(CS_SPREAD_SLAB, &cs->flags);	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);	cpus_clear(cs->cpus_allowed);	nodes_clear(cs->mems_allowed);	cs->mems_generation = cpuset_mems_generation++;	fmeter_init(&cs->fmeter);	cs->relax_domain_level = -1;	cs->parent = parent;	number_of_cpusets++;	return &cs->css ;}/* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which * will call async_rebuild_sched_domains(). */static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont){	struct cpuset *cs = cgroup_cs(cont);	cpuset_update_task_memory_state();	if (is_sched_load_balance(cs))		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);	number_of_cpusets--;	kfree(cs);}struct cgroup_subsys cpuset_subsys = {	.name = "cpuset",	.create = cpuset_create,	.destroy = cpuset_destroy,	.can_attach = cpuset_can_attach,	.attach = cpuset_attach,	.populate = cpuset_populate,	.post_clone = cpuset_post_clone,	.subsys_id = cpuset_subsys_id,	.early_init = 1,};
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -