📄 cpuset.c
字号:
mmput(mm); }}/* The various types of files and directories in a cpuset file system */typedef enum { FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB,} cpuset_filetype_t;static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val){ int retval = 0; struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; switch (type) { case FILE_CPU_EXCLUSIVE: retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); break; case FILE_MEM_EXCLUSIVE: retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); break; case FILE_MEM_HARDWALL: retval = update_flag(CS_MEM_HARDWALL, cs, val); break; case FILE_SCHED_LOAD_BALANCE: retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); break; case FILE_MEMORY_MIGRATE: retval = update_flag(CS_MEMORY_MIGRATE, cs, val); break; case FILE_MEMORY_PRESSURE_ENABLED: cpuset_memory_pressure_enabled = !!val; break; case FILE_MEMORY_PRESSURE: retval = -EACCES; break; case FILE_SPREAD_PAGE: retval = update_flag(CS_SPREAD_PAGE, cs, val); cs->mems_generation = cpuset_mems_generation++; break; case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); cs->mems_generation = cpuset_mems_generation++; break; default: retval = -EINVAL; break; } cgroup_unlock(); return retval;}static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val){ int retval = 0; struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: retval = update_relax_domain_level(cs, val); break; default: retval = -EINVAL; break; } cgroup_unlock(); return retval;}/* * Common handling for a write to a "cpus" or "mems" file. */static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, const char *buf){ int retval = 0; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; switch (cft->private) { case FILE_CPULIST: retval = update_cpumask(cgroup_cs(cgrp), buf); break; case FILE_MEMLIST: retval = update_nodemask(cgroup_cs(cgrp), buf); break; default: retval = -EINVAL; break; } cgroup_unlock(); return retval;}/* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller * chunks, there is no guarantee of atomicity. Since the display format * used, list of ranges of sequential numbers, is variable length, * and since these maps can change value dynamically, one could read * gibberish by doing partial reads while a list was changing. * A single large read to a buffer that crosses a page boundary is * ok, because the result being copied to user land is not recomputed * across a page fault. */static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs){ cpumask_t mask; mutex_lock(&callback_mutex); mask = cs->cpus_allowed; mutex_unlock(&callback_mutex); return cpulist_scnprintf(page, PAGE_SIZE, mask);}static int cpuset_sprintf_memlist(char *page, struct cpuset *cs){ nodemask_t mask; mutex_lock(&callback_mutex); mask = cs->mems_allowed; mutex_unlock(&callback_mutex); return nodelist_scnprintf(page, PAGE_SIZE, mask);}static ssize_t cpuset_common_file_read(struct cgroup *cont, struct cftype *cft, struct file *file, char __user *buf, size_t nbytes, loff_t *ppos){ struct cpuset *cs = cgroup_cs(cont); cpuset_filetype_t type = cft->private; char *page; ssize_t retval = 0; char *s; if (!(page = (char *)__get_free_page(GFP_TEMPORARY))) return -ENOMEM; s = page; switch (type) { case FILE_CPULIST: s += cpuset_sprintf_cpulist(s, cs); break; case FILE_MEMLIST: s += cpuset_sprintf_memlist(s, cs); break; default: retval = -EINVAL; goto out; } *s++ = '\n'; retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);out: free_page((unsigned long)page); return retval;}static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft){ struct cpuset *cs = cgroup_cs(cont); cpuset_filetype_t type = cft->private; switch (type) { case FILE_CPU_EXCLUSIVE: return is_cpu_exclusive(cs); case FILE_MEM_EXCLUSIVE: return is_mem_exclusive(cs); case FILE_MEM_HARDWALL: return is_mem_hardwall(cs); case FILE_SCHED_LOAD_BALANCE: return is_sched_load_balance(cs); case FILE_MEMORY_MIGRATE: return is_memory_migrate(cs); case FILE_MEMORY_PRESSURE_ENABLED: return cpuset_memory_pressure_enabled; case FILE_MEMORY_PRESSURE: return fmeter_getrate(&cs->fmeter); case FILE_SPREAD_PAGE: return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); default: BUG(); } /* Unreachable but makes gcc happy */ return 0;}static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft){ struct cpuset *cs = cgroup_cs(cont); cpuset_filetype_t type = cft->private; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: return cs->relax_domain_level; default: BUG(); } /* Unrechable but makes gcc happy */ return 0;}/* * for the common functions, 'private' gives the type of file */static struct cftype files[] = { { .name = "cpus", .read = cpuset_common_file_read, .write_string = cpuset_write_resmask, .max_write_len = (100U + 6 * NR_CPUS), .private = FILE_CPULIST, }, { .name = "mems", .read = cpuset_common_file_read, .write_string = cpuset_write_resmask, .max_write_len = (100U + 6 * MAX_NUMNODES), .private = FILE_MEMLIST, }, { .name = "cpu_exclusive", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_CPU_EXCLUSIVE, }, { .name = "mem_exclusive", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEM_EXCLUSIVE, }, { .name = "mem_hardwall", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEM_HARDWALL, }, { .name = "sched_load_balance", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SCHED_LOAD_BALANCE, }, { .name = "sched_relax_domain_level", .read_s64 = cpuset_read_s64, .write_s64 = cpuset_write_s64, .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, }, { .name = "memory_migrate", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_MIGRATE, }, { .name = "memory_pressure", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_PRESSURE, }, { .name = "memory_spread_page", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_PAGE, }, { .name = "memory_spread_slab", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_SLAB, },};static struct cftype cft_memory_pressure_enabled = { .name = "memory_pressure_enabled", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_PRESSURE_ENABLED,};static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont){ int err; err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); if (err) return err; /* memory_pressure_enabled is in root cpuset only */ if (!cont->parent) err = cgroup_add_file(cont, ss, &cft_memory_pressure_enabled); return err;}/* * post_clone() is called at the end of cgroup_clone(). * 'cgroup' was just created automatically as a result of * a cgroup_clone(), and the current task is about to * be moved into 'cgroup'. * * Currently we refuse to set up the cgroup - thereby * refusing the task to be entered, and as a result refusing * the sys_unshare() or clone() which initiated it - if any * sibling cpusets have exclusive cpus or mem. * * If this becomes a problem for some users who wish to * allow that scenario, then cpuset_post_clone() could be * changed to grant parent->cpus_allowed-sibling_cpus_exclusive * (and likewise for mems) to the new cgroup. Called with cgroup_mutex * held. */static void cpuset_post_clone(struct cgroup_subsys *ss, struct cgroup *cgroup){ struct cgroup *parent, *child; struct cpuset *cs, *parent_cs; parent = cgroup->parent; list_for_each_entry(child, &parent->children, sibling) { cs = cgroup_cs(child); if (is_mem_exclusive(cs) || is_cpu_exclusive(cs)) return; } cs = cgroup_cs(cgroup); parent_cs = cgroup_cs(parent); cs->mems_allowed = parent_cs->mems_allowed; cs->cpus_allowed = parent_cs->cpus_allowed; return;}/* * cpuset_create - create a cpuset * ss: cpuset cgroup subsystem * cont: control group that the new cpuset will be part of */static struct cgroup_subsys_state *cpuset_create( struct cgroup_subsys *ss, struct cgroup *cont){ struct cpuset *cs; struct cpuset *parent; if (!cont->parent) { /* This is early initialization for the top cgroup */ top_cpuset.mems_generation = cpuset_mems_generation++; return &top_cpuset.css; } parent = cgroup_cs(cont->parent); cs = kmalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); cpuset_update_task_memory_state(); cs->flags = 0; if (is_spread_page(parent)) set_bit(CS_SPREAD_PAGE, &cs->flags); if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpus_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); cs->mems_generation = cpuset_mems_generation++; fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; cs->parent = parent; number_of_cpusets++; return &cs->css ;}/* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which * will call async_rebuild_sched_domains(). */static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont){ struct cpuset *cs = cgroup_cs(cont); cpuset_update_task_memory_state(); if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); number_of_cpusets--; kfree(cs);}struct cgroup_subsys cpuset_subsys = { .name = "cpuset", .create = cpuset_create, .destroy = cpuset_destroy, .can_attach = cpuset_can_attach, .attach = cpuset_attach, .populate = cpuset_populate, .post_clone = cpuset_post_clone, .subsys_id = cpuset_subsys_id, .early_init = 1,};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -