📄 memcontrol.c
字号:
int retval = 0; struct mem_cgroup *mem = mem_cgroup_from_cont(cont); struct cgroup *parent = cont->parent; struct mem_cgroup *parent_mem = NULL; if (parent) parent_mem = mem_cgroup_from_cont(parent); cgroup_lock(); /* * If parent's use_hiearchy is set, we can't make any modifications * in the child subtrees. If it is unset, then the change can * occur, provided the current cgroup has no children. * * For the root cgroup, parent_mem is NULL, we allow value to be * set if there are no children. */ if ((!parent_mem || !parent_mem->use_hierarchy) && (val == 1 || val == 0)) { if (list_empty(&cont->children)) mem->use_hierarchy = val; else retval = -EBUSY; } else retval = -EINVAL; cgroup_unlock(); return retval;}static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft){ struct mem_cgroup *mem = mem_cgroup_from_cont(cont); u64 val = 0; int type, name; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); switch (type) { case _MEM: val = res_counter_read_u64(&mem->res, name); break; case _MEMSWAP: if (do_swap_account) val = res_counter_read_u64(&mem->memsw, name); break; default: BUG(); break; } return val;}/* * The user of this function is... * RES_LIMIT. */static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, const char *buffer){ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); int type, name; unsigned long long val; int ret; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); switch (name) { case RES_LIMIT: /* This function does all necessary parse...reuse it */ ret = res_counter_memparse_write_strategy(buffer, &val); if (ret) break; if (type == _MEM) ret = mem_cgroup_resize_limit(memcg, val); else ret = mem_cgroup_resize_memsw_limit(memcg, val); break; default: ret = -EINVAL; /* should be BUG() ? */ break; } return ret;}static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, unsigned long long *mem_limit, unsigned long long *memsw_limit){ struct cgroup *cgroup; unsigned long long min_limit, min_memsw_limit, tmp; min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT); min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); cgroup = memcg->css.cgroup; if (!memcg->use_hierarchy) goto out; while (cgroup->parent) { cgroup = cgroup->parent; memcg = mem_cgroup_from_cont(cgroup); if (!memcg->use_hierarchy) break; tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); min_limit = min(min_limit, tmp); tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT); min_memsw_limit = min(min_memsw_limit, tmp); }out: *mem_limit = min_limit; *memsw_limit = min_memsw_limit; return;}static int mem_cgroup_reset(struct cgroup *cont, unsigned int event){ struct mem_cgroup *mem; int type, name; mem = mem_cgroup_from_cont(cont); type = MEMFILE_TYPE(event); name = MEMFILE_ATTR(event); switch (name) { case RES_MAX_USAGE: if (type == _MEM) res_counter_reset_max(&mem->res); else res_counter_reset_max(&mem->memsw); break; case RES_FAILCNT: if (type == _MEM) res_counter_reset_failcnt(&mem->res); else res_counter_reset_failcnt(&mem->memsw); break; } return 0;}static const struct mem_cgroup_stat_desc { const char *msg; u64 unit;} mem_cgroup_stat_desc[] = { [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, [MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, }, [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, },};static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, struct cgroup_map_cb *cb){ struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); struct mem_cgroup_stat *stat = &mem_cont->stat; int i; for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) { s64 val; val = mem_cgroup_read_stat(stat, i); val *= mem_cgroup_stat_desc[i].unit; cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); } /* showing # of active pages */ { unsigned long active_anon, inactive_anon; unsigned long active_file, inactive_file; unsigned long unevictable; inactive_anon = mem_cgroup_get_all_zonestat(mem_cont, LRU_INACTIVE_ANON); active_anon = mem_cgroup_get_all_zonestat(mem_cont, LRU_ACTIVE_ANON); inactive_file = mem_cgroup_get_all_zonestat(mem_cont, LRU_INACTIVE_FILE); active_file = mem_cgroup_get_all_zonestat(mem_cont, LRU_ACTIVE_FILE); unevictable = mem_cgroup_get_all_zonestat(mem_cont, LRU_UNEVICTABLE); cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE); cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE); cb->fill(cb, "active_file", (active_file) * PAGE_SIZE); cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE); cb->fill(cb, "unevictable", unevictable * PAGE_SIZE); } { unsigned long long limit, memsw_limit; memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit); cb->fill(cb, "hierarchical_memory_limit", limit); if (do_swap_account) cb->fill(cb, "hierarchical_memsw_limit", memsw_limit); }#ifdef CONFIG_DEBUG_VM cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); { int nid, zid; struct mem_cgroup_per_zone *mz; unsigned long recent_rotated[2] = {0, 0}; unsigned long recent_scanned[2] = {0, 0}; for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); recent_rotated[0] += mz->reclaim_stat.recent_rotated[0]; recent_rotated[1] += mz->reclaim_stat.recent_rotated[1]; recent_scanned[0] += mz->reclaim_stat.recent_scanned[0]; recent_scanned[1] += mz->reclaim_stat.recent_scanned[1]; } cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); cb->fill(cb, "recent_rotated_file", recent_rotated[1]); cb->fill(cb, "recent_scanned_anon", recent_scanned[0]); cb->fill(cb, "recent_scanned_file", recent_scanned[1]); }#endif return 0;}static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft){ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); return get_swappiness(memcg);}static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, u64 val){ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct mem_cgroup *parent; if (val > 100) return -EINVAL; if (cgrp->parent == NULL) return -EINVAL; parent = mem_cgroup_from_cont(cgrp->parent); cgroup_lock(); /* If under hierarchy, only empty-root can set this value */ if ((parent->use_hierarchy) || (memcg->use_hierarchy && !list_empty(&cgrp->children))) { cgroup_unlock(); return -EINVAL; } spin_lock(&memcg->reclaim_param_lock); memcg->swappiness = val; spin_unlock(&memcg->reclaim_param_lock); cgroup_unlock(); return 0;}static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), .read_u64 = mem_cgroup_read, }, { .name = "max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { .name = "limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), .write_string = mem_cgroup_write, .read_u64 = mem_cgroup_read, }, { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { .name = "stat", .read_map = mem_control_stat_show, }, { .name = "force_empty", .trigger = mem_cgroup_force_empty_write, }, { .name = "use_hierarchy", .write_u64 = mem_cgroup_hierarchy_write, .read_u64 = mem_cgroup_hierarchy_read, }, { .name = "swappiness", .read_u64 = mem_cgroup_swappiness_read, .write_u64 = mem_cgroup_swappiness_write, },};#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAPstatic struct cftype memsw_cgroup_files[] = { { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), .read_u64 = mem_cgroup_read, }, { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), .write_string = mem_cgroup_write, .read_u64 = mem_cgroup_read, }, { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, },};static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss){ if (!do_swap_account) return 0; return cgroup_add_files(cont, ss, memsw_cgroup_files, ARRAY_SIZE(memsw_cgroup_files));};#elsestatic int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss){ return 0;}#endifstatic int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node){ struct mem_cgroup_per_node *pn; struct mem_cgroup_per_zone *mz; enum lru_list l; int zone, tmp = node; /* * This routine is called against possible nodes. * But it's BUG to call kmalloc() against offline node. * * TODO: this routine can waste much memory for nodes which will * never be onlined. It's better to use memory hotplug callback * function. */ if (!node_state(node, N_NORMAL_MEMORY)) tmp = -1; pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp); if (!pn) return 1; mem->info.nodeinfo[node] = pn; memset(pn, 0, sizeof(*pn)); for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; for_each_lru(l) INIT_LIST_HEAD(&mz->lists[l]); } return 0;}static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node){ kfree(mem->info.nodeinfo[node]);}static int mem_cgroup_size(void){ int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu); return sizeof(struct mem_cgroup) + cpustat_size;}static struct mem_cgroup *mem_cgroup_alloc(void){ struct mem_cgroup *mem; int size = mem_cgroup_size(); if (size < PAGE_SIZE) mem = kmalloc(size, GFP_KERNEL); else mem = vmalloc(size); if (mem) memset(mem, 0, size); return mem;}/* * At destroying mem_cgroup, references from swap_cgroup can remain. * (scanning all at force_empty is too costly...) * * Instead of clearing all references at force_empty, we remember * the number of reference from swap_cgroup and free mem_cgroup when * it goes down to 0. * * Removal of cgroup itself succeeds regardless of refs from swap. */static void __mem_cgroup_free(struct mem_cgroup *mem){ int node; for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); if (mem_cgroup_size() < PAGE_SIZE) kfree(mem); else vfree(mem);}static void mem_cgroup_get(struct mem_cgroup *mem){ atomic_inc(&mem->refcnt);}static void mem_cgroup_put(struct mem_cgroup *mem){ if (atomic_dec_and_test(&mem->refcnt)) { struct mem_cgroup *parent = parent_mem_cgroup(mem); __mem_cgroup_free(mem); if (parent) mem_cgroup_put(parent); }}/* * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. */static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem){ if (!mem->res.parent) return NULL; return mem_cgroup_from_res_counter(mem->res.parent, res);}#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAPstatic void __init enable_swap_cgroup(void){ if (!mem_cgroup_disabled() && really_do_swap_account) do_swap_account = 1;}#elsestatic void __init enable_swap_cgroup(void){}#endifstatic struct cgroup_subsys_state * __refmem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont){ struct mem_cgroup *mem, *parent; int node; mem = mem_cgroup_alloc(); if (!mem) return ERR_PTR(-ENOMEM); for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; /* root ? */ if (cont->parent == NULL) { enable_swap_cgroup(); parent = NULL; } else { parent = mem_cgroup_from_cont(cont->parent); mem->use_hierarchy = parent->use_hierarchy; } if (parent && parent->use_hierarchy) { res_counter_init(&mem->res, &parent->res); res_counter_init(&mem->memsw, &parent->memsw); /* * We increment refcnt of the parent to ensure that we can * safely access it on res_counter_charge/uncharge. * This refcnt will be decremented when freeing this * mem_cgroup(see mem_cgroup_put). */ mem_cgroup_get(parent); } else { res_counter_init(&mem->res, NULL); res_counter_init(&mem->memsw, NULL); } mem->last_scanned_child = NULL; spin_lock_init(&mem->reclaim_param_lock); if (parent) mem->swappiness = get_swappiness(parent); atomic_set(&mem->refcnt, 1); return &mem->css;free_out: __mem_cgroup_free(mem); return ERR_PTR(-ENOMEM);}static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cont){ struct mem_cgroup *mem = mem_cgroup_from_cont(cont); mem_cgroup_force_empty(mem, false);}static void mem_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont){ struct mem_cgroup *mem = mem_cgroup_from_cont(cont); struct mem_cgroup *last_scanned_child = mem->last_scanned_child; if (last_scanned_child) { VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); mem_cgroup_put(last_scanned_child); } mem_cgroup_put(mem);}static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont){ int ret; ret = cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); if (!ret) ret = register_memsw_files(cont, ss); return ret;}static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, struct task_struct *p){ mutex_lock(&memcg_tasklist); /* * FIXME: It's better to move charges of this process from old * memcg to new memcg. But it's just on TODO-List now. */ mutex_unlock(&memcg_tasklist);}struct cgroup_subsys mem_cgroup_subsys = { .name = "memory", .subsys_id = mem_cgroup_subsys_id, .create = mem_cgroup_create, .pre_destroy = mem_cgroup_pre_destroy, .destroy = mem_cgroup_destroy, .populate = mem_cgroup_populate, .attach = mem_cgroup_move_task, .early_init = 0,};#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAPstatic int __init disable_swap_account(char *s){ really_do_swap_account = 0; return 1;}__setup("noswapaccount", disable_swap_account);#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -