📄 memcontrol.c
字号:
unsigned long *scanned, int order, int mode, struct zone *z, struct mem_cgroup *mem_cont, int active, int file){ unsigned long nr_taken = 0; struct page *page; unsigned long scan; LIST_HEAD(pc_list); struct list_head *src; struct page_cgroup *pc, *tmp; int nid = z->zone_pgdat->node_id; int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; int lru = LRU_FILE * !!file + !!active; BUG_ON(!mem_cont); mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); src = &mz->lists[lru]; scan = 0; list_for_each_entry_safe_reverse(pc, tmp, src, lru) { if (scan >= nr_to_scan) break; page = pc->page; if (unlikely(!PageCgroupUsed(pc))) continue; if (unlikely(!PageLRU(page))) continue; scan++; if (__isolate_lru_page(page, mode, file) == 0) { list_move(&page->lru, dst); nr_taken++; } } *scanned = scan; return nr_taken;}#define mem_cgroup_from_res_counter(counter, member) \ container_of(counter, struct mem_cgroup, member)/* * This routine finds the DFS walk successor. This routine should be * called with hierarchy_mutex held */static struct mem_cgroup *__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem){ struct cgroup *cgroup, *curr_cgroup, *root_cgroup; curr_cgroup = curr->css.cgroup; root_cgroup = root_mem->css.cgroup; if (!list_empty(&curr_cgroup->children)) { /* * Walk down to children */ cgroup = list_entry(curr_cgroup->children.next, struct cgroup, sibling); curr = mem_cgroup_from_cont(cgroup); goto done; }visit_parent: if (curr_cgroup == root_cgroup) { /* caller handles NULL case */ curr = NULL; goto done; } /* * Goto next sibling */ if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, sibling); curr = mem_cgroup_from_cont(cgroup); goto done; } /* * Go up to next parent and next parent's sibling if need be */ curr_cgroup = curr_cgroup->parent; goto visit_parent;done: return curr;}/* * Visit the first child (need not be the first child as per the ordering * of the cgroup list, since we track last_scanned_child) of @mem and use * that to reclaim free pages from. */static struct mem_cgroup *mem_cgroup_get_next_node(struct mem_cgroup *root_mem){ struct cgroup *cgroup; struct mem_cgroup *orig, *next; bool obsolete; /* * Scan all children under the mem_cgroup mem */ mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); orig = root_mem->last_scanned_child; obsolete = mem_cgroup_is_obsolete(orig); if (list_empty(&root_mem->css.cgroup->children)) { /* * root_mem might have children before and last_scanned_child * may point to one of them. We put it later. */ if (orig) VM_BUG_ON(!obsolete); next = NULL; goto done; } if (!orig || obsolete) { cgroup = list_first_entry(&root_mem->css.cgroup->children, struct cgroup, sibling); next = mem_cgroup_from_cont(cgroup); } else next = __mem_cgroup_get_next_node(orig, root_mem);done: if (next) mem_cgroup_get(next); root_mem->last_scanned_child = next; if (orig) mem_cgroup_put(orig); mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); return (next) ? next : root_mem;}static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem){ if (do_swap_account) { if (res_counter_check_under_limit(&mem->res) && res_counter_check_under_limit(&mem->memsw)) return true; } else if (res_counter_check_under_limit(&mem->res)) return true; return false;}static unsigned int get_swappiness(struct mem_cgroup *memcg){ struct cgroup *cgrp = memcg->css.cgroup; unsigned int swappiness; /* root ? */ if (cgrp->parent == NULL) return vm_swappiness; spin_lock(&memcg->reclaim_param_lock); swappiness = memcg->swappiness; spin_unlock(&memcg->reclaim_param_lock); return swappiness;}/* * Dance down the hierarchy if needed to reclaim memory. We remember the * last child we reclaimed from, so that we don't end up penalizing * one child extensively based on its position in the children list. * * root_mem is the original ancestor that we've been reclaim from. */static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, gfp_t gfp_mask, bool noswap){ struct mem_cgroup *next_mem; int ret = 0; /* * Reclaim unconditionally and don't check for return value. * We need to reclaim in the current group and down the tree. * One might think about checking for children before reclaiming, * but there might be left over accounting, even after children * have left. */ ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, get_swappiness(root_mem)); if (mem_cgroup_check_under_limit(root_mem)) return 1; /* indicate reclaim has succeeded */ if (!root_mem->use_hierarchy) return ret; next_mem = mem_cgroup_get_next_node(root_mem); while (next_mem != root_mem) { if (mem_cgroup_is_obsolete(next_mem)) { next_mem = mem_cgroup_get_next_node(root_mem); continue; } ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, get_swappiness(next_mem)); if (mem_cgroup_check_under_limit(root_mem)) return 1; /* indicate reclaim has succeeded */ next_mem = mem_cgroup_get_next_node(root_mem); } return ret;}bool mem_cgroup_oom_called(struct task_struct *task){ bool ret = false; struct mem_cgroup *mem; struct mm_struct *mm; rcu_read_lock(); mm = task->mm; if (!mm) mm = &init_mm; mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10)) ret = true; rcu_read_unlock(); return ret;}/* * Unlike exported interface, "oom" parameter is added. if oom==true, * oom-killer can be invoked. */static int __mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom){ struct mem_cgroup *mem, *mem_over_limit; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct res_counter *fail_res; if (unlikely(test_thread_flag(TIF_MEMDIE))) { /* Don't account this! */ *memcg = NULL; return 0; } /* * We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the * thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ mem = *memcg; if (likely(!mem)) { mem = try_get_mem_cgroup_from_mm(mm); *memcg = mem; } else { css_get(&mem->css); } if (unlikely(!mem)) return 0; VM_BUG_ON(mem_cgroup_is_obsolete(mem)); while (1) { int ret; bool noswap = false; ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); if (likely(!ret)) { if (!do_swap_account) break; ret = res_counter_charge(&mem->memsw, PAGE_SIZE, &fail_res); if (likely(!ret)) break; /* mem+swap counter fails */ res_counter_uncharge(&mem->res, PAGE_SIZE); noswap = true; mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); } else /* mem counter fails */ mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); if (!(gfp_mask & __GFP_WAIT)) goto nomem; ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, noswap); if (ret) continue; /* * try_to_free_mem_cgroup_pages() might not give us a full * picture of reclaim. Some pages are reclaimed and might be * moved to swap cache or just unmapped from the cgroup. * Check the limit again to see if the reclaim reduced the * current usage of the cgroup before giving up * */ if (mem_cgroup_check_under_limit(mem_over_limit)) continue; if (!nr_retries--) { if (oom) { mutex_lock(&memcg_tasklist); mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); mutex_unlock(&memcg_tasklist); mem_over_limit->last_oom_jiffies = jiffies; } goto nomem; } } return 0;nomem: css_put(&mem->css); return -ENOMEM;}static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page){ struct mem_cgroup *mem; swp_entry_t ent; if (!PageSwapCache(page)) return NULL; ent.val = page_private(page); mem = lookup_swap_cgroup(ent); if (!mem) return NULL; if (!css_tryget(&mem->css)) return NULL; return mem;}/* * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be * USED state. If already USED, uncharge and return. */static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, struct page_cgroup *pc, enum charge_type ctype){ /* try_charge() can return NULL to *memcg, taking care of it. */ if (!mem) return; lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); res_counter_uncharge(&mem->res, PAGE_SIZE); if (do_swap_account) res_counter_uncharge(&mem->memsw, PAGE_SIZE); css_put(&mem->css); return; } pc->mem_cgroup = mem; smp_wmb(); pc->flags = pcg_default_flags[ctype]; mem_cgroup_charge_statistics(mem, pc, true); unlock_page_cgroup(pc);}/** * mem_cgroup_move_account - move account of the page * @pc: page_cgroup of the page. * @from: mem_cgroup which the page is moved from. * @to: mem_cgroup which the page is moved to. @from != @to. * * The caller must confirm following. * - page is not on LRU (isolate_page() is useful.) * * returns 0 at success, * returns -EBUSY when lock is busy or "pc" is unstable. * * This function does "uncharge" from old cgroup but doesn't do "charge" to * new cgroup. It should be done by a caller. */static int mem_cgroup_move_account(struct page_cgroup *pc, struct mem_cgroup *from, struct mem_cgroup *to){ struct mem_cgroup_per_zone *from_mz, *to_mz; int nid, zid; int ret = -EBUSY; VM_BUG_ON(from == to); VM_BUG_ON(PageLRU(pc->page)); nid = page_cgroup_nid(pc); zid = page_cgroup_zid(pc); from_mz = mem_cgroup_zoneinfo(from, nid, zid); to_mz = mem_cgroup_zoneinfo(to, nid, zid); if (!trylock_page_cgroup(pc)) return ret; if (!PageCgroupUsed(pc)) goto out; if (pc->mem_cgroup != from) goto out; res_counter_uncharge(&from->res, PAGE_SIZE); mem_cgroup_charge_statistics(from, pc, false); if (do_swap_account) res_counter_uncharge(&from->memsw, PAGE_SIZE); css_put(&from->css); css_get(&to->css); pc->mem_cgroup = to; mem_cgroup_charge_statistics(to, pc, true); ret = 0;out: unlock_page_cgroup(pc); return ret;}/* * move charges to its parent. */static int mem_cgroup_move_parent(struct page_cgroup *pc, struct mem_cgroup *child, gfp_t gfp_mask){ struct page *page = pc->page; struct cgroup *cg = child->css.cgroup; struct cgroup *pcg = cg->parent; struct mem_cgroup *parent; int ret; /* Is ROOT ? */ if (!pcg) return -EINVAL; parent = mem_cgroup_from_cont(pcg); ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); if (ret || !parent) return ret; if (!get_page_unless_zero(page)) { ret = -EBUSY; goto uncharge; } ret = isolate_lru_page(page); if (ret) goto cancel; ret = mem_cgroup_move_account(pc, child, parent); putback_lru_page(page); if (!ret) { put_page(page); /* drop extra refcnt by try_charge() */ css_put(&parent->css); return 0; }cancel: put_page(page);uncharge: /* drop extra refcnt by try_charge() */ css_put(&parent->css); /* uncharge if move fails */ res_counter_uncharge(&parent->res, PAGE_SIZE); if (do_swap_account) res_counter_uncharge(&parent->memsw, PAGE_SIZE); return ret;}/* * Charge the memory controller for page usage. * Return * 0 if the charge was successful * < 0 if the cgroup is over its limit */static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, enum charge_type ctype, struct mem_cgroup *memcg){ struct mem_cgroup *mem; struct page_cgroup *pc; int ret; pc = lookup_page_cgroup(page); /* can happen at boot */ if (unlikely(!pc)) return 0; prefetchw(pc); mem = memcg; ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); if (ret || !mem) return ret; __mem_cgroup_commit_charge(mem, pc, ctype); return 0;}int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask){ if (mem_cgroup_disabled()) return 0; if (PageCompound(page)) return 0; /* * If already mapped, we don't have to account. * If page cache, page->mapping has address_space. * But page->mapping may have out-of-use anon_vma pointer, * detecit it by PageAnon() check. newly-mapped-anon's page->mapping * is NULL. */ if (page_mapped(page) || (page->mapping && !PageAnon(page))) return 0; if (unlikely(!mm)) mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);}int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask){ struct mem_cgroup *mem = NULL; int ret; if (mem_cgroup_disabled()) return 0; if (PageCompound(page)) return 0; /* * Corner case handling. This is called from add_to_page_cache() * in usual. But some FS (shmem) precharges this page before calling it * and call add_to_page_cache() with GFP_NOWAIT. * * For GFP_NOWAIT case, the page may be pre-charged before calling * add_to_page_cache(). (See shmem.c) check it here and avoid to call * charge twice. (It works but has to pay a bit larger cost.) * And when the page is SwapCache, it should take swap information * into account. This is under lock_page() now. */ if (!(gfp_mask & __GFP_WAIT)) { struct page_cgroup *pc; pc = lookup_page_cgroup(page); if (!pc) return 0; lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { unlock_page_cgroup(pc); return 0; } unlock_page_cgroup(pc); } if (do_swap_account && PageSwapCache(page)) { mem = try_get_mem_cgroup_from_swapcache(page); if (mem) mm = NULL; else mem = NULL; /* SwapCache may be still linked to LRU now. */ mem_cgroup_lru_del_before_commit_swapcache(page); } if (unlikely(!mm && !mem)) mm = &init_mm; if (page_is_file_cache(page)) return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -