📄 mempolicy.c
字号:
{ struct mempolicy *pol = NULL; struct sp_node *sn; if (!sp->root.rb_node) return NULL; spin_lock(&sp->lock); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } spin_unlock(&sp->lock); return pol;}static void sp_delete(struct shared_policy *sp, struct sp_node *n){ pr_debug("deleting %lx-l%lx\n", n->start, n->end); rb_erase(&n->nd, &sp->root); mpol_put(n->policy); kmem_cache_free(sn_cache, n);}static struct sp_node *sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol){ struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n) return NULL; n->start = start; n->end = end; mpol_get(pol); pol->flags |= MPOL_F_SHARED; /* for unref */ n->policy = pol; return n;}/* Replace a policy range. */static int shared_policy_replace(struct shared_policy *sp, unsigned long start, unsigned long end, struct sp_node *new){ struct sp_node *n, *new2 = NULL;restart: spin_lock(&sp->lock); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { struct rb_node *next = rb_next(&n->nd); if (n->start >= start) { if (n->end <= end) sp_delete(sp, n); else n->start = end; } else { /* Old policy spanning whole new range. */ if (n->end > end) { if (!new2) { spin_unlock(&sp->lock); new2 = sp_alloc(end, n->end, n->policy); if (!new2) return -ENOMEM; goto restart; } n->end = start; sp_insert(sp, new2); new2 = NULL; break; } else n->end = start; } if (!next) break; n = rb_entry(next, struct sp_node, nd); } if (new) sp_insert(sp, new); spin_unlock(&sp->lock); if (new2) { mpol_put(new2->policy); kmem_cache_free(sn_cache, new2); } return 0;}/** * mpol_shared_policy_init - initialize shared policy for inode * @sp: pointer to inode shared policy * @mpol: struct mempolicy to install * * Install non-NULL @mpol in inode's shared policy rb-tree. * On entry, the current task has a reference on a non-NULL @mpol. * This must be released on exit. */void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol){ sp->root = RB_ROOT; /* empty tree == default mempolicy */ spin_lock_init(&sp->lock); if (mpol) { struct vm_area_struct pvma; struct mempolicy *new; /* contextualize the tmpfs mount point mempolicy */ new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); mpol_put(mpol); /* drop our ref on sb mpol */ if (IS_ERR(new)) return; /* no valid nodemask intersection */ /* Create pseudo-vma that contains just the policy */ memset(&pvma, 0, sizeof(struct vm_area_struct)); pvma.vm_end = TASK_SIZE; /* policy covers entire file */ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ mpol_put(new); /* drop initial ref */ }}int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *npol){ int err; struct sp_node *new = NULL; unsigned long sz = vma_pages(vma); pr_debug("set_shared_policy %lx sz %lu %d %d %lx\n", vma->vm_pgoff, sz, npol ? npol->mode : -1, npol ? npol->flags : -1, npol ? nodes_addr(npol->v.nodes)[0] : -1); if (npol) { new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol); if (!new) return -ENOMEM; } err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new); if (err && new) kmem_cache_free(sn_cache, new); return err;}/* Free a backing policy store on inode delete. */void mpol_free_shared_policy(struct shared_policy *p){ struct sp_node *n; struct rb_node *next; if (!p->root.rb_node) return; spin_lock(&p->lock); next = rb_first(&p->root); while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); rb_erase(&n->nd, &p->root); mpol_put(n->policy); kmem_cache_free(sn_cache, n); } spin_unlock(&p->lock);}/* assumes fs == KERNEL_DS */void __init numa_policy_init(void){ nodemask_t interleave_nodes; unsigned long largest = 0; int nid, prefer = 0; policy_cache = kmem_cache_create("numa_policy", sizeof(struct mempolicy), 0, SLAB_PANIC, NULL); sn_cache = kmem_cache_create("shared_policy_node", sizeof(struct sp_node), 0, SLAB_PANIC, NULL); /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or * fall back to the largest node if they're all smaller. */ nodes_clear(interleave_nodes); for_each_node_state(nid, N_HIGH_MEMORY) { unsigned long total_pages = node_present_pages(nid); /* Preserve the largest node */ if (largest < total_pages) { largest = total_pages; prefer = nid; } /* Interleave this node? */ if ((total_pages << PAGE_SHIFT) >= (16 << 20)) node_set(nid, interleave_nodes); } /* All too small, use the largest */ if (unlikely(nodes_empty(interleave_nodes))) node_set(prefer, interleave_nodes); if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) printk("numa_policy_init: interleaving failed\n");}/* Reset policy of current process to default */void numa_default_policy(void){ do_set_mempolicy(MPOL_DEFAULT, 0, NULL);}/* * Parse and format mempolicy from/to strings *//* * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag * Used only for mpol_parse_str() and mpol_to_str() */#define MPOL_LOCAL (MPOL_INTERLEAVE + 1)static const char * const policy_types[] = { "default", "prefer", "bind", "interleave", "local" };#ifdef CONFIG_TMPFS/** * mpol_parse_str - parse string to mempolicy * @str: string containing mempolicy to parse * @mpol: pointer to struct mempolicy pointer, returned on success. * @no_context: flag whether to "contextualize" the mempolicy * * Format of input: * <mode>[=<flags>][:<nodelist>] * * if @no_context is true, save the input nodemask in w.user_nodemask in * the returned mempolicy. This will be used to "clone" the mempolicy in * a specific context [cpuset] at a later time. Used to parse tmpfs mpol * mount option. Note that if 'static' or 'relative' mode flags were * specified, the input nodemask will already have been saved. Saving * it again is redundant, but safe. * * On success, returns 0, else 1 */int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context){ struct mempolicy *new = NULL; unsigned short uninitialized_var(mode); unsigned short uninitialized_var(mode_flags); nodemask_t nodes; char *nodelist = strchr(str, ':'); char *flags = strchr(str, '='); int i; int err = 1; if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; if (nodelist_parse(nodelist, nodes)) goto out; if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY])) goto out; } else nodes_clear(nodes); if (flags) *flags++ = '\0'; /* terminate mode string */ for (i = 0; i <= MPOL_LOCAL; i++) { if (!strcmp(str, policy_types[i])) { mode = i; break; } } if (i > MPOL_LOCAL) goto out; switch (mode) { case MPOL_PREFERRED: /* * Insist on a nodelist of one node only */ if (nodelist) { char *rest = nodelist; while (isdigit(*rest)) rest++; if (!*rest) err = 0; } break; case MPOL_INTERLEAVE: /* * Default to online nodes with memory if no nodelist */ if (!nodelist) nodes = node_states[N_HIGH_MEMORY]; err = 0; break; case MPOL_LOCAL: /* * Don't allow a nodelist; mpol_new() checks flags */ if (nodelist) goto out; mode = MPOL_PREFERRED; break; /* * case MPOL_BIND: mpol_new() enforces non-empty nodemask. * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags. */ } mode_flags = 0; if (flags) { /* * Currently, we only support two mutually exclusive * mode flags. */ if (!strcmp(flags, "static")) mode_flags |= MPOL_F_STATIC_NODES; else if (!strcmp(flags, "relative")) mode_flags |= MPOL_F_RELATIVE_NODES; else err = 1; } new = mpol_new(mode, mode_flags, &nodes); if (IS_ERR(new)) err = 1; else if (no_context) new->w.user_nodemask = nodes; /* save for contextualization */out: /* Restore string for error message */ if (nodelist) *--nodelist = ':'; if (flags) *--flags = '='; if (!err) *mpol = new; return err;}#endif /* CONFIG_TMPFS *//** * mpol_to_str - format a mempolicy structure for printing * @buffer: to contain formatted mempolicy string * @maxlen: length of @buffer * @pol: pointer to mempolicy to be formatted * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask * * Convert a mempolicy into a string. * Returns the number of characters in buffer (if positive) * or an error (negative) */int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context){ char *p = buffer; int l; nodemask_t nodes; unsigned short mode; unsigned short flags = pol ? pol->flags : 0; /* * Sanity check: room for longest mode, flag and some nodes */ VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16); if (!pol || pol == &default_policy) mode = MPOL_DEFAULT; else mode = pol->mode; switch (mode) { case MPOL_DEFAULT: nodes_clear(nodes); break; case MPOL_PREFERRED: nodes_clear(nodes); if (flags & MPOL_F_LOCAL) mode = MPOL_LOCAL; /* pseudo-policy */ else node_set(pol->v.preferred_node, nodes); break; case MPOL_BIND: /* Fall through */ case MPOL_INTERLEAVE: if (no_context) nodes = pol->w.user_nodemask; else nodes = pol->v.nodes; break; default: BUG(); } l = strlen(policy_types[mode]); if (buffer + maxlen < p + l + 1) return -ENOSPC; strcpy(p, policy_types[mode]); p += l; if (flags & MPOL_MODE_FLAGS) { if (buffer + maxlen < p + 2) return -ENOSPC; *p++ = '='; /* * Currently, the only defined flags are mutually exclusive */ if (flags & MPOL_F_STATIC_NODES) p += snprintf(p, buffer + maxlen - p, "static"); else if (flags & MPOL_F_RELATIVE_NODES) p += snprintf(p, buffer + maxlen - p, "relative"); } if (!nodes_empty(nodes)) { if (buffer + maxlen < p + 2) return -ENOSPC; *p++ = ':'; p += nodelist_scnprintf(p, buffer + maxlen - p, nodes); } return p - buffer;}struct numa_maps { unsigned long pages; unsigned long anon; unsigned long active; unsigned long writeback; unsigned long mapcount_max; unsigned long dirty; unsigned long swapcache; unsigned long node[MAX_NUMNODES];};static void gather_stats(struct page *page, void *private, int pte_dirty){ struct numa_maps *md = private; int count = page_mapcount(page); md->pages++; if (pte_dirty || PageDirty(page)) md->dirty++; if (PageSwapCache(page)) md->swapcache++; if (PageActive(page) || PageUnevictable(page)) md->active++; if (PageWriteback(page)) md->writeback++; if (PageAnon(page)) md->anon++; if (count > md->mapcount_max) md->mapcount_max = count; md->node[page_to_nid(page)]++;}#ifdef CONFIG_HUGETLB_PAGEstatic void check_huge_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct numa_maps *md){ unsigned long addr; struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); for (addr = start; addr < end; addr += sz) { pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & huge_page_mask(h)); pte_t pte; if (!ptep) continue; pte = *ptep; if (pte_none(pte)) continue; page = pte_page(pte); if (!page) continue; gather_stats(page, md, pte_dirty(*ptep)); }}#elsestatic inline void check_huge_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct numa_maps *md){}#endif/* * Display pages allocated per node and memory policy via /proc. */int show_numa_map(struct seq_file *m, void *v){ struct proc_maps_private *priv = m->private; struct vm_area_struct *vma = v; struct numa_maps *md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; struct mempolicy *pol; int n; char buffer[50]; if (!mm) return 0; md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); if (!md) return 0; pol = get_vma_policy(priv->task, vma, vma->vm_start); mpol_to_str(buffer, sizeof(buffer), pol, 0); mpol_cond_put(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); if (file) { seq_printf(m, " file="); seq_path(m, &file->f_path, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_printf(m, " heap"); } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { seq_printf(m, " stack"); } if (is_vm_hugetlb_page(vma)) { check_huge_range(vma, vma->vm_start, vma->vm_end, md); seq_printf(m, " huge"); } else { check_pgd_range(vma, vma->vm_start, vma->vm_end, &node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md); } if (!md->pages) goto out; if (md->anon) seq_printf(m," anon=%lu",md->anon); if (md->dirty) seq_printf(m," dirty=%lu",md->dirty); if (md->pages != md->anon && md->pages != md->dirty) seq_printf(m, " mapped=%lu", md->pages); if (md->mapcount_max > 1) seq_printf(m, " mapmax=%lu", md->mapcount_max); if (md->swapcache) seq_printf(m," swapcache=%lu", md->swapcache); if (md->active < md->pages && !is_vm_hugetlb_page(vma)) seq_printf(m," active=%lu", md->active); if (md->writeback) seq_printf(m," writeback=%lu", md->writeback); for_each_node_state(n, N_HIGH_MEMORY) if (md->node[n]) seq_printf(m, " N%d=%lu", n, md->node[n]);out: seq_putc(m, '\n'); kfree(md); if (m->count < m->size) m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -