📄 migrate.c
字号:
*/ rc = mapping->a_ops->migratepage(mapping, newpage, page); else rc = fallback_migrate_page(mapping, newpage, page); if (!rc) { remove_migration_ptes(page, newpage); } else newpage->mapping = NULL; unlock_page(newpage); return rc;}/* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *page, int force){ int rc = 0; int *result = NULL; struct page *newpage = get_new_page(page, private, &result); int rcu_locked = 0; int charge = 0; struct mem_cgroup *mem; if (!newpage) return -ENOMEM; if (page_count(page) == 1) { /* page was freed from under us. So we are done. */ goto move_newpage; } /* prepare cgroup just returns 0 or -ENOMEM */ rc = -EAGAIN; if (!trylock_page(page)) { if (!force) goto move_newpage; lock_page(page); } /* charge against new page */ charge = mem_cgroup_prepare_migration(page, &mem); if (charge == -ENOMEM) { rc = -ENOMEM; goto unlock; } BUG_ON(charge); if (PageWriteback(page)) { if (!force) goto uncharge; wait_on_page_writeback(page); } /* * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, * we cannot notice that anon_vma is freed while we migrates a page. * This rcu_read_lock() delays freeing anon_vma pointer until the end * of migration. File cache pages are no problem because of page_lock() * File Caches may use write_page() or lock_page() in migration, then, * just care Anon page here. */ if (PageAnon(page)) { rcu_read_lock(); rcu_locked = 1; } /* * Corner case handling: * 1. When a new swap-cache page is read into, it is added to the LRU * and treated as swapcache but it has no rmap yet. * Calling try_to_unmap() against a page->mapping==NULL page will * trigger a BUG. So handle it here. * 2. An orphaned page (see truncate_complete_page) might have * fs-private metadata. The page can be picked up due to memory * offlining. Everywhere else except page reclaim, the page is * invisible to the vm, so the page can not be migrated. So try to * free the metadata, so the page can be freed. */ if (!page->mapping) { if (!PageAnon(page) && PagePrivate(page)) { /* * Go direct to try_to_free_buffers() here because * a) that's what try_to_release_page() would do anyway * b) we may be under rcu_read_lock() here, so we can't * use GFP_KERNEL which is what try_to_release_page() * needs to be effective. */ try_to_free_buffers(page); } goto rcu_unlock; } /* Establish migration ptes or remove ptes */ try_to_unmap(page, 1); if (!page_mapped(page)) rc = move_to_new_page(newpage, page); if (rc) remove_migration_ptes(page, page);rcu_unlock: if (rcu_locked) rcu_read_unlock();uncharge: if (!charge) mem_cgroup_end_migration(mem, page, newpage);unlock: unlock_page(page); if (rc != -EAGAIN) { /* * A page that has been migrated has all references * removed and will be freed. A page that has not been * migrated will have kepts its references and be * restored. */ list_del(&page->lru); putback_lru_page(page); }move_newpage: /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ putback_lru_page(newpage); if (result) { if (rc) *result = rc; else *result = page_to_nid(newpage); } return rc;}/* * migrate_pages * * The function takes one list of pages to migrate and a function * that determines from the page to be migrated and the private data * the target of the move and allocates the page. * * The function returns after 10 attempts or if no pages * are movable anymore because to has become empty * or no retryable pages exist anymore. All pages will be * returned to the LRU or freed. * * Return: Number of pages not migrated or error code. */int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private){ int retry = 1; int nr_failed = 0; int pass = 0; struct page *page; struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; int rc; if (!swapwrite) current->flags |= PF_SWAPWRITE; for(pass = 0; pass < 10 && retry; pass++) { retry = 0; list_for_each_entry_safe(page, page2, from, lru) { cond_resched(); rc = unmap_and_move(get_new_page, private, page, pass > 2); switch(rc) { case -ENOMEM: goto out; case -EAGAIN: retry++; break; case 0: break; default: /* Permanent failure */ nr_failed++; break; } } } rc = 0;out: if (!swapwrite) current->flags &= ~PF_SWAPWRITE; putback_lru_pages(from); if (rc) return rc; return nr_failed + retry;}#ifdef CONFIG_NUMA/* * Move a list of individual pages */struct page_to_node { unsigned long addr; struct page *page; int node; int status;};static struct page *new_page_node(struct page *p, unsigned long private, int **result){ struct page_to_node *pm = (struct page_to_node *)private; while (pm->node != MAX_NUMNODES && pm->page != p) pm++; if (pm->node == MAX_NUMNODES) return NULL; *result = &pm->status; return alloc_pages_node(pm->node, GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);}/* * Move a set of pages as indicated in the pm array. The addr * field must be set to the virtual address of the page to be moved * and the node number must contain a valid target node. * The pm array ends with node = MAX_NUMNODES. */static int do_move_page_to_node_array(struct mm_struct *mm, struct page_to_node *pm, int migrate_all){ int err; struct page_to_node *pp; LIST_HEAD(pagelist); migrate_prep(); down_read(&mm->mmap_sem); /* * Build a list of pages to migrate */ for (pp = pm; pp->node != MAX_NUMNODES; pp++) { struct vm_area_struct *vma; struct page *page; err = -EFAULT; vma = find_vma(mm, pp->addr); if (!vma || !vma_migratable(vma)) goto set_status; page = follow_page(vma, pp->addr, FOLL_GET); err = PTR_ERR(page); if (IS_ERR(page)) goto set_status; err = -ENOENT; if (!page) goto set_status; if (PageReserved(page)) /* Check for zero page */ goto put_and_set; pp->page = page; err = page_to_nid(page); if (err == pp->node) /* * Node already in the right place */ goto put_and_set; err = -EACCES; if (page_mapcount(page) > 1 && !migrate_all) goto put_and_set; err = isolate_lru_page(page); if (!err) list_add_tail(&page->lru, &pagelist);put_and_set: /* * Either remove the duplicate refcount from * isolate_lru_page() or drop the page ref if it was * not isolated. */ put_page(page);set_status: pp->status = err; } err = 0; if (!list_empty(&pagelist)) err = migrate_pages(&pagelist, new_page_node, (unsigned long)pm); up_read(&mm->mmap_sem); return err;}/* * Migrate an array of page address onto an array of nodes and fill * the corresponding array of status. */static int do_pages_move(struct mm_struct *mm, struct task_struct *task, unsigned long nr_pages, const void __user * __user *pages, const int __user *nodes, int __user *status, int flags){ struct page_to_node *pm; nodemask_t task_nodes; unsigned long chunk_nr_pages; unsigned long chunk_start; int err; task_nodes = cpuset_mems_allowed(task); err = -ENOMEM; pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); if (!pm) goto out; /* * Store a chunk of page_to_node array in a page, * but keep the last one as a marker */ chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1; for (chunk_start = 0; chunk_start < nr_pages; chunk_start += chunk_nr_pages) { int j; if (chunk_start + chunk_nr_pages > nr_pages) chunk_nr_pages = nr_pages - chunk_start; /* fill the chunk pm with addrs and nodes from user-space */ for (j = 0; j < chunk_nr_pages; j++) { const void __user *p; int node; err = -EFAULT; if (get_user(p, pages + j + chunk_start)) goto out_pm; pm[j].addr = (unsigned long) p; if (get_user(node, nodes + j + chunk_start)) goto out_pm; err = -ENODEV; if (!node_state(node, N_HIGH_MEMORY)) goto out_pm; err = -EACCES; if (!node_isset(node, task_nodes)) goto out_pm; pm[j].node = node; } /* End marker for this chunk */ pm[chunk_nr_pages].node = MAX_NUMNODES; /* Migrate this chunk */ err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL); if (err < 0) goto out_pm; /* Return status information */ for (j = 0; j < chunk_nr_pages; j++) if (put_user(pm[j].status, status + j + chunk_start)) { err = -EFAULT; goto out_pm; } } err = 0;out_pm: free_page((unsigned long)pm);out: return err;}/* * Determine the nodes of an array of pages and store it in an array of status. */static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, const void __user **pages, int *status){ unsigned long i; down_read(&mm->mmap_sem); for (i = 0; i < nr_pages; i++) { unsigned long addr = (unsigned long)(*pages); struct vm_area_struct *vma; struct page *page; int err = -EFAULT; vma = find_vma(mm, addr); if (!vma) goto set_status; page = follow_page(vma, addr, 0); err = PTR_ERR(page); if (IS_ERR(page)) goto set_status; err = -ENOENT; /* Use PageReserved to check for zero page */ if (!page || PageReserved(page)) goto set_status; err = page_to_nid(page);set_status: *status = err; pages++; status++; } up_read(&mm->mmap_sem);}/* * Determine the nodes of a user array of pages and store it in * a user array of status. */static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, const void __user * __user *pages, int __user *status){#define DO_PAGES_STAT_CHUNK_NR 16 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; int err; for (i = 0; i < nr_pages; i += chunk_nr) { if (chunk_nr + i > nr_pages) chunk_nr = nr_pages - i; err = copy_from_user(chunk_pages, &pages[i], chunk_nr * sizeof(*chunk_pages)); if (err) { err = -EFAULT; goto out; } do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); err = copy_to_user(&status[i], chunk_status, chunk_nr * sizeof(*chunk_status)); if (err) { err = -EFAULT; goto out; } } err = 0;out: return err;}/* * Move a list of pages in the address space of the currently executing * process. */SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, const void __user * __user *, pages, const int __user *, nodes, int __user *, status, int, flags){ const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; /* Find the mm_struct */ read_lock(&tasklist_lock); task = pid ? find_task_by_vpid(pid) : current; if (!task) { read_unlock(&tasklist_lock); return -ESRCH; } mm = get_task_mm(task); read_unlock(&tasklist_lock); if (!mm) return -EINVAL; /* * Check if this process has the right to modify the specified * process. The right exists if the process has administrative * capabilities, superuser privileges or the same * userid as the target process. */ rcu_read_lock(); tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; goto out; } rcu_read_unlock(); err = security_task_movememory(task); if (err) goto out; if (nodes) { err = do_pages_move(mm, task, nr_pages, pages, nodes, status, flags); } else { err = do_pages_stat(mm, nr_pages, pages, status); }out: mmput(mm); return err;}/* * Call migration functions in the vma_ops that may prepare * memory in a vm for migration. migration functions may perform * the migration for vmas that do not have an underlying page struct. */int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, const nodemask_t *from, unsigned long flags){ struct vm_area_struct *vma; int err = 0; for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { if (vma->vm_ops && vma->vm_ops->migrate) { err = vma->vm_ops->migrate(vma, to, from, flags); if (err) break; } } return err;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -