📄 memory.c
字号:
break_cow(vma, new_page, address, page_table); lru_cache_add(new_page); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); page_cache_release(old_page); return 1; /* Minor fault */bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1;no_mem: page_cache_release(old_page); return -1;}static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff){ do { struct mm_struct *mm = mpnt->vm_mm; unsigned long start = mpnt->vm_start; unsigned long end = mpnt->vm_end; unsigned long len = end - start; unsigned long diff; /* mapping wholly truncated? */ if (mpnt->vm_pgoff >= pgoff) { zap_page_range(mm, start, len); continue; } /* mapping wholly unaffected? */ len = len >> PAGE_SHIFT; diff = pgoff - mpnt->vm_pgoff; if (diff >= len) continue; /* Ok, partially affected.. */ start += diff << PAGE_SHIFT; len = (len - diff) << PAGE_SHIFT; zap_page_range(mm, start, len); } while ((mpnt = mpnt->vm_next_share) != NULL);}/* * Handle all mappings that got truncated by a "truncate()" * system call. * * NOTE! We have to be ready to update the memory sharing * between the file and the memory map for a potential last * incomplete page. Ugly, but necessary. */int vmtruncate(struct inode * inode, loff_t offset){ unsigned long pgoff; struct address_space *mapping = inode->i_mapping; unsigned long limit; if (inode->i_size < offset) goto do_expand; inode->i_size = offset; spin_lock(&mapping->i_shared_lock); if (!mapping->i_mmap && !mapping->i_mmap_shared) goto out_unlock; pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (mapping->i_mmap != NULL) vmtruncate_list(mapping->i_mmap, pgoff); if (mapping->i_mmap_shared != NULL) vmtruncate_list(mapping->i_mmap_shared, pgoff);out_unlock: spin_unlock(&mapping->i_shared_lock); truncate_inode_pages(mapping, offset); goto out_truncate;do_expand: limit = current->rlim[RLIMIT_FSIZE].rlim_cur; if (limit != RLIM_INFINITY && offset > limit) goto out_sig; if (offset > inode->i_sb->s_maxbytes) goto out; inode->i_size = offset;out_truncate: if (inode->i_op && inode->i_op->truncate) { lock_kernel(); inode->i_op->truncate(inode); unlock_kernel(); } return 0;out_sig: send_sig(SIGXFSZ, current, 0);out: return -EFBIG;}/* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... */void swapin_readahead(swp_entry_t entry){ int i, num; struct page *new_page; unsigned long offset; /* * Get the number of handles we should do readahead io to. */ num = valid_swaphandles(entry, &offset); for (i = 0; i < num; offset++, i++) { /* Ok, do the async read-ahead now */ new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); if (!new_page) break; page_cache_release(new_page); } return;}/* * We hold the mm semaphore and the page_table_lock on entry and * should release the pagetable lock on exit.. */static int do_swap_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, pte_t orig_pte, int write_access){ struct page *page; swp_entry_t entry = pte_to_swp_entry(orig_pte); pte_t pte; int ret = 1; spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry); page = read_swap_cache_async(entry); if (!page) { /* * Back out if somebody else faulted in this pte while * we released the page table lock. */ int retval; spin_lock(&mm->page_table_lock); retval = pte_same(*page_table, orig_pte) ? -1 : 1; spin_unlock(&mm->page_table_lock); return retval; } /* Had to read the page from swap area: Major fault */ ret = 2; } mark_page_accessed(page); lock_page(page); /* * Back out if somebody else faulted in this pte while we * released the page table lock. */ spin_lock(&mm->page_table_lock); if (!pte_same(*page_table, orig_pte)) { spin_unlock(&mm->page_table_lock); unlock_page(page); page_cache_release(page); return 1; } /* The page isn't present yet, go ahead with the fault. */ swap_free(entry); if (vm_swap_full()) remove_exclusive_swap_page(page); mm->rss++; pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) pte = pte_mkdirty(pte_mkwrite(pte)); unlock_page(page); flush_page_to_ram(page); flush_icache_page(vma, page); set_pte(page_table, pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); spin_unlock(&mm->page_table_lock); return ret;}/* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in * multithreaded programs. */static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr){ pte_t entry; /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ if (write_access) { struct page *page; /* Allocate our own private page. */ spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); if (!pte_none(*page_table)) { page_cache_release(page); spin_unlock(&mm->page_table_lock); return 1; } mm->rss++; flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add(page); mark_page_accessed(page); } set_pte(page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */no_mem: return -1;}/* * do_no_page() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if * the "write_access" parameter is true in order to avoid the next * page fault. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. * * This is called with the MM semaphore held and the page table * spinlock held. Exit with the spinlock released. */static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t *page_table){ struct page * new_page; pte_t entry; if (!vma->vm_ops || !vma->vm_ops->nopage) return do_anonymous_page(mm, vma, page_table, write_access, address); spin_unlock(&mm->page_table_lock); new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); if (new_page == NULL) /* no page was available -- SIGBUS */ return 0; if (new_page == NOPAGE_OOM) return -1; /* * Should we do an early C-O-W break? */ if (write_access && !(vma->vm_flags & VM_SHARED)) { struct page * page = alloc_page(GFP_HIGHUSER); if (!page) { page_cache_release(new_page); return -1; } copy_user_highpage(page, new_page, address); page_cache_release(new_page); lru_cache_add(page); new_page = page; } spin_lock(&mm->page_table_lock); /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid * for other architectures too. * * Note that if write_access is true, we either now have * an exclusive copy of the page, or this is a shared mapping, * so we can make it writable and dirty to avoid having to * handle that later. */ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { ++mm->rss; flush_page_to_ram(new_page); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(page_table, entry); } else { /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); spin_unlock(&mm->page_table_lock); return 1; } /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); spin_unlock(&mm->page_table_lock); return 2; /* Major fault */}/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * Note the "page_table_lock". It is to protect against kswapd removing * pages from under us. Note that kswapd only ever _removes_ pages, never * adds them. As such, once we have noticed that the page is not present, * we can drop the lock early. * * The adding of pages is protected by the MM semaphore (which we hold), * so we don't need to worry about a page being suddenly been added into * our VM. * * We enter with the pagetable spinlock held, we are supposed to * release it when done. */static inline int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte){ pte_t entry; entry = *pte; if (!pte_present(entry)) { /* * If it truly wasn't present, we know that kswapd * and the PTE updates will not touch it later. So * drop the lock. */ if (pte_none(entry)) return do_no_page(mm, vma, address, write_access, pte); return do_swap_page(mm, vma, address, pte, entry, write_access); } if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); spin_unlock(&mm->page_table_lock); return 1;}/* * By the time we get here, we already hold the mm semaphore */int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access){ pgd_t *pgd; pmd_t *pmd; current->state = TASK_RUNNING; pgd = pgd_offset(mm, address); /* * We need the page table lock to synchronize with kswapd * and the SMP-safe atomic PTE updates. */ spin_lock(&mm->page_table_lock); pmd = pmd_alloc(mm, pgd, address); if (pmd) { pte_t * pte = pte_alloc(mm, pmd, address); if (pte) return handle_pte_fault(mm, vma, address, write_access, pte); } spin_unlock(&mm->page_table_lock); return -1;}/* * Allocate page middle directory. * * We've already handled the fast-path in-line, and we own the * page table lock. * * On a two-level page table, this ends up actually being entirely * optimized away. */pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address){ pmd_t *new; /* "fast" allocation can happen without dropping the lock.. */ new = pmd_alloc_one_fast(mm, address); if (!new) { spin_unlock(&mm->page_table_lock); new = pmd_alloc_one(mm, address); spin_lock(&mm->page_table_lock); if (!new) return NULL; /* * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ if (!pgd_none(*pgd)) { pmd_free(new); goto out; } } pgd_populate(mm, pgd, new);out: return pmd_offset(pgd, address);}/* * Allocate the page table directory. * * We've already handled the fast-path in-line, and we own the * page table lock. */pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address){ if (pmd_none(*pmd)) { pte_t *new; /* "fast" allocation can happen without dropping the lock.. */ new = pte_alloc_one_fast(mm, address); if (!new) { spin_unlock(&mm->page_table_lock); new = pte_alloc_one(mm, address); spin_lock(&mm->page_table_lock); if (!new) return NULL; /* * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ if (!pmd_none(*pmd)) { pte_free(new); goto out; } } pmd_populate(mm, pmd, new); }out: return pte_offset(pmd, address);}int make_pages_present(unsigned long addr, unsigned long end){ int ret, len, write; struct vm_area_struct * vma; vma = find_vma(current->mm, addr); write = (vma->vm_flags & VM_WRITE) != 0; if (addr >= end) BUG(); if (end > vma->vm_end) BUG(); len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; ret = get_user_pages(current, current->mm, addr, len, write, 0, NULL, NULL); return ret == len ? 0 : -1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -